wk10_lec_support(1)
Week 10 Lecture support¶
© Professor Yuefeng Li
Copyright By PowCoder代写 加微信 powcoder
Question 4¶
# The common similarity measures used for clustering users is the correlation measure.
# Typically, users are represented by their rating vectors of items.
# We use a list of lists Rvs to represent rating vectors for all users
# for example
# I = [0,1,2,3] # 4 items in I numbered from 0 to 3
# U = [0,1,2,3,4] # 5 users in U numbered from 0 to 4
# Rvs = [[1,2,1,0],[0,1,1,1],[2,1,3,5],[1,0,2,0],[0,2,3,4]],
# 5 users’ rating vectors for 4 items, where 0 means the user has not yet rated the item.
# The function returns U, I and Uc (the correlation matrix to measure the similarity between users)
import math
def my_correlation(Rvs):
U = [i for i in range(len(Rvs))]
I = [i for i in range(len(Rvs[0]))]
Uc = [[1 for i in range(len(U))] for i in range(len(U))]
# for pair of uers i and j
count= len(U)
for i in range(count):
for j in range(count):
ai = sum(Rvs[i])/len(Rvs[i])
aj = sum(Rvs[j])/len(Rvs[j])
Uc[i][j] = sum([(Rvs[i][x]-ai)*(Rvs[j][x]-aj) for x in range(len(I))])/math.sqrt(sum([(Rvs[i][x]-ai)**2 for x in range(len(I))])*sum([(Rvs[j][x]-aj)**2 for x in range(len(I))]))
return(U, I, Uc)
Rvs = [[1,2,1,0],[0,1,1,1],[2,1,3,5],[1,0,2,0],[0,2,3,4]] # 5 uers’ rating vectors for 4 items
(U1, I1, Uc1) = my_correlation(Rvs)
print(Uc1)
[0, 1, 2, 3, 4]
[0, 1, 2, 3]
[[1.0, 0.0, -0.9561828874675149, 0.0, -0.47809144373375745], [0.0, 1.0, 0.29277002188455997, -0.17407765595569785, 0.8783100656536799], [-0.9561828874675149, 0.29277002188455997, 1.0, -0.050964719143762556, 0.7142857142857143], [0.0, -0.17407765595569785, -0.050964719143762556, 1.0, -0.15289415743128767], [-0.47809144373375745, 0.8783100656536799, 0.7142857142857143, -0.15289415743128767, 1.0]]
# It returns clusters – a list of user lists
# Note: to quickly test the proposed data structures, we don’t actually call a clustering algorithm and
# just simply return a clustering result.
def my_cluster(U, Uc):
C = [[0,1,3], [2,4]]
# The predicted rating to unseen items for all users
def my_prediction(Rvs, C, U, I):
unseenI = [[i for i in range(len(I)) if Rvs[u][i]==0] for u in range(len(U))]
for u in range(len(U)):
for ci in C:
if u in ci:
Cluster_u = ci # find u’s cluster
for i in unseenI[u]:
Rvs[u][i] = (1/len(Cluster_u))*sum([Rvs[u1][i] for u1 in Cluster_u])
return Rvs
# test the definations
C1 = my_cluster(U1, Uc1)
unI = my_prediction(Rvs, C1, U1, I1)
print(unI)
[[1, 2, 1, 0.3333333333333333], [0.6666666666666666, 1, 1, 1], [2, 1, 3, 5], [1, 1.0, 2, 0.4444444444444444], [1.0, 2, 3, 4]]
程序代写 CS代考 加微信: powcoder QQ: 1823890830 Email: powcoder@163.com