-
Notifications
You must be signed in to change notification settings - Fork 39
/
Copy pathdata_preprocessor.py
65 lines (49 loc) · 1.95 KB
/
data_preprocessor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import numpy as np
def read_rating(path, num_users, num_items,num_total_ratings, a, b, train_ratio):
fp = open(path + "ratings.dat")
user_train_set = set()
user_test_set = set()
item_train_set = set()
item_test_set = set()
R = np.zeros((num_users,num_items))
mask_R = np.zeros((num_users, num_items))
C = np.ones((num_users, num_items)) * b
train_R = np.zeros((num_users, num_items))
test_R = np.zeros((num_users, num_items))
train_mask_R = np.zeros((num_users, num_items))
test_mask_R = np.zeros((num_users, num_items))
random_perm_idx = np.random.permutation(num_total_ratings)
train_idx = random_perm_idx[0:int(num_total_ratings*train_ratio)]
test_idx = random_perm_idx[int(num_total_ratings*train_ratio):]
num_train_ratings = len(train_idx)
num_test_ratings = len(test_idx)
lines = fp.readlines()
for line in lines:
user,item,rating,_ = line.split("::")
user_idx = int(user) - 1
item_idx = int(item) - 1
R[user_idx,item_idx] = int(rating)
mask_R[user_idx,item_idx] = 1
C[user_idx,item_idx] = a
''' Train '''
for itr in train_idx:
line = lines[itr]
user,item,rating,_ = line.split("::")
user_idx = int(user) - 1
item_idx = int(item) - 1
train_R[user_idx,item_idx] = int(rating)
train_mask_R[user_idx,item_idx] = 1
user_train_set.add(user_idx)
item_train_set.add(item_idx)
''' Test '''
for itr in test_idx:
line = lines[itr]
user, item, rating, _ = line.split("::")
user_idx = int(user) - 1
item_idx = int(item) - 1
test_R[user_idx, item_idx] = int(rating)
test_mask_R[user_idx, item_idx] = 1
user_test_set.add(user_idx)
item_test_set.add(item_idx)
return R, mask_R, C, train_R, train_mask_R, test_R, test_mask_R,num_train_ratings,num_test_ratings,\
user_train_set,item_train_set,user_test_set,item_test_set