版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_29992111/article/details/81085912
从HDFS获取输入流
private static final String HDFS_USERCF_PATH="/usercf/usercf";
public static final String HDFS_PATH = "hdfs://xx.xx.xx.xx:9000";
public InputStream getHdfsFileInputStream(){
Configuration configuration= new Configuration();
configuration.set("fs.hdfs.impl",org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
FileSystem fileSystem=null;
try {
fileSystem = FileSystem.get(new URI(HDFS_PATH), configuration, "root");
FSDataInputStream inputStream = fileSystem.open(new Path(HDFS_USERCF_PATH));
return inputStream;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
UserCF 使用String类型的userId
元数据格式如下:
userid itemid score
00164de5b22335dc1fa96e3430974dc2 553758065430 0.1
0022588261a857e4e237c1530ebb4e72 566155235963 0.1
00229d2d7fc3e73ea0f96625988e2a65 556508199842 0.1
00229d2d7fc3e73ea0f96625988e2a65 560583042108 0.1
002e7188e42a4a303aa1cebd6d81e9e4 545907540109 0.1
00313fd4e22058e846912f550b7688ff 561370351364 0.1
0036be6a025e71240548ea4265e002ac 43170449380 0.1
0037f8e18af8b6a7163438d9ea33a46f 40063916333 0.1
00388599e64f0dd2c4e9d70446406b2a 558843565468 0.1
004a4d509b3451a1ef696a986082a53b 36637606467 0.1
004a4d509b3451a1ef696a986082a53b 43936070094 0.1
004cc58dd0e974a4a329f6de45617097 548113254234 0.1
0053ba0971edf77d6dd567c8de019e5e 27583448359 0.1
005a89b8c3e8244051e198fe9b152ca4 564715367974 0.2
005cdade5234d107431958ca11ceca62 44019857894 0.1
0064445ecae687dbf803c3e6c379d1e3 39309371619 0.1
0072fde7c51a43269da2e690ff808b22 37016930337 0.1
0073cecc4dd81968d6fe10de0d8a8998 562637978196 0.2
007f35074f1aa9481a10284ec33fbe8a 553106888596 0.1
使用facebook对于usercf算法中对于string类型的userid进行处理。
原文链接如下:facebook推荐算法
我的源代码如下:
final static int NEIGHBORHOOD_NUM = 2;
final static int RECOMMENDER_NUM = 3;
private static final String HDFS_USERCF_PATH="/usercf/usercf";
public static final String HDFS_PATH = "hdfs://xx.xx.xx.xx:9000";
public InputStream getHdfsFileInputStream(){
Configuration configuration= new Configuration();
configuration.set("fs.hdfs.impl",org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
FileSystem fileSystem=null;
try {
fileSystem = FileSystem.get(new URI(HDFS_PATH), configuration, "root");
FSDataInputStream inputStream = fileSystem.open(new Path(HDFS_USERCF_PATH));
return inputStream;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
public void userCF() throws Exception {
MemoryIDMigrator idMigrator = new MemoryIDMigrator();
Map<Long,List<Preference>> preferecesOfUsers = new HashMap<>();
InputStream inputStream=getHdfsFileInputStream();
CSVParser parser = new CSVParser(new InputStreamReader(inputStream, "UTF-8"), '\t');
String[] line;
while((line = parser.getLine()) != null) {
String canvasId = line[0];
Long itemId = Long.valueOf(line[1]);
long userId = idMigrator.toLongID(canvasId);
idMigrator.storeMapping(userId, canvasId);
List<Preference> userPrefList;
if((userPrefList = preferecesOfUsers.get(userId)) == null) {
userPrefList = new ArrayList<>();
preferecesOfUsers.put(userId, userPrefList);
}
userPrefList.add(new GenericPreference(userId, itemId, Float.valueOf(line[2])));
}
FastByIDMap<PreferenceArray> preferecesOfUsersFastMap = new FastByIDMap<>();
for(Map.Entry<Long, List<Preference>> entry : preferecesOfUsers.entrySet()) {
preferecesOfUsersFastMap.put(entry.getKey(), new GenericUserPreferenceArray(entry.getValue()));
}
DataModel model= new GenericDataModel(preferecesOfUsersFastMap);
// 指定用户相似度计算方法,欧式距离相似性
UserSimilarity user = new EuclideanDistanceSimilarity(model);
// 指定用户邻居数量
NearestNUserNeighborhood neighbor = new NearestNUserNeighborhood(NEIGHBORHOOD_NUM, user, model);
// 构建基于用户的推荐系统
Recommender r = new GenericUserBasedRecommender(model, neighbor, user);
// 得到所有用户的ID集合
LongPrimitiveIterator iter = model.getUserIDs();
while (iter.hasNext()) {
long uid = iter.nextLong();
List<RecommendedItem> list = r.recommend(uid, RECOMMENDER_NUM);
if(CollectionUtils.isEmpty(list)){
continue;
}
for (RecommendedItem ritem : list) {
redisService.zadd(KeysEnum.USER_RECOMMENDER_KEY.getKey() + idMigrator.toStringID(uid), ritem.getValue(), String.valueOf(ritem.getItemID()));
}
logger.info("canvasId("+idMigrator.toStringID(uid)+")推荐商品成功");
}
if(inputStream!=null){
inputStream.close();
}
}