设为首页 加入收藏

TOP

mahout usercf 单机使用(从hdfs读取输入流,将用户推荐数据存入redis)
2019-04-23 00:11:19 】 浏览:73
Tags:mahout usercf 单机 使用 hdfs 读取 入流 用户 推荐 数据 存入 redis
版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_29992111/article/details/81085912

从HDFS获取输入流

private static final String HDFS_USERCF_PATH="/usercf/usercf";

public static final String HDFS_PATH = "hdfs://xx.xx.xx.xx:9000";


public  InputStream getHdfsFileInputStream(){
    Configuration configuration= new Configuration();
    configuration.set("fs.hdfs.impl",org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    FileSystem fileSystem=null;
    try {
        fileSystem = FileSystem.get(new URI(HDFS_PATH), configuration, "root");
        FSDataInputStream inputStream = fileSystem.open(new Path(HDFS_USERCF_PATH));
        return inputStream;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return null;
}

UserCF 使用String类型的userId

元数据格式如下:

userid                              itemid          score            
00164de5b22335dc1fa96e3430974dc2    553758065430    0.1
0022588261a857e4e237c1530ebb4e72    566155235963    0.1
00229d2d7fc3e73ea0f96625988e2a65    556508199842    0.1
00229d2d7fc3e73ea0f96625988e2a65    560583042108    0.1
002e7188e42a4a303aa1cebd6d81e9e4    545907540109    0.1
00313fd4e22058e846912f550b7688ff    561370351364    0.1
0036be6a025e71240548ea4265e002ac    43170449380 0.1
0037f8e18af8b6a7163438d9ea33a46f    40063916333 0.1
00388599e64f0dd2c4e9d70446406b2a    558843565468    0.1
004a4d509b3451a1ef696a986082a53b    36637606467 0.1
004a4d509b3451a1ef696a986082a53b    43936070094 0.1
004cc58dd0e974a4a329f6de45617097    548113254234    0.1
0053ba0971edf77d6dd567c8de019e5e    27583448359 0.1
005a89b8c3e8244051e198fe9b152ca4    564715367974    0.2
005cdade5234d107431958ca11ceca62    44019857894 0.1
0064445ecae687dbf803c3e6c379d1e3    39309371619 0.1
0072fde7c51a43269da2e690ff808b22    37016930337 0.1
0073cecc4dd81968d6fe10de0d8a8998    562637978196    0.2
007f35074f1aa9481a10284ec33fbe8a    553106888596    0.1

使用facebook对于usercf算法中对于string类型的userid进行处理。

原文链接如下:facebook推荐算法

我的源代码如下:

final static int NEIGHBORHOOD_NUM = 2;

    final static int RECOMMENDER_NUM = 3;

    private static final String HDFS_USERCF_PATH="/usercf/usercf";

    public static final String HDFS_PATH = "hdfs://xx.xx.xx.xx:9000";


    public  InputStream getHdfsFileInputStream(){
        Configuration configuration= new Configuration();
        configuration.set("fs.hdfs.impl",org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
        FileSystem fileSystem=null;
        try {
            fileSystem = FileSystem.get(new URI(HDFS_PATH), configuration, "root");
            FSDataInputStream inputStream = fileSystem.open(new Path(HDFS_USERCF_PATH));
            return inputStream;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;
    }

    public void userCF() throws Exception {

        MemoryIDMigrator idMigrator = new MemoryIDMigrator();

        Map<Long,List<Preference>> preferecesOfUsers = new HashMap<>();

        InputStream inputStream=getHdfsFileInputStream();

        CSVParser parser = new CSVParser(new InputStreamReader(inputStream, "UTF-8"), '\t');

        String[] line;

        while((line = parser.getLine()) != null) {

            String canvasId = line[0];

            Long itemId = Long.valueOf(line[1]);

            long userId = idMigrator.toLongID(canvasId);

            idMigrator.storeMapping(userId, canvasId);

            List<Preference> userPrefList;

            if((userPrefList = preferecesOfUsers.get(userId)) == null) {

                userPrefList = new ArrayList<>();

                preferecesOfUsers.put(userId, userPrefList);
            }
            userPrefList.add(new GenericPreference(userId, itemId, Float.valueOf(line[2])));

        }

        FastByIDMap<PreferenceArray> preferecesOfUsersFastMap = new FastByIDMap<>();

        for(Map.Entry<Long, List<Preference>> entry : preferecesOfUsers.entrySet()) {
            preferecesOfUsersFastMap.put(entry.getKey(), new GenericUserPreferenceArray(entry.getValue()));
        }

        DataModel model= new GenericDataModel(preferecesOfUsersFastMap);

        // 指定用户相似度计算方法,欧式距离相似性
        UserSimilarity user = new EuclideanDistanceSimilarity(model);

        // 指定用户邻居数量
        NearestNUserNeighborhood neighbor = new NearestNUserNeighborhood(NEIGHBORHOOD_NUM, user, model);

        // 构建基于用户的推荐系统
        Recommender r = new GenericUserBasedRecommender(model, neighbor, user);

        // 得到所有用户的ID集合
        LongPrimitiveIterator iter = model.getUserIDs();

        while (iter.hasNext()) {
            long uid = iter.nextLong();
            List<RecommendedItem> list = r.recommend(uid, RECOMMENDER_NUM);
            if(CollectionUtils.isEmpty(list)){
                continue;
            }
            for (RecommendedItem ritem : list) {
                redisService.zadd(KeysEnum.USER_RECOMMENDER_KEY.getKey() + idMigrator.toStringID(uid), ritem.getValue(), String.valueOf(ritem.getItemID()));
            }
            logger.info("canvasId("+idMigrator.toStringID(uid)+")推荐商品成功");
        }
        if(inputStream!=null){
            inputStream.close();
        }
    }
】【打印繁体】【投稿】【收藏】 【推荐】【举报】【评论】 【关闭】 【返回顶部
上一篇HDFS--梳理各个模块的功能与关系 下一篇HDFS Balance调优参数设置

最新文章

热门文章

Hot 文章

Python

C 语言

C++基础

大数据基础

linux编程基础

C/C++面试题目