Apriori算法Java实现示例(一)

2014-11-24 08:29:12 · 作者: · 浏览: 2
package xx;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;

public class Apriori {

	public static void main(String[] args) throws Exception {

		// 初始化事务集
		List> trans = new LinkedList>();
		trans.add(new ItemSet(new String[] { "I1", "I2", "I5" }));
		trans.add(new ItemSet(new String[] { "I2", "I4" }));
		trans.add(new ItemSet(new String[] { "I2", "I3" }));
		trans.add(new ItemSet(new String[] { "I1", "I2", "I4" }));
		trans.add(new ItemSet(new String[] { "I1", "I3" }));
		trans.add(new ItemSet(new String[] { "I2", "I3" }));
		trans.add(new ItemSet(new String[] { "I1", "I3" }));
		trans.add(new ItemSet(new String[] { "I1", "I2", "I3", "I5" }));
		trans.add(new ItemSet(new String[] { "I1", "I2", "I3" }));

		int MSF = 2; // 设定最小支持频次为2

		Map> rst = findFrequentItemSets(trans, MSF);

		// 输出频繁项集
		System.out.println("Frequent Item Sets:");
		for (Entry> entry : rst.entrySet()) {
			Integer itemSetSize = entry.getKey();
			System.out.printf("Frequent %d Item Sets:\n", itemSetSize);
			for (ItemSet set : entry.getValue())
				System.out.printf("%s, %d\n", set, set.frequence);
		}

		double MCONF = 0.6; // 设定最小置信度为60%

		Map directMap = new HashMap();
		for (Entry> entry : rst.entrySet()) {
			for (ItemSet set : entry.getValue())
				directMap.put(set, set);
		}

		// 根据频繁项集构造关联规则
		System.out.println();
		System.out.println("Association Rules:");
		for (Entry> entry : rst.entrySet()) {
			for (ItemSet set : entry.getValue()) {
				double cnt1 = directMap.get(set).frequence;
				List
subSets = set.listNotEmptySubItemSets(); for (ItemSet subSet : subSets) { int cnt2 = directMap.get(subSet).frequence; double conf = cnt1 / cnt2; if (cnt1 / cnt2 >= MCONF) { ItemSet remainSet = new ItemSet(); remainSet.addAll(set); remainSet.removeAll(subSet); System.out.printf("%s => %s, %.2f\n", subSet, remainSet, conf); } } } } } /** * 查找事务集中的所有频繁项集,返回Map为:L -> 所有频繁L项集的列表 */ static Map> findFrequentItemSets( Iterable> transIterable, int MSF) { Map> ret = new TreeMap>(); // 首先确定频繁1项集 Iterator> it = transIterable.iterator(); Set oneItemSets = findFrequentOneItemSets(it, MSF); ret.put(1, oneItemSets); int preItemSetSize = 1; Set preItemSets = oneItemSets; // 基于获得的所有频繁L-1项集迭代查找所有频繁L项集,直到不存在频繁L-1项集 while (!preItemSets.isEmpty()) { int curItemSetSize = preItemSetSize + 1; // 获取频繁L项集的所有候选L项集 List candidates = aprioriGenCandidates(preItemSets); // 扫描事务集以确定所有候选L项集出现的频次 it = transIterable.iterator(); while (it.hasNext()) { Set tran = it.next(); for (ItemSet candidate : candidates) if (tran.containsAll(candidate)) candidate.frequence++; } // 将出现频次不小于最小支持频次的候选L项集选为频繁L项集 Set curItemSets = new HashSet(); for (ItemSet candidate : candidates) if (candidate.frequence >= MSF) curItemSets.add(candidate); if (!curItemSets.isEmpty()) ret.put(curItemSetSize