package xx;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
public class Apriori {
public static void main(String[] args) throws Exception {
// 初始化事务集
List> trans = new LinkedList>();
trans.add(new ItemSet(new String[] { "I1", "I2", "I5" }));
trans.add(new ItemSet(new String[] { "I2", "I4" }));
trans.add(new ItemSet(new String[] { "I2", "I3" }));
trans.add(new ItemSet(new String[] { "I1", "I2", "I4" }));
trans.add(new ItemSet(new String[] { "I1", "I3" }));
trans.add(new ItemSet(new String[] { "I2", "I3" }));
trans.add(new ItemSet(new String[] { "I1", "I3" }));
trans.add(new ItemSet(new String[] { "I1", "I2", "I3", "I5" }));
trans.add(new ItemSet(new String[] { "I1", "I2", "I3" }));
int MSF = 2; // 设定最小支持频次为2
Map> rst = findFrequentItemSets(trans, MSF);
// 输出频繁项集
System.out.println("Frequent Item Sets:");
for (Entry> entry : rst.entrySet()) {
Integer itemSetSize = entry.getKey();
System.out.printf("Frequent %d Item Sets:\n", itemSetSize);
for (ItemSet set : entry.getValue())
System.out.printf("%s, %d\n", set, set.frequence);
}
double MCONF = 0.6; // 设定最小置信度为60%
Map directMap = new HashMap();
for (Entry> entry : rst.entrySet()) {
for (ItemSet set : entry.getValue())
directMap.put(set, set);
}
// 根据频繁项集构造关联规则
System.out.println();
System.out.println("Association Rules:");
for (Entry> entry : rst.entrySet()) {
for (ItemSet set : entry.getValue()) {
double cnt1 = directMap.get(set).frequence;
List
subSets = set.listNotEmptySubItemSets();
for (ItemSet subSet : subSets) {
int cnt2 = directMap.get(subSet).frequence;
double conf = cnt1 / cnt2;
if (cnt1 / cnt2 >= MCONF) {
ItemSet remainSet = new ItemSet();
remainSet.addAll(set);
remainSet.removeAll(subSet);
System.out.printf("%s => %s, %.2f\n", subSet,
remainSet, conf);
}
}
}
}
}
/**
* 查找事务集中的所有频繁项集,返回Map为:L -> 所有频繁L项集的列表
*/
static Map> findFrequentItemSets(
Iterable> transIterable, int MSF) {
Map> ret = new TreeMap>();
// 首先确定频繁1项集
Iterator> it = transIterable.iterator();
Set oneItemSets = findFrequentOneItemSets(it, MSF);
ret.put(1, oneItemSets);
int preItemSetSize = 1;
Set preItemSets = oneItemSets;
// 基于获得的所有频繁L-1项集迭代查找所有频繁L项集,直到不存在频繁L-1项集
while (!preItemSets.isEmpty()) {
int curItemSetSize = preItemSetSize + 1;
// 获取频繁L项集的所有候选L项集
List candidates = aprioriGenCandidates(preItemSets);
// 扫描事务集以确定所有候选L项集出现的频次
it = transIterable.iterator();
while (it.hasNext()) {
Set tran = it.next();
for (ItemSet candidate : candidates)
if (tran.containsAll(candidate))
candidate.frequence++;
}
// 将出现频次不小于最小支持频次的候选L项集选为频繁L项集
Set curItemSets = new HashSet();
for (ItemSet candidate : candidates)
if (candidate.frequence >= MSF)
curItemSets.add(candidate);
if (!curItemSets.isEmpty())
ret.put(curItemSetSize