Apriori算法实现(三)

2014-11-24 08:14:21 · 作者: · 浏览: 3
Iterator >> it = txDatabase.entrySet().iterator(); // 统计关联规则支持计数 int conditionToConclusionCnt = 0; // 关联规则(条件项集推出结论项集)计数 int conclusionToConditionCnt = 0; // 关联规则(结论项集推出条件项集)计数 int supCnt = 0; // 关联规则支持计数 while(it.hasNext()) { Map.Entry > entry = it.next(); Set txSet = entry.getValue(); Set set1 = new HashSet (); Set set2 = new HashSet (); set1.addAll(conditionSet); set1.removeAll(txSet); // 集合差运算:set-txSet if(set1.isEmpty()) { // 如果set为空,说明事务数据库中包含条件频繁项conditionSet // 计数 conditionToConclusionCnt++; } set2.addAll(conclusionSet); set2.removeAll(txSet); // 集合差运算:set-txSet if(set2.isEmpty()) { // 如果set为空,说明事务数据库中包含结论频繁项conclusionSet // 计数 conclusionToConditionCnt++; } if(set1.isEmpty() && set2.isEmpty()) { supCnt++; } } // 计算置信度 Float conditionToConclusionConf = new Float(supCnt)/new Float(conditionToConclusionCnt); if(conditionToConclusionConf>=minConf) { if(assiciationRules.get(conditionSet) == null) { // 如果不存在以该条件频繁项集为条件的关联规则 Set > conclusionSetSet = new HashSet >(); conclusionSetSet.add(conclusionSet); assiciationRules.put(conditionSet, conclusionSetSet); } else { assiciationRules.get(conditionSet).add(conclusionSet); } } Float conclusionToConditionConf = new Float(supCnt)/new Float(conclusionToConditionCnt); if(conclusionToConditionConf>=minConf) { if(assiciationRules.get(conclusionSet) == null) { // 如果不存在以该结论频繁项集为条件的关联规则 Set > conclusionSetSet = new HashSet >(); conclusionSetSet.add(conditionSet); assiciationRules.put(conclusionSet, conclusionSetSet); } else { assiciationRules.get(conclusionSet).add(conditionSet); } } } /** * 经过挖掘得到的频繁项集Map * * @return 挖掘得到的频繁项集集合 */ public Map >> getFreqItemSet() { return freqItemSet; } /** * 获取挖掘到的全部的频繁关联规则的集合 * @return 频繁关联规则集合 */ public Map , Set >> getAssiciationRules() { return assiciationRules; } }


测试类如下:

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

import junit.framework.TestCase;
/**
* Apriori算法测试类
* 
* @author king
* @date 2013/07/28 
*/
public class AprioriTest extends TestCase {

	private Apriori apriori;
	private Map
  
   > txDatabase;
	private Float minSup = new Float(0.50);
	private Float minConf = new Float(0.70);
	
	public static void main(String []args) throws Exception {
		AprioriTest at = new AprioriTest();
		at.setUp();
		
		long from = System.currentTimeMillis();
		at.testGetFreqItemSet();
		long to = System.currentTimeMillis();
		System.out.println(耗时: + (to-from));
	
	}
	
	@Override
	protected void setUp() throws Exception {
//	    create(); // 构造事务数据库
		this.buildData(Integer.MAX_VALUE, f_faqk_.dat);
	    apriori = new Apriori(txDatabase, minSup, minConf);
	}
	
	/**
	* 构造模拟事务数据库txDatabase
	*/
	public void create() {
	   txDatabase = new HashMap
   
    >(); Set
    
      set1 = new TreeSet
     
      (); set1.add(A); set1.add(B); set1.add(C); set1.add(E); txDatabase.put(1, set1); Set
      
        set2 = new TreeSet
       
        (); set2.add(A); set2.add(B); set2.add(C); txDatabase.put(2, set2); Set
        
          set3 = new TreeSet
         
          (); set3.add(C); set3.add(D); txDatabase.put(3, set3); Set
          
            set4 = new TreeSet
           
            (); set4.add(A); set4.add(B); set4.add(E); txDatabase.put(