Chameleon两阶段聚类算法(二)

2015-11-21 01:44:36 · 作者: · 浏览: 11
= null; Point point2 = null; ArrayList edgeList = new ArrayList<>(); ArrayList pointList1 = (ArrayList) points.clone(); ArrayList pointList2 = null; Cluster c2 = null; try { c2 = (Cluster) otherCluster.clone(); pointList2 = c2.points; } catch (CloneNotSupportedException e) { // TODO Auto-generated catch block e.printStackTrace(); } int[] tempEdge; // 循环计算出每次的最近距离 while (count < n) { tempEdge = new int[2]; minDistance = Integer.MAX_VALUE; for (Point p1 : pointList1) { for (Point p2 : pointList2) { distance = p1.ouDistance(p2); if (distance < minDistance) { point1 = p1; point2 = p2; tempEdge[0] = p1.id; tempEdge[1] = p2.id; minDistance = distance; } } } pointList1.remove(point1); pointList2.remove(point2); edgeList.add(tempEdge); count++; } return edgeList; } @Override protected Object clone() throws CloneNotSupportedException { // TODO Auto-generated method stub //引用需要再次复制,实现深拷贝 ArrayList pointList = (ArrayList) this.points.clone(); Cluster cluster = new Cluster(id, pointList); return cluster; } }
算法工具类Chameleon.java:

?

?

package DataMining_Chameleon;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.text.MessageFormat;
import java.util.ArrayList;

/**
 * Chameleon 两阶段聚类算法工具类
 * 
 * @author lyq
 * 
 */
public class ChameleonTool {
	// 测试数据点文件地址
	private String filePath;
	// 第一阶段的k近邻的k大小
	private int k;
	// 簇度量函数阈值
	private double minMetric;
	// 总的坐标点的个数
	private int pointNum;
	// 总的连接矩阵的情况,括号表示的是坐标点的id号
	public static int[][] edges;
	// 点与点之间的边的权重
	public static double[][] weights;
	// 原始坐标点数据
	private ArrayList totalPoints;
	// 第一阶段产生的所有的连通子图作为最初始的聚类
	private ArrayList
initClusters; // 结果簇结合 private ArrayList resultClusters; public ChameleonTool(String filePath, int k, double minMetric) { this.filePath = filePath; this.k = k; this.minMetric = minMetric; readDataFile(); } /** * 从文件中读取数据 */ private void readDataFile() { File file = new File(filePath); ArrayList dataArray = new ArrayList(); try { BufferedReader in = new BufferedReader(new FileReader(file)); String str; String[] tempArray; while ((str = in.readLine()) != null) { tempArray = str.split(" "); dataArray.add(tempArray); } in.close(); } catch (IOException e) { e.getStackTrace(); } Point p; totalPoints = new ArrayList<>(); for (String[] array : dataArray) { p = new Point(array[0], array[1], array[2]); totalPoints.add(p); } pointNum = totalPoints.size(); } /** * 递归的合并小聚簇 */ private void combineSubClusters() { Cluster cluster = null; resultClusters = new ArrayList<>(); // 当最后的聚簇只剩下一个的时候,则退出循环 while (initClusters.size() > 1) { cluster = initClusters.get(0); combineAndRemove(cluster, initClusters); } } /** * 递归的合并聚簇和移除聚簇 * * @param clusterList */ private ArrayList combineAndRemove(Cluster cluster, ArrayList clusterList) { ArrayList remainClusters; double metric = 0; double maxMetric = -Integer.MAX_VALUE; Cluster cluster1 = null; Cluster cluster2 = null; for (Cluster c2 : clusterList) { if (cluster.id == c2.id) { continue; } metric = calMetricfunction(cluster, c2, 1); if (metric > maxMetric) { maxMetric = metric; cluster1 = cluster; cluster2 = c2; } } // 如果度量函数值超过阈值,则进行合并,继续搜寻可以合并的簇 if (maxMetric > minMetric) { clusterList.remove(cluster2); // 将边进行连接 connectClusterToCluster(cluster1, cluster2); // 将簇1和簇2合并 cluster1.points.addAll(cluster2.points); remainClusters = combineAndRemove(cluster1, clus