Chameleon两阶段聚类算法 - 数据库编程

= null; Point point2 = null; ArrayList edgeList = new ArrayList<>(); ArrayList pointList1 = (ArrayList) points.clone(); ArrayList pointList2 = null; Cluster c2 = null; try { c2 = (Cluster) otherCluster.clone(); pointList2 = c2.points; } catch (CloneNotSupportedException e) { // TODO Auto-generated catch block e.printStackTrace(); } int[] tempEdge; // 循环计算出每次的最近距离 while (count < n) { tempEdge = new int[2]; minDistance = Integer.MAX_VALUE; for (Point p1 : pointList1) { for (Point p2 : pointList2) { distance = p1.ouDistance(p2); if (distance < minDistance) { point1 = p1; point2 = p2; tempEdge[0] = p1.id; tempEdge[1] = p2.id; minDistance = distance; } } } pointList1.remove(point1); pointList2.remove(point2); edgeList.add(tempEdge); count++; } return edgeList; } @Override protected Object clone() throws CloneNotSupportedException { // TODO Auto-generated method stub //引用需要再次复制，实现深拷贝 ArrayList pointList = (ArrayList) this.points.clone(); Cluster cluster = new Cluster(id, pointList); return cluster; } }
算法工具类Chameleon.java:

package DataMining_Chameleon;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.text.MessageFormat;
import java.util.ArrayList;

/**
 * Chameleon 两阶段聚类算法工具类
 * 
 * @author lyq
 * 
 */
public class ChameleonTool {
	// 测试数据点文件地址
	private String filePath;
	// 第一阶段的k近邻的k大小
	private int k;
	// 簇度量函数阈值
	private double minMetric;
	// 总的坐标点的个数
	private int pointNum;
	// 总的连接矩阵的情况,括号表示的是坐标点的id号
	public static int[][] edges;
	// 点与点之间的边的权重
	public static double[][] weights;
	// 原始坐标点数据
	private ArrayList totalPoints;
	// 第一阶段产生的所有的连通子图作为最初始的聚类
	private ArrayList




 initClusters;
	// 结果簇结合
	private ArrayList resultClusters;

	public ChameleonTool(String filePath, int k, double minMetric) {
		this.filePath = filePath;
		this.k = k;
		this.minMetric = minMetric;

		readDataFile();
	}

	/**
	 * 从文件中读取数据
	 */
	private void readDataFile() {
		File file = new File(filePath);
		ArrayList dataArray = new ArrayList();

		try {
			BufferedReader in = new BufferedReader(new FileReader(file));
			String str;
			String[] tempArray;
			while ((str = in.readLine()) != null) {
				tempArray = str.split(" ");
				dataArray.add(tempArray);
			}
			in.close();
		} catch (IOException e) {
			e.getStackTrace();
		}

		Point p;
		totalPoints = new ArrayList<>();
		for (String[] array : dataArray) {
			p = new Point(array[0], array[1], array[2]);
			totalPoints.add(p);
		}
		pointNum = totalPoints.size();
	}

	/**
	 * 递归的合并小聚簇
	 */
	private void combineSubClusters() {
		Cluster cluster = null;

		resultClusters = new ArrayList<>();

		// 当最后的聚簇只剩下一个的时候，则退出循环
		while (initClusters.size() > 1) {
			cluster = initClusters.get(0);
			combineAndRemove(cluster, initClusters);
		}
	}

	/**
	 * 递归的合并聚簇和移除聚簇
	 * 
	 * @param clusterList
	 */
	private ArrayList combineAndRemove(Cluster cluster,
			ArrayList clusterList) {
		ArrayList remainClusters;
		double metric = 0;
		double maxMetric = -Integer.MAX_VALUE;
		Cluster cluster1 = null;
		Cluster cluster2 = null;

		for (Cluster c2 : clusterList) {
			if (cluster.id == c2.id) {
				continue;
			}

			metric = calMetricfunction(cluster, c2, 1);

			if (metric > maxMetric) {
				maxMetric = metric;
				cluster1 = cluster;
				cluster2 = c2;
			}
		}

		// 如果度量函数值超过阈值，则进行合并,继续搜寻可以合并的簇
		if (maxMetric > minMetric) {
			clusterList.remove(cluster2);
			// 将边进行连接
			connectClusterToCluster(cluster1, cluster2);
			// 将簇1和簇2合并
			cluster1.points.addAll(cluster2.points);
			remainClusters = combineAndRemove(cluster1, clus

Chameleon两阶段聚类算法(二)