|
w FileReader(file));
String str;
String[] tempArray;
while ((str = in.readLine()) != null) {
tempArray = str.split(" ");
for(String word: tempArray){
dataArray.add(word);
}
}
in.close();
} catch (IOException e) {
e.getStackTrace();
}
return dataArray;
}
/**
* 获取查询总数据
* @return
*/
public ArrayList getQueryDatas(){
return this.queryDatas;
}
/**
* 用位存储数据
*/
private void bitStoreData() {
long hashcode = 0;
bitStore = new BitSet(BIT_ARRAY_LENGTH);
for (String word : totalDatas) {
// 对每个词进行3次哈希求值,减少哈希冲突的概率
hashcode = BKDRHash(word);
hashcode %= BIT_ARRAY_LENGTH;
bitStore.set((int) hashcode, true);
hashcode = SDBMHash(word);
hashcode %= BIT_ARRAY_LENGTH;
bitStore.set((int) hashcode, true);
hashcode = DJBHash(word);
hashcode %= BIT_ARRAY_LENGTH;
bitStore.set((int) hashcode, true);
}
}
/**
* 进行数据的查询,判断原数据中是否存在目标查询数据
*/
public Map queryDatasByBF() {
boolean isExist;
long hashcode;
int pos1;
int pos2;
int pos3;
// 查询词的所属情况图
Map word2exist = new HashMap();
hashcode = 0;
isExist = false;
bitStoreData();
for (String word : queryDatas) {
isExist = false;
hashcode = BKDRHash(word);
pos1 = (int) (hashcode % BIT_ARRAY_LENGTH);
hashcode = SDBMHash(word);
pos2 = (int) (hashcode % BIT_ARRAY_LENGTH);
hashcode = DJBHash(word);
pos3 = (int) (hashcode % BIT_ARRAY_LENGTH);
// 只有在3个哈希位置都存在才算真的存在
if (bitStore.get(pos1) && bitStore.get(pos2) && bitStore.get(pos3)) {
isExist = true;
}
// 将结果存入map
word2exist.put(word, isExist);
}
return word2exist;
}
/**
* 进行数据的查询采用普通的过滤器方式就是,逐个查询
*/
public Map queryDatasByNF() {
boolean isExist = false;
// 查询词的所属情况图
Map word2exist = new HashMap();
// 遍历的方式去查找
for (String qWord : queryDatas) {
isExist = false;
for (String word : totalDatas) {
if (qWord.equals(word)) {
isExist = true;
break;
}
}
word2exist.put(qWord, isExist);
}
return word2exist;
}
/**
* BKDR字符哈希算法
*
* @param str
* @return
*/
private long BKDRHash(String str) {
int seed = 31; /* 31 131 1313 13131 131313 etc.. */
long hash = 0;
int i = 0;
for (i = 0; i < str.length(); i++) {
hash = (hash * seed) + (str.charAt(i));
}
hash = Math.abs(hash);
return hash;
}
/**
* SDB字符哈希算法
*
* @param str
* @return
*/
private long SDBMHash(String str) {
long hash = 0;
int i = 0;
for (i = 0; i < str.length(); i++) {
hash = (str.charAt(i)) + (hash << 6) + (hash << 16) - hash;
}
hash = Math.abs(hash);
return hash;
}
/**
* DJB字符哈希算法
*
* @param str
* @return
*/
private long DJBHash(String str) {
long hash = 5381;
int i = 0;
for (i = 0; i < str.length(); i++) {
hash = ((hash << 5) + hash) + (str.charAt(i));
}
hash = Math.abs(hash);
return hash;
}
}
场景测试类Client.java:
?
?
package BloomFilter;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Map;
/**
* BloomFileter布隆过滤器测试类
*
* @author lyq
*
*/
public class Client {
public static void main(String[] args) {
String filePath = "C:\\Users\\lyq\\Desktop\\icon\\input.txt";
String testFilePath = "C:\\Users\\lyq\\Desktop\\icon\\testInput.txt";
// 总的查询词数
int totalCount;
// 正确的结果数
int rightCount;
long startTime = 0;
long endTime = 0;
// 布隆过滤器查询结果
Map bfMap;
// 普通过滤器查询结果
Map nfMap;
//查询总数据
ArrayList queryDatas;
BloomFilterTool tool = new BloomFilterTool(filePath, testFilePath);
// 采用布隆过滤器的方式进行词的查询
startTime = System.currentTimeMillis();
bfMap = tool.queryDatasByBF();
endTime = System.currentTimeMillis();
System.out.println("BloomFilter算法耗时" + (endTime - startTime) + "ms");
/ |