Java实现双数组Trie树(DoubleArrayTrie,DAT) (六)

2014-11-24 08:07:36 · 作者: · 浏览: 3
heck.length) break;
if(Check[Base[p]+i] == p)
{
ret.add(i);
}
}
return ret;
}

private boolean TailContainString(int start,String s2)
{
for(int i=0;i {
if(s2.charAt(i) != Tail[i+start]) return false;
}

return true;
}
private boolean TailMatchString(int start,String s2)
{
s2 += END_CHAR;
for(int i=0;i {
if(s2.charAt(i) != Tail[i+start]) return false;
}
return true;
}


public void Insert(String s) throws Exception
{
s += END_CHAR;

int pre_p = 1;
int cur_p;
for(int i=0; i {
//获取状态位置
cur_p = Base[pre_p]+GetCharCode(s.charAt(i));
//如果长度超过现有,拓展数组
if (cur_p >= Base.length) Extend_Array();

//空闲状态
if(Base[cur_p] == 0 && Check[cur_p] == 0)
{
Base[cur_p] = -Pos;
Check[cur_p] = pre_p;
Pos = CopyToTailArray(s,i+1);
break;
}else
//已存在状态
if(Base[cur_p] > 0 && Check[cur_p] == pre_p)
{
pre_p = cur_p;
continue;
}else
//冲突 1:遇到 Base[cur_p]小于0的,即遇到一个被压缩存到Tail中的字符串
if(Base[cur_p] < 0 && Check[cur_p] == pre_p)
{
int head = -Base[cur_p];

if(s.charAt(i+1)== END_CHAR && Tail[head]==END_CHAR) //插入重复字符串
{
break;
}

//公共字母的情况,因为上一个判断已经排除了结束符,所以一定是2个都不是结束符
if (Tail[head] == s.charAt(i+1))
{
int avail_base = x_check(new Integer[]{GetCharCode(s.charAt(i+1))});
Base[cur_p] = avail_base;

Check[avail_base+GetCharCode(s.charAt(i+1))] = cur_p;
Base[avail_base+GetCharCode(s.charAt(i+1))] = -(head+1);
pre_p = cur_p;
continue;
}
else
{
//2个字母不相同的情况,可能有一个为结束符
int avail_base ;
avail_base = x_check(new Integer[]{GetCharCode(s.charAt(i+1)),GetCharCode(Tail[head])});

Base[cur_p] = avail_base;

Check[avail_base+GetCharCode(Tail[head])] = cur_p;
Check[avail_base+GetCharCode(s.charAt(i+1))] = cur_p;

//Tail 为END_FLAG 的情况
if(Tail[head] == END_CHAR)
Base[avail_base+GetCharCode(Tail[head])] = 0;
else
Base[avail_base+GetCharCode(Tail[head])] = -(head+1);
if(s.charAt(i+1) == END_CHAR)
Base[avail_base+GetCharCode(s.charAt(i+1))] = 0;
else
Base[avail_base+GetCharCode(s.charAt(i+1))] = -Pos;

Pos = CopyToTailArray(s,i+2);
break;
}
}else
//冲突2:当前结点已经被占用,需要调整pre的base
if(Check[cur_p] != pre_p)
{
ArrayList list1 = GetChildList(pre_p);
int toBeAdjust;
ArrayList list = null;
if(true)
{
toBeAdjust = pre_p;
list = list1;
}

int origin_base = Base[toBeAdjust];
list.add(GetCharCode(s.charAt(i)));
int avail_base = x_check((Integer[])list.toArray(new Integer[list.size()]));
list.remove(list.size()-1);

Base[toBeAdjust] = avail_base;
for(int j=0; j {
//BUG
int tmp1 = origin_base + list.get(j);
int tmp2 = avail_base + list.get(j);

Base[tmp2] = Base[tmp1];
Check[tmp2] = Check[tmp1];

//有后续
if(Base[tmp1] > 0)
{
ArrayList subsequence = GetChildList(tmp1);
for(int k=0; k {
Check[Base[tmp1]+subsequence.get(k)] = tmp2;
}
}

Base[tmp1] = 0;
Check[tmp1] = 0;
}

//更新新的cur_p
cur_p = Base[pre_p]+GetCharCode(s.charAt(i));

if(s.charAt(i) == END_CHAR)
Base[cur_p] = 0;
else
Base[cur_p] = -Pos;
Check[cur_p] = pre_p;