题目链接:http://poj.org/problem id=3294
题目大意:给定n个字符串,找出至少出现在n/2个字符串中的最长子串。
解题思路:本题可用后缀数组解决。先用倍增算法算出sa数组、rank数组再算出height数组,最后二分枚举长度,看是否符合出现在n/2个字符串中。题目并不难,但比较麻烦,首先要记录最长的字符串,作为二分查找的上届,然后在二分的时候要对height数组进行分组,将大于mid的分一组,看着一组中的子串都在哪些字符串中,用vis标记出现过的字符串,以免重复。还有一个地方要注意的是中间的分隔尽量不要一样,我就用了96个字符进行分隔,这可大幅度减少错误。
测试数据:
3
aaa
aaa
aaa
4
abcd
efgh
muck
nstv
3
abc
bcd
cde
3
abcdefg
bcdefgh
cdefghi
3
xxx
yyy
zzz
代码:
[cpp]
#include
#include
#define MIN 110
#define MAX 1100
#define INF 110000
char str[MIN][MAX];
int ans[MAX],tot,minlen,maxlen;
int sa[INF],ra[INF],h[INF],len;
int n,arr[INF],in[INF],vis[MIN];
int wa[INF],wb[INF],wv[INF],wn[INF];
int cmp(int *r,int ra,int rb,int l){
return r[ra] == r[rb] && r[ra+l] == r[rb+l];
}
void Da(int *r,int n,int m) {
int i,j,k,p,*t;
int *x = wa,*y = wb;
for (i = 0; i < m; ++i) wn[i] = 0;
for (i = 0; i < n; ++i) wn[x[i]=r[i]]++;
for (i = 1; i < m; ++i) wn[i] += wn[i-1];
for (i = n - 1; i >= 0; --i) sa[--wn[x[i]]] = i;
for (j = 1,p = 1; p < n; j *= 2,m = p) {
for (p = 0,i = n - j; i < n; ++i) y[p++] = i;
for (i = 0; i < n; ++i) if (sa[i] >= j) y[p++] = sa[i] - j;
for (i = 0; i < n; ++i) wv[i] = x[y[i]];
for (i = 0; i < m; ++i) wn[i] = 0;
for (i = 0; i < n; ++i) wn[wv[i]]++;
for (i = 1; i < m; ++i) wn[i] += wn[i-1];
for (i = n - 1; i >= 0; --i) sa[--wn[wv[i]]] = y[i];
t = x,x = y,y = t,p = 1;
for (x[sa[0]] = 0,i = 1; i < n; ++i)
x[sa[i]] = cmp(y,sa[i-1],sa[i],j) p - 1 : p++;
}
}
void CalHeight(int *r,int n) {
int i,j,k = 0;
for (i = 1; i <= n; ++i) ra[sa[i]] = i;
for (i = 0; i < n; h[ra[i++]] = k)
for (k k-- : 0,j = sa[ra[i]-1]; r[i+k] == r[j+k]; k++);
}
void Solve(int *r,int len,int *ans,int minlen) {
int i,j,num,beg,cnt,flag;
int low,high,mid,half;
maxlen = 0;
low = 1,high = minlen;
half = n / 2.0;
while (low <= high) {
cnt = 0;
mid = low + (high - low) / 2;
for (i = 1; i <= len; ++i){
if (h[i] < mid) {
memset(vis,0,sizeof(vis));
vis[in[sa[i]]] = 1,flag = 0; //flag标记使得当前开始的答案只记录一次
num = 1,beg = sa[i+1]; //beg从哪个地方开始,num表示子串出现在几个字符串中
}
else{
j = sa[i];
if (vis[in[j]] == 0 && arr[j] != '$')
num++,vis[in[j]] = 1;
if (num > half && !flag)
ans[cnt++] = beg,flag = 1;
}
}
if (cnt != 0) {
//符合情况
maxlen = mid;
tot = cnt;
low = mid + 1;
}
else high = mid - 1;
}
}
int main()
{
int i,j,k,c;
scanf("%d",&n);
do{
minlen = 0,tot = 0,c = 0;
memset(in,0,sizeof(in));
memset(arr,0,sizeof(arr));
memset(ra,0,sizeof(ra));
for (i = 0; i < n; ++i) {
scanf("%s",str[i]);
if (strlen(str[i]) > minlen);
minlen = strlen(str[i]);
}
for (i = k = 0; i < n; ++i) {
for (j = 0; str[i][j]; ++j)
arr[k] = str[i][j],in[k++] = i;
c = (c + 1) % 96,arr[k++] = c;
}
len = k - 1;
Da(arr,len + 1,150);
CalHeight(arr,len);
Solve(arr,len,ans,minlen);
if (n == 1) printf("%s\n",str[0]);
else if (maxlen == 0) printf(" \n");
else {
for (i = 0; i < tot; ++i)
for (j = ans[i]; j < ans[i] + maxlen; ++j)
printf(j == (ans[i] + maxlen - 1) "%c\n":"%c",arr[j]);
}
scanf("%d",&n);
if (n != 0) printf("\n");
}while (n);
}
本文ZeroClock原创,但可以转载,因为我们是兄弟。
摘