后缀数组简单总结(一)

2014-11-24 08:09:48 · 作者: · 浏览: 0

主要参考:

http://hi.baidu.com/ahnkftravhdhkyr/item/cc38703dd46547cd392ffab1

及cxlove博客

主要是论文《后缀数组――处理字符串的有力工具》一些题解和其它题目的主要题解


一般的模板:

#include
  
     
#include
   
     #include
    
      #include
     
       #include
      
        #include
       
         #include
        
          #include
         
           #include
           #include
           
             #define REP(i, n) for(int i=0; i
            
             = 0; i--) sa[--wn[x[i]]] = i; for (p = 1, j = 1; p < n; j <<= 1, m = p) { for (p = 0, i = n - j; i < n; i++) y[p++] = i; for (i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i] - j; for (i = 0; i < m; i++) wn[i] = 0; for (i = 0; i < n; i++) wn[wv[i] = x[y[i]]]++; for (i = 1; i < m; i++) wn[i] += wn[i - 1]; for (i = n - 1; i >= 0; i--) sa[--wn[wv[i]]] = y[i]; t = x; x = y; y = t; x[sa[0]] = 0; p = 1; for (i = 1; i < n; i++) x[sa[i]] = cmp(y, sa[i - 1], sa[i], j)   p - 1 : p++; } } void getHeight(int *r, int *sa, int n) { int i, j, k = 0; for (i = 1; i <= n; i++) { rank[sa[i]] = i; height[i] = 0; } for (i = 0;i < n; i++) { if (k) k--; j = sa[rank[i] - 1]; while (r[i + k] == r[j + k]) k++; height[rank[i]] = k; } } 
            
           
         
        
       
      
     
    
   
  


debuge:

debuge:
0.debuge
void debuge_sa()
{
    for (int i = 0; i <= n; i++)
    cout << i << ' ' << rank[i] << ' ' << height[i] << endl;
    cout << "^^^^^^^^^^^^^^" << endl << endl;

    for (int i = 0; i <= n; i++)
    cout << i << ' ' << sa[i] << endl;
    cout << "^^^^^^^^^^^^^^" << endl << endl;

    init_rmq(n);
    for (int i = 1; i <= n; i++)
    {
        for (int j = i; j <= n; j++)
            printf("%d ", rmq(i, j));
        printf("\n");
    }
}
/*
debuge_sa:
ABABABAB
0 4 0
1 8 0
2 3 2
3 7 4
4 2 6
5 6 0
6 1 1
7 5 3
8 0 5
^^^^^^^^^^^^^^


0 8
1 6
2 4
3 2
4 0
5 7
6 5
7 3
8 1
^^^^^^^^^^^^^^


0 2 2 2 0 0 0 0
2 4 4 0 0 0 0
4 6 0 0 0 0
0 0 0 0 0
0 1 1 1
1 3 3
3 5
0
*/


一些题目:

一些论文上的题目:
//1、求单个子串的不重复子串个数。SPOJ 694
int wa[MAXN], wb[MAXN], wv[MAXN], wn[MAXN];
char r[MAXN];
int a[MAXN], sa[MAXN], rank[MAXN], height[MAXN];
int main()
{
    int t,i;
    int n, m;
    cin >> t;
    while (t--)
    {
        RS(r);
        n = strlen(r);
        m = 256;
        REP(i, n)
            a[i] = r[i];
        a[n] = 0;
        build_sa(a, sa, n + 1, m);
        getHeight(a, sa, n);
        int ans = 0;
        FE(i, 1, n) ans += n - sa[i] - height[i];
        cout << ans << endl;
    }
    return 0;
}
/**
4、最长重复不重叠子串 PKU1743
题意:给出一个旋律,用n个数字[1,88]表示其音符,问它最长的主题长度是多少。
一个旋律的主题是一段至少出现过两次的不重叠音乐片段。
所谓重复出现,包括一段音乐全体加上某个数后再次出现。
如1 2 3 4 5和5 6 7 8 9是同一个音乐片段。主题长度至少为5.无解输出0。
*/
int wa[MAXN], wb[MAXN], wv[MAXN], wn[MAXN];
char r[MAXN];
int a[MAXN], sa[MAXN], rank[MAXN], height[MAXN];
bool check(int L, int n)
{
    int smin = sa[1];
    int smax = sa[1];
    for (int i = 2; i <= n; i++)
    {
        if (height[i] < L)
        {
            if (smax - smin >= L) return 1;
            smin = smax = sa[i];
        }
        else
        {
            smin = min(smin, sa[i]);
            smax = max(smax, sa[i]);
        }
    }
//    if (height[n] >= L && smax - smin >= L)///
//        return 1;
    return 0;
}

void solve(int n)
{
    int l = 0;
    int r = n / 2;
    while (l <= r)
    {
        int m = (l + r) >> 1;
        if (check(m, n)) l = m + 1;
        else r = m - 1;
    }
    WI(l);
}
int x[MAXN];
int main()
{
    int t,i;
    int n, m;
    while (~RI(n) && n)
    {
        REP(i, n) RI(x[i]);
        a[0] = 500;
        for (int i = 1; i < n; i++) a[i] = x[i] - x[i - 1] + 100;
        a[n] = 0;
        m = 501;
        build_sa(a, sa, n + 1, m);
        getHeight(a, sa, n);

        solve(n);
    }
    return 0;
}
/**
5、最长的出现k次的重复(可重叠)子串。 PKU3261
题目大意:给出n个数字组成的一个字符串,求最长的恰好出现k次的重复子串(可重叠)的字符串的长度。
分析:后缀数组一个经典的应用。先二分答案,然后分组。只要某一组包含的后缀数量大于等于k,表示有解。这个不难理解。等完成了论文里面的练习之后,我再写个总结笔记吧。
深刻体会到后缀数组的强大....
*/
int wa[MAXN], wb[MAXN], wv[MAXN], wn[MAXN];
int a[MAXN], sa[MAXN], rank[MAXN], height[MAXN];
char r[MAXN];
int n;

int K;
bool check(int k)
{
    int num = 1;
    FE(i, 2, n)
    {
        if (height[i] < k)
        {
            if (num >= K) return 1;
            num = 1;
        }
        else
            num++;
    }
    if (num >= k) return 1;
    return 0;
}

void solve()
{
    int l = 0, r = n, mid;
    while (l <= r)
    {
        mid = (l + r) >> 1;
        if (check(mid)) l = mid + 1;
        else r = mid - 1;
    }
    cout << r << endl;
}

map
  
   M;
int main()
{
    int x[MAXN];
    int T;
    while (~RII(n, K) && n)
    {
        M.clear();
        int MM = 1;
        REP(i, n)
        {
            RI(x[i]);
            if (M.count(x[i]) == 0) M[x[i]] = MM++;
        }
        REP(i, n) a[i] = M[x[i]];
        a[n]= 0;
        build_sa(a, sa, n + 1, MM + 5);
        getHeight(a, sa, n);
        solve();
    }
}
/***
7.求一个串最多由哪个串复制若干次得到 PKU2406 (!!!此代码是网上的)
题目大意:给出一个字符串s,则存在子串a,a重复k次后得到s。求k的最大值。
*/
#include
   
     #include
    
      using namespace std; const int maxn=3000010; int ws[maxn],wa[maxn],wb[maxn],wv[maxn],sa[maxn],rank[maxn],height[maxn],a[maxn],f[maxn]; char s[maxn]; //dc3 #define F(x) ((x)/3+((x)%3==1 0:tb)) #define G(x) ((x)
     
      =0;i--) b[--ws[wv[i]]]=a[i]; return; } void dc3(int *r,int *sa,int n,int m) { int i,j,*rn=r+n,*san=sa+n,ta=0,tb=(n+1)/3,tbc=0,p; r[n]=r[n+1]=0; for(i=0;i
      
       =1;i--){ f[i]=min; min=min
       
        (s[i]); a[n]=0; dc3(a,sa,n+1,123); cal(a,sa,n); rmq(height,n); printf("%d\n",work(n)); } return 0; } /*** 8.最长公共子串 Pku2774--Long Long Message 题目大意:给出两个字符串,求它们的最长公共子串的长度。 */ int wa[MAXN], wb[MAXN], wv[MAXN], wn[MAXN]; int a[MAXN], sa[MAXN], rank[MAXN], height[MAXN]; char r1[MAXN], r2[MAXN]; int nr1, nr2; bool check(int x, int y) { if (x > y) swap(x, y); if (x < nr1 && y > nr1) return 1; else return 0; } void solve() { int ans = 0; FE(i, 2, nr1 + nr2 + 1) { if (check(sa[i], sa[i - 1])) { if (height[i] > ans) ans = height[i]; } } cout << ans << endl; } int main() { while (scanf("%s%s", r1, r2) != EOF) { nr1 = strlen(r1); nr2 = strlen(r2); REP(i, nr1) a[i] = (int)r1[i]; a[nr1] = 1; REP(i, nr2) a[i + nr1 + 1] = (int)r2[i]; a[nr1 + nr2 + 2] = 0; build_sa(a, sa, nr1 + nr2 + 2, 256); getHeight(a, sa, nr1 + nr2); solve(); } } /** 9.重复次数最多的重复子串 SPOJ687--Repeats 题目大意:给出一个字符串,求该字符串中出现次数最多的连续重复子串。 */ int wa[MAXN], wb[MAXN], wv[MAXN], wn[MAXN]; char r[MAXN]; int a[MAXN], sa[MAXN], rank[MAXN], height[MAXN]; int h[MAXN][20]; int init_rmq(int n) { FE(i, 1, n) h[i][0] = height[i]; for (int j = 1; (1 << j) <= n; j++) { for (int i = 1; i + (1 << j) - 1 <= n; i++) { h[i][j] = min(h[i][j - 1], h[i + (1 << (j - 1))][j - 1]); } } } int rmq(int L, int R)///要求L!=R 且 L>=0 , R >= 0 { L = rank[L]; R = rank[R]; if (L > R) swap(L, R); L++; int k = 0; while ((1 << (k + 1)) <= (R - L + 1)) k++; return min(h[L][k], h[R - (1 << k) + 1][k]); } int getnum(int L, int n) { int ans = 1; for (int i = 0; i < n - L; i += L) { int dd = rmq(i, i + L); if (i && dd % L) { dd = L - (dd % L); dd = rmq(i - dd, i + L - dd); } ans = max(ans, dd / L + 1); } return ans; } void solve(int n) { int ans = 1; FE(i, 1, n - 1) { ans = max(ans, getnum(i, n)); } WI(ans); } int main() { int t,i; int n, m; int T; RI(T); while (T--) { RI(n); REP(i, n) scanf(" %c", &r[i]); REP(i, n ) a[i] = r[i]; a[n] = 0; m = 256; build_sa(a, sa, n + 1, m); getHeight(a, sa, n); init_rmq(n); solve(n); } return 0; } /** 10.多个串的公共子串问题 PKU3294 [后缀数组]Pku3294--Life Forms 题目大意:给出n个字符串,求一个最长的串,它出现在一半的串以上。若要多个,按字典顺序输出。 */ int wa[MAXN], wb[MAXN], wv[MAXN], wn[MAXN]; char r[MAXN]; int a[MAXN], sa[MAXN], rank[MAXN], height[MAXN]; //int h[MAXN][30]; int idx[MAXN]; inline int getid(int x) { return idx[sa[x]]; } bool check(int k, int t, int n) { int vis[110];///!!! CLR(vis, 0); int num = (t >> 1) + 1; int step = 1;/// int x = getid(1);/// vis[x] = 1;/// int tol = 1;/// FE(i, 2, n) { if (height[i] < k) { if (tol >= num) return 1; step++; x = getid(i); vis[x] = step; tol = 1; } else { x = getid(i); if (vis[x] != step) { vis[x] = step; tol++; } } } if (tol >= num && height[n - 1] >= k) return 1; return 0; } void print(int k, int t, int n) { int vis[110];///!!! CLR(vis, 0); int num = (t >> 1) + 1; int step = 1;/// int x = idx[sa[1]];/// vis[x] = 1;/// int tol = 1;/// FE(i, 2, n) { if (height[i] < k) { if (tol >= num) { int x = sa[i - 1];///!!! for (int j = x; j < x + k; j++) printf("%c", (char)a[j]); printf("\n"); } step++; x = idx[sa[i]]; vis[x] = step; tol = 1; } else { x =idx[sa[i]]; if (vis[x] != step) { vis[x] = step; tol++; } } } if (tol >= num && height[n - 1] >= k)///!!! { int x = sa[n - 1]; for (int j = x; j < x + k; j++) printf("%c", (char)a[j]); printf("\n"); } } void solve(int t, int n) { int l = 0, r = 1010, mid; while(l <= r) { mid = (l + r) >> 1; if (check(mid, t, n)) l = mid + 1; else r = mid - 1; } if (r == 0) printf(" \n"); el