Maximum repetition substring(后缀数组)

描述

传送门:POJ3963

The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of ababab is $3$ and ababa is $1$.

Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number.

Input

The input consists of multiple test cases. Each test case contains exactly one line, which gives a non-empty string consisting of lowercase letters. The length of the string will not be greater than $100,000$.

The last test case is followed by a line containing a #.

Output

For each test case, print a line containing the test case number( beginning with $1$) followed by the substring of maximum repetition number. If there are multiple substrings of maximum repetition number, print the lexicographically smallest one.

Sample Input

1
2
3
ccabababc
daabbccaa
#

Sample Output

1
2
Case 1: ababab
Case 2: aa

思路

  • 罗穗骞的论文题。这类问题先求出原串的后缀数组和高度数组,然后在高度数组上面做事情。
  • 先枚举循环节的长度$l$,然后求$suffix[i]$和$suffix[i+l]$的LCP,记为$k$(即向后匹配),出现次数则为$k/l+1$。但是这存在一个问题,论文中的图也反映了这一点,我们在求LCP的时候是向后匹配的,但是可能存在向前的匹配使得循环次数再多一次,所以我们还要再去向前匹配一下。
  • 由于要输出字典序最小的解,所以要把循环节长度记下来。最后只要在sa数组中跑一遍,因为sa数组本身就是按字典序的,所以第一个符合条件的就是字典序最小的了。

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#include <bits/stdc++.h>
using namespace std;
#define clr(a, x) memset(a, x, sizeof(a))
#define mp(x, y) make_pair(x, y)
#define pb(x) push_back(x)
#define X first
#define Y second
#define fastin \
ios_base::sync_with_stdio(0); \
cin.tie(0);
typedef long long ll;
typedef long double ld;
typedef pair<int, int> PII;
typedef vector<int> VI;
const int INF = 0x3f3f3f3f;
const int mod = 1e9 + 7;
const double eps = 1e-6;

const int maxn = 1e5 + 5;
char s[maxn];
int sa[maxn], t[maxn], t2[maxn], c[maxn], rnk[maxn], height[maxn];

void build_sa(int m, int n)
{
n++;
int *x = t, *y = t2;
for (int i = 0; i < m; i++) c[i] = 0;
for (int i = 0; i < n; i++) c[x[i] = s[i]]++;
for (int i = 1; i < m; i++) c[i] += c[i - 1];
for (int i = n - 1; ~i; i--) sa[--c[x[i]]] = i;
for (int k = 1; k <= n; k <<= 1)
{
int p = 0;
for (int i = n - k; i < n; i++) y[p++] = i;
for (int i = 0; i < n; i++)
if (sa[i] >= k) y[p++] = sa[i] - k;
for (int i = 0; i < m; i++) c[i] = 0;
for (int i = 0; i < n; i++) c[x[y[i]]]++;
for (int i = 0; i < m; i++) c[i] += c[i - 1];
for (int i = n - 1; ~i; i--) sa[--c[x[y[i]]]] = y[i];
swap(x, y);
p = 1;
x[sa[0]] = 0;
for (int i = 1; i < n; i++)
x[sa[i]] = y[sa[i - 1]] == y[sa[i]] && y[sa[i - 1] + k] == y[sa[i] + k] ? p - 1 : p++;
if (p >= n) break;
m = p;
}
n--;
int k = 0;
for (int i = 0; i <= n; i++) rnk[sa[i]] = i;
for (int i = 0; i < n; i++)
{
if (k) k--;
int j = sa[rnk[i] - 1];
while (s[i + k] == s[j + k]) k++;
height[rnk[i]] = k;
}
}

int dp[maxn][30];
void initrmq(int n)
{
for (int i = 1; i <= n; i++)
dp[i][0] = height[i];
for (int j = 1; (1 << j) <= n; j++)
for (int i = 1; i + (1 << j) - 1 <= n; i++)
dp[i][j] = min(dp[i][j - 1], dp[i + (1 << (j - 1))][j - 1]);
}

int rmq(int l, int r)
{
if (l > r) swap(l, r);
int k = 0;
while ((1 << (k + 1)) <= r - l + 1) k++;
return min(dp[l][k], dp[r - (1 << k) + 1][k]);
}

int lcp(int a, int b)
{
a = rnk[a], b = rnk[b];
if (a > b) swap(a, b);
a++;
return rmq(a, b);
}

int main()
{
#ifndef ONLINE_JUDGE
freopen("1.in", "r", stdin);
freopen("1.out", "w", stdout);
#endif
int kase = 0;
while (~scanf("%s", s) && s[0] != '#')
{
int n = strlen(s);
build_sa(128, n);
/*for (int i = 0; i <= n; i++)
printf("%s\n", s + sa[i]);*/
initrmq(n);
int ans = 0;
vector<int> v;
for (int i = 1; i < n; i++)
for (int j = 0; j + i < n; j += i)
{
int tmp = lcp(j, j + i);
int sum = tmp / i + 1;
int p = j - (i - tmp % i);
if (p >= 0 && tmp % i && lcp(p, p + i) >= (i - tmp % i)) sum++;
if (sum > ans)
{
ans = sum;
v.clear();
v.pb(i);
}
else if (sum == ans)
v.pb(i);
}
int len = -1, st;
for (int i = 1; i <= n && len == -1; i++)
for (int j = 0; j < v.size(); j++)
{
int l = v[j];
if (lcp(sa[i], sa[i] + l) >= (ans - 1) * l)
{
len = l;
st = sa[i];
break;
}
}
s[st + len * ans] = '\0';
printf("Case %d: %s\n", ++kase, s + st);
}
return 0;
}
捐助作者
0%