string string string(后缀数组)

描述

传送门:2017 ACM/ICPC Asia Regional Shenyang Online

Uncle Mao is a wonderful ACMER. One day he met an easy problem, but Uncle Mao was so lazy that he left the problem to you. I hope you can give him a solution.
Given a string s, we define a substring that happens exactly $k$ times as an important string, and you need to find out how many substrings which are important strings.

Input

The first line contains an integer $T$ $(T≤100)$ implying the number of test cases.
For each test case, there are two lines:
the first line contains an integer $k$ $(k≥1)$ which is described above;
the second line contain a string s $(length(s)≤10^5)$.
It’s guaranteed that $∑length(s)≤2∗10^6$.

Output

For each test case, print the number of the important substrings in a line.

Sample Input

1
2
3
4
5
2
2
abcabc
3
abcabcabcabc

Sample Output

1
2
6
9

思路

  • 这类经典题目是在$height$数组上搞,所以我们先求原串的后缀数组。
  • 将$height$数组每相邻$k-1$个一看,这个区间的最小值就是排名相邻的$k$个后缀的LCP,那么这个LCP必然出现了$k$次,那么这个LCP的前缀也必然出现了至少$k$次。问题在于解决这些串是不是严格出现了$k$次,只需要判断这些$height$值的前一个和后一个的$height$值,(记做$x,y$)那么长度为$1 \sim max(x,y) $的LCP的前缀出现了至少$k+1$次,因此要去掉这一部分(当然你直接按前面的方法求得出现至少$k+1$次再减去至少出现$k$次的也可以)。
  • 但是还有一个坑点,就是如果$k=1$,$height$数组就不管用了,这个时候我们要把之前的RMQ求LCP变成$n-sa[i]$。
  • 值得注意的是这里维护区间最小值不一定要用RMQ,可以用单调队列来维护。

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#include <bits/stdc++.h>
using namespace std;
#define clr(a, x) memset(a, x, sizeof(a))
#define mp(x, y) make_pair(x, y)
#define pb(x) push_back(x)
#define X first
#define Y second
#define fastin \
ios_base::sync_with_stdio(0); \
cin.tie(0);
typedef long long ll;
typedef long double ld;
typedef pair<int, int> PII;
typedef vector<int> VI;
const int INF = 0x3f3f3f3f;
const int mod = 1e9 + 7;
const double eps = 1e-6;

const int maxn = 1 << 17;
char s[maxn];
int sa[maxn], t[maxn], t2[maxn], c[maxn], rnk[maxn], height[maxn];
void build_sa(int m, int n)
{ // m为字符集大小(0~m-1),n为字符串的长度
n++;
int *x = t, *y = t2;
for (int i = 0; i < m; i++) c[i] = 0;
for (int i = 0; i < n; i++) c[x[i] = s[i]]++;
for (int i = 1; i < m; i++) c[i] += c[i - 1];
for (int i = n - 1; ~i; i--) sa[--c[x[i]]] = i;
for (int k = 1; k <= n; k <<= 1)
{
int p = 0;
for (int i = n - k; i < n; i++) y[p++] = i;
for (int i = 0; i < n; i++)
if (sa[i] >= k) y[p++] = sa[i] - k;
for (int i = 0; i < m; i++) c[i] = 0;
for (int i = 0; i < n; i++) c[x[y[i]]]++;
for (int i = 0; i < m; i++) c[i] += c[i - 1];
for (int i = n - 1; ~i; i--) sa[--c[x[y[i]]]] = y[i];
swap(x, y);
p = 1;
x[sa[0]] = 0;
for (int i = 1; i < n; i++)
x[sa[i]] = y[sa[i - 1]] == y[sa[i]] && y[sa[i - 1] + k] == y[sa[i] + k] ? p - 1 : p++;
if (p >= n) break;
m = p;
}
int k = 0;
n--;
for (int i = 0; i <= n; i++) rnk[sa[i]] = i;
for (int i = 0; i < n; i++)
{
if (k) k--;
int j = sa[rnk[i] - 1];
while (s[i + k] == s[j + k]) k++;
height[rnk[i]] = k;
}
}

int dp[maxn][30];
void initrmq(int n)
{
for (int i = 1; i <= n; i++)
dp[i][0] = height[i];
for (int j = 1; (1 << j) <= n; j++)
for (int i = 1; i + (1 << j) - 1 <= n; i++)
dp[i][j] = min(dp[i][j - 1], dp[i + (1 << (j - 1))][j - 1]);
}
int rmq(int l, int r)
{
int k = 0;
while ((1 << (k + 1)) <= r - l + 1) k++;
return min(dp[l][k], dp[r - (1 << k) + 1][k]);
}
int lcp(int a, int b)
{
a = rnk[a], b = rnk[b];
if (a > b) swap(a, b);
a++;
return rmq(a, b);
}

int main()
{
#ifndef ONLINE_JUDGE
freopen("1.in", "r", stdin);
freopen("1.out", "w", stdout);
#endif
int T;
scanf("%d", &T);
while (T--)
{
int k;
scanf("%d", &k);
scanf("%s", s);
int n = strlen(s);
build_sa(128, n);
ll ans = 0;
height[n + 1] = 0;
if (k == 1)
for (int i = 1; i <= n; i++)
ans += max(0, n - sa[i] - max(height[i], height[i + 1]));
else
{
initrmq(n);
for (int i = 1; i <= n - k + 1; i++)
ans += max(0, rmq(i + 1, i + k - 1) - max(height[i], height[i + k]));
}
printf("%lld\n", ans);
}
return 0;
}
捐助作者