Common Substrings(后缀数组+单调栈)

描述

传送门:POJ3415

A substring of a string $T$ is defined as: $T(i, k)= T_iT_{i +1}…T_{i+k-1}$, $1≤ i≤ i+k-1≤|T|$.
Given two strings $A$, $B$ and one integer $K$, we define $S$, a set of triples $(i, j, k)$:$$S =\lbrace ( i, j, k)| k≥ K, A(i,k)= B(j,k)\rbrace $$
You are to give the value of $|S|$ for specific $A$, $B$ and $K$.

Input

The input file contains several blocks of data. For each block, the first line contains one integer $K$, followed by two lines containing strings $A$ and $B$, respectively. The input file is ended by $K=0$.

$1 ≤ |A|, |B| ≤ 10^5$
$1 ≤ K ≤ min\lbrace |A|, |B|\rbrace $
Characters of $A$ and $B$ are all Latin letters.

Output

For each case, output an integer $|S|$.

思路

  • 先把两个串连接起来,中间用一个没有出现的字符分开(这类题目都是这个套路)。然后求后缀数组和$height$数组。对$height$数组进行分组使得某个组内的$height$值都不小于$k$。
  • 遍历一遍$height$数组,每遇到一个$A$的后缀就统计一下它前面的$B$的后缀能组成多少公共子串,同理$A$和$B$反一下再扫一遍,就得到了答案。
  • 但是这个统计是$O(n^2)$的。由于两个后缀的LCP是它们的区间最小值,所以可以维护一个自底向上递增的单调栈,同时维护栈中的总和和已经在栈中每一个元素的出现次数。每一次弹出,相当于就是用较小的$height$去替换了较大的。具体的细节看代码细细体会~

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#include <cstdio>
#include <cstring>
#include <stack>
#include <vector>
using namespace std;
#define clr(a, x) memset(a, x, sizeof(a))
#define mp(x, y) make_pair(x, y)
#define pb(x) push_back(x)
#define X first
#define Y second
#define fastin \
ios_base::sync_with_stdio(0); \
cin.tie(0);
typedef long long ll;
typedef long double ld;
typedef pair<int, int> PII;
typedef vector<int> VI;
const int INF = 0x3f3f3f3f;
const int mod = 1e9 + 7;
const double eps = 1e-6;

const int maxn = 1 << 18;
char s[maxn];
int sa[maxn], t[maxn], t2[maxn], c[maxn], rnk[maxn], height[maxn];

void build_sa(int m, int n)
{
n++;
int *x = t, *y = t2;
for (int i = 0; i < m; i++) c[i] = 0;
for (int i = 0; i < n; i++) c[x[i] = s[i]]++;
for (int i = 1; i < m; i++) c[i] += c[i - 1];
for (int i = n - 1; ~i; i--) sa[--c[x[i]]] = i;
for (int k = 1; k <= n; k <<= 1)
{
int p = 0;
for (int i = n - k; i < n; i++) y[p++] = i;
for (int i = 0; i < n; i++)
if (sa[i] >= k) y[p++] = sa[i] - k;
for (int i = 0; i < m; i++) c[i] = 0;
for (int i = 0; i < n; i++) c[x[y[i]]]++;
for (int i = 0; i < m; i++) c[i] += c[i - 1];
for (int i = n - 1; ~i; i--) sa[--c[x[y[i]]]] = y[i];
swap(x, y);
p = 1;
x[sa[0]] = 0;
for (int i = 1; i < n; i++)
x[sa[i]] = y[sa[i - 1]] == y[sa[i]] && y[sa[i - 1] + k] == y[sa[i] + k] ? p - 1 : p++;
if (p >= n) break;
m = p;
}
n--;
int k = 0;
for (int i = 0; i <= n; i++) rnk[sa[i]] = i;
for (int i = 0; i < n; i++)
{
if (k) k--;
int j = sa[rnk[i] - 1];
while (s[i + k] == s[j + k]) k++;
height[rnk[i]] = k;
}
}

ll solve(int k, int n, int len, bool flag)
{
ll ret = 0;
ll sum[maxn];
int h[maxn], cnt[maxn];
sum[0] = h[0] = cnt[0] = 0;
int top = 0;
int tmp;
for (int i = 1; i <= n; i++)
{
tmp = 0;
while (top && h[top] > height[i])
tmp += cnt[top--];
if (tmp && height[i] >= k)
{
ll Sum = sum[top] + (ll)(height[i] - k + 1) * tmp;
// printf("push: %lld\n", Sum);
sum[++top] = Sum, h[top] = height[i], cnt[top] = tmp;
}
if ((sa[i] > len) ^ flag)
h[++top] = INF, cnt[top] = 1;
else
ret += sum[top];
}
// printf("ret = %lld\n", ret);
return ret;
}

int main()
{
#ifndef ONLINE_JUDGE
freopen("1.in", "r", stdin);
freopen("1.out", "w", stdout);
#endif
int k;
while (~scanf("%d", &k) && k)
{
scanf("%s", s);
int len = strlen(s);
s[len] = '$';
scanf("%s", s + len + 1);
int n = strlen(s);
// printf("%s\n", s);
build_sa(128, n);
ll ans = solve(k, n, len, 0) + solve(k, n, len, 1);
printf("%lld\n", ans);
}
return 0;
}
捐助作者
0%