【题解】hihocoder - 1419 - 后缀数组四·重复旋律4

题目

http://hihocoder.com/problemset/problem/1419

题意

如果一个字符串是由某个长度为 \(l\) 的字符串重复 \(k\) 次得到的,那么称之为\((k,l)\)-重复的。

求一个字符串中 \(k\) 最大的 \((k,l)\) 重复的子串。

题解

  • 题目中的提示用的是后缀数组,复杂度 \(O(n\log n)\)。这里用后缀自动机做这道经典后缀数组题。
  • 如果存在一个 \((k,l)\) 重复的子串,那么一定存在一个状态 \(s\) 使得 \(len(s) \ge (k-1)\times l\),而且 \(EndPos\) 集合中有两个元素的差恰好为 \(l\)
  • 反过来一个状态 \(s\) 所能对应的最大的 \(k\)\(len(s)/d_{min}+1\),其中 \(d_{min}\)\(EndPos\) 集合中最小元素差。
  • 为了求出 \(d_{min}\),有两种方法:
    • 用 STL 中的 set 在树上进行启发式合并,复杂度 \(O(n\log^2 n)\)(其实快得很)。注意不需要在每个状态遍历 \(EndPos\) 集合来计算 \(d_{min}\),如果相同的 \(d\) 在子树中出现过,那么肯定是之前的 \(k\) 计算得到的结果更大(因为 \(len\) 更大),所以只需考虑插入后所形成的新的(更小的) \(d\)
    • 线段树查询子树上最小元素差,当然要借助一下 dfs 序,复杂度 \(O(n \log n)\)

代码1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#include <bits/stdc++.h>
using namespace std;
using LL = long long;
#define FOR(i, x, y) for (decay<decltype(y)>::type i = (x), _##i = (y); i < _##i; ++i)
#define FORD(i, x, y) for (decay<decltype(x)>::type i = (x), _##i = (y); i > _##i; --i)
#ifdef zerol
#define dbg(args...) do { cout << "\033[32;1m" << #args << " -> "; err(args); } while (0)
#else
#define dbg(...)
#endif
void err() { cout << "\033[39;0m" << endl; }
template<template<typename...> class T, typename t, typename... Args>
void err(T<t> a, Args... args) { for (auto x: a) cout << x << ' '; err(args...); }
template<typename T, typename... Args>
void err(T a, Args... args) { cout << a << ' '; err(args...); }
// -----------------------------------------------------------------------------
const int maxn = 1E5 + 100, INF = 1E9;

namespace SAM {
const int M = maxn << 1;
int t[M][26], len[M] = {-1}, fa[M], sz = 2, last = 1;
set<int> pos[M];
void ins(int ch, int _pos) {
int p = last, np = last = sz++;
len[np] = len[p] + 1; pos[np].insert(_pos);
for (; p && !t[p][ch]; p = fa[p])
t[p][ch] = np;
if (!p) { fa[np] = 1; return; }
int q = t[p][ch];
if (len[p] + 1 == len[q]) fa[np] = q;
else {
int nq = sz++; len[nq] = len[p] + 1;
memcpy(t[nq], t[q], sizeof t[0]);
fa[nq] = fa[q];
fa[np] = fa[q] = nq;
for (; t[p][ch] == q; p = fa[p])
t[p][ch] = nq;
}
}

int c[maxn] = {1}, a[M];
void rsort() {
FOR (i, 1, sz) c[len[i]]++;
FOR (i, 1, maxn) c[i] += c[i - 1];
FOR (i, 1, sz) a[--c[len[i]]] = i;
}

}

using namespace SAM;

int merge(set<int>& a, set<int>& b) {
if (a.size() < b.size()) a.swap(b);
int ret = INF;
for (auto& x: b) {
auto it = a.insert(x).first, it2 = it;
if (it != a.begin()) ret = min(ret, x - *--it);
if (++it2 != a.end()) ret = min(ret, *it2 - x);
}
return ret;
}

int min_d[M];
char s[maxn];
int main() {
scanf("%s", s);
FOR (i, 0, strlen(s)) ins(s[i] - 'a', i);
rsort();
memset(min_d, 0x3f, sizeof min_d);
int ans = 1;
FORD (i, sz - 1, 1) {
int u = a[i];
ans = max(ans, len[u] / min_d[u] + 1);
min_d[fa[u]] = min(min_d[fa[u]], merge(pos[fa[u]], pos[u]));
}
cout << ans << endl;
}

代码2

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#include <bits/stdc++.h>
using namespace std;
using LL = long long;
#define FOR(i, x, y) for (decay<decltype(y)>::type i = (x), _##i = (y); i < _##i; ++i)
#define FORD(i, x, y) for (decay<decltype(x)>::type i = (x), _##i = (y); i > _##i; --i)
#ifdef zerol
#define dbg(args...) do { cout << "\033[32;1m" << #args << " -> "; err(args); } while (0)
#else
#define dbg(...)
#endif
void err() { cout << "\033[39;0m" << endl; }
template<template<typename...> class T, typename t, typename... Args>
void err(T<t> a, Args... args) { for (auto x: a) cout << x << ' '; err(args...); }
template<typename T, typename... Args>
void err(T a, Args... args) { cout << a << ' '; err(args...); }
// -----------------------------------------------------------------------------
const int maxn = 1E5 + 100, INF = 1E9;

namespace TREE {
struct P {
int minv, maxv, d;
P(): minv(INF), maxv(-INF), d(INF) {}
P(int v): minv(v), maxv(v), d(INF) {}
};
template<typename T>
P operator & (T&& a, T&& b) {
P r;
r.minv = min(a.minv, b.minv);
r.maxv = max(a.maxv, b.maxv);
r.d = min(min(a.d, b.d), min(abs(a.minv - b.maxv), abs(a.maxv - b.minv)));
return r;
}
P p[maxn << 3];
int n;
#define ls o * 2, l, (l + r) / 2
#define rs o * 2 + 1, (l + r) / 2 + 1, r
void build(int* v, int o = 1, int l = 1, int r = n) {
if (l == r) { p[o] = v[l] != -1 ? P(v[l]) : P(); return; }
build(v, ls); build(v, rs);
p[o] = p[o * 2] & p[o * 2 + 1];
};
P query(int ql, int qr, int o = 1, int l = 1, int r = n) {
if (ql > r || l > qr) return P();
if (ql <= l && r <= qr) return p[o];
return query(ql, qr, ls) & query(ql, qr, rs);
}
}

namespace SAM {
const int M = maxn << 1;
int t[M][26], len[M] = {-1}, fa[M], sz = 2, last = 1, pos[M], pos_idx[M];
vector<int> G[M];
void ins(int ch, int _pos) {
int p = last, np = last = sz++;
len[np] = len[p] + 1; pos[np] = _pos;
for (; p && !t[p][ch]; p = fa[p])
t[p][ch] = np;
if (!p) { fa[np] = 1; return; }
int q = t[p][ch];
if (len[p] + 1 == len[q]) fa[np] = q;
else {
int nq = sz++; len[nq] = len[p] + 1;
memcpy(t[nq], t[q], sizeof t[0]);
fa[nq] = fa[q];
fa[np] = fa[q] = nq;
for (; t[p][ch] == q; p = fa[p])
t[p][ch] = nq;
}
}

int clk = 0, in[M], out[M];
void dfs(int u) {
in[u] = ++clk;
pos_idx[in[u]] = pos[u];
for (int& v: G[u]) dfs(v);
out[u] = clk;
}

int go() {
int ret = 1;
FOR (i, 2, sz) G[fa[i]].push_back(i);
dfs(1);
TREE::n = sz - 1;
TREE::build(pos_idx);
FOR (i, 1, sz)
ret = max(ret, len[i] / TREE::query(in[i], out[i]).d + 1);
return ret;
}

}

using namespace SAM;

char s[maxn];
int main() {
scanf("%s", s);
memset(pos, -1, sizeof pos);
FOR (i, 0, strlen(s)) ins(s[i] - 'a', i);
cout << go() << endl;
}