【题解】HackerRank-Find Strings

题目

https://www.hackerrank.com/challenges/find-strings/problem

题意

求若干个串的所有本质不同子串中第 k 大的子串。

题解

解法 1

  • 建广义后缀自动机(反向插入),然后按字典序建后缀树。
  • 跑一遍 dfs 预处理子树能表示的字符串数量,然后询问在树上跑就好了,由于字符集比较小,也不会出现菊花之类的状况。

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#include <bits/stdc++.h>
using namespace std;
using LL = long long;
#define FOR(i, x, y) for (decay<decltype(y)>::type i = (x), _##i = (y); i < _##i; ++i)
#define FORD(i, x, y) for (decay<decltype(x)>::type i = (x), _##i = (y); i > _##i; --i)
#ifdef zerol
#define dbg(args...) do { cout << "\033[34;1m" << #args << " -> "; err(args); } while (0)
#else
#define dbg(...)
#endif
void err() { cout << "\033[39;0m" << endl; }
template<template<typename...> class T, typename t, typename... Args>
void err(T<t> a, Args... args) { for (auto x: a) cout << x << ' '; err(args...); }
template<typename T, typename... Args>
void err(T a, Args... args) { cout << a << ' '; err(args...); }
// -----------------------------------------------------------------------------
const int M = 50 * 2 * 2000 + 100;
const int N = 2000 + 5;
string a[N];

int t[M][26], len[M] = {0}, fa[M], sz = 2, last = 1;
char* one[M];
void ins(int ch, char* pp) {
int p = last, np = 0, nq = 0, q = -1;
if (!t[p][ch]) {
np = sz++; one[np] = pp;
len[np] = len[p] + 1;
for (; p && !t[p][ch]; p = fa[p]) t[p][ch] = np;
}
if (!p) fa[np] = 1;
else {
q = t[p][ch];
if (len[p] + 1 == len[q]) fa[np] = q;
else {
nq = sz++; len[nq] = len[p] + 1; one[nq] = one[q];
memcpy(t[nq], t[q], sizeof t[0]);
fa[nq] = fa[q];
fa[np] = fa[q] = nq;
for (; t[p][ch] == q; p = fa[p]) t[p][ch] = nq;
}
}
last = np ? np : nq ? nq : q;
}
int up[M], c[256] = {2}, aa[M];
vector<int> G[M];

void rsort() {
FOR (i, 1, 256) c[i] = 0;
FOR (i, 2, sz) up[i] = *(one[i] + len[fa[i]]);
FOR (i, 2, sz) c[up[i]]++;
FOR (i, 1, 256) c[i] += c[i - 1];
FOR (i, 2, sz) aa[--c[up[i]]] = i;
FOR (i, 2, sz) G[fa[aa[i]]].push_back(aa[i]);
}

int s[M];
void dfs(int u) {
if (!u) return;
s[u] = len[u] - len[fa[u]];
for (int& v: G[u]) {
dfs(v); s[u] += s[v];
}
}

vector<char> ans;
void go(int u, int k) {
FOR (i, len[fa[u]] + 1, len[u] + 1) {
ans.push_back(*(one[u] + i - 1));
if (--k == 0) return;
}
int t = 0;
while (s[G[u][t]] < k) { k -= s[G[u][t]]; ++t; }
assert(t < G[u].size());
go(G[u][t], k);
}


int main() {
#ifdef zerol
freopen("in", "r", stdin);
#endif
int n; cin >> n;
FOR (i, 0, n) {
cin >> a[i]; last = 1;
FORD (j, (int)a[i].length() - 1, -1) ins(a[i][j] - 'a', &a[i][j]);
}
rsort();
dfs(1);
int Qn; cin >> Qn;
while (Qn--) {
int k; cin >> k;
ans.clear();
if (k > s[1]) { puts("INVALID"); continue; }
go(1, k);
for (char& c: ans) putchar(c);
puts("");
}
}

解法 2

  • 同样是后缀自动机,这次是在自动机上跑而不是后缀树上跑。似乎代码能短不少。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#include <bits/stdc++.h>
using namespace std;
using LL = long long;
#define FOR(i, x, y) for (decay<decltype(y)>::type i = (x), _##i = (y); i < _##i; ++i)
#define FORD(i, x, y) for (decay<decltype(x)>::type i = (x), _##i = (y); i > _##i; --i)
#ifdef zerol
#define dbg(args...) do { cout << "\033[32;1m" << #args << " -> "; err(args); } while (0)
#else
#define dbg(...)
#endif
void err() { cout << "\033[39;0m" << endl; }
template<template<typename...> class T, typename t, typename... Args>
void err(T<t> a, Args... args) { for (auto x: a) cout << x << ' '; err(args...); }
template<typename T, typename... Args>
void err(T a, Args... args) { cout << a << ' '; err(args...); }
// -----------------------------------------------------------------------------
const int M = 50 * 2 * 2000 + 100;
const int N = 2000 + 5;
string a[N];

int t[M][26], len[M] = {0}, fa[M], sz = 2, last = 1;
void ins(int ch) {
int p = last, np = 0, nq = 0, q = -1;
if (!t[p][ch]) {
np = sz++;
len[np] = len[p] + 1;
for (; p && !t[p][ch]; p = fa[p]) t[p][ch] = np;
}
if (!p) fa[np] = 1;
else {
q = t[p][ch];
if (len[p] + 1 == len[q]) fa[np] = q;
else {
nq = sz++; len[nq] = len[p] + 1;
memcpy(t[nq], t[q], sizeof t[0]);
fa[nq] = fa[q];
fa[np] = fa[q] = nq;
for (; t[p][ch] == q; p = fa[p]) t[p][ch] = nq;
}
}
last = np ? np : nq ? nq : q;
}

LL cnt[M];
void dfs(int u) {
if (cnt[u]) return; cnt[u] = 1;
FOR (c, 0, 26) {
int v = t[u][c]; if (!v) continue;
dfs(v); cnt[u] += cnt[v];
}
}
void go(int u, int c, int k) {
if (c != -1) putchar(c + 'a');
if (--k == 0) return;
FOR (c, 0, 26) {
int v = t[u][c]; if (!v) continue;
if (cnt[v] < k) k -= cnt[v];
else { go(v, c, k); return; }
}
}

int main() {
int n; cin >> n;
FOR (i, 0, n) {
cin >> a[i]; last = 1;
FOR (j, 0, a[i].length()) ins(a[i][j] - 'a');
}
dfs(1);
int Qn; cin >> Qn;
while (Qn--) {
int k; cin >> k; ++k;
if (k > cnt[1]) puts("INVALID");
else { go(1, -1, k); puts(""); }
}
}