2022年1月24日,第十天
1. 题目链接:P3763 [TJOI2017]DNA
A m a z i n g ! ! ! Amazing!!! Amazing!!! 这题居然有四种思路解决。
:多项解决方案,时间复杂 O ( n l o g n ) O(nlog\ n) O(nlogn),总共跑了 12 12 12 次 F F T FFT FFT ,常数太大,打开 O 2 O2 O2 才 A C AC AC。由于 D N A DNA DNA 序列最多有 4 4 4 对于不同的字符,我们可以分别考虑以下实践和每个字符 c c c ,对于两个串 s , t s,\ t s,t ,长度分别为 n , mn,\ m n, m,则有 a [ i ] = [ s [ i ] = = c ] , b [ i ] = [ t [ i ] = = c ] a[i] = [s[i]==c],\ b[i]=[t[i]==c] a[i]=[s[i]==c], b[i]=[t[i]==c] , a [ i ] a[i] a[i] 和 b [ i ] b[i] b[i] 都是多项式,则有每一个位置的匹配数量为 ∑ i = 0 m − 1 a [ i + k ] ⋅ b [ i ] \displaystyle\sum_{i = 0}^{m - 1}a[i+k]\cdot b[i] i=0∑m−1a[i+k]⋅b[i] ,我们发现这东西很像 F F T FFT FFT ,我们使用一个技巧,将 b [ i ] b[i] b[i] 翻转,令 T [ i ] = b [ m − 1 − i ] T[i] = b[m - 1-i] T[i]=b[m−1−i] ,那么 ∑ i = 0 m − 1 a [ i + k ] ⋅ T [ m − 1 − i ] \displaystyle\sum_{i = 0}^{m - 1}a[i + k]\cdot T[m - 1 - i] i=0∑m−1a[i+k]⋅T[m−1−i] ,下标和为定值 m − 1 + k m - 1 + k m−1+k ,直接上 N T T NTT NTT 或 F F T FFT FFT 即可。
#include <bits/stdc++.h>
#define ull unsigned long long
#define ll long long
#define re register
#define endl '\n'
using namespace std;
const int MOD = 998244353;
const int inf = 0x3f3f3f3f;
const double eps = 1e-8;
const double Pi = acos(-1.0);
const int N = 4e5 + 5;
struct node {
double x, y;
node (double x=0, double y=0): x(x), y(y) {
}
node operator * (node r) {
return node (x*r.x - y*r.y, x*r.y + y*r.x); }
node operator - (node r) {
return node (x - r.x, y - r.y); }
node operator + (node r) {
return node (x + r.x, y + r.y); }
}a[N], b[N];
int rev[N], bit, lim;
void fft (node* f, int flag) {
for (int i = 0; i < lim; i ++)
if (rev[i] > i) swap (f[i], f[rev[i]]);
for (int k = 1; k < lim; k <<= 1) {
node wnk = node (cos (Pi/k), flag * sin (Pi/k));
for (int i = 0; i < lim; i += k << 1) {
node w = node (1, 0);
for (int j = 0; j < k; j ++, w = w * wnk) {
node nx = f[i + j], ny = w * f[i + j + k];
f[i + j] = nx + ny;
f[i + j + k] = nx - ny;
}
}
}
if (flag == -1) for (int i = 0; i < lim; i ++) f[i].x /= lim;
}
char ch[] = {
'A', 'G', 'C', 'T'};
char s[N], t[N];
int ans[N], n, m;
int solve () {
n = strlen (s), m = strlen (t);
for (lim = 1, bit = 0; lim < n + m; lim <<= 1) bit ++;
for (int i = 0; i < lim; i ++)
rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (bit - 1));
for (int i = 0; i < lim; i ++) ans[i] = 0;
for (int j = 0; j < 4; j ++) {
for (int i = 0; i < n; i ++) a[i] = node (s[i] == ch[j], 0);
for (int i = n; i < lim; i ++) a[i] = node ();
for (int i = 0; i < m; i ++) b[i] = node (t[m - i - 1] == ch[j], 0);
for (int i = m; i < lim; i ++) b[i] = node ();
fft (a, 1), fft (b, 1);
for (int i = 0; i < lim; i ++) a[i] = a[i] * b[i];
fft (a, -1);
for (int i = 0; i < n - m + 1; i ++)
ans[i] += int(a[i + m - 1].x + 0.5);
}
int res = 0;
for (int i = 0; i < n - m + 1; i ++)
if (m - ans[i] <= 3) res ++;
return res;
}
int main() {
int T; scanf ("%d", &T);
while (T --) {
scanf ("%s%s", s, t);
printf ("%d\n", solve ());
}
return 0;
}
:后缀自动机解决,时间复杂度 O ( n ) O(n) O(n) 。根据后缀自动机的性质,从源点出发,得到的状态是原串的子串,每一个点的 e n d p o s endpos endpos 大小就是该子串出现的次数。由此我们直接 d f s dfs dfs ,在允许有 3 3 3 次跳过的前提下线性求出贡献。
#include <bits/stdc++.h> #define ull unsigned long long #define ll long long #define re register #define endl '\n' using namespace std; const int MOD = 998244353; const int inf = 0x3f3f3f3f; const double eps = 1e-8; const double Pi = acos(-1.0); const int N = 2e5 + 5; struct node { int link, len, ch[26]; }st[N << 1]; int sz, last, sum[N]; void sam_init () { for (int i = 0; i <= sz; i ++) { st[i].len = st[i].link = sum[i] = 0; memset (st[i].ch, 0, sizeof (st[i].ch)); } sz = last = 1; } void sam_extend (int c) { int p = last, cur = ++ sz; st[cur].len = st[p].len + 1, last = cur, sum[cur] = 1; for ( ; p && ! st[p].ch[c]; p = st[p].link) st[p].ch[c] = cur; if (! p) st[cur].link = 1; else { int q = st[p].ch[c]; if (st[p].len + 1 == st[q].len) st[cur].link = q; else { int clone = ++ sz; st[clone].len = st[p].len + 1; st[clone].link = st[q].link; memcpy (st[clone].ch, st[q].ch, sizeof (st[q].ch)); for ( ; p && st[p].ch[c] == q; p = st[p].link) st[p].ch[c] = clone; st[cur].link = st[q].link = clone; } } } char ch[] = { 'A', 'C', 'G', 'T'}; char s[N], t[N]; int ns, nt, a[N], b[N], ans; void dfs (int u, int j, int tot) { if (j > nt) return void(ans += sum[u]); for (int i = 0; i < 4; i ++) { int v = ch[i] - 'A'; if (st[u].ch[v] == 0) continue; if (ch[i] == t[j]) dfs (st[u].ch[v], j + 1, tot); else if (tot < 3) dfs (st[u].ch[v], j + 1, tot + 1); } } int main() { int T; scanf ("%d", &T); while (