[题解]CF1334G-Substring Search

题意

\(p\)为从\(\sum\)\(\sum\)的双射(\(\sum = \{a,b,...,z\})\). 给出两个字符串\(s,t\)(由于个人习惯,此处s,t的含义和原题相反)且满足\((1 \leq |t| \leq |s| \leq 2 \cdot10^5)\). 求对于\(s\)的每个长度为\(|t|\)的子串\(s'\), \(s'\)\(t\)是否匹配. 输出一个长为\(|s| - |t| + 1\)的01串, 其中第\(i\)位为1表示\(s[i...i+|t| - 1]\)\(t\)匹配.

匹配在此题定义为: 设两个长度相等的串\(s',t\), 对全部\(i = 0...|t|-1\), 有\(s'[i] == t[i]\)\(s'[i] == p(t[i])\)

题解

首先我们设匹配函数 \[ F(x) = \sum_{i = x,j = i - x}^{|t| + x - 1}(s[i] - t[j])^2(s[i] - f(t[j]))^2 \] 容易看出, 当且仅当\(F(x) == 0\)时, \(s[x...x+|t| - 1]\)\(t\)匹配. 将和式内部展开后化简, 可以发现展开后每一项都可化为 \(\sum_{i,j = 1}^na_ib_j\)的形式. 我们用经典套路把\(t\)反转后, 便可化为卷积形式用FFT求解.

但是这个式子好像要做七次DFT和IDFT, 太麻烦. 我们尝试乱搞一下: \[ G(x) = \sum_{i = x,j = i-x}^{|t| + x - 1}(s[i] - t[j])(s[i] - f(t[j])) \] 展开后有: \[ G(x) = \sum_{i = x}^{|t| + x - 1}s[i]^2 - \sum_{i = x,j = i-x}^{|t| + x - 1}s[i]\cdot f(t[j]) - \sum_{i = x,j = i-x}^{|t| + x - 1}s[i]\cdot t[j] + \sum_{j = 0}^{|t| - 1} t[j] \cdot f(t[j]) \]

首项和末项用前缀和处理, 剩下两项可以分别进行一次卷积求出.

把平方去掉后由于存在负数项, 因而某些不能被匹配的位置也可能有\(G(x) = 0\). 我们将a-z随机赋上一个权值后再进行计算, 便能大大减少冲突的概率. (我不知道这种做法实际上是否正确, 更不能保证不会被卡, 但这种做法足够通过此题). 由于FFT在计算时会损失精度, 我们使用NTT进行求解.

时间复杂度\(|s|\log |s|\)

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
//
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
#define endl '\n'
const int maxn = 6e6+10;
const int p = 998244353;
int Pow(int x,int d){
int tans = 1;
if(d == 0)return 1%p;
int a = Pow(x,d/2);
tans = 1ll*a*a%p;
if(d%2)tans = 1ll*tans*x%p;
return tans%p;
}
int rev[maxn];
void init(vector<int> & x,int n){
int size = 1;
while(size <= n)size *= 2;
x.assign(size*2,0);
}
void NTT_init(int lim){
for (int i = 0; i < lim; ++i) rev[i] = (i & 1) * (lim >> 1) + (rev[i >> 1] >> 1);
}
void NTT(vector<int> & A, int opt) {
int lim = A.size();
int i, j, k, m, gn, g, tmp;
for(i = 0; i < lim; ++i)if (rev[i] < i) swap(A[i], A[rev[i]]);
for(m = 2; m <= lim; m <<= 1) {
k = m >> 1;
gn = Pow(3,(p - 1) / m);
for(i = 0; i < lim; i += m) {
g = 1;
for (j = 0; j < k; ++j, g = 1ll * g * gn % p) {
tmp = 1ll * A[i + j + k] * g % p;
A[i + j + k] = (A[i + j] - tmp + p) % p;
A[i + j] = (A[i + j] + tmp) % p;
}
}
}
if(opt == -1){
auto it = A.begin();it++;
reverse(it,A.end());
int inv = Pow(lim,p-2);
for(i = 0; i < lim; ++i) A[i] = 1ll * A[i] * inv % p;
}
}
typedef vector<int> Poly;
Poly A,B;
int per[30];
int w[maxn][5];
int rnd[30];
int ans[maxn];
mt19937 Rand(19260817);
signed main(){
ios::sync_with_stdio(0);
cin.tie(0);
string s,t;
srand(time(NULL));
for(int i = 0;i < 26;i++)cin >> per[i],per[i]--,rnd[i] = Rand()%p;
cin >> t >> s;
int n = s.size(),m = t.size();
Poly A,B;
init(A,s.size());
init(B,s.size());
NTT_init(A.size());
for(int i = 0;i < n;i++)w[i][0] = (1ll*rnd[s[i] - 'a']*rnd[s[i] - 'a']%p + (i?w[i-1][0]:0))%p;
for(int i = 0;i < m;i++)w[i][3] = (1ll*rnd[t[i] - 'a']*rnd[per[t[i] - 'a']]%p + (i?w[i-1][3]:0))%p;
for(int i = 0;i < n;i++)A[i] = rnd[s[i] - 'a'];
for(int i = 0;i < m;i++)B[m - i - 1] = rnd[t[i] - 'a'];
NTT(A,1);
NTT(B,1);
for(int i = 0;i < (int)A.size();i++)A[i] = (1ll*A[i]*B[i])%p;
NTT(A,-1);
for(int i = 0;i < (int)A.size();i++)w[i][1] = A[i];
init(A,s.size());
init(B,s.size());
for(int i = 0;i < n;i++)A[i] = rnd[s[i] - 'a'];
for(int i = 0;i < m;i++)B[m - i - 1] = rnd[per[t[i] - 'a']];
NTT(A,1);
NTT(B,1);
for(int i = 0;i < (int)A.size();i++)A[i] = (1ll*A[i]*B[i])%p;
NTT(A,-1);
for(int i = 0;i < (int)A.size();i++)w[i][2] = A[i];
for(int i = m-1;i < n;i++){
ll A = (w[i][0] - (i - m >= 0?w[i-m][0]:0) + p)%p,D = w[m-1][3]%p;
ll B = w[i][1],C = w[i][2];
if((((A - B + p)%p - C + p) % p + D)%p == 0)cout << "1";
else cout << "0";
}
return 0;
}