[题解]-CF954I Yet Another String Matching Problem

孩子的第一道卷积题

题意

假设两个相等长度的字符串\(u,v\). 每次可以进行一次操作将\(u,v\)中某种字符同时全部变为另一种字符, 设\(F(u,v)\)为使得\(u,v\)相等的最小操作次数.

现在给出两个字符串\(s,t\), 且保证\(|s| \geq |t|\). 求对于\(s\)的全部长度为\(|t|\)的子串\(s'\), \(F(s',t)\)的值各为多少.

保证\(\sum = \{\text{a,b,c,d,e,f}\}\), \(1 \leq |t| \leq |s| \leq 1.25\cdot 10^5\)

题解

首先求解\(F(u,v)\). 我们建一张图, 图中节点为字母\(a...f\). 对所有的\(i = 1...|u|\), 连边\((u[i],v[i])\). 显然答案最小值为该图任意生成树边的数目, 可以简单地用并查集求解. 暴力求解复杂度为\(O(|s|^2)\).

考虑优化. 设\(n = |s|,m = |t|.\)\(f[c][d][idx]\)\(s[idx- m + 1...idx]\)中字符\(c\)\(t\)中字符\(d\)下标(从0开始)一一对应的数目. 例如\(s = \text{bbcdefa}, t = \text{ddcb}\). 则\(f['b']['d'][3] = 2\).

求出\(f[c][d][idx]\)后, 对于每个\(f[c][d][idx] > 0\), 从\(c\)\(d\)连一条边, 再按\(F(u,v)\)的方法求解, 便能求出每个\(idx\)的答案.

下面是FFT处理这类字符串匹配问题的套路做法:

假设我们需要求\(f[c][d]\). 设多项式\(g,h\). 令\(g(i) = [s[i] == c],h(i) = [t[j] == d]\). 此后将\(h\)翻转, 我们发现\(g\)\(h\)的卷积: \[ (g \ast h)[i] = \sum_{j = 0}^i g[j]\cdot h[i-j] \] 恰好是我们想要的\(f[c][d][]\).

枚举每个\(c,d \in \sum\), 便能高效求解问题. 时间复杂度\(|\sum|^2n\log n\)

优化: 容易发现\(g\)\(h\)各只有\(6\)种, 因此我们只需做\(12\)次(而非\(36\)次)DFT. 并且\(c == d\)的情况显然对答案没有贡献, 无需考虑.

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
//
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
#define endl '\n'
typedef double ld;
typedef complex<ld> cld;
typedef vector<cld> Poly;
const int maxn = 6e6+10;
const ld PI = acos(-1.0);
int rev[maxn];
void init(Poly & A,int n){
A.clear();
A.shrink_to_fit();
int size = 1;
while(size <= n)size *= 2;
A.assign(size*2,0);
}
void FFT_init(Poly A){
int n = A.size();
int S = log2(n);
for(int i = 0;i < n;i++)rev[i] = (rev[i>>1] >> 1) | ((i & 1) << (S - 1));
}
void FFT(Poly & A,int type){
int n = A.size();
for(int i = 0;i < n;i++)if(i < rev[i])swap(A[i],A[rev[i]]);
for(int i = 1;i < n;i *= 2){
cld Wn(cos(PI/i),type*sin(PI/i));
for(int j = 0;j < n;j += i*2){
cld W(1,0);
for(int k = 0;k < i;k++){
cld facx = A[j+k],facy = W*A[j+k+i];
A[j+k] = facx + facy;
A[j+k+i] = facx - facy;
W *= Wn;
}
}
}
if(type == -1)for(int i = 0;i < (int)A.size();i++)A[i].real((int)(A[i].real()/A.size() + 0.5));
}
vector<int>f[10][10];
Poly DFTA[10],DFTB[10];
int father[10];
void init(){
for(int i = 0;i < 10;i++)father[i] = i;
}
int findset(int e){
return father[e] == e?e:father[e] = findset(father[e]);
}
inline void un(int x,int y){
father[findset(x)] = findset(y);
}
bool check(int u,int v){
return findset(u) == findset(v);
}
int n,m;
int cal(int idx){
init();
int cnt = 0;
for(int i = 'a';i <= 'f';i++){
for(int j = 'a';j <= 'f';j++)if(i != j){
int u = i - 'a',v = j - 'a';
if(f[u][v][idx] <= 0)continue;
if(!check(u,v))cnt++;
un(u,v);
}
}
return cnt;
}
signed main(){
ios::sync_with_stdio(0);
cin.tie(0);
string s,t;
cin >> s >> t;
Poly A,B;
n = s.size(),m = t.size();
init(A,n);
FFT_init(A);
for(char c = 'a';c <= 'f';c++){
init(DFTA[c - 'a'],n);
init(DFTB[c - 'a'],n);
for(int i = 0;i < n;i++)if(s[i] == c)DFTA[c - 'a'][i] = 1;
for(int j = 0;j < m;j++)if(t[j] == c)DFTB[c - 'a'][m - j - 1] = 1;
FFT(DFTA[c - 'a'],1);
FFT(DFTB[c - 'a'],1);
}
for(char c = 'a';c <= 'f';c++){
for(char d = 'a';d <= 'f';d++)if(c != d){
A = DFTA[c - 'a'],B = DFTB[d - 'a'];
for(int i = 0;i < (int)A.size();i++)A[i] *= B[i];
FFT(A,-1);
for(int i = 0;i < (int)A.size();i++)f[c - 'a'][d - 'a'].push_back((int)A[i].real());
}
}
for(int i = m-1;i < n;i++)cout << cal(i) << " ";
return 0;
}