http://acm.hdu.edu.cn/showproblem.php?pid=2222
题意:给出 n 个关键词和一个长的字符串,要找出字符串里面出现了多少个关键词。
思路:AC自动机,就是基于字典树的KMP算法,网上博客写的很详细。 还有要注意的就是这题测试数据估计改了,动态分配内存会MLE。只能开数组了。
#include <cstdio>
#include <cstring>
#include <iostream>
using namespace std;/
struct node{
int next[26];
int counts, fail;
static int newnode;
};
int que[250010];
node tr[250010];
char str[1000010];
int node::newnode = 1;
//分配没有使用过的下标
int Newnode(){
int pos = node::newnode++;
tr[pos].counts = tr[pos].fail =0;
for(int i = 0; i < 26; i++){
tr[pos].next[i] = 0;
}
return pos;
}
//构造字典树
int Build(int rt, char *str)
{
int i = -1;
int pr = rt;
while(str[++i]){
int pos = str[i]-'a';
if(!tr[pr].next[pos]){
tr[pr].next[pos] = Newnode();
}
pr = tr[pr].next[pos];
}
tr[pr].counts++; //标记这是一个关键字的结尾
return rt;
}
void Automaton(int rt) //AC自动机,利用BFS构造fail
{
int rear, top;
rear = top = 0;
que[rear++] = rt;
while(top != rear){
int pr = que[top++];
for(int i = 0; i < 26; i++){
int son = tr[pr].next[i]; //pr 的儿子结点
if(son == NULL)
continue;
if(pr == rt) //如果在根节点的儿子结点匹配失败肯定要跳回根结点
tr[son].fail = pr;
else{
int temp = tr[pr].fail; //通过父结点的fail找子节点的fail
while(temp){
if(tr[temp].next[i]) //父结点的fail存在第 i 个儿子
{
tr[son].fail = tr[temp].next[i]; //指向父结点fail的儿子
break;
}
temp = tr[temp].fail; //继续找上一个fail
}
if(temp == NULL) //不存在就指向rt
tr[son].fail = rt;
}
que[rear++] = son; //入队
}
}
}
int query(int rt, char *str)
{
int pr = rt;
int i = -1, sum = 0;
while(str[++i]){
int pos = str[i] - 'a';
while(!tr[pr].next[pos] && pr != rt) //找匹配成功的串
pr = tr[pr].fail;
pr = (tr[pr].next[pos] == NULL) ? rt : tr[pr].next[pos]; //成功或者失败
int temp = pr;
while(temp != rt && tr[temp].counts != -1) //成功就累加
{
sum += tr[temp].counts;
tr[temp].counts = -1; //标记被计算过了
temp = tr[temp].fail;
}
}
return sum;
}
int main()
{
int Test;
scanf("%d", &Test);
while(Test--){
int n;
scanf("%d", &n);
node::newnode = 1;
int rt = Newnode();
for(int i = 0; i < n; i++){
scanf("%s", str);
rt = Build(rt, str);
}
Automaton(rt);
scanf("%s", str);
printf("%d\n", query(rt, str));
}
return 0;
}