基于tensorflow框架CNN应用文本分类任务_tensorflow用cnn实现文本处理-CSDN博客

本文介绍了使用卷积神经网络(CNN)进行文本分类的实践过程，包括数据预处理（确保相同长度和向量维度）、构建考虑不同卷积核大小的模型、特征图的池化以及多特征的融合，最终训练并展示模型性能。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

# CNN文本分类实战
# 1：文本数据预处理，必须都是相同长度，相同向量维度
# 2：构建卷积模型，注意卷积核大小的设计
# 3：将卷积后的特征图池化成一个特征
# 4：将多种特征拼接在一起，准备完成分类任务
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.sequence import pad_sequences

num_features = 3000
sequence_length = 300
embedding_dimension = 100
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=num_features)
# print(x_train.shape)
# print(x_test.shape)
# print(y_train.shape)
# print(y_test.shape)

x_train = pad_sequences(x_train, maxlen=sequence_length)
x_test = pad_sequences(x_test, maxlen=sequence_length)
# print(x_train.shape)
# print(x_test.shape)
# print(y_train.shape)
# print(y_test.shape)

# 多种卷积核，相当于单词数
filter_sizes=[3,4,5]
def convolution():
    inn = layers.Input(shape=(sequence_length, embedding_dimension, 1))#3维的
    cnns = []
    for size in filter_siz