• 大小: 7.03MB
    文件类型: .zip
    金币: 1
    下载: 0 次
    发布日期: 2023-10-23
  • 语言: Python
  • 标签:

资源简介

淘宝商品评价及新闻评论情感倾向分析,使用了python来实现。模型包括RNN和CNN

资源截图

代码片段和文件信息

from TextClassification import TextClassification DataPreprocess
from sklearn.model_selection import train_test_split
from TextClassification import load_data
import numpy as np

# load data
#-----------------------------------
data = load_data(name=‘single‘)
x = data[‘evaluation‘]
y = [[i] for i in data[‘label‘]]

# data process
#-----------------------------------
process = DataPreprocess()
# cut texts
x_cut = process.cut_texts(texts=x need_cut=True word_len=2 savepath=None)
# texts to sequence
x_seq = process.text2seq(texts_cut=x_cut tokenizer=tokenizer tokenizer_savapah=None
                         num_words=num_words maxlen=maxlen batchsize=10000)
# list to array
x_seq = np.array(x_seq)

# texts to word vector
x_word_vec = model.text2vec(texts_cut=x sg=1 size=128 window=5 min_count=1)
# texts vector
x_vec = np.array([sum(i) / len(i) for i in x_word_vec])

# single target

# train model
#------------------------------------
X_train X_test y_train y_test = train_test_split(x y test_size=0.2)

model = TextClassification()
model.fit(x=X_train y=y_train method=‘CNN‘ model=None
          x_need_preprocess=True y_need_preprocess=True
          epochs=10 batchsize=128 output_type=‘single‘)
label_set = model.label_set
y_predict = model.predict(x=X_test x_need_preprocess=True)
y_predict_label = model.label2toptag(predictions=y_predict labelset=label_set)
print(sum([y_predict_label[i] == y_test[i] for i in range(len(y_predict))]) / len(y_predict))



# multiple target

# load data
#-----------------------------------
data = load_data(name=‘multiple‘)
x = [i[‘fact‘] for i in data]
y = [i[‘accusation‘] for i in data]

X_train X_test y_train y_test = train_test_split(x y test_size=0.2)

model = TextClassification()
model.fit(x=X_train y=y_train method=‘CNN‘ model=None
          x_need_preprocess=True y_need_preprocess=True
          epochs=10 batchsize=128 output_type=‘multiple‘)
label_set = model.label_set
y_predict = model.predict(x=X_test x_need_preprocess=True)
y_predict_label = model.label2tag(predictions=y_predict labelset=label_set)
print(sum([y_predict_label[i] == y_test[i] for i in range(len(y_predict))]) / len(y_predict))

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2018-07-06 08:28  Text-Classification-master\
     文件          66  2018-07-06 08:28  Text-Classification-master\.gitattributes
     文件        1045  2018-07-06 08:28  Text-Classification-master\.gitignore
     文件        5155  2018-07-06 08:28  Text-Classification-master\README.md
     目录           0  2018-07-06 08:28  Text-Classification-master\TextClassification\
     文件        6138  2018-07-06 08:28  Text-Classification-master\TextClassification\DataPreprocess.py
     文件        6770  2018-07-06 08:28  Text-Classification-master\TextClassification\TextClassification.py
     文件         127  2018-07-06 08:28  Text-Classification-master\TextClassification\__init__.py
     目录           0  2018-07-06 08:28  Text-Classification-master\TextClassification\data\
     文件    21503783  2018-07-06 08:28  Text-Classification-master\TextClassification\data\data_multiple.json
     文件      949081  2018-07-06 08:28  Text-Classification-master\TextClassification\data\data_single.csv
     文件         413  2018-07-06 08:28  Text-Classification-master\TextClassification\load_data.py
     目录           0  2018-07-06 08:28  Text-Classification-master\TextClassification\models\
     文件        2072  2018-07-06 08:28  Text-Classification-master\TextClassification\models\CNN.py
     文件        2048  2018-07-06 08:28  Text-Classification-master\TextClassification\models\RNN.py
     文件         669  2018-07-06 08:28  Text-Classification-master\TextClassification\models\SklearnClf.py
     文件          76  2018-07-06 08:28  Text-Classification-master\TextClassification\models\__init__.py
     文件        2205  2018-07-06 08:28  Text-Classification-master\demo.py
     目录           0  2018-07-06 08:28  Text-Classification-master\demo\
     目录           0  2018-07-06 08:28  Text-Classification-master\demo\.idea\
     文件       15324  2018-07-06 08:28  Text-Classification-master\demo\.idea\workspace.xml
     文件         874  2018-07-06 08:28  Text-Classification-master\demo\demo_net_multiple.py
     文件        2183  2018-07-06 08:28  Text-Classification-master\demo\demo_net_multiple_use_process.py
     文件         862  2018-07-06 08:28  Text-Classification-master\demo\demo_net_single.py
     文件        2161  2018-07-06 08:28  Text-Classification-master\demo\demo_net_single_use_process.py
     文件        1309  2018-07-06 08:28  Text-Classification-master\demo\demo_sklearn.py
     目录           0  2018-07-06 08:28  Text-Classification-master\picture\
     文件      308331  2018-07-06 08:28  Text-Classification-master\picture\data_multiple.png
     文件       61071  2018-07-06 08:28  Text-Classification-master\picture\data_single.png

评论

共有 条评论