Tensorflow-BiLSTM分类

大小: 11KB

文件类型: .py

金币: 1

下载: 0 次

发布日期: 2021-05-19
语言: Python
标签: Tensorflow python BiLSTM

高速下载

资源简介

该段Tensorflow代码可用于文本分类，和情感分类。其主要特点是，在同一份代码中，同时实现两张张量图，一张用于训练，另一张用于测试。并做交叉验证。

资源截图

小图大图

代码片段和文件信息

#! usr/bin/env python3
# -*- coding:utf-8 -*-
“““
@Author:zhoukaiyin
@Time:2017/7/22
“““
import glob
import tensorflow as tf
import gensim
import numpy as np
import pickle
import os
from sklearn.metrics import precision_score recall_score f1_score
from sklearn.model_selection import KFold
def data2index（）:
    words = list（get_word（））
    w2label = {w:i for iw in enumerate（words1）}
    label2w = {i:w for iw in enumerate（words1）}
    return w2labellabel2w

def get_word（）:
    words=[]
    files = glob.glob（“../BIO/*“）
    for file in files:
        with open（file‘r‘） as rf:
            for line in rf:
                line = line.strip（）
                if len（line）!=0:
                    w = line.split（‘\t‘）[0]
                    words.append（w）
    return set（words）

def get_phara（）:
    pharas = []
    labels = []
    human_feature = []
    files = glob.glob（“../BIO/*“）
    with open（“h_features.pkl“‘rb‘） as hf:
        features = pickle.load（hfencoding=“bytes“）

        m_features = eval（str（features））
    with open（‘../label/result.txt‘‘r‘） as pf:
        dom = pf.read（）
        labe_dir = eval（dom）
    for file in files:
        phara = []
        name = os.path.basename（file）
        label = labe_dir[name]
        if label!=‘unknown‘:
            human_feature.append（m_features[name]）
            label = int（label）
            with open（file‘r‘） as rf:
                for line in rf:
                    line = line.strip（）
                    if len（line）!=0:
                        w = line.split（‘\t‘）[0]
                        phara.append（w）
            labels.append（label）
            pharas.append（phara）
    return pharaslabelshuman_feature

def w2embed（）:
    w2labellabel2w = data2index（）
    model = gensim.models.KeyedVectors.load_word2vec_format（“glove200.txt“ binary=False）
    embeding=[]
    dim = model.vector_size
    add = np.random.randn（dim）
    embeding.append（add）
    count = 0
    ncount = 0
    for i in range（1 len（w2label） + 1）:
        word = label2w[i].lower（）
        try:
            embed = model[word]
        except KeyError:
            count += 1
            embed = np.zeros（（dim））
            word_list = word.split（‘-‘）
            try:
                for i w in enumerate（word_list）:
                    embed += model[word_list[i]]
            except KeyError:
                ncount+=1
                embed = embeding[0]
        embeding.append（embed）
    print（“一共有{}个单词没有被找到有{}个重新被找到！“.format（countcount-ncount））
    wf=open（“embed.pkl“‘wb‘）
    wwf = open（“w2label.pkl“‘wb‘）
    pickle.dump（w2labelwwf）
    pickle.dump（embedingwf）
    wf.close（）
    wwf.close（）

def load_data（）:
    rrf = open（“w2label.pkl“‘rb‘）
    w2label = pickle.load（rrfencoding=“bytes“）
    index_pharas = []
    labels = []
    pharas clsshuman_feature= get_phara（）
    for iphara in enumerate（pharas）:
        ph

上一篇：感知机算法Python实现
下一篇：最好中国大学近几年排名及python爬虫代码

共有条评论

Tensorflow-BiLSTM分类

资源简介

资源截图

代码片段和文件信息

评论

相关资源