• 大小: 2.18MB
    文件类型: .zip
    金币: 1
    下载: 0 次
    发布日期: 2023-09-11
  • 语言: Python
  • 标签:

资源简介

NLP之旅(包含NLP文章/代码集锦)

资源截图

代码片段和文件信息

import keras

from .word_embeddings import Wordembeddings
from .glove import GloVeembeddings

‘‘‘
    Source: https://github.com/rgsachin/CoVe
‘‘‘


class CoVeembeddings(Wordembeddings):
    COVE_MODEL_KERAS_URL = ‘https://github.com/rgsachin/CoVe/raw/master/Keras_CoVe.h5‘
    
    def __init__(self 
                 word_embeddings_dir 
                 handle_oov=True oov_vector_type=‘random‘ 
                 padding=True pad_vector_type=‘random‘ 
                 max_sequence_length=50 tokenizer=None
                 verbose=0):
        super().__init__(verbose=verbose)
        
        if tokenizer is None:
            self.tokenizer = self._tokenizer_space
        
        self.word_embeddings_dir = word_embeddings_dir
        self.handle_oov = handle_oov
        self.oov_vector_type = oov_vector_type
        self.padding = padding
        self.pad_vector_type = pad_vector_type
        self.max_sequence_length = max_sequence_length
        
    def load_model(self dest_dir src=None trainable=True verbose=0):
        if src is None:
            src = self.COVE_MODEL_KERAS_URL
        
        file_path = self.download(
            src=src dest_dir=dest_dir dest_file=None uncompress=False)
    
        self.model = keras.models.load_model(file_path)
        
        self.word_embs_model = GloVeembeddings(
            handle_oov=self.handle_oov oov_vector_type=self.oov_vector_type
            padding=self.padding pad_vector_type=self.pad_vector_type 
            max_sequence_length=self.max_sequence_length)
        self.word_embs_model.load_model(dest_dir=self.word_embeddings_dir process=False verbose=verbose)
        
    def encode(self x tokenize=True):
        if tokenize:
            tokens = [self.tokenizer(sentence) for sentence in x]
        else:
            tokens = x
        
        x_embs = self.word_embs_model.encode(tokens)
        
        return self.model.predict(x_embs)
    
        

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2019-07-30 04:44  nlp-master\
     文件           5  2019-07-30 04:44  nlp-master\.gitignore
     文件       15499  2019-07-30 04:44  nlp-master\README.md
     目录           0  2019-07-30 04:44  nlp-master\aion\
     目录           0  2019-07-30 04:44  nlp-master\aion\embeddings\
     文件        1961  2019-07-30 04:44  nlp-master\aion\embeddings\cove.py
     文件        2877  2019-07-30 04:44  nlp-master\aion\embeddings\doc2vec.py
     文件         838  2019-07-30 04:44  nlp-master\aion\embeddings\document_embeddings.py
     文件        4723  2019-07-30 04:44  nlp-master\aion\embeddings\elmo.py
     文件        3042  2019-07-30 04:44  nlp-master\aion\embeddings\embeddings.py
     文件        2797  2019-07-30 04:44  nlp-master\aion\embeddings\glove.py
     文件        2746  2019-07-30 04:44  nlp-master\aion\embeddings\infersent.py
     目录           0  2019-07-30 04:44  nlp-master\aion\embeddings\infersent_lib\
     文件          83  2019-07-30 04:44  nlp-master\aion\embeddings\infersent_lib\.gitignore
     文件       19332  2019-07-30 04:44  nlp-master\aion\embeddings\infersent_lib\LICENSE
     文件        8103  2019-07-30 04:44  nlp-master\aion\embeddings\infersent_lib\README.md
     文件        3175  2019-07-30 04:44  nlp-master\aion\embeddings\infersent_lib\data.py
     目录           0  2019-07-30 04:44  nlp-master\aion\embeddings\infersent_lib\dataset\
     文件        1969  2019-07-30 04:44  nlp-master\aion\embeddings\infersent_lib\dataset\get_data.bash
     文件        2192  2019-07-30 04:44  nlp-master\aion\embeddings\infersent_lib\dataset\tokenizer.sed
     目录           0  2019-07-30 04:44  nlp-master\aion\embeddings\infersent_lib\encoder\
     文件       48444  2019-07-30 04:44  nlp-master\aion\embeddings\infersent_lib\encoder\demo.ipynb
     文件        2602  2019-07-30 04:44  nlp-master\aion\embeddings\infersent_lib\encoder\extract_features.py
     文件          12  2019-07-30 04:44  nlp-master\aion\embeddings\infersent_lib\encoder\models.py
     文件      580976  2019-07-30 04:44  nlp-master\aion\embeddings\infersent_lib\encoder\samples.txt
     文件       32989  2019-07-30 04:44  nlp-master\aion\embeddings\infersent_lib\models.py
     文件        2517  2019-07-30 04:44  nlp-master\aion\embeddings\infersent_lib\mutils.py
     文件       11112  2019-07-30 04:44  nlp-master\aion\embeddings\infersent_lib\train_nli.py
     文件         838  2019-07-30 04:44  nlp-master\aion\embeddings\sentence_embeddings.py
     文件        4872  2019-07-30 04:44  nlp-master\aion\embeddings\skip_thoughts.py
     文件        1682  2019-07-30 04:44  nlp-master\aion\embeddings\word_embeddings.py
............此处省略39个文件信息

评论

共有 条评论