利用网络上公开的数据构建一个小型的证券知识图谱/知识库

大小: 55.85MB

文件类型: .gz

金币: 2

下载: 1 次

发布日期: 2023-05-29
语言: 其他
标签: 知识图谱 知识库

高速下载

资源简介

资源截图

小图大图

代码片段和文件信息

import os
import csv
import hashlib


def get_md5（string）:
    “““Get md5 according to the string
    “““
    byte_string = string.encode（“utf-8“）
    md5 = hashlib.md5（）
    md5.update（byte_string）
    result = md5.hexdigest（）
    return result


def build_executive（executive_prep executive_import）:
    “““Create an ‘executive‘ file in csv format that can be imported into Neo4j.
    format -> person_id:IDnamegenderage:int:LABEL
    label -> Person
    “““
    print（‘Writing to {} file...‘.format（executive_import.split（‘/‘）[-1]））
    with open（executive_prep ‘r‘ encoding=‘utf-8‘） as file_prep \
        open（executive_import ‘w‘ encoding=‘utf-8‘） as file_import:
        file_prep_csv = csv.reader（file_prep delimiter=‘‘）
        file_import_csv = csv.writer（file_import delimiter=‘‘）

        headers = [‘person_id:ID‘ ‘name‘ ‘gender‘ ‘age:int‘ ‘:LABEL‘]
        file_import_csv.writerow（headers）
        for i row in enumerate（file_prep_csv）:
            if i == 0 or len（row） < 3:
                continue
            info = [row[0] row[1] row[2]]
            # generate md5 according to ‘name‘ ‘gender‘ and ‘age‘
            info_id = get_md5（‘{}{}{}‘.format（row[0] row[1] row[2]））
            info.insert（0 info_id）
            info.append（‘Person‘）
            file_import_csv.writerow（info）
    print（‘- done.‘）


def build_stock（stock_industry_prep stock_concept_prep stock_import）:
    “““Create an ‘stock‘ file in csv format that can be imported into Neo4j.
    format -> company_id:IDnamecode:LABEL
    label -> CompanyST
    “““
    print（‘Writing to {} file...‘.format（stock_import.split（‘/‘）[-1]））
    stock = set（）  # ‘codename‘

    with open（stock_industry_prep ‘r‘ encoding=‘utf-8‘） as file_prep:
        file_prep_csv = csv.reader（file_prep delimiter=‘‘）
        for i row in enumerate（file_prep_csv）:
            if i == 0:
                continue
            code_name = ‘{}{}‘.format（row[0] row[1].replace（‘ ‘ ‘‘））
            stock.add（code_name）

    with open（stock_concept_prep ‘r‘ encoding=‘utf-8‘） as file_prep:
        file_prep_csv = csv.reader（file_prep delimiter=‘‘）
        for i row in enumerate（file_prep_csv）:
            if i == 0:
                continue
            code_name = ‘{}{}‘.format（row[0] row[1].replace（‘ ‘ ‘‘））
            stock.add（code_name）

    with open（stock_import ‘w‘ encoding=‘utf-8‘） as file_import:
        file_import_csv = csv.writer（file_import delimiter=‘‘）
        headers = [‘stock_id:ID‘ ‘name‘ ‘code‘ ‘:LABEL‘]
        file_import_csv.writerow（headers）
        for s in stock:
            split = s.split（‘‘）
            ST = False  # ST flag
            states = [‘*ST‘ ‘ST‘ ‘S*ST‘ ‘SST‘]
            info = []
            for state in states:
                if split[1].startswith（state）:
                    ST = True
                    split[1] = split[1].replace（state ‘‘）

上一篇：机器学习，概率模型和深度学习的讲义（1500+页）和视频链接
下一篇：淘宝买的ecshop生鲜模板，简单修改了一些错误

共有条评论

利用网络上公开的数据构建一个小型的证券知识图谱/知识库

资源简介

资源截图

代码片段和文件信息

评论

相关资源