• 大小: 18.08MB
    文件类型: .rar
    金币: 1
    下载: 0 次
    发布日期: 2023-08-02
  • 语言: C#
  • 标签: c#  余弦算法  相似度  

资源简介

采用TF余弦算法,对于word内容进行相似度的比较,并且对于图片也有相似度的比较,显示结果,按照相似度进行降序排序,展示出来相似度最高的2篇

资源截图

代码片段和文件信息

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.IO;
using Word = Microsoft.Office.Interop.Word;
using NPOI.XWPF.UserModel;

namespace TFIDFMeasure
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
            
        }
        //文件名
        public List FieldName = new List();
        //文件路径
        public List FieldPath = new List();
        //是否上传了文件
        public bool IsUpLoad = false;
        //相似度比较结果
        public List str = new List();
        public List num = new List();
        public List num1 = new List();

        /// 
        /// 检测按钮事件
        /// 

        /// 
        /// 
        private void button1_Click(object sender EventArgs e)
        {
            if (IsUpLoad == true)
            {
                List note = Doc2Text(FieldPath);
                for (int i = 0; i < note.Count; i++)
                {
                    for (int j = i + 1; j < note.Count; j++)
                    {
                        string name = note[i].Name + “&“ + note[j].Name;
                        string s1 = note[i].Note;
                        string s2 = note[j].Note;
                        Similarity sl = new Similarity();
                        double d = sl.getSimilarity(s1 s2);
                        IAnalyser analyser = new SimHashAnalyser();
                        var l = analyser.GetLikenessValue(s1 s2);
                        double likeness = d;
                        if (System.Math.Abs(d * 100 - l * 100) < 20)
                        {
                            likeness = l;
                        }
                        str.Add(name);
                        num.Add(d);
                        num1.Add(likeness);
                        this.textBox1.Text += string.Format(“文章:{0},文章:{1},相似度:{2}“ name.Split(‘&‘)[0] name.Split(‘&‘)[1] d)+“\r\n“;
                        this.textBox2.Text += string.Format(“文章:{0},文章:{1},相似度:{2}“ name.Split(‘&‘)[0] name.Split(‘&‘)[1] likeness) + “\r\n“;
                        if (System.Math.Abs(d * 100 - likeness * 100) < 10)
                        {
                            this.textBox3.Text += string.Format(“文章:{0},文章:{1}相似度误差小于10%,结果较为准确!“ name.Split(‘&‘)[0] name.Split(‘&‘)[1]) + “\r\n“;
                        }
                        else
                        {
                            this.textBox3.Text += string.Format(“文章:{0},文章:{1}相似度最终结果为:{2}!“ name.Split(‘&‘)[0] name.Split(‘&‘)[1](decimal)(d+likeness)/2) + “\r\n“;
                        }
                    }
                }
            }
            els

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----

     文件     396288  2018-02-28 09:08  TFIDFMeasure\bin\Debug\NPOI.v2.0.OOxml.dll

     文件      84480  2017-07-31 11:47  TFIDFMeasure\bin\Debug\NPOI.v2.0.Openxml4Net.dll

     文件      25600  2018-04-18 15:10  TFIDFMeasure\bin\Debug\TFIDFMeasure.exe

     文件      54784  2018-04-18 15:10  TFIDFMeasure\bin\Debug\TFIDFMeasure.pdb

     文件      11600  2018-04-18 15:21  TFIDFMeasure\bin\Debug\TFIDFMeasure.vshost.exe

     文件        490  2017-03-19 05:00  TFIDFMeasure\bin\Debug\TFIDFMeasure.vshost.exe.manifest

     文件      11513  2018-04-18 15:23  TFIDFMeasure\Form1.cs

     文件       7015  2018-04-17 18:31  TFIDFMeasure\Form1.Designer.cs

     文件       6020  2018-04-17 18:31  TFIDFMeasure\Form1.resx

     文件       5420  2018-04-08 14:58  TFIDFMeasure\obj\x86\Debug\DesignTimeResolveAssemblyReferences.cache

     文件       6650  2018-04-18 15:10  TFIDFMeasure\obj\x86\Debug\DesignTimeResolveAssemblyReferencesInput.cache

     文件     407040  2018-04-09 09:04  TFIDFMeasure\obj\x86\Debug\Interop.Microsoft.Office.Core.dll

     文件     434688  2018-04-09 08:58  TFIDFMeasure\obj\x86\Debug\Interop.Office.dll

     文件        722  2018-04-18 15:21  TFIDFMeasure\obj\x86\Debug\TFIDFMeasure.csproj.FileListAbsolute.txt

     文件        975  2018-04-17 18:32  TFIDFMeasure\obj\x86\Debug\TFIDFMeasure.csproj.GenerateResource.Cache

     文件        870  2018-04-09 09:04  TFIDFMeasure\obj\x86\Debug\TFIDFMeasure.csproj.ResolveComReference.cache

     文件      37119  2018-04-18 15:02  TFIDFMeasure\obj\x86\Debug\TFIDFMeasure.csprojResolveAssemblyReference.cache

     文件      25600  2018-04-18 15:10  TFIDFMeasure\obj\x86\Debug\TFIDFMeasure.exe

     文件        180  2018-04-17 18:32  TFIDFMeasure\obj\x86\Debug\TFIDFMeasure.Form1.resources

     文件      54784  2018-04-18 15:10  TFIDFMeasure\obj\x86\Debug\TFIDFMeasure.pdb

     文件        180  2018-04-17 18:21  TFIDFMeasure\obj\x86\Debug\TFIDFMeasure.Properties.Resources.resources

     文件        493  2018-04-08 14:44  TFIDFMeasure\Program.cs

     文件       1356  2018-04-08 14:44  TFIDFMeasure\Properties\AssemblyInfo.cs

     文件       2876  2018-04-08 14:44  TFIDFMeasure\Properties\Resources.Designer.cs

     文件       5612  2018-04-08 14:44  TFIDFMeasure\Properties\Resources.resx

     文件       1099  2018-04-08 14:44  TFIDFMeasure\Properties\Settings.Designer.cs

     文件        249  2018-04-08 14:44  TFIDFMeasure\Properties\Settings.settings

     文件       4446  2018-04-17 18:06  TFIDFMeasure\SimHashAnalyser.cs

     文件      11097  2018-04-09 08:21  TFIDFMeasure\Similarity.cs

     文件       4621  2018-04-17 18:21  TFIDFMeasure\TFIDFMeasure.csproj

............此处省略19个文件信息

评论

共有 条评论