• 大小: 59.37M
    文件类型: .zip
    金币: 1
    下载: 0 次
    发布日期: 2023-05-14
  • 语言: 其他
  • 标签: 其他  

资源简介

Diff-FSPM-master.zip

资源截图

代码片段和文件信息

#! /usr/bin/env python
# encoding: utf-8

‘‘‘ 
Created on Apr 24 2014

@summary: Diff-FSPM Algorithm
    - Usage: python Main.py
    - Input parameters: epsilon l_opt dataset min_sup

@author: Lu Guoqing  

‘‘‘

import sys
sys.path.append( ‘conf‘ )
sys.path.append( ‘lib‘ )

import Sanitizer
from Reconstruction import *
from NGramSet import *
from GetOptLength import * 
from base.dp_log import dplog
import dp_conf as conf


def init():
    ‘‘‘
    @summary: Initialization
    
    ‘‘‘
    dplog.init_logger( conf.LOG_FILE )


def Diff_FSPM():
    ‘‘‘
    Diff-FSPM 算法分为如下3个步骤:
        - 原始序列数据集局部转换
            - 获取最优序列长度 l_opt            ok
            - 截断原始序列数据集                ok
        - 层次遍历构建绕动闭前缀序列树
            - min_sup 约束剪枝
            - 闭等价关系 剪枝
            - 预测计数值 PK. 噪音计数值
        - 描述上是挖掘FSP树 实际直接输出结果集
            
    @summary: Diff-FSPM algorithm
    ‘‘‘
    dplog.info( “ === Phase 1: Decomposing input sequence dataset to n-grams (%d<=n<=%d) Begin ===“ % (1 conf.l_opt) )
    conf.l_opt = GetOptSeqLength(conf.dataset conf.epsilon mechanism=“Exponential“)
    ngram_set = NGramSet( int(conf.l_opt) N_max=int(conf.n_max) )
    ngram_set.load_dataset( conf.dataset conf.dataset_ngrams % (conf.l_opt) )
    dplog.info( “ === Phase 1: Decomposing input sequence dataset to n-grams (%d<=n<=%d) End ===“ % (1 conf.l_opt) )

    dplog.info( “ === Phase 2: Sanitizing n-grams to build noisy frequent sequential patterns Tree Begin ===“ )
    ngram_set = Sanitizer.ngram( ngram_set conf.n_max conf.epsilon conf.l_opt conf.min_sup)
    ngram_set.dump( conf.dataset_noisy % (conf.l_opt conf.epsilon))
    dplog.info( “ === Phase 2: Sanitizing n-grams to build noisy frequent sequential patterns Tree End ===“ )
    
    dplog.info( “ === Phase 3: Synthetic frequent sequential patterns from santized n-grams Begin ===“ )
    factory = Reconstruction( ngram_set conf.min_sup )
    factory.extend()
    factory.ngramset.dump( conf.dataset_result % (conf.l_opt conf.epsilon))
    dplog.info( “ === Phase 3: Synthetic frequent sequential patterns from santized n-grams End ===“ )


def main():
    ‘‘‘
    @summary: main entry

    ‘‘‘
    
    init()
    
    logstr = “+“*8 + “    Start Diff-FSPM Algorithm    “ + “+“*8
    dplog.info(““)
    dplog.info(“+“ * len(logstr))
    dplog.info(logstr)
    dplog.info(“+“ * len(logstr))
    dplog.info(““)

    dplog.debug(“original sequence database : (%s)“%(conf.dataset))
    dplog.debug(“differential privacy budget : (%s)“%(conf.epsilon))
    dplog.debug(“minmum support value : (%s)“%(conf.min_sup))
    
    Diff_FSPM()

    logstr = “+“*8 + “     End Diff-FSPM Algorithm     “ + “+“*8
    dplog.info(““)
    dplog.info(“+“ * len(logstr))
    dplog.success(logstr)
    dplog.info(“+“ * len(logstr))
    dplog.info(““)

    
if __name__ == “__main__“:
    
    main()

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2014-05-22 03:04  Diff-FSPM-master\
     文件         483  2014-05-22 03:04  Diff-FSPM-master\.gitattributes
     文件        2643  2014-05-22 03:04  Diff-FSPM-master\.gitignore
     文件        3051  2014-05-22 03:04  Diff-FSPM-master\Main.py
     文件         580  2014-05-22 03:04  Diff-FSPM-master\README.md
     文件        3460  2014-05-22 03:04  Diff-FSPM-master\Reconstruction.py
     文件        7847  2014-05-22 03:04  Diff-FSPM-master\Sanitizer.py
     目录           0  2014-05-22 03:04  Diff-FSPM-master\algorithm\
     目录           0  2014-05-22 03:04  Diff-FSPM-master\algorithm\ngramspan\
     文件        1906  2014-05-22 03:04  Diff-FSPM-master\algorithm\ngramspan\Main.py
     目录           0  2014-05-22 03:04  Diff-FSPM-master\algorithm\ngramspan\data\
     文件        5691  2014-05-22 03:04  Diff-FSPM-master\algorithm\ngramspan\data\msnbc-original-20grams.dat
     文件       13219  2014-05-22 03:04  Diff-FSPM-master\algorithm\ngramspan\data\msnbc-original-32grams.dat
     文件        3508  2014-05-22 03:04  Diff-FSPM-master\algorithm\ngramspan\data\msnbc-original-5grams.dat
     文件    11591683  2014-05-22 03:04  Diff-FSPM-master\algorithm\ngramspan\data\msnbc.dat
     目录           0  2014-05-22 03:04  Diff-FSPM-master\algorithm\ngramspan\lib\
     文件        6659  2014-05-22 03:04  Diff-FSPM-master\algorithm\ngramspan\lib\NGramSet.py
     文件         565  2014-05-22 03:04  Diff-FSPM-master\algorithm\ngramspan\lib\ProgressBar.py
     文件        1097  2014-05-22 03:04  Diff-FSPM-master\algorithm\ngramspan\lib\Utils.py
     文件           0  2014-05-22 03:04  Diff-FSPM-master\algorithm\ngramspan\lib\__init__.py
     目录           0  2014-05-22 03:04  Diff-FSPM-master\algorithm\utility\
     文件         160  2014-05-22 03:04  Diff-FSPM-master\algorithm\utility\run.sh
     文件        6041  2014-05-22 03:04  Diff-FSPM-master\algorithm\utility\utility.py
     目录           0  2014-05-22 03:04  Diff-FSPM-master\conf\
     文件           0  2014-05-22 03:04  Diff-FSPM-master\conf\__init__.py
     文件        1536  2014-05-22 03:04  Diff-FSPM-master\conf\dp_conf.py
     目录           0  2014-05-22 03:04  Diff-FSPM-master\data\
     目录           0  2014-05-22 03:04  Diff-FSPM-master\data\input\
     文件      957429  2014-05-22 03:04  Diff-FSPM-master\data\input\BMS1_spmf.seq
     文件     4022055  2014-05-22 03:04  Diff-FSPM-master\data\input\T10I4D100K.dat
     文件    35509823  2014-05-22 03:04  Diff-FSPM-master\data\input\accidents.dat
............此处省略89个文件信息

评论

共有 条评论