资源简介

天池大赛第六名。这个赛题不是典型的分类、聚类问题,而是时间序列问题,这种时间序列问题,主要在于学习出周期性和趋势。为了达到学出周期性和趋势的目标,可以试用统计学中典型的时间序列模型如STL分解和ARIMA,这样的模型的优点是便捷,弱点是像个黑盒,不太便于添加更多特征。

资源截图

代码片段和文件信息

########################## fit ##################################################
import csv
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as pltf
from sklearn.linear_model import Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA

from pp import *

def fitJA(j start_date_rank):
    pltf.clf()
    p = artists_play_inday[j]
    p = p[start_date_rank:]
    print p
    apcount = [0] * (183 - start_date_rank)
    apdate = range(start_date_rank 183)
    for i in p:
        apcount[i[1] - start_date_rank] = i[0]

    print apcount

    d_train = np.asarray(apdate)
    c_train = np.asarray(apcount)

    # create matrix versions of these arrays
    D_train = d_train[: np.newaxis]
    d_test_plot = np.asarray(range(start_date_rank 244))
    D_test_plot = d_test_plot[: np.newaxis]

    pltf.scatter(d_train c_train label=“training points“)

    for degree in [123]:
        model = make_pipeline(PolynomialFeatures(degree) Ridge())
        model.fit(D_train c_train)
        c_test_plot = model.predict(D_test_plot)
        pltf.plot(d_test_plot c_test_plot label=“degree %d“ % degree)

    pltf.legend(bbox_to_anchor=(0. 1.02 1. .102) loc=3 ncol=5 mode=“expand“ borderaxespad=0.)
    pltf.show()


def test(degree):
    error_rate_of_artist = []
    weight_of_artist = []
    f_of_artist = []
    F = 0.0
    for j in range(0 50):
        p = artists_play_inday[j]
        apcount = [0] * 184
        apdate = range(0 184)
        for i in p:
            apcount[i[1]] = i[0]

        x = np.asarray(apdate[:122])
        x_test = np.asarray(apdate[122:])
        X = x[: np.newaxis]
        y = np.asarray(apcount[:122])
        y_test_true = np.asarray(apcount[122:])

        X_test = x_test[: np.newaxis]

        model = make_pipeline(PolynomialFeatures(degree) Ridge())
        model.fit(X y)
        y_test_pred = model.predict(X_test)

        error_rate_pow2_sum = 0.0
        weight = 0.0
        for idx in range(0 len(x_test)):
            y_true = y_test_true[idx]
            if y_true == 0:
                y_true = 1 # deal with divide by zero

            error_rate_pow2_sum += (float((int(math.ceil(y_test_pred[idx])) - y_true)) / float(y_true) )**2
            weight += y_test_true[idx]

        error_rate_j = math.sqrt(error_rate_pow2_sum / float(len(x_test)))
        error_rate_of_artist.append(error_rate_j)
        weight_j = math.sqrt(weight)
        weight_of_artist.append(weight_j)
        f_j = (1 - error_rate_j) * weight_j
        f_of_artist.append(f_j)
        F += f_j

    print ‘degree‘ degree
    print ‘error_rate_of_artist‘ error_rate_of_artist
    print ‘weight_of_artist‘ weight_of_artist
    print ‘f_of_artist‘ f_of_artist
    print ‘F‘ F


def pred(degree):
    predict_file_path = “./data/mars_tianchi_artist_plays_predict.csv“
    fp = open(predict_file_path ‘

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2016-06-19 14:09  AliMusicTrendPredict-master\
     文件          81  2016-06-19 14:09  AliMusicTrendPredict-master\.gitignore
     文件       12328  2016-06-19 14:09  AliMusicTrendPredict-master\F.png
     文件       16782  2016-06-19 14:09  AliMusicTrendPredict-master\README.md
     目录           0  2016-06-19 14:09  AliMusicTrendPredict-master\s1d1\
     文件         562  2016-06-19 14:09  AliMusicTrendPredict-master\s1d1\0605_degreeOfEachArtist.txt
     目录           0  2016-06-19 14:09  AliMusicTrendPredict-master\s1d1\data\
     文件           0  2016-06-19 14:09  AliMusicTrendPredict-master\s1d1\data\.placeholder
     文件       11334  2016-06-19 14:09  AliMusicTrendPredict-master\s1d1\le.py
     目录           0  2016-06-19 14:09  AliMusicTrendPredict-master\s1d1\pic\
     文件      149287  2016-06-19 14:09  AliMusicTrendPredict-master\s1d1\pic\artist_trend_0.png
     文件       75044  2016-06-19 14:09  AliMusicTrendPredict-master\s1d1\pic\artist_trend_predict_0.png
     文件      112406  2016-06-19 14:09  AliMusicTrendPredict-master\s1d1\pic\song_trend_0.png
     文件       22432  2016-06-19 14:09  AliMusicTrendPredict-master\s1d1\pp.py
     目录           0  2016-06-19 14:09  AliMusicTrendPredict-master\s1d2\
     文件       56254  2016-06-19 14:09  AliMusicTrendPredict-master\s1d2\artist_34_play_diffrate.png
     文件       80439  2016-06-19 14:09  AliMusicTrendPredict-master\s1d2\artist_34_play_pred.png
     文件       39008  2016-06-19 14:09  AliMusicTrendPredict-master\s1d2\artist_34_play_trend.png
     文件       73673  2016-06-19 14:09  AliMusicTrendPredict-master\s1d2\artist_99_play.png
     文件       90596  2016-06-19 14:09  AliMusicTrendPredict-master\s1d2\artist_99_play_month_seasonal.png
     文件       54569  2016-06-19 14:09  AliMusicTrendPredict-master\s1d2\artist_99_play_trend.png
     文件      104886  2016-06-19 14:09  AliMusicTrendPredict-master\s1d2\artist_99_play_week_seasonal.png
     目录           0  2016-06-19 14:09  AliMusicTrendPredict-master\s1d2\data\
     文件           0  2016-06-19 14:09  AliMusicTrendPredict-master\s1d2\data\.placeholder
     文件       65163  2016-06-19 14:09  AliMusicTrendPredict-master\s1d2\iterate.dat
     文件       22349  2016-06-19 14:09  AliMusicTrendPredict-master\s1d2\le2.py
     目录           0  2016-06-19 14:09  AliMusicTrendPredict-master\s1d2\pic_trend\
     文件           0  2016-06-19 14:09  AliMusicTrendPredict-master\s1d2\pic_trend\.placeholder
     文件       63669  2016-06-19 14:09  AliMusicTrendPredict-master\s1d2\pic_trend\0.png
     文件       43664  2016-06-19 14:09  AliMusicTrendPredict-master\s1d2\pic_trend\1.png
     文件       88492  2016-06-19 14:09  AliMusicTrendPredict-master\s1d2\pic_trend\10.png
............此处省略201个文件信息

评论

共有 条评论