资源简介

完整python项目,可以自己运行。利用python爬虫 爬取今日头条后台数据。然后使用flask框架 实现自己的后台 ,通过爬虫获取 今日头条数据。html实现前端 显示数据。网站UI一级界面自己实现,仿照今日头条网站

资源截图

代码片段和文件信息

#!/usr/bin/env python
# -*- coding: utf-8 -*-
‘‘‘
Created on 2017-8-29

@author: Administrator
‘‘‘
import urllib2
import urllib
from bs4 import BeautifulSoup
from Type import NewsType
from NewsModel import NewsInfo
import json
URL = ‘http://www.toutiao.com/api/pc/feed/?‘
newsSet = set()
def getNews(nextNewstimeNewstype):
    headers = {‘User-Agent‘ :‘Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/60.0.3112.113 Safari/537.36‘
               ‘Referer‘:‘http://www.toutiao.com/ch/news_hot/‘}
    data = {
            ‘category‘:Newstype
            ‘utm_source‘:‘toutiao‘
            ‘widen‘:1
            ‘max_behot_time‘:nextNewstime#下一条新闻时间,由上一条返回
            ‘max_behot_time_tmp‘:nextNewstime
           ‘ tadrequire‘:‘true‘
            ‘as‘:‘as:A145E9AAB5D2B44‘#作用不知到
            ‘cp‘:‘59A5C23B14C4EE1‘}#作用不知到
    request = urllib2.Request(URL+urllib.urlencode(data)headers=headers)
    response = urllib2.urlopen(request)
    d =json.load(response)
    hasMore = d.get(‘has_more‘)
    data = d.get(‘data‘)
    nextTime = d.get(‘next‘)
    nextNewstime = nextTime.get(“max_behot_time“)
    for new in data:
        if(new.get(‘single_mode‘) == True):
            tittle = new.get(“tittle“)
            abstract = new.get(“abstract“)
            image_url = new.get(“image_url“)
            group_id = new.get(‘group_id‘)
            source = new.get(‘source‘)
            source_url = new.get(‘source_url‘)
            newInfo = NewsInfo(tittleabstractimage_urlgroup_idsourcesource_urlsource_url)
            newsSet.add(newInfo)
    return nextNewstime
if __name__ == ‘__main__‘:
    time = getNews(0 NewsType.NEWS_HOT)
#    time = getNews(time NewsType.NEWS_HOT)
#    time = getNews(time NewsType.NEWS_HOT)

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2017-09-07 17:08  ATodatNews\
     文件         381  2017-08-29 11:56  ATodatNews\.project
     文件         434  2017-08-29 11:56  ATodatNews\.pydevproject
     目录           0  2017-08-29 19:13  ATodatNews\.settings\
     文件         192  2017-09-13 15:44  ATodatNews\.settings\org.eclipse.core.resources.prefs
     目录           0  2017-09-07 17:05  ATodatNews\dist\
     文件         142  2017-09-07 17:05  ATodatNews\dist\cookie.txt
     目录           0  2017-09-07 17:04  ATodatNews\dist\static\
     目录           0  2017-09-07 17:04  ATodatNews\dist\static\css\
     文件         127  2017-09-06 14:36  ATodatNews\dist\static\css\global.css
     文件        3586  2017-09-07 15:47  ATodatNews\dist\static\css\home_css.css
     目录           0  2017-09-07 17:04  ATodatNews\dist\static\images\
     文件      112556  2017-09-06 15:34  ATodatNews\dist\static\images\guanggao.png
     文件        3480  2017-08-30 11:37  ATodatNews\dist\static\images\logo.png
     文件        5069  2017-09-05 20:12  ATodatNews\dist\static\images\navi_bar.png
     目录           0  2017-09-07 17:04  ATodatNews\dist\static\js\
     文件        6846  2017-09-05 12:01  ATodatNews\dist\static\js\md5.js
     目录           0  2017-09-07 17:04  ATodatNews\dist\templates\
     文件       32856  2017-09-07 15:48  ATodatNews\dist\templates\home.html
     文件         398  2017-09-06 11:46  ATodatNews\dist\templates\newfile.html
     文件    14948438  2017-09-07 17:03  ATodatNews\dist\toutiao.exe
     目录           0  2017-08-29 19:18  ATodatNews\src\
     目录           0  2017-09-13 15:44  ATodatNews\src\mySpider\
     文件        1866  2017-08-29 19:16  ATodatNews\src\mySpider\Main.py
     文件       12315  2017-09-06 20:37  ATodatNews\src\mySpider\NewSpider.py
     文件        9909  2017-09-06 20:39  ATodatNews\src\mySpider\NewSpider.pyc
     文件        4642  2017-09-07 12:13  ATodatNews\src\mySpider\NewsModel.py
     文件        4863  2017-09-07 12:13  ATodatNews\src\mySpider\NewsModel.pyc
     文件         331  2017-08-29 18:03  ATodatNews\src\mySpider\Type.py
     文件         799  2017-08-29 19:14  ATodatNews\src\mySpider\Type.pyc
     文件           0  2017-08-29 19:13  ATodatNews\src\mySpider\__init__.py
............此处省略20个文件信息

评论

共有 条评论