• 大小: 65.37MB
    文件类型: .zip
    金币: 1
    下载: 0 次
    发布日期: 2023-07-18
  • 语言: Python
  • 标签:

资源简介

淘宝天猫商品数据抓取,代码和exe都在里面

资源截图

代码片段和文件信息

#!/usr/bin/python3.4
# -*- coding: utf-8 -*-

import requests
import json
import time
import os

session = requests.session()
headers = {
    ‘accept‘: ‘*/*‘
    ‘accept-encoding‘: ‘gzip deflate br‘
    ‘accept-language‘: ‘zh-CNzh;q=0.9‘
    ‘referer‘: ‘https://uland.taobao.com/semm/tbsearch?refpid=mm_26632258_3504122_32554087&keyword=%E5%A5%B3%E8‘
               ‘%A3%85 ‘
               ‘&rewriteQuery=1&a=mi={imei}&sms=baidu&idfa={‘
               ‘idfa}&clk1=abab6283306413775910d4b0b37ca047&upsid=abab6283306413775910d4b0b37ca047‘
    ‘user-agent‘: ‘Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML like Gecko) ‘
                  ‘Chrome/%s Mobile Safari/537.36‘
}

# 这是新的方法,获取详情页的信息需要如下网址参考
# https://blog.csdn.net/github_38782597/article/details/82563477

def getJson(page keyword):

    url = ‘https://odin.re.taobao.com/m/Nwalltbuad?sbid=sem2_kgb_activity&ignore=CATID%2CRANKINFO%2CMATCHTYPE&pvid=_TL‘ \
          ‘-41832&refpid=mm_26632258_3504122_32554087&clk1=abab6283306413775910d4b0b37ca047&idfa=%7Bidfa%7D&pid‘ \
          ‘=430680_1006&keyword=‘ + keyword + ‘&count=60&offset=‘ + str(60 * page) + ‘&relacount=8&t=1535075213992‘ \
                                                                                     ‘&callback‘ \
                                                                                     ‘=mn17jsonp1535075213992 ‘
    r = session.get(url=url headers=headers)
    html = r.text
    start = html.find(‘(‘)
    datas = (json.loads(html[start + 1:-1]))[‘result‘][‘item‘]
    # 抓取详情页的库存、店名、收藏数
    detailInfo = getDetail(datas)
    return {“listItem“:detailInfo}

def getDetail(datas):
    detailInfo = []
    for item in datas:
        try:
            resource_id = item[“RESOURCEID“]
            url = r‘https://h5api.m.taobao.com/h5/mtop.taobao.detail.getdetail/6.0/?jsv=2.4.8&appKey=12574478&t=1535083295045‘ \
                  r‘&sign=ef22a6dc765bd6ce86d36e2ba9a6cc33&api=mtop.taobao.detail.getdetail&v=6.0&dataType=jsonp&ttid=2017‘ \
                  r‘%40taobao_h5_6.6.0&AntiCreep=true&type=jsonp&callback=mtopjsonp2&data=%7B%22itemNumId%22%3A%22‘ + str(
                resource_id) + r‘%22%7D ‘
            r = session.get(url=url headers=headers)
            html = r.text
            start = html.find(‘(‘)
            datas = (json.loads(html[start + 1:-1]))[‘data‘]
            # 店铺各种信息:名称:shopName、链接:taoShopUrl
            item = dict(item **datas[‘seller‘])
            # 库存
            quantity = json.loads(datas[‘apiStack‘][0][‘value‘])[‘skuCore‘][‘sku2info‘][‘0‘][‘quantity‘]
            item[“quantity“]=quantity
            # 收藏
            favcount=datas[‘item‘][‘favcount‘]
            item[‘favcount‘]=favcount
            # 基本信息
            item[‘groupProps‘] = datas[‘props‘][‘groupProps‘][0][‘基本信息‘]
            # 店铺所在地
            address=json.loads(datas[‘apiStack‘][0][‘value‘])[‘delivery‘][‘from‘]
            item[‘address‘] = address
            # 店铺得分
            score=0
    

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2019-05-17 13:31  tbtmGoods-master\
     文件           7  2019-05-17 13:31  tbtmGoods-master\.gitignore
     目录           0  2019-05-17 13:31  tbtmGoods-master\.idea\
     文件         686  2019-05-17 13:31  tbtmGoods-master\.idea\misc.xml
     文件         270  2019-05-17 13:31  tbtmGoods-master\.idea\modules.xml
     文件         519  2019-05-17 13:31  tbtmGoods-master\.idea\tbtmGoods.iml
     文件         180  2019-05-17 13:31  tbtmGoods-master\.idea\vcs.xml
     文件       30299  2019-05-17 13:31  tbtmGoods-master\.idea\workspace.xml
     文件        5120  2019-05-17 13:31  tbtmGoods-master\README.md
     目录           0  2019-05-17 13:31  tbtmGoods-master\exe\
     目录           0  2019-05-17 13:31  tbtmGoods-master\exe\pys\
     目录           0  2019-05-17 13:31  tbtmGoods-master\exe\pys\__pycache__\
     文件        3235  2019-05-17 13:31  tbtmGoods-master\exe\pys\__pycache__\htmlWeb.cpython-35.pyc
     目录           0  2019-05-17 13:31  tbtmGoods-master\exe\pys\build\
     目录           0  2019-05-17 13:31  tbtmGoods-master\exe\pys\build\htmlWeb\
     文件       99736  2019-05-17 13:31  tbtmGoods-master\exe\pys\build\htmlWeb\Analysis-00.toc
     文件       17370  2019-05-17 13:31  tbtmGoods-master\exe\pys\build\htmlWeb\EXE-00.toc
     文件    31628997  2019-05-17 13:31  tbtmGoods-master\exe\pys\build\htmlWeb\PKG-00.pkg
     文件       16351  2019-05-17 13:31  tbtmGoods-master\exe\pys\build\htmlWeb\PKG-00.toc
     文件     5041155  2019-05-17 13:31  tbtmGoods-master\exe\pys\build\htmlWeb\PYZ-00.pyz
     文件       84203  2019-05-17 13:31  tbtmGoods-master\exe\pys\build\htmlWeb\PYZ-00.toc
     文件      788776  2019-05-17 13:31  tbtmGoods-master\exe\pys\build\htmlWeb\base_library.zip
     文件        1014  2019-05-17 13:31  tbtmGoods-master\exe\pys\build\htmlWeb\htmlWeb.exe.manifest
     文件       24076  2019-05-17 13:31  tbtmGoods-master\exe\pys\build\htmlWeb\warn-htmlWeb.txt
     文件     1247221  2019-05-17 13:31  tbtmGoods-master\exe\pys\build\htmlWeb\xref-htmlWeb.html
     目录           0  2019-05-17 13:31  tbtmGoods-master\exe\pys\dist\
     文件    31902405  2019-05-17 13:31  tbtmGoods-master\exe\pys\dist\htmlWeb.exe
     目录           0  2019-05-17 13:31  tbtmGoods-master\exe\pys\dist\json\
     目录           0  2019-05-17 13:31  tbtmGoods-master\exe\pys\dist\json\20190517\
     文件      288711  2019-05-17 13:31  tbtmGoods-master\exe\pys\dist\json\20190517\1558056699.json
     目录           0  2019-05-17 13:31  tbtmGoods-master\exe\pys\dist\static\
............此处省略20个文件信息

评论

共有 条评论