• 大小: 6.26KB
    文件类型: .rar
    金币: 1
    下载: 0 次
    发布日期: 2024-05-09
  • 语言: Python
  • 标签: 购物  实例  

资源简介


资源截图

代码片段和文件信息

import requests
from bs4 import BeautifulSoup
from lxml import etree
from urllib import request
from urllib.parse import quote urlencode
import csv
import copy
import re

def getDataByUrl2(url):
    #headers = {‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox    /23.0‘}
    headers = {‘authority‘: ‘search.jd.com‘
            ‘method‘: ‘GET‘
            ‘path‘: ‘/s_new.php?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E6%89%8B%E6%9C%BA&cid2=653&cid3=655&page=4&s=84&scrolling=y&log_id=1529828108.22071&tpl=3_M&show_items=76519277367120705686874192526001239593418245549693893501742146265774952648054355373457574483120617607769327957336429596306652833872572246889274256224768461‘
            ‘scheme‘: ‘https‘
            ‘referer‘: ‘https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E6%89%8B%E6%9C%BA&cid2=653&cid3=655&page=3&s=58&click=0‘
            ‘user-agent‘: ‘Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/66.0.3359.139 Safari/537.36‘
            ‘x-requested-with‘: ‘xmlHttpRequest‘
            ‘Cookie‘:‘qrsc=3; pinId=RAGa4xMoVrs; xtest=1210.cf6b6759; ipLocation=%u5E7F%u4E1C; _jrda=5; TrackID=1aUdbc9HHS2MdEzabuYEyED1iDJaLWwBAfGBfyIHJZCLWKfWaB_KHKIMX9Vj9_2wUakxuSLAO9AFtB2U0SsAD-mXIh5rIfuDiSHSNhZcsJvg; shshshfpa=17943c91-d534-104f-a035-6e1719740bb6-1525571955; shshshfpb=2f200f7c5265e4af999b95b20d90e6618559f7251020a80ea1aee61500; cn=0; 3AB9D23F7A4B3C9B=QFOFIDQSIC7TZDQ7U4RPNYNFQN7S26SFCQQGTC3YU5UZQJZUBNPEXMX7O3R7SIRBTTJ72AXC4S3IJ46ESBLTNHD37U; ipLoc-djd=19-1607-3638-3638.608841570; __jdu=930036140; user-key=31a7628c-a9b2-44b0-8147-f10a9e597d6f; areaId=19; __jdv=122270672|direct|-|none|-|1529893590075; PCSYCityID=25; mt_xid=V2_52007VwsQU1xaVVoaSClUA2YLEAdbWk5YSk9MQAA0BBZOVQ0ADwNLGlUAZwQXVQpaAlkvShhcDHsCFU5eXENaGkIZWg5nAyJQbVhiWR9BGlUNZwoWYl1dVF0%3D; __jdc=122270672; shshshfp=72ec41b59960ea9a26956307465948f6; rkv=V0700; __jda=122270672.930036140.-.1529979524.1529984840.85; __jdb=122270672.1.930036140|85.1529984840; shshshsID=f797fbad20f4e576e9c30d1c381ecbb1_1_1529984840145‘
    }
    req = request.Request(url=url headers=headers)
    pageSource = request.urlopen(req).read().decode(‘utf-8‘ errors=‘ignore‘)
    with open(‘data.txt‘ ‘w‘) as f:
        f.write(pageSource)
    soup = BeautifulSoup(pageSource ‘lxml‘)
    return soup

def getDataByUrl(url):
    #headers = {‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox    /23.0‘}
    headers = {‘authority‘: ‘search.jd.com‘
            ‘method‘: ‘GET‘
            ‘path‘: ‘/s_new.php?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E6%89%8B%E6%9C%BA&cid2=653&cid3=655&page=4&s=84&scrolling=y&log_id=1529828108.22071&tpl=3_M&show_items=765192773671207056868741925260012395934182455496938935017421462657749526480543553734575744831206176077693279573364295963066528338725722468892742562

评论

共有 条评论