• 大小: 6KB
    文件类型: .py
    金币: 1
    下载: 0 次
    发布日期: 2021-05-07
  • 语言: Python
  • 标签: python  

资源简介

本爬虫若自己修改需要一定的编程基础,可参考http://blog.csdn.net/gcs1024/article/details/78559488

资源截图

代码片段和文件信息

import random
import requests
import urllib.parse
import urllib.request
from PIL import Image
import pytesseract
import os
import random
from time import timestrftime localtime
import time as t
qid=str(16454455)
rnqian=str(2063096382)
def download(qidheaderi):
url=‘https://www.wjx.cn/AntiSpamImageGen.aspx?q=‘+qid+‘&t=‘+str(int(time() * 1000))
req = urllib.request.Request(urlheaders=header)
data = urllib.request.urlopen(req).read()
pic = open(‘%d.gif‘%(i)‘wb‘)
pic.write(data)
pic.close()
def binarizing(img): #input: gray image
threshold=30
pixdata = img.load()
w h = img.size
for y in range(h):
for x in range(w):
if pixdata[x y] > threshold:
pixdata[x y] = 255
else:
pixdata[x y] = 0
return img
def depoint(img):   #input: gray image
pixdata = img.load()
wh = img.size
for y in range(1h-1):
for x in range(1w-1):
count = 0
if pixdata[xy-1] > 245:
count = count + 1
if pixdata[xy+1] > 245:
count = count + 1
if pixdata[x-1y] > 245:
count = count + 1
if pixdata[x+1y] > 245:
count = count + 1
if count >2:
pixdata[xy] = 255
return img
def shibie(img):
imgry = img.convert(‘L‘)
threshold = 140
table = []
for i in range(256):
if i < threshold:
table.append(0)
else:
table.append(1)
out = imgry.point(table ‘1‘)
print(str(pytesseract.image_to_string(out)).strip())
return(str(pytesseract.image_to_string(out)).strip())#适用于简单二维码
def post(qidrnqiani):
timeg=str(int(time() * 1000))
t.sleep(10)
timep=str(int(time() * 1000))
ip=str(random.randint(14))+‘.‘+str(random.randint(14))+‘.‘+str(random.randint(14))+‘.‘+str(random.randint(14))
rnhou=str(random.randint(1000000099999999))
headerget={
‘Host‘: ‘www.wjx.cn‘
‘Connection‘: ‘keep-alive‘
‘X-Forwarded-For‘: ip
‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML like Gecko)  Chrome/62.0.3202.89 Safari/537.36 EXT/6d8a2f10c62d11e7gqpxa53987ed19aa47e3/2.4‘
‘Accept‘: ‘image/webpimage/apngimage/**/*;q=0.8‘
‘Referer‘: ‘https://www.wjx.cn/jq/‘+qid+‘.aspx‘
‘Accept-Encoding‘: ‘gzip deflate br‘
‘Accept-Language‘: ‘zh-CNzh;q=0.9‘
‘Cookie‘: ‘.ASPXANONYMOUS=Se6Dlf-S0wEkAAAAMzEyZGYyZmUtYzBmYi00YWM3LWIyMTEtMTEzZWI0YzkzMmZhi6xL6iHoMTghIlPoznFqbYuLd1s1; spiderregkey=www.wjx.cn%c2%a7%c2%a71; baidutgkey=%u95EE%u5377%u661FBH%7C2%7Cbaidu; _uab_collina=151065406900158178719624; SojumpSurvey=01022D8896C0612BD508FE2D28A847832BD508000670002D00740065007300740000012F00FF29B0D12A4780F0718D63D71441EC14F08F69B611;  lllogcook=1; LastCheckUpdateDate=1; ASP.NET_SessionId=4mbujabo1zx2a1imb0pw40k0; _umdata=C234BF9D3AFA6FE7FD70ECA73142BFB1DAA8AC4CAD8E980472CE17B2B4815B078B6B64C8E7D1428ACD43AD3E795C914CB6CD457CEA3135697A8EEEB6A2679E66; LastActivityJoin=16276361101135441472; Hm_lvt_21be24c80829bd7a683b2c536fcf520b=1510624314151065385915106588821510665316;  Hm_lpvt_21be24c80829bd7a683b2c536fcf520b=‘+timeg
‘RA-Ver‘: ‘2.4‘
‘RA-Sid‘: ‘6d8a2f10c62d11e7gqpxa53987ed19aa47e3‘

评论

共有 条评论