人人贷爬虫代码

大小: 6KB

文件类型: .py

金币: 1

下载: 1 次

发布日期: 2021-06-06
语言: Python
标签: python 爬虫

高速下载

资源简介

用于爬取人人贷网站信息，在之前的代码基础上进行了新的更新

资源截图

小图大图

代码片段和文件信息

# -*- coding: utf-8 -*-
“““
Created on Mon Aug 13 11:10:39 2018

@author: 95647
“““
from selenium import webdriver
import time
import json
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
from selenium.webdriver.firefox.options import Options  #headless browser login ini
import requests
from pandas import Dataframe
import threading
time_start = time.clock（）

#driver = webdriver.PhantomJS（executable_path=r‘‘‘C:\Users\95647\Desktop\小工具\phantomjs-2.1.1-windows\bin\phantomjs.exe‘‘‘）
headers = {‘User-Agent‘:‘Mozilla/5.0 （Windows NT 10.0; WOW64） AppleWebKit/537.36 （KHTML like Gecko） Chrome/55.0.2883.87 Safari/537.36‘}
#headers according to push F12 in browser
#defin a def to annalys of website
#use the login information you have signed
username = “******“          #username
password = u“*****“     #password
# driver = webdriver.Firefox（）

# use headless browser to log
options = Options（）
options.add_argument（‘-headless‘）
driver = webdriver.Firefox（firefox_options=options）  #use headless firefox to login in

def LoginRRD（username password）:
    try:
        print（u‘ready loging renrendai website...‘）
        driver.get（“https://www.renrendai.com/login“）
        login_in_pswd = driver.find_element_by_class_name（“tab-password“） #点击密码登录
        login_in_pswd.click（）
        time.sleep（2）
        driver.find_element_by_id（“login_username“）.send_keys（username）
        time.sleep（0.5）
        driver.find_element_by_id（“J_pass_input“）.send_keys（password）
        time.sleep（0.5）
        driver.find_element_by_xpath（r“““/html/body/div[2]/div/div/div[2]/div[2]/div/div[1]/button“““）.click（）
        time.sleep（2） #设置等待几秒，以进入用户主界面，如不等待而直接进入爬虫会提示未登录
        print（u‘login successful!‘）
    except Exception as e:
        print（“Error:“ e）
    finally:
        print（u‘End Login!\n‘）

loanid_e =[]
def parse_userinfo（loanididx）: #defin def to analysis borrower informations
    # global login_status
    global loanid_e
    login_status =False
    urll=“https://www.renrendai.com/loan-%s.html“%str（loanid）
    driver.get（urll）
    html = BeautifulSoup（driver.page_source‘lxml‘）
    # f= open（“htm%s.txt“%idx“w“）
    # f.write（html.decode（“utf-8“）.replace（‘\xa9‘“@“））
    # f.close
    info = html.findAll（‘div‘ class_=“loan-user-info“）  # 这个地方的命名经常修改
    try:
        userinfo = {}
        items = info[0].findAll（‘span‘{“class“:“pr20“}）
    except:
        loanid_e.append（loanid）
    else:    
        for item in items:
            var = item.get_text（）
            value = item.parent.text.replace（var““）
            userinfo[var]=value
    data = pd.Dataframe（userinfoi

上一篇：华为挑战赛装箱问题解决
下一篇：Head First python 第二版源代码

共有条评论

人人贷爬虫代码

资源简介

资源截图

代码片段和文件信息

评论

相关资源