资源简介

:爬取网站数据,基于 selenium.webdriver

资源截图

代码片段和文件信息

__author__ = ‘fandechun‘
import datetime
import re
import sys
import time
import uuid
import pymysql
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC

def find_element(driverlocater):#封装WebDriverWait.until方法
    element=WebDriverWait(driver 10 0.5).until(EC.presence_of_element_located(locater))
    return element

def isElementExist(driverxpath1):
    flag=True
    try:
        driver.find_element_by_xpath(xpath1)
        return flag
    except:
        flag=False
        return flag

def get_by_xpath_if_existed(driverxpath1):
    try:
        driver.find_element_by_xpath(xpath1)
        return driver.find_element_by_xpath(xpath1)
    except:
        return ‘未定‘

def get_by_class_if_existed(drivercolumn):
    try:
        driver.find_element_by_class_name(column)
        return driver.find_element_by_class_name(column)
    except:
        return ‘未定‘

def get_by_id_if_existed(drivercolumn):
    try:
        driver.find_element_by_id(column)
        return driver.find_element_by_id(column)
    except:
        return ‘未定‘

class MySqlSession:
    def __init__(self):
        self.con = pymysql.connect(
            host=‘localhost‘
            port=3306
            user=‘root‘
            passwd=‘123‘
            db=‘python‘
            charset=‘utf8‘
        )
    def insert_house_summary_info(self house_nameother_namehouse_priceaddressopen_datehouse_layouts):
        # 数据库游标!
        cue = self.con.cursor()
        uid = str(uuid.uuid1())
        house_id = ‘‘.join(uid.split(‘-‘))
        sql=‘insert into d_house_summary values (%s%s%s%s%s%s%s)‘
        try:
            cue.execute(sql[house_idhouse_nameother_namehouse_priceaddressopen_datehouse_layouts])
        except Exception as e:
            print(f“插入数据库失败:{e}“)
            self.con.rollback()
        else:
            self.con.commit()
    def insert_house_details_info(self house_namebuilding_areahouse_layout):
        # 数据库游标!
        cue = self.con.cursor()
        uid = str(uuid.uuid1())
        layout_id = ‘‘.join(uid.split(‘-‘))
        sql=‘insert into d_house_details values (%s%s%s%s)‘
        try:
            cue.execute(sql[layout_idhouse_namebuilding_areahouse_layout])
        except Exception as e:
            print(f“插入数据库失败:{e}“)
            self.con.rollback()
        else:
            self.con.commit()
browser = webdriver.Chrome()
browser.get(“https://jn.fang.anjuke.com/loupan/“)

########判断页面是否已打开########
locater=(By.ID‘search-btn‘)
find_element(browserlocater)
shouye=browser.current_window_handle
########查询条件########
########这个下拉列表框比较特殊,无法通过正常的方法选择,只能通过模拟鼠标操作来点击。且需要放在最前面,通过点击其它查询条件触发检索########
elemen

评论

共有 条评论