• 大小: 30KB
    文件类型: .zip
    金币: 2
    下载: 1 次
    发布日期: 2021-05-16
  • 语言: Html/CSS
  • 标签: 安全  ZoomEye  爬虫  

资源简介

# getZoomEye ## * 目的: 从 https://www.zoomeye.org/ 网站抓取数据 ## * 使用方法: 1. 安装依赖库selenium ,下载webdriver并配置环境变量 2. 修改配置文件config.ini query=#搜索的关键字 pagenum=#抓取的页数 type=#抓取的类型,填写web 或host 3.运行getZoomeye.py ## * 其它: zoomeye在未登录状态下,仅显示十页内容 chrome driver 下载地址 :http://chromedriver.storage.googleapis.com/index.html?path=2.22/

资源截图

代码片段和文件信息

#/usr/bin/env python
# -*- coding: utf-8 -*-
‘‘‘
author : heyanglv
date : 2016/7/25
‘‘‘
import sys 
import ConfigParser
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

reload(sys)
sys.setdefaultencoding(‘utf8‘)

def main():
    config = ConfigParser.ConfigParser()
    config.read(‘./config.ini‘)
    query_string=config.get(‘QUERY‘‘query‘)
    pagenum=int(config.get(‘QUERY‘‘pagenum‘))
    _type=config.get(‘QUERY‘‘type‘)
    query_url=“https://www.zoomeye.org/search?q=“

    #driver = webdriver.Firefox()
    driver = webdriver.Chrome()
    #driver = webdriver.PhantomJS()
           
    with open(_type+“result.txt““a+“) as f:
        for num  in  range(1pagenum):
            query_page=query_url+query_string+“&p=“+str(num)+“&t=“+_type
            try:
                driver.get(query_page)
                element = WebDriverWait(driver 30).until(EC.presence_of_element_located((By.XPATH “//footer[@class=‘site-footer‘]“)))
                result=driver.find_element_by_xpath(r“.//div[@class=‘result-list‘]“)
                links=result.find_elements_by_xpath(r“./ul/li/h3/a“)
                for link in links:
                    _href=link.get_attribute(‘href‘)
                    if _type==‘host‘:
                        import re
                        p = re.compile(r‘https{01}://\d{13}.\d{13}.\d{13}.\d{13}‘)
                        res=p.findall(_href.strip())
                        if len(res)!=0:
                            _href=res[0]
                            print _href
                            f.write(_href+“\n“)
                        continue
                    print _href
                    f.write(_href+“\n“)
            except Exception e:
                raise e
                pass
    driver.close()
if __name__ == ‘__main__‘:
    main()

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2016-08-05 11:45  getZoomEye\
     文件        6148  2016-08-05 16:57  getZoomEye\.DS_Store
     目录           0  2018-06-14 16:37  __MACOSX\
     目录           0  2018-06-14 16:37  __MACOSX\getZoomEye\
     文件         120  2016-08-05 16:57  __MACOSX\getZoomEye\._.DS_Store
     目录           0  2016-08-04 13:34  getZoomEye\.git\
     文件          11  2016-08-04 13:32  getZoomEye\.git\COMMIT_EDITMSG
     文件         312  2016-07-31 21:08  getZoomEye\.git\config
     文件          73  2016-07-31 21:08  getZoomEye\.git\description
     文件         100  2016-08-05 02:47  getZoomEye\.git\FETCH_HEAD
     文件          23  2016-07-31 21:08  getZoomEye\.git\HEAD
     目录           0  2016-07-31 21:08  getZoomEye\.git\hooks\
     文件         478  2016-07-31 21:08  getZoomEye\.git\hooks\applypatch-msg.sample
     文件         896  2016-07-31 21:08  getZoomEye\.git\hooks\commit-msg.sample
     文件         189  2016-07-31 21:08  getZoomEye\.git\hooks\post-update.sample
     文件         424  2016-07-31 21:08  getZoomEye\.git\hooks\pre-applypatch.sample
     文件        1642  2016-07-31 21:08  getZoomEye\.git\hooks\pre-commit.sample
     文件        1348  2016-07-31 21:08  getZoomEye\.git\hooks\pre-push.sample
     文件        4951  2016-07-31 21:08  getZoomEye\.git\hooks\pre-rebase.sample
     文件        1239  2016-07-31 21:08  getZoomEye\.git\hooks\prepare-commit-msg.sample
     文件        3610  2016-07-31 21:08  getZoomEye\.git\hooks\update.sample
     文件         457  2016-08-04 13:32  getZoomEye\.git\index
     目录           0  2016-07-31 21:08  getZoomEye\.git\info\
     文件         240  2016-07-31 21:08  getZoomEye\.git\info\exclude
     目录           0  2016-08-04 13:32  getZoomEye\.git\lfs\
     目录           0  2016-08-04 13:32  getZoomEye\.git\lfs\objects\
     目录           0  2016-08-04 13:32  getZoomEye\.git\lfs\objects\logs\
     目录           0  2016-08-04 13:32  getZoomEye\.git\lfs\tmp\
     目录           0  2016-08-04 13:32  getZoomEye\.git\lfs\tmp\objects\
     目录           0  2016-07-31 21:08  getZoomEye\.git\logs\
     文件         683  2016-08-04 13:32  getZoomEye\.git\logs\HEAD
............此处省略54个文件信息

评论

共有 条评论