宜配屋

python使用scrapy解析js示例

yipeiwu_com5年前 (2020-03-06)Python基础

from selenium import selenium

class MySpider(CrawlSpider):
    name = 'cnbeta'
    allowed_domains = ['cnbeta.com']
    start_urls = ['//www.jb51.net']

    rules = (
        # Extract links matching 'category.php' (but not matching 'subsection.php')
        # and follow links from them (since no callback means follow=True by default).
        Rule(SgmlLinkExtractor(allow=('/articles/.*\.htm', )),
             callback='parse_page', follow=True),

# Extract links matching 'item.php' and parse them with the spider's method parse_item
)

    def __init__(self):
        CrawlSpider.__init__(self)
        self.verificationErrors = []
        self.selenium = selenium("localhost", 4444, "*firefox", "//www.jb51.net")
        self.selenium.start()

    def __del__(self):
        self.selenium.stop()
        print self.verificationErrors
        CrawlSpider.__del__(self)

    def parse_page(self, response):
        self.log('Hi, this is an item page! %s' % response.url)
        sel = Selector(response)
        from webproxy.items import WebproxyItem

        sel = self.selenium
        sel.open(response.url)
        sel.wait_for_page_to_load("30000")
        import time

time.sleep(2.5)

python使用scrapy解析js示例

相关文章

Django中的Signal代码详解

python socket 超时设置 errno 10054

详解Python的单元测试

matlab中实现矩阵删除一行或一列的方法

windows上安装python3教程以及环境变量配置详解

© YiPeiWu.com 【宜配屋】粤ICP备17031333号

Powered By Z-BlogPHP. Theme by TOYEAN.

宜配屋

python使用scrapy解析js示例

相关文章

Django中的Signal代码详解

python socket 超时设置 errno 10054

详解Python的单元测试

matlab中实现矩阵删除一行或一列的方法

windows上安装python3教程以及环境变量配置详解

© YiPeiWu.com 【宜配屋】 粤ICP备17031333号 var _hmt = _hmt || [];(function() { var hm = document.createElement("script"); hm.src = "https://hm.baidu.com/hm.js?8aa60ae04b767b2af31903508928acc0"; var s = document.getElementsByTagName("script")[0]; s.parentNode.insertBefore(hm, s);})();

Powered By Z-BlogPHP. Theme by TOYEAN.

© YiPeiWu.com 【宜配屋】粤ICP备17031333号