关于虫师的pdf中python+selenium的多进程执行测试用例失败的问题
发布网友
发布时间:2022-05-17 19:16
我来回答
共2个回答
热心网友
时间:2023-11-03 17:33
#之前用过如下代码爬过豆瓣用户,效率还可以
#好像叫gevent协称模块来着,你可以试试。
#!/usr/bin/env python
#coding: utf-8
from selenium import webdriver
from selenium.webdriver.phantomjs.service import Service as PhantomJSService
from gevent import monkey
from BeautifulSoup import BeautifulSoup
monkey.patch_all()
import gevent
import sys
import time
def doJob(urls,name):
service_args = [
]
browser = webdriver.PhantomJS(executable_path=r'D:\TestProject\phantomjs\bin\phantomjs.exe',service_args=service_args)
wr = open('done/'+name+'.txt','w')
for url in urls:
browser.get(url)
time.sleep(1)
soup = BeautifulSoup(browser.page_source.encode('utf-8'))
findNames = soup.findAll('div',attrs={'class':'name'})
if findNames is None:
print url
for sub in findNames:
n = sub.a.string.encode('utf8') if sub.a.string is not None else ''
pl = sub.span.string.encode('utf8') if sub.span.string is not None else ''
wr.write(n+','+pl)
wr.write('\n')
wr.close()
browser.quit()
files = {'culture':[],'travel':[],'ent':[],'fashion':[],'life':[],'tech':[]}
for key_fn in files:
with open(key_fn + '.link','r') as f:
files[key_fn] = f.read().split('\n')
gevent.joinall([
gevent.spawn(doJob,files['culture'],'culture'),
gevent.spawn(doJob,files['travel'],'travel'),
gevent.spawn(doJob,files['ent'],'ent'),
gevent.spawn(doJob,files['fashion'],'fashion'),
gevent.spawn(doJob,files['life'],'life'),
gevent.spawn(doJob,files['tech'],'tech'),
])
热心网友
时间:2023-11-03 17:33
兄弟 你解决了这个问题了没