# coding: utf-8
"""
@Time : 11/7/2022 13:42
@Author: fff
@File: test2.py
@Software: PyCharm
"""
import asyncio,Class_function,random
from playwright.async_api import async_playwright
class spider:
def __init__(self):
self.Core_Function = Class_function.Class_function()
self.page_result_list=[]
self.request_list = []
self.response_list = []
self.list_url = []
self.HTML_list=[]
async def click_function(self,page):
'''
;模拟遍历点击
:return:
'''
try:
num222 = await page.evaluate('''
window.stop();
num222=document.querySelectorAll('*').length;
num222;
''')
# self.Core_Function.callback_logging.info(num222)
#print(num222)
await page.evaluate('''
window.scrollBy(1920, 50);
treeWalker = document.createTreeWalker(document);
num111=0
while (treeWalker.nextNode() && num111<1500) {
console.log("[*] processing node " + treeWalker.currentNode.tagName + ' ' + treeWalker.currentNode.id);
if (treeWalker.currentNode.click) {
treeWalker.currentNode.target='';
treeWalker.currentNode.click();
num111=num111+1;
}
}
''')
flag_num=0
while await page.evaluate('num111') < 666:
#self.Core_Function.callback_logging().info('while')
await asyncio.sleep(0.5)
flag_num=flag_num+1
if flag_num<3:
break
except Exception as e:
self.Core_Function.callback_logging().error(e)
async def hook_requset(self,route):
'''
# hook 请求包
:param route:
:return:
'''
# print(route.request.url)
if route.request.url == self.target_request['url']:
if self.target_request['body'] != 'Null':
await route.continue_(headers=self.target_request['headers'], method=self.target_request['method'],
post_data=self.target_request['body'])
elif self.target_request['method'] == 'GET':
await route.continue_(headers=self.target_request['headers'], method=self.target_request['method'])
else:
await route.continue_()
elif route.request.url != 'about:blank' and route.request.is_navigation_request():
# print(route.request.url)
if route.request.method == 'GET':
request = {"headers": route.request.headers, "method": route.request.method, "url": route.request.url,
"body": "Null"}
self.request_list.append(request)
elif self.target_request['body'] != 'Null':
request = {"headers": route.request.headers, "method": route.request.method,
"url": route.request.url, "body": route.request.post_data}
self.request_list.append(request)
# await route.continue_()
await route.abort(error_code='aborted')
else:
await route.continue_()
async def handle_popup(self, page):
'''
# 关闭click打开的新窗口
:param page:
:return:
'''
await page.close()
async def handle_dialog(self, dialog):
'''
# 处理alert之类
:param dialog:
:return:
'''
await dialog.dismiss()
async def handle_network_http_request(self, request):
'''
; 获取页面http请求
:param request:
:return:
'''
try:
if request.resource_type not in ['image', 'stylesheet', 'websocket', 'media', 'font']:
request_data = {}
# print(request.url)
if request.post_data == None:
request_data['body'] = 'Null'
else:
request_data['body'] = request.post_data
request_data['url'] = request.url
request_data['headers'] = request.headers
request_data['method'] = request.method
request_data['time'] = self.Core_Function.callback_time(0)
request_data['describe'] = 'Null'
request_data['status'] = 0
#print(request_data)
self.request_list.append(request_data)
except Exception as e:
self.Core_Function.callback_logging().error(e)
async def handle_http_response(self, response):
'''
# 处理http响应
'''
try:
# print(response.request.url)
response_data = {}
html_data={}
if response.request.url == self.target_request['url']:
if response.status in [200, 301, 302, 404, 500]:
response_data['body'] = self.target_request['body']
response_data['url'] = self.target_request['url']
response_data['headers'] = self.target_request['headers']
response_data['method'] = self.target_request['method']
response_data['http_status_code'] = response.status
response_data['headers_response'] = response.headers
html=await response.text()
html_md5=self.Core_Function.md5_convert(html)
html_data['html']=html
html_data['html_md5'] = html_md5
html_data['time'] = self.Core_Function.callback_time(0)
html_data['status']=0
response_data['html_md5']=html_md5
response_data['time'] = self.Core_Function.callback_time(0)
response_data['describe'] = 'Null'
response_data['status'] = 0
# print(response_data)
self.response_list.append(response_data)
self.HTML_list.append(html_data)
except Exception as e:
self.Core_Function.callback_logging().error(e)
async def page_data(self,page,request):
'''
;页面信息获取
:param page:
:param request:
:return:
'''
html=await page.content()
html_md5 = self.Core_Function.md5_convert(html)
html_data={}
html_data['html'] = html
html_data['html_md5'] = html_md5
html_data['time'] = self.Core_Function.callback_time(0)
html_data['status'] = 0
request['status'] = 0
request['html_md5'] = html_md5
request['title'] = await page.title()
print(request['title'])
await page.evaluate('''
list_href=[]
window.open = function(url) { console.log("new link: " + url);list_href.push(url); };
window.close = function () { return false; };
''')
await page.evaluate('''
list_href=[]
for(i=0;i<document.getElementsByTagName("a").length;i++){
list_href.push(document.getElementsByTagName("a")[i].href); //输出该页面的所有链接。
}
''')
# print(await page.content())
filename_img = "%s/%s.jpeg" % (self.Core_Function.create_image_path(), ''.join(
random.sample('ABCDEFGHIJKLMNOPQRSTUVWXYZ012345678zyxwvutsrqponmlkjihgfedcba', 15)))
try:
await page.screenshot(path=filename_img, type='jpeg', quality=15)
except Exception as error:
filename_img

Wis57
- 粉丝: 439
最新资源
- 深圳市地铁龙岗线工程建设项目管理咨询及设计监理服务安装装修策划报告.doc
- 软件测试从业人员的调查报告.doc
- (源码)基于物联网的智能灌溉系统.zip
- 基于Python和Flask框架开发的轻量级数据可视化大屏展示系统-支持多页面切换和实时数据渲染-适用于企业数据监控和业务分析场景-包含数据看板-空气质量监测-计算机性能指标等模块.zip
- 基于PLC控制系统的Z3040型摇臂钻床改造.doc
- 运输企业如何在区块链助力下飞速发展---副本.pptx
- Azure数据与AI架构师手册精华
- (源码)基于React框架的技术实践项目.zip
- (源码)基于Go语言的哆啦助手GPT.zip
- 基于SpringBootVue3MyBatis的家纺用品电子商务平台-包含床品套件-被芯枕芯-窗帘地毯-毛巾浴巾等全品类家纺商品在线销售系统-支持商品分类展示-购物车管理-订单.zip
- 基于Qt的图像识别项目
- (源码)基于STM32U5的USB存储设备管理系统.zip
- (源码)基于LTARK技术的开源电子模块项目.zip
- (源码)基于Vue2框架的前端开发学习项目.zip
- (源码)基于uniapp框架的uniapp2wxpack.zip
- 基于SpringBoot和Vue的全功能社区活动素材管理系统-包含用户注册登录论坛活动公告资讯图片视频素材收藏留言报名个人中心管理后台管理员用户管理素材管理活动管理报名管理论坛管理.zip
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈


