import base64
import os
import random
import json
import re
import time
import datetime
import execjs
import requests
from retrying import retry
from fake_useragent import UserAgent
from urllib.parse import urlencode
from loguru import logger
# 忽略requests证书警告
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
PATH = os.path.dirname(os.path.abspath(__file__)) # 获取绝对路径
from dy_slide_verify import Verify
COUNT = 1
def run_time(func):
def new_func(*args, **kwargs):
start_time = datetime.datetime.now()
logger.info("程序开始时间:{}".format(start_time))
res = func(*args, **kwargs)
end_time = datetime.datetime.now()
logger.info("程序结束时间:{}".format(end_time))
logger.info("程序执行用时:{}s".format((end_time - start_time)))
return res
return new_func
class sessions(requests.Session):
def request(self, *args, **kwargs):
kwargs.setdefault('timeout', 30)
kwargs.setdefault('cookies', '')
return super(sessions, self).request(*args, **kwargs)
class DouYinComment(object):
def __init__(self):
logger.add("file_X.log", retention="1 days")
self.session = sessions()
@retry(stop_max_attempt_number=3)
def _parse_url(self, url, headers=None, params=None):
"""url请求"""
count = 1
while count < 20:
try:
if params:
response = requests.get(url, headers=headers, params=params, verify=False,
allow_redirects=False, timeout=20)
else:
response = requests.get(url, headers=headers, verify=False, allow_redirects=False, timeout=20)
if count % 5 == 0:
self.get_cookie()
if not response.text:
count += 1
logger.info('数据获取失败,重试...')
time.sleep(1)
continue
return response
except Exception as e:
count += 1
continue
def get_passport_csrf_token(self):
headers = {
"authority": "sso.douyin.com",
"accept": "application/json, text/plain, */*",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "no-cache",
"origin": "https://www.douyin.com",
"pragma": "no-cache",
"referer": "https://www.douyin.com/",
"sec-ch-ua": "^\\^Chromium^^;v=^\\^110^^, ^\\^Not",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "^\\^Windows^^",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
}
url = "https://sso.douyin.com/check_login/"
params = {
"service": "https:^%^2F^%^2Fwww.douyin.com",
"aid": "6383",
"account_sdk_source": "sso",
"language": "zh",
"sdk_version": "2.1.3"
}
response = self.session.get(url, headers=headers, params=params)
Set_Cookie = response.headers.get('Set-Cookie')
passport_csrf_token_default = re.findall(r'passport_csrf_token_default=(.*?);', Set_Cookie, re.S | re.M)
if not passport_csrf_token_default:
logger.error('token: 提取失败')
raise
self.cookie += f'passport_csrf_token={passport_csrf_token_default[0]};'
@retry(stop_max_attempt_number=3)
def get_cookie(self):
count = 10
while count:
try:
session = requests.session()
url = 'https://www.douyin.com/aweme/v1/web/aweme/post/?device_platform=webapp&aid=6383&channel=channel_pc_web&sec_user_id=MS4wLjABAAAA_py8TGmFe6t8KDY04LU0JH9Yr9ml54dCjRFi0mc1lwI&max_cursor=1644497505000&locate_item_id=7064150578586193188&locate_query=false&show_live_replay_strategy=1&count=10&publish_video_strategy_type=2&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1680&screen_height=1050&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=108.0.0.0&browser_online=true&engine_name=Blink&engine_version=108.0.0.0&os_name=Windows&os_version=10&cpu_core_num=4&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=100&webid=7180319539912607284&msToken=mgDDRa5pg4ZXS8pPtZSYMAQSePr3lEy0rZ_jCzpOcY2c7opOKqNwSSR1cz991481ddf0AQAl2S4aTTBs19-O4VwM9K3JfMVB6-Q-tn2k2LqJVlgOl-2sdA==&X-Bogus=DFSzswVYA9sANryXSkh5PRXAIQR0'
h = session.get(url, verify=False, allow_redirects=False, timeout=20).headers
x_vc_bdturing_parameters = h.get('x-vc-bdturing-parameters')
if not x_vc_bdturing_parameters:
count -= 1
logger.info(f'提取:x_vc_bdturing_parameters 失败,重试!')
time.sleep(random.randint(3, 5))
continue
verify_data = json.loads(base64.b64decode(h.get('x-vc-bdturing-parameters')).decode("utf-8"))
fp = verify_data.get("fp")
detail = verify_data.get("detail")
logger.info(f"成功提取:{fp}, 开始验证")
try: # 有几率报错,报错重试
msg = Verify().verify(fp, detail)
logger.info(msg)
except Exception as e:
logger.info(f"{e}")
continue
if msg.get('code') != 200:
logger.info(f"{msg.get('message')},重试")
continue
logger.info(f"ck s_v_web_id:{fp}, {msg.get('message')}")
s_v_web_id = f's_v_web_id={verify_data.get("fp")};'
self.cookie = s_v_web_id
return
except Exception as e:
logger.info(f'提取:x_vc_bdturing_parameters 出错:{e}')
time.sleep(random.randint(3, 5))
continue
input('没有提取到fp')
def _get_xb(self, params):
"""
计算X-Bogus
"""
# 读取xb算法js
with open('x-b.js', encoding='utf-8') as f:
js = f.read()
ctx = execjs.compile(js)
pathanme = urlencode(params)
res = ctx.call('Xb', pathanme)
return res
def get_aweme_list(self, sec_user_id):
"""
根据作者ID获取所有视频ID
"""
headers = {
"authority": "www.douyin.com",
"accept": "application/json, text/plain, */*",
"accept-language": "zh-CN,zh;q=0.9",
"bd-ticket-guard-client-csr": "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURSBSRVFVRVNULS0tLS0NCk1JSUJEekNCdFFJQkFEQW5NUXN3Q1FZRFZRUUdFd0pEVGpFWU1CWUdBMVVFQXd3UFltUmZkR2xqYTJWMFgyZDENCllYSmtNRmt3RXdZSEtvWkl6ajBDQVFZSUtvWkl6ajBEQVFjRFFnQUVWdHJwOUhyOTdwRCttcGVxcTZIZzBUanUNCnJQRVpGSVQzajBTUGFQNGVGaXRzeHU5U3U2ZWJFWHVDNDVlYkMxbExFVlBGVXNPZFF6TWlsTjFmWThDdlZxQXMNCk1Db0dDU3FHU0liM0RRRUpEakVkTUJzd0dRWURWUjBSQkJJd0VJSU9kM2QzTG1SdmRYbHBiaTVqYjIwd0NnWUkNCktvWkl6ajBFQXdJRFNRQXdSZ0loQU5WOWlTOUVzVGszem5KOFprTDVNKzNZTk11NTRRNnF6Qm5kUy9Yd1Y3b1INCkFpRUFxbERLTkcrcUMyMjBBQ1B2Z1IrVlI2VWh3RXhUOEZTS0N2LzU2clBrMmNzPQ0KLS0tLS1FTkQgQ0VSVElGSUNBVEUgUkVRVUVTVC0tLS0tDQo=",
"bd-ticket-guard-version": "2",
"cache-control": "no-cache",
"pragma": "no-cache",
"referer": "https://www.douyin.com/",
"sec-ch-ua": "^\\^Chromium^^;v=^\\^110^^, ^\\^Not",
- 1
- 2
- 3
- 4
前往页