前言
1.改代码只是为了练习爬虫而写的,非恶意代码,如有侵权,会及时删除;
2.该网站的视频本身是可以下载的,使用代码目的只是为了方便下载视频。
1 获取视频地址的加密信息
# mp4地址是加密在源代码里面的
def get_encode(self):
resp = requests.get(url=self.url, headers=self.headers)
resp.close()
obj = re.compile('"encrptedVideoMeta":"(.*?)"')
enc_text = obj.search(resp.text, re.S).group(1)
return enc_text
2 解密
# 两次解密,先base64解密,然后在用位运算解密
def get_decode(self, text):
dec_text = base64.b64decode(text.encode()).decode()
t = 'guanghui456'
n = ''
for i in range(len(dec_text)):
o = ord(dec_text[i])
s = ord(t[i % len(t)])
n += chr(o ^ s)
dct = json.loads(n)
self.name = dct['title']
self.mp4_url = dct['clarityUrl'][-1]['url']
3 下载视频
def downLoad(self):
if not os.path.isdir('video'):
os.mkdir('video')
resp = requests.get(url=self.mp4_url, headers=self.headers)
resp.close()
with open('video/%s.mp4' % self.name, mode='wb') as f:
f.write(resp.content)
print('%s下载完成' % self.name)
4 完整代码
"""
好看视频的短视频的播放地址在源代码里面,但是加密了,所以需要对加密内容进行解密
需要进行两次解密,1是base64解密,2是位运算解密
"""
import requests
import base64
import json
import re
import os
class Application:
def __init__(self, url):
self.url = url
self.headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0'
}
def get_encode(self):
resp = requests.get(url=self.url, headers=self.headers)
resp.close()
obj = re.compile('"encrptedVideoMeta":"(.*?)"')
enc_text = obj.search(resp.text, re.S).group(1)
return enc_text
def get_decode(self, text):
dec_text = base64.b64decode(text.encode()).decode()
t = 'guanghui456'
n = ''
for i in range(len(dec_text)):
o = ord(dec_text[i])
s = ord(t[i % len(t)])
n += chr(o ^ s)
dct = json.loads(n)
self.name = dct['title']
self.mp4_url = dct['clarityUrl'][-1]['url']
# print(self.mp4_url)
def downLoad(self):
if not os.path.isdir('video'):
os.mkdir('video')
resp = requests.get(url=self.mp4_url, headers=self.headers)
resp.close()
with open('video/%s.mp4' % self.name, mode='wb') as f:
f.write(resp.content)
print('%s下载完成' % self.name)
def main(self):
text_ = self.get_encode()
self.get_decode(text_)
self.downLoad()
if __name__ == '__main__':
main_url = input('输入https://haokan.baidu.com/v?vid=*************&tab=recommend:').strip()
app = Application(main_url)
app.main()