Python 笔记(三) 采用某音视频
# -*- coding: utf-8 -*- import csv import re from time import time import parsel import requests # 爬取视频 from pip._vendor.rich import json # 代码原理 # 1.发送请求,获取数据 # 2.分析数据,保存数据 # 多视频采集 分析url变化规律 # 视频中的分析 src 跳转的 链接 找到链接的位置 # 正则匹配资源地址 # 解码出链接 # 保存数据到MP4文件 def main(): start = time() # 单视频链接 url = 'htt***in.com/video/7102976099893726468' headers = { 'cookie': 'ttwid=1|hG0kpelVMqzSZfkO_rtj8iOALM-zNSkUnkVPKEZhGHM|1653807941|05d1a7f8d3b87e942cb7b78042736355b3d237751ac6fa0e8d1d6f648190c1d3; dou***com; strategyABtestKey=1653807944.929; passport_csrf_token=af41f908b3561447aada38684bc61922; passport_csrf_token_default=af41f908b3561447aada38684bc61922; _tea_utm_cache_2285=undefined; ttcid=14e69838859a41ca8c73be39d792b9bb18; _tea_utm_cache_6383=undefined; _tea_utm_cache_1300=undefined; THEME_STAY_TIME="299505"; IS_HIDE_THEME_CHANGE="1"; pwa_guide_count="3"; __ac_nonce=0629329ae00490204570a; __ac_signature=_02B4Z6wo00f01lP-nZAAAIDC0.xn0ZCu3hJT3pkAAPZ756XEuILz-YmH3p5as5tBwCjH9QBCFROw-9LYkUt9.nCeebXVx1DzPNVd-yBewu1woRj.AQw6XuAwAUqiscgaSQ0Bp7Azhm0MG79F32; home_can_add_dy_2_desktop="1"; msToken=oElEdtCwAY-oW727oex-ylY5s6bhyXc0vl7_JLS1O2D-qjfr1K0Z3ceAqNKNFC_H_JsJOox2nj0wbOlvYzeR-LfoVK_CLwwjJkBvEvVGu7p54EjJhngpk1I=; tt_scid=VQSplI87N418e17p3XKlO7S.AQhepL2rDMS9QAV8KqFVGDUu5.DTWdrH8mmWziSP3f35; AVATAR_FULL_LOGIN_GUIDE_ITA_TIMESTAMP="1653811641788"; AVATAR_FULL_LOGIN_GUIDE_ITA_COUNT="1"; s_v_web_id=verify_l3r0rjv0_JzCyMEuL_W5M8_4yOk_97B4_RH2THzWfqjNG; VIDEO_FILTER_MEMO_SELECT={"expireTime":1654416467488,"type":0}; msToken=9-2NQXP7MDpVtGJKKVA72dTFmnP_LHga9H_QseAfKSL5FO2Cw12c0_vJK416ciyO9a0ag6F5oClvZK3c5QNZXugD8VLVfZyJniqQWK_nff8re2EgqSzxyNU=', # 浏览器基本信息 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36' } # 通过多视频列表收集ID response = requests.get(url=url, headers=headers) # print(response.text) # 通过正则找到数据 \d 一个或多个数字表示匹配 (.*?) 表示中间所有内容 # 获取标题 title = re.findall('<title data-react-helmet="true">(.*?)</title>', response.text)[0] print(title) # 获取初始的url video_url = re.findall('src(.*?)"},{"', response.text)[0] # 获取跳转网站编码内容 print(video_url) # unquote 将url转码 video_url_1 = requests.utils.unquote(video_url).replace('":"', 'https:') # 解析出编码 print(video_url_1) video_content = requests.get(url=video_url_1, headers=headers).content # 发送请求获取二进制数据内容 # 写入文件 with open(title '.mp4',mode='wb') as f: f.write(video_content) # 写入文件内容 end = time() print(总共消耗%.2f秒.' % (end - start)) # 收集抖音视频。 if __name__ == '__main__': main()