import time
import requests
import pandas as pd
import random
headers = {
"user-agent":
"Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1",
"Referer": "https://www.bilibili.com/",
}
video_data = []
for page in range(1,2):
api_url = f'https://api.bilibili.com/x/space/arc/search?mid=10330740&ps=30&tid=0&pn={page}&keyword=&order=pubdate&jsonp=jsonp'
res = requests.get(api_url, headers=headers).json()
video_list = res['data']['list']['vlist']
for video in video_list:
videos_dic = {}
videos_dic["title"]=video['title']
videos_dic["img"] = video["pic"]
videos_dic['video_time'] = video['length']
videos_dic['play_num'] = video['play']
videos_dic['video_description'] = video['description']
videos_dic['author'] = video['author']
videos_dic['comment_num'] = video['comment']
videos_dic['play_url'] = 'https://www.bilibili.com/video/' + video['bvid']
video_data.append(videos_dic)
time.sleep(random.randint(2, 5))
video_df = pd.DataFrame(video_data)
【Python】B站详细信息抓取
关注
打赏