import requests
from bs4 import BeautifulSoup
url = 'https://car.autohome.com.cn/diandongche/index.html'
headers = {
'Referer': 'https://car.autohome.com.cn/',
'Sec-Fetch-Mode': 'no-cors',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'
}
brand_data = {}
try:
r = requests.get(url, headers=headers)
if r.status_code == 200:
r.encoding = r.apparent_encoding # 此处将编码改成网页的编码样式,防止出现乱码
soup = BeautifulSoup(r.text, "lxml")
car_tree = soup.find('div', id='cartree')
for i in car_tree.find_all('li'):
for j in i.find_all('a'):
print("一级品牌数据:", j.text.strip(), "=", 'https://car.autohome.com.cn' + j.get('href'))
brand_data[j.text.strip()] = 'https://car.autohome.com.cn' + j.get('href')
except:
print("爬取失败!")
完整源代码下载:
某车汽车论坛汽车数据爬虫源代码及数据-Python文档类资源-CSDN下载1.根据所有新能源汽车品牌列表及链接地址分别获取对应子品牌及车系数据列表2.根据所有车系列表数据分更多下载资源、学习资料请访问CSDN下载频道.https://download.csdn.net/download/weixin_56516468/84861598