#!/usr/bin/env python
# -*- coding:utf-8 -*-
# author:xjl
# datetime:2019/12/30 19:39
# software: PyCharm
"""自己手动实现一个下载目标urld的自愿的代码
1 找到目标的网址
2 在本地新建文件夹
3 保存到文件夹中
存在的问题是下载下来的图片是打不开的什么原因
"""
import requests
import urllib
from urllib.request import urlretrieve
import re
import os
from bs4 import BeautifulSoup
def file_multi_down(url):
html = requests.get(url).text # 获取网页内容
# 这里由于有些图片可能存在网址打不开的情况,加个5秒超时控制。
# data-objurl="http://pic38.nipic.com/20140218/17995031_091821599000_2.jpg"获取这种类型链接
soup = BeautifulSoup(html, 'html.parser', from_encoding='utf-8')
# ^abc.*?qwe$
pic_url = soup.find_all('img', src=re.compile(r'^https://cdn.pixabay.com/photo/.*?jpg$'))
# pic_url = pic_node.get_text()
# pic_url = re.findall('"https://cdn.pixabay.com/photo/""(.*?)",',html,re.S)
print(pic_url)
i = 0
# 判断image文件夹是否存在,不存在则创建
if not os.path.exists('image'):
os.makedirs('image')
for url in pic_url:
img = url['src']
try:
pic = requests.get(img,timeout=10)# 超时异常判断 5秒超时
except requests.exceptions.ConnectionError:
print('当前图片无法下载')
file_name = "image/" + str(i) + ".jpg" # 拼接图片名
print(file_name)
# 将图片存入本地
fp = open(file_name, 'wb')
fp.write(pic.content) # 写入图片
fp.close()
i += 1
# 下载目标网站的单张图片的函数
def image_upload(IMAGE_URL):
r = requests.get(IMAGE_URL)
print(r)
with open('./image1.png', 'wb') as f:
f.write(r.content)
return
# 下载目标网站的单张图片的函数
def urllib_download(IMAGE_URL):
urlretrieve(IMAGE_URL, './image2.png')
# 下载目标网站的单张图片的函数
def chunk_download(IMAGE_URL):
r = requests.get(IMAGE_URL, stream=True)
with open('./image3.png', 'wb') as f:
for chunk in r.iter_content(chunk_size=32):
f.write(chunk)
# 下载目标网站的资源
def zip_down(url):
filename = "./tomcat.zip"
try:
urlretrieve(url, filename)
except urllib.ContentTooShortError:
print('Network conditions is not good.Reloading.')
zip_down(url, filename)
if __name__ == '__main__':
# 下载单张图片
image_url = "http://image.nationalgeographic.com.cn/2017/1122/20171122113404332.jpg"
# image_upload(IMAGE_URL)
# urllib_download(IMAGE_URL)
# chunk_download(IMAGE_URL)
zip_resource = "http://mirror.bit.edu.cn/apache/tomcat/tomcat-8/v8.5.50/bin/apache-tomcat-8.5.50-windows-x64.zip"
# zip_down(zip_resource)
# 批量下载多张图片资源
url = 'https://pixabay.com/'
file_multi_down(url)
Python——下载目标URL的资源
关注
打赏