源站:https://fache.co/
脚本:
from bs4 import BeautifulSoup
import threading, time, requests, os, urllib3
import json
import sys
requests.packages.urllib3.disable_warnings()
urls = ['https://fache.co/pic/', 'https://fache.co/pic2/', 'https://fache.co/pic3/']
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/65.0.3325.181 Safari/537.36',
}
class StoppableThread(threading.Thread):
"""封装stop和stoped的thread """
def __init__(self, *args, **kwargs):
super(StoppableThread, self).__init__(*args, **kwargs)
self._stop_event = threading.Event()
def stop(self):
self._stop_event.set()
def stopped(self):
return self._stop_event.is_set()
def mkdir(path):
folder = os.path.exists(path)
if not folder:
d = os.makedirs(path)
return path
def creat_soup( url):
'''
该函数返回一个url的soup对象
:param url:一个页面的链接
'''
# 获取网页,得到一个response对象
s = requests.session()
try:
response = s.get(url, headers=headers, timeout=30)
except requests.exceptions.RequestException as e:
print(e)
response.encoding = 'UTF-8'
return BeautifulSoup(response.text, 'html.parser')
def get_urls():
with open('fache.json', 'r') as f:
s = json.load(f)
return s
def get_single_imgs(url):
soup = creat_soup(url)
p = soup.findAll('img')
title = soup.find('title').text
page = {'title':title, 'imgs':[]}
for i in p:
page['imgs'].append(i['src'])
return page
def saveImages(title, imgs):
imgPath = os.path.abspath('./')+'\\fache\\'+title+'\\'
t = os.listdir(imgPath)
if os.path.exists(imgPath) and (len(t) == len(imgs) ):
print('Passed:'+title+'已经下载!')
return False
folder = mkdir(imgPath)
count = 1
print('开始下载:'+title+',共计图片:'+str(len(imgs)) +'张')
for index, img in enumerate(imgs):
try:
html = requests.get(img, headers=headers, timeout=30)
filename = folder+'\\'+str(index+1)+'.jpg'
with open(filename, 'wb') as handle:
handle.write(html.content)
count +=1
except Exception as e:
print(e)
continue
print('Finisded! '+title+'结束,共计下载'+str(count)+'张图片')
def downTask():
urls = get_urls()
thread_list = []
for url in urls:
signle = get_single_imgs(url)
try:
th = StoppableThread(target=saveImages, args=(signle['title'],signle['imgs']))
th.start()
thread_list.append(th)
except (KeyboardInterrupt, SystemExit):
th.stop()
sys.exit()
for x in thread_list:
x.join()
if __name__=="__main__":
downTask()
本文由 admin 创作,采用 知识共享署名4.0 国际许可协议进行许可
本站文章除注明转载/出处外,均为本站原创或翻译,转载前请务必署名
最后编辑时间为: Sep 9, 2020 at 09:12 am