自己写了玩的一个小脚本,百度图片下载
import re
import os
import requests
import hashlib
def dowmloadPic(html, keyword):
pic_url = re.findall('"objURL":"(.*?)",', html, re.S)
if len(pic_url) < 1:
return 1
i = 0
for each in pic_url:
print(i + 1, end=',')
md5Str = hashlib.md5(each.encode("utf-8")).hexdigest()
# 抓去链接
oneStr = md5Str + ' ' + keyword + ' ' + each + '\n'
with open('downText.txt', 'a+') as f:
f.write(oneStr)
# 下载图片
# try:
# pic = requests.get(each, timeout=10)
# except requests.exceptions.ConnectionError:
# print('链接超时,跳过此操作')
# continue
#
# kz = os.path.splitext(each)[-1]
# photo = + keyword + '_' + str(i) + kz
#
# with open(photo, 'wb') as f:
# f.write(pic.content)
i += 1
print('\n')
return 0
if __name__ == '__main__':
word = input('enter a key word:')
page = input('enter the page:')
page = int(page)
page = 1 if page < 1 else page
url = 'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=' + word + '&ct=201965323&v=flip'
p = 1
while (p <= page):
print(word + ',第[' + str(p) + ']页:')
pn = (p - 1) * 20
url = url + '&pn=' + str(pn)
result = requests.get(url).content.decode('utf-8')
code = dowmloadPic(result, word)
if code:
print('无相关数据,提前退出程序')
break
p = p + 1
print('程序结束')
您可能感兴趣的文章:python生成器/yield协程/gevent写简单的图片下载器功能示例