文章目录 [ 隐藏 ]
爬虫其实很简单,只要用心,很快就就能掌握这门技术。我们上一篇讲了怎么加载花瓣网美女,这一章我们增加界面功能,并把项目转成exe可执行文件。
爬虫我已经做好了,可以从下面的地址下载:
可以输入关键字,点击确定,就可以下载相关图片。
链接: https://pan.baidu.com/s/1gfxcysF 密码: t8wz
实现原理与难点
- 实现原理
- 界面编程
- 把项目转换成exe可执行文件
实现原理
这里面我们用到面向对象思想,会有三个文件
- DownloadHuaban.py, 实现PyQt5界面
- UtilsRequest.py, 实现网络请求和下载图片
- Huaban.py 实现逻辑判断和网址获取
界面编程
界面编程我们用到PyQt5技术,如果对PyQt5不了解,可以查看:http://code.py40.com/face
python项目转exe可执行文件
我们需要使用PyInstaller模块把python转exe文件。
关于PyInstaller的使用方法请查看教程Python转exe
执行命令:
下面是源码
PyQt界面代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
# -*- coding: utf-8 -*- ''' auth:py40.com download picture from huaban ''' import sys,os import threading import webbrowser from PyQt5.QtGui import QIcon from PyQt5.QtWidgets import QApplication, QWidget,QTextBrowser, QPushButton,QDesktopWidget,QLabel,QLineEdit,QTextEdit,QCheckBox,QGridLayout,QFileDialog,QVBoxLayout,QMessageBox import Huaban class DownloadHuaban(QWidget): current_file_dictory = "" huban = '' item_name = "花瓣网图片下载器 1.0" def __init__(self): super().__init__() self.current_file_dictory = os.path.split(os.path.realpath(__file__))[0] self.huban = Huaban.Huaban() self.initUI() def initUI(self): tips = QLabel("作者:大猫"); self.tips_1 = QLabel("网站:<a href='http://code.py40.com'>http://code.py40.com</a>"); self.tips_1.setOpenExternalLinks(True) tips_null=QLabel(); gridDescript = QGridLayout() gridDescript.addWidget(tips, 1, 0) gridDescript.addWidget(self.tips_1, 2, 0) gridDescript.addWidget(tips_null, 3, 0) gridDescript.addWidget(tips_null, 4, 0) tips_savefile = QLabel("图片保存路径") tips_key = QLabel("搜索关键字") tips_guolv = QLabel("是否过滤掉普通图片") self.btn_savefile = QPushButton(self.huban.file_save_path,self) self.ed_bieming = QLineEdit('花瓣') self.cb_guolv = QCheckBox() self.btn_ok = QPushButton('开始下载',self) self.btn_pause = QPushButton('暂停下载', self) self.btn_savefile.clicked.connect(self.btn_savefile_Clicked) self.btn_ok.clicked.connect(self.btn_ok_Clicked) self.btn_pause.clicked.connect(self.btn_pause_Clicked) grid = QGridLayout() grid.setSpacing(10) grid.addWidget(tips_savefile,1,0) grid.addWidget(self.btn_savefile,1,1) grid.addWidget(tips_key,2,0) grid.addWidget(self.ed_bieming,2,1) grid.addWidget(tips_guolv,3,0) grid.addWidget(self.cb_guolv,3,1) grid.addWidget(tips_null, 4, 0) gridBtn = QGridLayout() gridBtn.setSpacing(10) gridBtn.addWidget(self.btn_ok, 1, 0) gridBtn.addWidget(self.btn_pause, 1, 1) vbox = QVBoxLayout() vbox.addLayout(gridDescript) vbox.addLayout(grid) vbox.addLayout(gridBtn) self.setLayout(vbox) self.resize(250,150) self.center() self.setWindowTitle(self.item_name) self.setWindowIcon(QIcon('icon/icon.ico')) self.show() def center(self): qr = self.frameGeometry() cp = QDesktopWidget().availableGeometry().center() qr.moveCenter(cp) self.move(qr.topLeft()) def btn_savefile_Clicked(self): filename= QFileDialog.getExistingDirectory(self,directory=self.huban.file_save_path); print(filename) # text=open(filename,'r').read() self.btn_savefile.setText(filename) def btn_pause_Clicked(self): self.huban.stopDownLoadHuban() self.showTipsDialog(text="下载已暂停") def btn_ok_Clicked(self): print("oncliuck") self.huban.ru.is_pasue = False save_file = self.btn_savefile.text(); if save_file: self.huban.file_save_path =save_file self.huban.text_keyword = self.ed_bieming.text(); self.t1 = threading.Thread(target=self.huban.downloadhuaban()) self.t1.setDaemon(True) self.t1.start() def showTipsDialog(self, text): try: QMessageBox.about(self, "提示", text).show() except Exception as e: print(e) def tips_1_Clicked(self,url): webbrowser.open("http://code.py40.com") if __name__ == '__main__': app = QApplication(sys.argv) a = DownloadHuaban() sys.exit(app.exec_()) |
Huaban.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import re,os import UtilsRequest class Huaban(): file_save_path = "D:/work/python/pic" text_keyword = "航空"; page_nums = 0; down_photo_num=0#下载的图片数量 ru = "" def __init__(self): super().__init__(); self.ru = UtilsRequest.UtilsRequest() def downloadhuaban(self): if self.ru.is_pasue: return urlhuaban = "http://huaban.com/search/?q=%s&per_page=20&wfl=1&page=%d" urlhuaban = urlhuaban % (self.text_keyword,self.page_nums); file_save_path = self.file_save_path+"/"+self.text_keyword+"/"; print("*******************************************************************") print("请求网址:", urlhuaban) self.page_nums += 1 if not os.path.exists(file_save_path): os.makedirs(file_save_path) text = self.ru.requestpageText(urlhuaban) pattern = re.compile('{"pin_id":(\d*?),.*?"key":"(.*?)",.*?"like_count":(\d*?),.*?"repin_count":(\d*?),.*?}', re.S) items = re.findall(pattern, text) if(len(items)==0): print("*******************************************************************") print("共下载图片%d张"%self.down_photo_num) print("下载资源结束~~~~~~~~~~~~~或未找到资源") return; print(items) for item in items: max_pin_id = item[0] x_key = item[1] x_like_count = int(item[2]) x_repin_count = int(item[3]) if (x_repin_count > 10 and x_like_count > 10) or x_repin_count > 10 or x_like_count > 1: print("开始下载第{0}张图片".format(self.down_photo_num)) url_image = "http://hbimg.b0.upaiyun.com/" url_item = url_image + x_key filename = file_save_path + str(max_pin_id) + ".jpg" if os.path.isfile(filename): print("文件存在:", filename) continue self.ru.downfile(filename, url_item) self.down_photo_num += 1 self.downloadhuaban() def stopDownLoadHuban(self): self.ru.is_pasue = True |
UtilsRequest.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import requests,time from PyQt5.QtWidgets import QApplication class UtilsRequest(): #联网超时时间 time_out = 30 #联网失败重试次数 request_nums = 5; is_pasue = False; def __init__(self): super().__init__(); def requestpageText(self,url): if self.is_pasue: return; request_count = self.request_nums try: request_count-=1 head = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'} Page = requests.session().get(url, headers=head, timeout=self.time_out) Page.encoding = "utf-8" QApplication.processEvents() print("获取网页数据成功") return Page.text except Exception as e: print("联网失败了...重试中", e) time.sleep(5) print("暂停结束") if request_count >=0 : self.requestpageText(url) def downfile(self,file, url): if self.is_pasue: return; print("开始下载:", file, url) try: r = requests.get(url, stream=True) with open(file, 'wb') as fd: for chunk in r.iter_content(): fd.write(chunk) QApplication.processEvents() except Exception as e: print("下载失败了", e) |
Python转exe文件
好了,我们的程序只能在有Python环境下才能使用。如果不安装python,怎么在windows环境下使用呢,这就需要我们使用PyInstaller模块把python转exe文件。
关于PyInstaller的使用方法请查看教程Python转exe
执行命令:
1 2 |
E:\python\python\Scripts>pyinstaller.exe -F E:\python\python_tools.git\trunk\hua nbanwang\client\DownloadHuaban.py |
等命令执行完毕,可以看到我们的exe文件,这个exe文件可以在windows系统上运行,不用安装python环境。
未经允许不得转载:Python在线学习 » Python爬虫-爬取花瓣网图片三(增加界面)