pyqt5 + pyinstaller 制作爬虫小程序

摘要:
环境:macpython3.7pyqt5pinstallerps:主要熟悉pyqt5,添加了单选框输入框文本框文件夹选择框和日历下拉框效果图片:pyqt5主程序文件#-*-编码:utf-8-*-#@作者:Meihai#@日期:2019-07-1013:02:56#@LastModifiedby:Meihai#@LastModifiedtime:2019-07-1516:43:

环境:mac python3.7 pyqt5 pyinstaller

ps: 主要是熟悉pyqt5, 加入了单选框 输入框 文本框 文件夹选择框及日历下拉框

效果图:

pyqt5 + pyinstaller 制作爬虫小程序第1张

pyqt5 主程序文件 

# -*- coding: utf-8 -*-
# @Author: Mehaei
# @Date:   2019-07-10 13:02:56
# @Last Modified by:   Mehaei
# @Last Modified time: 2019-07-15 16:43:18
import os
import uuid
import sys
import time
import json
from PyQt5.QtGui import QRegExpValidator, QIntValidator
from PyQt5.QtCore import QDate, QBasicTimer, QRegExp
from PyQt5.QtWidgets import (QWidget, QDesktopWidget, QApplication, 
                            QMessageBox, QPushButton, QLabel, QLineEdit, QGridLayout, QComboBox,
                            QDateTimeEdit, QFileDialog, QProgressBar, QTextEdit)
 
 
from worker import Worker
 
 
class Example(QWidget):
 
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.initUI()
 
    def initUI(self):
      # 主窗口大小
        self.resize(500, 400)
        self.center()
        self.cwd = os.getcwd()
     
        url = QLabel('url')
        proxy = QLabel("proxy")
        count = QLabel("count")
 
        start_time = QLabel("start_time")
        end_time = QLabel("end_time")
 
        data_dir = QLabel("data_dir")
 
        shop_info = QLabel("shop_info")
        self.shopEdit = QTextEdit()
 
     # 文件选择框
        self.btn_chooseDir = QPushButton(self)
        self.btn_chooseDir.setObjectName("btn_chooseDir")
        self.btn_chooseDir.setText("choose dir")
        self.btn_chooseDir.clicked.connect(self.slot_btn_chooseDir)
 
        # url正则验证 仅限amazon
        url_regex = QRegExp(r'http[s]{0,1}://www.amazon.+')
        url_line_re = QRegExpValidator(self)
        url_line_re.setRegExp(url_regex)
 
        self.urlEdit = QLineEdit()
        self.urlEdit.setPlaceholderText("Please product url")
 
        self.urlEdit.setValidator(url_line_re)
 
        # 下拉框
        self.proxyCom = QComboBox()
        self.proxyCom.addItem("adsl(default)")
        self.proxyCom.addItem("None")
 
        self.countEdit = QLineEdit()
        self.countEdit.setText("100")
        int_limit = QIntValidator(self)
        int_limit.setRange(1, 50000)
        self.countEdit.setValidator(int_limit)
 
        self.startdateEdit = QDateTimeEdit(QDate.currentDate(), self)
        self.startdateEdit.setDisplayFormat("yyyy-MM-dd HH:mm:ss")
        self.startdateEdit.setCalendarPopup(True)
 
        self.startdateEdit.dateChanged.connect(self.get_start_date)
 
        self.enddateEdit = QDateTimeEdit(QDate.currentDate(), self)
        self.enddateEdit.setDisplayFormat("yyyy-MM-dd HH:mm:ss")
        self.enddateEdit.setCalendarPopup(True)
 
        self.enddateEdit.dateChanged.connect(self.get_end_date)
 
        self.shopbtn = QPushButton('Shop', self)
        # self.btn.move(40, 80)
        self.shopbtn.clicked.connect(self.get_shop)
 
        self.reviewbtn = QPushButton('Review', self)
        # self.btn.move(40, 80)
        self.reviewbtn.clicked.connect(self.get_review)
  
        grid = QGridLayout()
        grid.setSpacing(5)
 
        grid.addWidget(url, 1, 0)
        grid.addWidget(self.urlEdit, 1, 1, 1, 4)
 
        grid.addWidget(proxy, 2, 0)
        grid.addWidget(self.proxyCom, 2, 1)
 
        grid.addWidget(count, 2, 2, 1, 2)
        grid.addWidget(self.countEdit, 2, 4)
 
        grid.addWidget(start_time, 3, 0)
        grid.addWidget(self.startdateEdit, 3, 1)
 
        grid.addWidget(end_time, 3, 2, 1, 2)
        grid.addWidget(self.enddateEdit, 3, 4)
 
        grid.addWidget(data_dir, 4, 0)
        grid.addWidget(self.btn_chooseDir, 4, 1)
 
        grid.addWidget(shop_info, 5, 0)
        grid.addWidget(self.shopEdit, 5, 1, 5, 5)
 
        grid.addWidget(self.pbar, 10, 0, 1, 5)
        grid.addWidget(self.shopbtn, 11, 0, 1, 2)
        grid.addWidget(self.reviewbtn, 11, 3, 1, 2)
 
        self.setLayout(grid) 
 
        self.setWindowTitle('Amazon Crawl')
        self.show()
 
    def center(self):
 
        qr = self.frameGeometry()
        cp = QDesktopWidget().availableGeometry().center()
        qr.moveCenter(cp)
        self.move(qr.topLeft())

    def get_start_date(self):
        dateTime = self.startdateEdit.dateTime()

    def get_end_date(self):
        dateTime = self.enddateEdit.dateTime()

    def slot_btn_chooseDir(self):
        self.dir_choose = QFileDialog.getExistingDirectory(self,
                                    "Choose data save dir",
                                    self.cwd) # 起始路径
        if self.dir_choose == "":
            return False
        self.btn_chooseDir.setText(self.dir_choose)

    def warning(self, title, content):
        QMessageBox.warning(self, title, content)

    def get_shop(self):
        try:
            self.dir_choose
        except Exception:
            self.dir_choose = "./data/"

        params = {
            "id": str(uuid.uuid4()).replace("-", ""),
            "url": self.urlEdit.text(),
            "proxy_type": self.proxyCom.currentText(),
            "count": self.countEdit.text(),
            "start_time": self.time_to_time_stamp(self.startdateEdit.text()),
            "end_time": self.time_to_time_stamp(self.enddateEdit.text()),
            "data_save_dir": self.dir_choose
        }
        if not params["url"]:
            self.warning("Url is Null", "Please input product url")
            return False
        self.work = Worker(**params)
        self.shop_detail = self.work.start(shop=True, product_detail=None)
        self.shopEdit.setText(json.dumps(self.shop_detail, indent=4))

    def get_review(self):
        try:
            self.shop_detail
        except Exception as e:
            self.warning("Product info is Null", "Please get product info")
            return False
        self.work.start(shop=False, product_detail=self.shop_detail)
        QMessageBox.information(self,
                                "Review done",
                                "%s review crawl done, count:%s, Save to: %s" % (self.urlEdit.text(), self.amazon.cralwer_data_num, self.amazon.file_data_pname) if self.amazon.cralwer_data_num else "%s review crawl done, count:%s" % (self.urlEdit.text(), self.amazon.cralwer_data_num)
                                )

     def closeEvent(self, event):
        reply = QMessageBox.question(self, 'Message',
                                     "Are you sure to quit?", QMessageBox.Yes |
                                     QMessageBox.No, QMessageBox.No)
        if reply == QMessageBox.Yes:
            event.accept()
        else:
            event.ignore()

    def time_to_time_stamp(self, time_value):
        time_array = time.strptime(time_value, "%Y-%m-%d %H:%M:%S")
        return int(time.mktime(time_array) * 1000)

        
 if __name__ == '__main__':
    app = QApplication(sys.argv)
    ex = Example()
    sys.exit(app.exec_())

仅个人学习参考, 如有疑问,欢迎交流

--------------------------------

免责声明:文章转载自《pyqt5 + pyinstaller 制作爬虫小程序》仅用于学习参考。如对内容有疑问,请及时联系本站处理。

上篇qdoc 简介c++ 数组元素拷贝到容器(copy)下篇

宿迁高防,2C2G15M,22元/月;香港BGP,2C5G5M,25元/月 雨云优惠码:MjYwNzM=

相关文章

python-requests.post方法中data与json参数区别

 GET 在通过requests.get()进行GET请求时,key1=value1&key2=value2可以写成字典形式在parms中传参 from common.baseRquest import BaseRquest url ='http://10.162.16.5:8072/contents/list' parm = { "con...

UMeditor百度富文本编辑器的使用

批量上传的图片在线管理没法查看图片 是因为jar包本身的Bug,这里暂时做了个替换展示。就是找到Img.js 然后搜索 img.set 替换下就好了 var url=list[i].url ; url=url.replace("D:/JavaWorkSpace/.metadata/.me_tcat7/webapps/WebEUEdit",""); //i...

通过wireshark获取应用接口并使用爬虫爬取网站数据(二)

接上文 找到接口之后连续查看了几个图片,结果发现图片都很小,于是用手机下载了一个用wireshark查看了一下url 之前接口的是 imges_min下载的时候变成了images soga,知道之后立马试了一下 果然有效, 但是总不能一个一个的查看下载吧 于是连夜写了个java爬虫 下面是代码 packagecom.feng.main; importj...

QT5:类总结

一.QObject 类 QObject::d_ptr QObject::staticMetaObject QObject::staticQtMetaObject QObject::tr() QObject::event() QObject::parent() QObject::thread() QObject::connect() QObje...

[转]JAVA读取外部资源的方法

在java代码中经常有读取外部资源的要求:如配置文件等等,通常会把配置文件放在classpath下或者在web项目中放在web-inf下. 1.从当前的工作目录中读取: try { BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream("wkdi...

xxl-job编写GULE(Java)运行模式,带执行参数(url)

packagecom.xxl.job.service.handler; importcom.xxl.job.core.log.XxlJobLogger; importcom.xxl.job.core.biz.model.ReturnT; importcom.xxl.job.core.handler.IJobHandler; public class q...