从中国期货监控中心爬取每日交易数据

发布时间 2023-07-11 14:25:47作者: 宏图英雄

# 中国期货监控中心 爬取 每日交易数据

import os
import time
import traceback
from share import SI
from datetime import datetime, timedelta, date
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver import ChromeOptions
from PySide2.QtWidgets import QMessageBox
from PySide2.QtCore import Qt
from PySide2.QtGui import QIcon
from VeriCodeFromBhshare import getLocalVericode           # 从 www.bhshare.cn 进行 图片 验证码识别
from VeriCodeFromBaidu import getwords as bdgetgetwords    # 从 百度 进行 图片 验证码识别
# 中国期货监控中心-投资者查询服务系统

DAILY_SETTLEMENT_RESULT_CODE_OK = 1001  # 返回结果正常
DAILY_SETTLEMENT_RESULT_CODE_UNKNOW = 1002  # 未知异常
DAILY_SETTLEMENT_RESULT_CODE_NOT_TRADE = 1003  # 非交易日
DAILY_SETTLEMENT_RESULT_CODE_NOT_REGISTER = 1004  # 尚未注册

class NotRegistered(Exception):
    pass

class DailySettlementQueryError(Exception):
    pass

class CfmmcBrowser(object):
    def __init__(self, userID, password, saveto='./', datelist=[]):
        super(CfmmcBrowser, self).__init__()
        self.userID = userID
        self.password = password
        self.saveto = saveto
        self.ocrTOKEN = '424a894cd'  # token 获取:http://www.bhshare.cn/imgcode/gettoken
        self.ocrURL = 'http://www.bhshare.cn/imgcode/'  # 接口地址
        self.newDateList = []
        # print(datelist)
        if len(datelist) > 0:
            for i in range(len(datelist)):
                # print(datelist[i])
                if isinstance(datelist[i], datetime) and datelist[i] <= datetime.today() - timedelta(days=1):
                    self.newDateList.append(datelist[i].strftime('%Y-%m-%d'))

        if len(self.newDateList) == 0:
            self.newDateList.append(datetime.today() - timedelta(days=3).strftime('%Y-%m-%d'))
            self.newDateList.append(datetime.today() - timedelta(days=2).strftime('%Y-%m-%d'))
            self.newDateList.append(datetime.today() - timedelta(days=1).strftime('%Y-%m-%d'))
            now = datetime.now()
            if now.time().hour > 17:
                # 下午5点就可以查询当天数据了
                self.newDateList.append(datetime.today().strftime('%Y-%m-%d'))
        print(self.newDateList)
        # exit()
        self.__cfmmcUrl = 'https://investorservice.cfmmc.com/login.do'
        # 浏览器实例
        drivefile = os.path.join(os.getcwd(), 'chromedriver.exe').replace('\\','/')  # chromedriver/
        # print(drivefile.replace('\\','/'))
        # exit()
        picfile = os.path.join(os.getcwd(), 'imgVeriCode.png').replace('\\','/')
        # 隐藏浏览器界面

        # 防止检测
        option = ChromeOptions()
        chrome_option = ChromeOptions()
        prefs = {"download.default_directory": self.saveto}
        chrome_option.add_experimental_option("prefs", prefs)
        chrome_option.add_argument('--disable-extensions')
        chrome_option.add_argument('-ignore-certificate-errors')
        chrome_option.add_argument('-ignore -ssl-errors')
        chrome_option.add_argument('--disable-gpu')
        chrome_option.add_argument('--no-sandbox')
        option.add_experimental_option('excludeSwitches', ['enable-logging'])   # enable-automation
        # 导入配置
        option.binary_location = SI.chromepath
        chromePath = os.path.join(SI.appPath, 'chromedriver.exe')
        self.browser = webdriver.Chrome(executable_path=chromePath, chrome_options=chrome_option, options=option)

        if self.login():
            try:
                self.downloadDailySettlement()  # 指定查询数据日期 ,并提交查询
            except:
                pass
            finally:
                self.browser.quit()
                picfile = os.path.join(os.getcwd(), 'imgVeriCode.png').replace('\\', '/')
                if os.path.exists(picfile):
                    os.remove(picfile)

    def login(self):
        """
        进行登陆
        :return: bool(是否登录成功)
        """
        # 打开页面
        self.browser.get(self.__cfmmcUrl)  # cfmmcUrl
        # 输入账号密码
        userID = self.browser.find_element(By.NAME, 'userID')
        userID.clear()
        userID.send_keys(self.userID)
        # self.browser.find_element_by_name('password').send_keys(self.password)
        self.browser.find_element(by=By.NAME, value='password').send_keys(self.password)
        code = self.browser.find_element(By.ID, "imgVeriCode")
        code.screenshot('imgVeriCode.png')  # 针对当前节点进行单独截图
        # vericode = self.getLocalVericode('imgVeriCode.png')  # 获取验证码文字
        imgfile = os.path.join(os.getcwd(), "imgVeriCode.png")
        vericode = getLocalVericode(imgfile)     #  通过 http://www.bhshare.cn/imgcode/ 获取验证码文字
        if vericode == 'error':                # 如 上述 未得到 结果 转为 通过 百度云 获得验证码
            print(' Bhshare  error ! ')
            vericode = bdgetgetwords(imgfile).getwords()['words_result'][0]['words']

        if os.path.exists(os.path.join(os.getcwd(), 'imgVeriCode.png').replace('\\', '/')):
            os.remove(os.path.join(os.getcwd(), 'imgVeriCode.png').replace('\\', '/'))
        if vericode.isalnum():  # 判断 是否是由数字和字母 组成
            if len(vericode) == 6:  # 判断 串长度 是否为6位
                # print("验证码:" + vericode)
                # 清空输入框并填入验证码
                self.browser.find_element(by=By.NAME, value='vericode').clear()
                self.browser.find_element(by=By.NAME, value='vericode').send_keys(vericode)
                # 登陆
                self.browser.find_element(by=By.CLASS_NAME, value='buttonLogin').click()
                # 登陆成功
                try:
                    title = self.browser.find_element(by=By.CLASS_NAME, value='page-title-text').text
                    # print('登陆成功')
                    return '客户交易结算日报' in title
                except:
                    return False
        else:
            return False

    def downloadDailySettlement(self):
        """
        执行下载每日结算
        """
        print(' login success ! ')

        try:
            # for d in dates():
            for d in range(len(self.newDateList)):
                # 逐天下载
                print(self.newDateList[d])
                self.downloadSettlementByDate(self.newDateList[d])
                # time.sleep(0.3)

        except NotRegistered:
            # 查询结束
            pass

        except DailySettlementQueryError:
            traceback.print_exc()
            raise

    def downloadSettlementByDate(self, tdate):
        """
        :param date:
        :return:
        """
        # 提交查询
        customerForm = self.browser.find_element(by=By.NAME, value='customerForm')
        tradeDate = customerForm.find_element(by=By.NAME, value='tradeDate')
        tradeDate.clear()
        time.sleep(0.3)
        # assert isinstance(tdate, date)
        # print(tdate.strftime('%Y-%m-%d'))
        tradeDate.send_keys(tdate)  # .strftime('%Y-%m-%d')
        tradeDate.submit()
        time.sleep(1)
        # 检查错误提示
        print(tradeDate)
        code = self.checkDailySettlementQueryNotice()
        # print(code)
        # 查询每日交易的返回状态
        # exit()
        if code == DAILY_SETTLEMENT_RESULT_CODE_OK:
            # 正常,继续查询
            link = self.browser.find_elements(by=By.XPATH, value='//*[@id="waitBody"]/table/tbody/tr[1]/td/a')
            if link:
                url = link[0].get_attribute('href')
                # print(url)
                self.browser.get(url)  # 不能用click,因为click点击字符串没用,直接用浏览器打开网址即可
                time.sleep(1)

        elif code == DAILY_SETTLEMENT_RESULT_CODE_UNKNOW:
            raise DailySettlementQueryError()  # 未知错误
        elif code == DAILY_SETTLEMENT_RESULT_CODE_NOT_TRADE:
            return  # 非交易日,返回查询另一个交易日
        elif code == DAILY_SETTLEMENT_RESULT_CODE_NOT_REGISTER:
            raise NotRegistered()  # 注册日

    def checkDailySettlementQueryNotice(self):
        """
        检查错误提示
        :return:
        """
        try:
            noticeEle = self.browser.find_element(by=By.ID, value='waitBody').find_element(By.TAG_NAME, 'li')
            if '的交易结算报告,原因是期货公司未向监控中心报送该日数据' in noticeEle.text:
                code = DAILY_SETTLEMENT_RESULT_CODE_NOT_REGISTER
            elif '为非交易日,请重新选择交易日期' in noticeEle.text:
                code = DAILY_SETTLEMENT_RESULT_CODE_NOT_TRADE
            else:
                # 未知异常
                code = DAILY_SETTLEMENT_RESULT_CODE_UNKNOW
            # print(noticeEle.text)
        except NoSuchElementException:
            # 无公告
            code = DAILY_SETTLEMENT_RESULT_CODE_OK
        return code

def alarmMessageBox(MESSAGE):
    QMessageBox.setWindowModality(Qt.NonModal)
    result=QMessageBox( QMessageBox.Information,"提示", MESSAGE, QMessageBox.Ok, self)  #   " 提示信息 ", "   " + alarm)


def questMessage(tltie, MESSAGE, btn1, btn2):  #
    box = QMessageBox()
    icon = QIcon("images/rlylogo.ico");
    box.setWindowIcon(icon);
    box.setWindowModality(Qt.NonModal)
    box.setIcon(QMessageBox.Question)
    box.setWindowTitle(tltie)
    box.setText(MESSAGE)
    if btn2 is not None:
        box.setStandardButtons(QMessageBox.Yes | QMessageBox.No)
    else:
        box.setStandardButtons(QMessageBox.Yes)
    buttonY = box.button(QMessageBox.Yes)
    buttonY.setStyleSheet("background: url(images/logInbg.png) no-repeat;background-color: transparent;\
            border: none;\
            cursor: pointer;\
            width: 120px;\
            height: 32px;\
            color: #FFF;\
            font-weight: bold;\
            text-align: center;\
            hover{background-color:rdb(255,93,52);}")

    buttonY.setText(btn1)
    if btn2 is not None:
        buttonN = box.button(QMessageBox.No)
        buttonN.setStyleSheet("background: url(images/logInbg.png) no-repeat;\
                       border: none;\
                       cursor: pointer;\
                       width: 120px;\
                       height: 32px;\
                       color: #FFF;\
                       font-weight: bold;\
                       text-align: center;")

        buttonN.setText(btn2)

    box.exec_()
    if box.clickedButton() == buttonY:
        result = True
    else:
        result = False
    # box.setAttribute(Qt.WA_DeleteOnClose)
    # print(buttonY.styleSheet())
    return result

def is_valid_date(str):
    '''判断是否是一个有效的日期字符串'''
    try:
        datetime.datetime.strptime(str, "%Y-%m-%d")
        return True
    except:
        return False

if __name__ == '__main__':
    import configparser as config
    file = 'fmsconfig.ini'
    configfile = os.path.join(os.getcwd(), file)
    try:
        setup_cfg = os.path.join(os.getcwd(), file)
        if os.path.exists(setup_cfg):
            parser = config.SafeConfigParser()
            with open(setup_cfg, "r") as f:
                parser.read_file(f)

            def get(parser, name):
                if parser.has_option("DEFAULT", name):
                    return parser.get("DEFAULT", name)
                return None
            startDate = get(parser, 'startDate')
            endDate = get(parser, 'endDate')

    except IOError:
        startDate = None
        endDate = None

    if not startDate is None:
        startDate = datetime.strptime(startDate, "%Y-%m-%d")
    if not endDate is None:
        endDate = datetime.strptime(endDate, "%Y-%m-%d")

    if (startDate is None) or (endDate is None):
        endDate = datetime.today()
        startDate = datetime.today() - timedelta(days=int(SI.getDataDays))

    accounts = ['013588768358']  #  '022116501650', '0022988787', '013588768358'
    saveto = SI.input_path

    if not os.path.exists(saveto):
        os.makedirs(saveto)
    # print(type(startDate), type(endDate))
    if isinstance(startDate, datetime) and isinstance(endDate, datetime) and startDate <= endDate:
        # print(startDate, endDate)
        def dates():
            preDays = 0
            while endDate - timedelta(days=preDays) >= startDate:  # preDays < self.daynum:
                yield endDate - timedelta(days=preDays)
                preDays += 1
        datelist = list(dates())
        # print(datelist)
        for t in range(len(accounts)):
            CfmmcBrowser(accounts[t], SI.cfmmcpsw, saveto=saveto, datelist=datelist)