python html 今日概念资金流入前20名-526互联

#!/usr/bin/env Python
# coding=utf-8


from selenium import webdriver  # 导入模块
import time
from lxml import etree  # lxml库是一个HTML、XML的解析器
import tushare as ts
import requests, json
import threading
import random


def get_init_driver():
    time_start = time.time()
    option = webdriver.ChromeOptions()  # 创建一个配置对象（ChromeOptions是控制Chrome启动属性的类）
    option.add_argument('headless')  # 添加启动参数add_argument为'headless'，开启无界面模式（设置浏览器静默，让浏览器在后台运行，不需要加载样式和渲染）
    driver = webdriver.Chrome(options=option)  # 用webdriver启动浏览器（实例化带有配置的driver对象）
    time_end = time.time()
    print("初始化driver花时：% f 秒" % float(time_end - time_start))
    return driver


# 关闭driver
def get_close_driver(driver):
    driver.close()


# 今日板块行业资金流入前20名
# 近5日，近10日行业流金流入：https://data.eastmoney.com/bkzj/hy.html?stat=10
# http://quote.eastmoney.com/center/boardlist.html#industry_board
# https://data.eastmoney.com/bkzj/hy.html
def get_today_industry_hy_html(driver, number):
    time2_start = time.time()
    driver.get("https://data.eastmoney.com/bkzj/hy.html")  # 跳转到指定的url地址（请求地址）
    # time.sleep(1)
    source = driver.page_source  # 获取页面源码
    mytree = etree.HTML(source)  # 解析网页
    # 用xpath获取所需的内容，返回一个列表
    tables = mytree.xpath("//div[@class='dataview-body']/table")[0]  # 定位表格table
    # print("定位表格table")
    # print(tables)
    trs = tables.xpath('.//tr')  # 取出所有tr标签（每一行单元格）
    onetable = []
    h = 0
    for tr in trs:
        h = h + 1
        # print("第行：", h)
        if h == 1 or h == 2:
            continue
        if h >= number:
            continue
        n = 0
        for td in tr:
            n = n + 1
            # print("第列：", n)
            if n != 2:
                # 只取第二列
                continue

            texts = td.xpath(".//text()")  # 取出所有td标签下的文本（每个单元格）
            for text in texts:
                onetable.append(text.strip(" "))  # 去除文本前后空格

    # time.sleep(2)
    # driver.close()  # 关闭
    time2_end = time.time()
    hy_list_5 = get_today_industry_hy_api(6)
    onetable.extend(hy_list_5)
    hy_list_10 = get_today_industry_hy_api(6)
    onetable.extend(hy_list_10)
    onetable2 = list(set(onetable))
    print("查询实时行业(6倍数据更新一次)花时：% .3f 秒" % float(time2_end - time2_start))
    return onetable2


# 今日概念资金流入前20名
# http://quote.eastmoney.com/center/boardlist.html#concept_board
def get_now_gn_html(driver, number):
    time2_start = time.time()
    driver.get("https://data.eastmoney.com/bkzj/gn.html")  # 跳转到指定的url地址（请求地址）
    # time.sleep(1)
    source = driver.page_source  # 获取页面源码
    mytree = etree.HTML(source)  # 解析网页
    # 用xpath获取所需的内容，返回一个列表
    tables = mytree.xpath("//div[@class='dataview-body']/table")[0]  # 定位表格table
    # print("定位表格table")
    # print(tables)
    trs = tables.xpath('.//tr')  # 取出所有tr标签（每一行单元格）
    onetable = []
    h = 0
    for tr in trs:
        h = h + 1
        # print("第行：", h)
        if h == 1 or h == 2:
            continue
        if h >= number:
            continue
        n = 0
        for td in tr:
            n = n + 1
            # print("第列：", n)
            if n != 2:
                # 只取第二列
                continue

            texts = td.xpath(".//text()")  # 取出所有td标签下的文本（每个单元格）
            for text in texts:
                onetable.append(text.strip(" "))  # 去除文本前后空格

    if basic_rule_1.is_string_in_array(onetable, "CPO"):
        onetable.append("共封装光学（CPO）")
        onetable.append("CPO概念")
    if basic_rule_1.is_string_in_array(onetable, "算力概念"):
        onetable.append("东数西算（算力）")
    if basic_rule_1.is_string_in_array(onetable, "光通信模块"):
        onetable.append("共封装光学（CPO）")
    # 删除昨日涨停_含一字

    # time.sleep(2)
    # driver.close()  # 关闭
    time2_end = time.time()
    # 合并返五日概念数组#type = 'f62' # 当前f62，近5天，f164；涨幅：f3
    list5 = get_now_gn_api_type(6, 'f3')
    onetable.extend(list5)
    # 合并返10日概念数组
    list10 = get_now_gn_api_type(6, 'f3')
    onetable.extend(list10)
    #print("查询实时概念(6倍数据更新一次)花时：% .3f 秒" % float(time2_end - time2_start))

    # 去除涨停概念数据
    onetable = basic_rule_1.is_array_not_in_array(onetable, None)
    # 去除重复
    onetable2 = list(set(onetable))
    return onetable2


def get_now_gn_df_html(driver, ts_code):
    code = ts_code[0:6]
    sc = ts_code[7:9]
    # print(code)
    code = sc + code
    # print(code)

    time2_start = time.time()
    driver.get(
        "https://emweb.securities.eastmoney.com/PC_HSF10/CoreConception/Index?type=web&code=" + code)  # 跳转到指定的url地址（请求地址）
    # time.sleep(1)
    source = driver.page_source  # 获取页面源码
    mytree = etree.HTML(source)  # 解析网页
    # 用xpath获取所需的内容，返回一个列表
    ps = mytree.xpath("//div[@class='summary']")  # 定位表格table
    # print("定位ps")
    # print(ps)
    trs = ps[0].xpath("//p/text()")
    # trs = ps.xpath(".//text()")   # 取出所有tr标签（每一行单元格）
    onetable = []
    h = 0
    for tr in trs:
        h = h + 1
        if h != 10:
            continue
        gn_str_list = tr.strip(" ")
        gn_str_list = gn_str_list.strip('\n')
        onetable = gn_str_list.split(" ")
        # print(onetable)

    time2_end = time.time()
    # print("查询实时概念花时：% f 秒" % float(time2_end-time2_start))

    if len(onetable) <= 0:
        print("在东方财富网没有到到概念数据，在Tushare查询")
        onetable = get_stock_gn(ts_code)

    # 去除涨停概念数据
    onetable = basic_rule_1.is_array_not_in_array(onetable, None)

    return onetable