浙江省高校教师教育理论培训考试练习题库

发布时间 2023-09-07 17:10:38作者: 未配妥剑,已入江湖

题库爬虫方案

# %%
import openpyxl
import requests
from bs4 import BeautifulSoup
import operator
from functools import reduce
# URL = 'http://www.hzwolf.com/dxxlx.htm'  # 大学心理学
# URL = 'http://www.hzwolf.com/gdjyfg.htm'  # 高等教育法规
# URL = 'http://www.hzwolf.com/gdjyx.htm'  # 高等教育学
URL = 'http://www.hzwolf.com/jsllx.htm'  # 教师伦理学
res = requests.get(URL)
# %%
res.encoding = 'GB2312'
html = res.text
soup = BeautifulSoup(html, 'html.parser')
# %%
items = soup.find_all('span')
# print(items)
item_str = []
for item in items:
    if not item.string is None:
        item_str.append(item.string.strip())
# %%
len_str = len(item_str)
# %%
i_slice = []
for i in range(0, len_str):
    if(item_str[i][2:5] == '单选题' or item_str[i][2:5] == '多选题' or item_str[i][2:5] == '判断题'):
        i_slice.append(i)
i_slice.append(len_str)
# %%
item_str_single = []
item_str_multi = []
item_str_judge = []
for i in range(0, len(i_slice)-1):
    if (i % 3 == 0):
        item_str_single.append(item_str[i_slice[i]+1:i_slice[i+1]])
    if (i % 3 == 1):
        item_str_multi.append(item_str[i_slice[i]+1:i_slice[i+1]])
    if (i % 3 == 2):
        item_str_judge.append(item_str[i_slice[i]+1:i_slice[i+1]])
# %%
item_str_single = reduce(operator.add, item_str_single)
item_str_multi = reduce(operator.add, item_str_multi)
item_str_judge = reduce(operator.add, item_str_judge)
# %%
# print(item_str_judge)
# %%
file = openpyxl.Workbook()
sheet1 = file.active
k = 0
i_slice = []
len_single = len(item_str_single)
for i in range(0, len_single):
    if (item_str_single[i][0].isdigit() and len(item_str_single[i]) < 5):
        i_slice.append(i)
i_slice.append(len_single)
ques_all = []
for i in range(0, len(i_slice)-1):
    ques_list = item_str_single[i_slice[i]:i_slice[i+1]]
    del ques_list[0]
    ques_all.append(ques_list)
ques_del = list(set([tuple(t) for t in ques_all]))
for i in range(0, len(ques_del)):  # len(ques_del)
    k = k+1
    ques_one = list(ques_del[i])
    sheet1.cell(row=k, column=2).value = ques_one[1]
    del ques_one[1]
    sheet1.cell(row=k, column=1).value = (5-len(str(k))) * '0' + f'{k}_单选题\n'+(f'\n').join(ques_one)
# %%
i_slice = []
len_single = len(item_str_multi)
for i in range(0, len_single):
    if (item_str_multi[i][0].isdigit() and len(item_str_multi[i]) < 5):
        i_slice.append(i)
i_slice.append(len_single)
ques_all = []
for i in range(0, len(i_slice)-1):
    ques_list = item_str_multi[i_slice[i]:i_slice[i+1]]
    del ques_list[0]
    ques_all.append(ques_list)
ques_del = list(set([tuple(t) for t in ques_all]))
for i in range(0, len(ques_del)):
    k = k+1
    ques_one = list(ques_del[i])
    sheet1.cell(row=k, column=2).value = ques_one[1]
    del ques_one[1]
    sheet1.cell(row=k, column=1).value = (5-len(str(k))) * '0' + f'{k}_多选题\n'+(f'\n').join(ques_one)
# %%
i_slice = []
len_single = len(item_str_judge)
for i in range(0, len_single):
    if (item_str_judge[i][0].isdigit() and len(item_str_judge[i]) < 5):
        i_slice.append(i)
i_slice.append(len_single)
ques_all = []
for i in range(0, len(i_slice)-1):
    ques_list = item_str_judge[i_slice[i]:i_slice[i+1]]
    del ques_list[0]
    ques_all.append(ques_list)
ques_del = list(set([tuple(t) for t in ques_all]))
for i in range(0, len(ques_del)):
    k = k+1
    ques_one = list(ques_del[i])
    sheet1.cell(row=k, column=2).value = ques_one[1]
    del ques_one[1]
    sheet1.cell(row=k, column=1).value = (5-len(str(k))) * '0' + f'{k}_判断题\n'+(f'\n').join(ques_one)
file.save('card.xlsx')