Day 20 20.3 数据库之Python操作MySQL

发布时间 2023-04-02 20:26:37作者: Chimengmeng

Python操作MySQL

import pymysql

# 打开数据库连接

db = pymysql.connect(host='localhost', user='root', passwd='...', port=3306,datebase='...')
print('连接成功!')

# 使用 cursor() 方法创建一个游标对象 cursor
cursor = db.cursor()

# 使用 execute()  方法执行 SQL 查询
cursor.execute("SELECT VERSION()")

# 使用 fetchone() 方法获取单条数据.
data = cursor.fetchone()

print("Database version : %s " % data)

# 关闭数据库连接
db.close()

class版本

import pymysql


class DBHandler:
    def __init__(self, host, port, user, password,
                 database, charset, **kwargs):
        # 连接数据库服务器
        self.conn = pymysql.connect(host=host, port=port, user=user, password=password,
                                    database=database, cursorclass=pymysql.cursors.DictCursor,
                                    charset=charset, **kwargs)
        # 获取游标
        self.cursor = self.conn.cursor()

    def query(self, sql, args=None, one=True):
        self.cursor.execute(sql, args)
        if one:
            return self.cursor.fetchone()
        else:
            return self.cursor.fetchall()

    def close(self):
        self.cursor.close()
        self.conn.close()


if __name__ == "__main__":
    db = DBHandler(host='127.0.0.1', port=3306,
                   user='root', password='....',
                   database='uric', charset='utf8')
    sql = 'SELECT VERSION()'
    data = db.query(sql)
    print(data)

爬虫小说存储案例

import requests
from lxml import etree
import pymysql


class Spider(object):
    def __init__(self, host, port, user, password,
                 database, charset, **kwargs):
        # 连接数据库服务器
        self.conn = pymysql.connect(host=host, port=port, user=user, password=password,
                                    database=database, cursorclass=pymysql.cursors.DictCursor,
                                    charset=charset, **kwargs)
        # 获取游标
        self.cursor = self.conn.cursor()

        # 初始化表、
        self.init_table()

        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
        }

        self.session = requests.session()

    def exec(self, sql, args=None, one=True):
        self.cursor.execute(sql, args)
        self.conn.commit()

        if one:
            return self.cursor.fetchone()
        else:
            return self.cursor.fetchall()

    def close(self):
        self.cursor.close()
        self.conn.close()

    def init_table(self):

        # 使用 execute()  方法执行 SQL 查询
        sql1 = """
           create table if not exists book(
           id int primary key auto_increment,
           bookName varchar(32),
           coverImg varchar(255),
           authorPenName varchar(32)
           )character set=utf8; 
           """
        self.exec(sql1)

        sql2 = """
               create table if not exists chapter(
               id int primary key auto_increment ,
               chapter_name varchar(32),
               chapter_content text,
               book_id INT NOT NULL
               )character set=utf8;
               """
        self.exec(sql2)

    def login(self):
        self.session.post("https://passport.17k.com/ck/user/login", data={
            "loginName": ".....",
            "password": "...."
        }, headers=self.headers)

    def get_shelf_books(self):
        '''
        params:
        :return: data [{},{},{}]
        '''
        res = self.session.get("https://user.17k.com/ck/author/shelf?page=1&appKey=2406394919")

        res.encoding = "utf8"
        # print(res.text)
        data = res.json().get("data")

        for book_dict in data:
            self.handle_one_book(book_dict)

    def handle_one_book(self, book_dict):
        # 循环处理每一本书
        print(book_dict)
        bookId = book_dict.get("bookId")
        bookName = book_dict.get("bookName")
        coverImg = book_dict.get("coverImg")
        authorPenName = book_dict.get("authorPenName")

        sql = f"""insert into book (bookName,coverImg,authorPenName) values (
            "{bookName}","{coverImg}","{authorPenName}");"""
        print("sql:::", sql)
        self.exec(sql)

        # 爬取每一章的文本信息并下载
        self.get_chapters(bookId)

    def get_chapters(self, book_id):

        # 爬虫每一本书架书籍的章节页面
        res = requests.get(f"https://www.17k.com/list/{book_id}.html")
        res.encoding = "utf8"
        # 解析该书籍的章节页面中章节链接
        selector = etree.HTML(res.text)
        items = selector.xpath('//dl[@class="Volume"][position()>1]/dd/a')

        for item in items:
            self.handle_one_chapter(item, book_id)

    def handle_one_chapter(self, item, book_id):
        # 每一本书籍的每一章节的信息
        chapter_href = item.xpath("./@href")[0]
        chapter_name = item.xpath("./span/text()")[0].strip()
        # 爬取章节内容
        res = requests.get("https://www.17k.com" + chapter_href)
        res.encoding = "utf8"
        chapter_html = res.text
        print(chapter_html)
        selector = etree.HTML(res.text)
        chapter_content_list = selector.xpath(
            '//div[contains(@class,"content")]/div[@class="p"]/p[position()<last()]/text()')
        chapter_content_str = "\n".join(chapter_content_list)
        # 章节进行下载,写入到一个文件中

        sql = f"""insert into chapter (chapter_name,chapter_content,book_id) values (
                  "{chapter_name}",'{chapter_content_str}',"{book_id}");"""
        print("sql:::", sql)
        self.exec(sql)

    def run(self):
        # (1) 模拟登录,获取Cookie
        self.login()
        # (2) 爬取书架上的书籍信息
        self.get_shelf_books()


if __name__ == "__main__":
    s = Spider(host='127.0.0.1', port=3306,
               user='root', password='.....',
               database='xiaoshuo', charset='utf8')

    s.run()