HBase CRUD client

发布时间 2023-07-11 22:11:32作者: LexLuc
# requirement.txt
happybase==1.2.0
"""
hbase.py
"""

# -*- coding: utf-8 -*-

import happybase

from config.hbase_config import HAPPYBASE_HBASE
# HAPPYBASE_HBASE = {
#    "host": "xxx.xxx.xxx.xxx",
#    "port": ???,
#    "size": ?,
# }

from tasks.common_constant import log

hbase_pool = happybase.ConnectionPool(**HAPPYBASE_HBASE)


class HbaseCURD():

    def __init__(self, hbase_config=None, hbase_pool=None):
        self.hbase_config = hbase_config
        if self.hbase_config is not None:
            self.hbase_pool = happybase.ConnectionPool(**hbase_config)
        elif hbase_pool is not None:
            self.hbase_pool = hbase_pool
        else:
            raise Exception('HbaseCURD init error')

    def save_to_hbase(self, table_name, data, batch_size=5000, timeout=120):
        with self.hbase_pool.connection(timeout) as connection:
            if table_name.encode() in connection.tables():
                table = connection.table(table_name)
                with table.batch(batch_size=batch_size) as bat:
                    for row_key, kv_pairs in data.items():
                        bat.put(row_key, kv_pairs)
            else:
                log.error('save to hbsae fail, hbase table %s not exist' % table_name)

    def create_hbase_table(self, table_name, families=None):
        with self.hbase_pool.connection() as connection:
            if table_name.encode() not in connection.tables():
                if families is None:
                    families = {'families': dict(max_versions=1)}
                connection.create_table(table_name, families=families)
            else:
                log.warning('hbase table %s exist, create table fail' % table_name)

    def delete_hbase_table(self, table_name):
        with self.hbase_pool.connection() as connection:
            if table_name.encode() in connection.tables():
                connection.delete_table(table_name, disable=True)
            else:
                log.warning('hbase table %s not exist, delete table fail' % table_name)

    def scan(self, table_name, columns=None, filter=None, limit=None, batch_size=5000, timeout=120):
        with self.hbase_pool.connection(timeout) as connection:
            if table_name.encode() in connection.tables():
                table = connection.table(table_name)
                return table.scan(columns=columns, filter=filter, batch_size=batch_size, limit=limit)
            else:
                log.warning('hbase table %s not exist, get data fail' % table_name)