duckdb

发布时间 2023-10-26 17:32:09作者: 月渊
# %%
import pandas as pd
import glob
import time
import duckdb
# %%
conn = duckdb.connect()
# %%
cur_time = time.time()
df = conn.execute(
    """
            select * from read_csv_auto('*.csv',header=True)
            limit 10
            """
).df()
print(f"time:{(time.time()-cur_time)}")
print(df)
# %%
conn.register("df_view", df)
# %%
conn.execute("describe df_view").df()
# %%
df.isnull().sum()
df = df.dropna(how="all")
# %%
conn.execute("select count(1) from df").df()
# %%
conn.execute(
    """
with a as (select * from df)
             select * from a
             """
).df()
# %%
conn.execute("copy (from read_csv_auto('*.csv',header=True)) to 'aaa.parquet'")
# %%
conn.execute("copy (from 'aaa.parquet') to 'a.csv'(DELIMITER ',', HEADER)")
# %%
conn.execute("SHOW TABLES;").df()
# %%
import pandas as pd
import glob
import time
import duckdb
import sqlite3
conn = duckdb.connect()
df = conn.execute(
    """
            select * from read_csv_auto('a.csv',header=True)
            """
).df()
conn3 = sqlite3.connect('database.db')
df.to_sql('table_name', conn3, if_exists='replace', index=False)
conn3.close()