pandas使用
发布时间 2023-05-17 15:51:16作者: 春树&暮云
使用笔记
- 基于某列concat或
- df = pd.merge(df_raw, df_ret, on="text")
- 交集
- df_join = df1.merge(df2, how="inner", left_on="key1", right_on="key2")
- 差集
- df_diff = df1[~df1["key1"].isin(df2["key2"])]
- replace
- df1.key1.str.replace(r'[^\w\s]+', '').str.upper())
- split数据集
dc_p1_train = dc_p1_sample.sample(frac=train_frac, random_state=RANDOM_STATE)
dc_p1_dev = dc_p1_sample.drop(dc_p1_train.index)
dc_p1_test = dc_p1_dev.sample(frac=dev_test_frac, random_state=RANDOM_STATE)
dc_p1_dev = dc_p1_dev.drop(dc_p1_test.index)