pandas之filter

发布时间 2023-08-07 09:48:48作者: Rong_Z

数据准备

import pandas as pd


product_info = {
    "订单号": [ "2951110000099262111", "2181910000909928191", "2194560000121355545", "1194560000121311126", "1483160000121315483"],
    "数量": [92, 61, 66, 33, 15],
    "价格(USD)": [230, 122, 150, 190, 200],
    "状态": ["Not Delivered", "Not Delivered", "Not Delivered", "Not Shipped", "Not Delivered"],
    "订单日期": ["2022-02-12", "2022-03-02", "2022-01-22", "2022-02-22", "2022-02-15"],
    "订单编号": ["444111", "444122", "444132", "444003", "444244"],
}

df = pd.DataFrame(product_info, index=['one', 'two', 'three', 'four', 'five'])
print(df)
'''
                       订单号  数量  价格(USD)             状态        订单日期    订单编号
one    2951110000099262111  92      230  Not Delivered  2022-02-12  444111
two    2181910000909928191  61      122  Not Delivered  2022-03-02  444122
three  2194560000121355545  66      150  Not Delivered  2022-01-22  444132
four   1194560000121311126  33      190    Not Shipped  2022-02-22  444003
five   1483160000121315483  15      200  Not Delivered  2022-02-15  444244
'''

方法介绍

按columns过滤

全列名过滤

# 过滤出列名为“订单号”和“订单编号”列
res_df = df.filter(items=["订单号", '订单编号'])
print(res_df)
'''
                       订单号    订单编号
one    2951110000099262111  444111
two    2181910000909928191  444122
three  2194560000121355545  444132
four   1194560000121311126  444003
five   1483160000121315483  444244
'''

模糊过滤

# 过滤出列名中包含“订单”的所有列
res_df = df.filter(like='订单', axis=1)
print(res_df)
'''
                       订单号        订单日期    订单编号
one    2951110000099262111  2022-02-12  444111
two    2181910000909928191  2022-03-02  444122
three  2194560000121355545  2022-01-22  444132
four   1194560000121311126  2022-02-22  444003
five   1483160000121315483  2022-02-15  444244
'''

正则过滤

# 过滤出以"订单"开头的所有列
res_df = df.filter(regex='^订单', axis=1)
print(res_df)
'''
                       订单号        订单日期    订单编号
one    2951110000099262111  2022-02-12  444111
two    2181910000909928191  2022-03-02  444122
three  2194560000121355545  2022-01-22  444132
four   1194560000121311126  2022-02-22  444003
five   1483160000121315483  2022-02-15  444244
'''

按index过滤

全名称过滤

# 过滤出index为"one", "four"的行
res_df = df.filter(items=["one", 'four'], axis=0)
print(res_df)
'''
                      订单号  数量  价格(USD)             状态        订单日期    订单编号
one   2951110000099262111  92      230  Not Delivered  2022-02-12  444111
four  1194560000121311126  33      190    Not Shipped  2022-02-22  444003
'''

模糊过滤

# 过滤出index包含"e"的所有行
res_df = df.filter(like='e', axis=0)
print(res_df)
'''
                       订单号  数量  价格(USD)             状态        订单日期    订单编号
one    2951110000099262111  92      230  Not Delivered  2022-02-12  444111
three  2194560000121355545  66      150  Not Delivered  2022-01-22  444132
five   1483160000121315483  15      200  Not Delivered  2022-02-15  444244
'''

正则过滤

# 过滤出以"t"开头的所有行
res_df = df.filter(regex='^t', axis=0)
print(res_df)
'''
                       订单号  数量  价格(USD)             状态        订单日期    订单编号
two    2181910000909928191  61      122  Not Delivered  2022-03-02  444122
three  2194560000121355545  66      150  Not Delivered  2022-01-22  444132
'''

# 过滤出以"t"开头或以"f"开头的所有行
res_df = df.filter(regex='^t|^f', axis=0)
print(res_df)
'''
                       订单号  数量  价格(USD)             状态        订单日期    订单编号
two    2181910000909928191  61      122  Not Delivered  2022-03-02  444122
three  2194560000121355545  66      150  Not Delivered  2022-01-22  444132
four   1194560000121311126  33      190    Not Shipped  2022-02-22  444003
five   1483160000121315483  15      200  Not Delivered  2022-02-15  444244
'''