python 判断STATA 变量标签 变量名 变量值标签中找关键字

发布时间 2023-04-17 16:59:25作者: myrj
import pyreadstat as pyreadstat
import pandas as pd
import json,sys

def dumca(ab):
    dataframe, meta = pyreadstat.read_dta(ab)
    mc=meta.column_names
    return mc
#根据变量标签查找
def cha(dtname,gjz):
    ab=[]
    dataframe, meta = pyreadstat.read_dta(dtname+".dta")
    lba=meta.column_names_to_labels
    for ii in lba:
        if ii is not None and lba[ii] is not None:
            if gjz in lba[ii]:
                if ii not in ab:
                    ab.append(ii)
                #break
    ab=str(ab).replace('[','').replace(']','').replace("'","").replace(',','')
    return ab
#根据变量内容查找
def chaa(dtname,gjz):
    ab=[]
    dataframe, meta = pyreadstat.read_dta(dtname+".dta")
    #df = pd.read_stata(dtname+".dta",chunksize=10000)
    blm=dumca(dtname+".dta")
    for column in dataframe.columns:
        for ii in range(0,len(blm)+1):
            nr=dataframe[column].get(ii)
            #print(str(nr),ii,column)
            if nr:
                if gjz in str(nr):
                    if column not in ab:
                        ab.append(column)
                    break

    ab=str(ab).replace('[','').replace(']','').replace("'","").replace(',','')
    return ab
#根据值标签内容查找
def chaaa(dtname,gjz):
    ab=[]
    dataframe, meta = pyreadstat.read_dta(dtname+".dta")
    lba=meta.variable_value_labels
    blm=dumca(dtname+".dta")
    for ii in blm:
        try:
            ac=lba[ii]
        except:
            continue
        for key in ac:
            try:
                nr=ac[key]
            except:
                continue
            if nr:
                if gjz in nr:
                    if ii not in ab:
                        ab.append(ii)
                        #break
    ab=str(ab).replace('[','').replace(']','').replace("'","").replace(',','')
    return ab

#print(cha("d:\\statashu\\cfps\\cfps2010adult_202008","性别"))
file=input('请输入dta文件位置及名称(不用输入.dta):')
gjza=input('请输入要查找的关键字:')
#file="d:\\statashu\\cfps\\cfps2010adult_202008"
#gjza="性别"
#sys.exit(0)
gjj=gjza.split(" ")
for iv in gjj:
    print(cha(file,iv))