import pyreadstat as pyreadstat import pandas as pd import json,sys def dumca(ab): dataframe, meta = pyreadstat.read_dta(ab) mc=meta.column_names return mc #根据变量标签查找 def cha(dtname,gjz): ab=[] dataframe, meta = pyreadstat.read_dta(dtname+".dta") lba=meta.column_names_to_labels for ii in lba: if ii is not None and lba[ii] is not None: if gjz in lba[ii]: if ii not in ab: ab.append(ii) #break ab=str(ab).replace('[','').replace(']','').replace("'","").replace(',','') return ab #根据变量内容查找 def chaa(dtname,gjz): ab=[] dataframe, meta = pyreadstat.read_dta(dtname+".dta") #df = pd.read_stata(dtname+".dta",chunksize=10000) blm=dumca(dtname+".dta") for column in dataframe.columns: for ii in range(0,len(blm)+1): nr=dataframe[column].get(ii) #print(str(nr),ii,column) if nr: if gjz in str(nr): if column not in ab: ab.append(column) break ab=str(ab).replace('[','').replace(']','').replace("'","").replace(',','') return ab #根据值标签内容查找 def chaaa(dtname,gjz): ab=[] dataframe, meta = pyreadstat.read_dta(dtname+".dta") lba=meta.variable_value_labels blm=dumca(dtname+".dta") for ii in blm: try: ac=lba[ii] except: continue for key in ac: try: nr=ac[key] except: continue if nr: if gjz in nr: if ii not in ab: ab.append(ii) #break ab=str(ab).replace('[','').replace(']','').replace("'","").replace(',','') return ab #print(cha("d:\\statashu\\cfps\\cfps2010adult_202008","性别")) file=input('请输入dta文件位置及名称(不用输入.dta):') gjza=input('请输入要查找的关键字:') #file="d:\\statashu\\cfps\\cfps2010adult_202008" #gjza="性别" #sys.exit(0) gjj=gjza.split(" ") for iv in gjj: print(cha(file,iv))