stata PYTHON 文书筛选

发布时间 2023-10-15 19:44:07作者: myrj
cap:mkdir 1012a
cd D:\te\pan\2018年裁判文书数据_马克数据网
fs *.csv
local shu=4
foreach file in `r(files)'{
    local shu=`shu'+1
    import delimited "`file'", clear 
    cap:keep 案件名称 所属地区 案件类型 案件类型编码 审理程序 裁判日期 案由 全文
    if _rc{
        foreach var of varlist _all{
    local cx=`var'[1]
    rename `var' `cx'
}
    }
    keep if 审理程序=="一审"
    save d:\te\xa`shu',replace
}
foreach var of varlist _all{
    local cx=`var'[1]
    rename `var' `cx'
}
    gen sheng=""
    local k=_N
    forvalues i=1/`k'{
        local cc=所属地区[`i']
        preserve
        local bz=0
        local cv=""
        use sheng1,clear
        local k3=_N
        forvalues iii=1/`k3'{
            local k33=sheng[`iii']
            if regexm("`cc'","`k33'"){
                disp "`cc'"
                local cv="`k33'"
                local bz=1
                continue,break
            }

    }
    restore
    replace sheng="`cv' in `i'
}
save d:\te\xx`shu',replace
}


save 1012a\a1,replace


forvalues iv=1/4{
    use d:\te\pan\a`iv',clear

//     if `bz'==0{
//         use 1001\quanguoshengshi,clear
//     local k1=_N
//     forvalues ii=1/`k1'{
//         local k11=shi[`ii']
//         local k22=xian[`ii']
//        
//         disp "`cc'"
//         if regexm("`cc'","`k11'"){
//             local cv="`k11'"
//             continue,break
//         }
//         if regexm("`ccc'","`k11'"){
//             local cv="`k11'"
//             continue,break
//         }
//         if "`cv'"==""{
//             if regexm("`cc'","`k22'"){
//             local cv="`k11'"
//             continue,break
//         }
//         }
//         if "`cv'"==""{
//             if regexm("`ccc'","`k22'"){
//             local cv="`k11'"
//             continue,break
//         }
//         }
//     }
//     }
    
    restore
    replace shi="`cv'" in `i'
    disp `i'
}
save d:\te\pan\a`iv'x,replace
}
import pandas as pd
import dask.dataframe as dd
import os,glob,sys
directory=os.getcwd()
files=glob.glob(directory+"/*")
index=-1
au=0
for file in files:
    if ".csv" in file and "ta" in file:
        au=au+1
        df = pd.read_csv(file)
        df["sheng"]=""
        df["shi"]=""
        df1 = pd.read_stata('shengx.dta')
        ab=1
        index=-1
        for dizhi in df["全文"]:
            index=index+1
        #for index,row in df.iterrows():
##            dizhi=str(row["所属地区"])
##            dizhia=str(row["全文"])
            #print(dizhi,dizhia)
            dizhi=str(dizhi)
            for shi1 in df1["sheng"]:
            #for index1,row1 in df1.iterrows():
##                
##                shi=row1["xian"]
##                shi1=row1["shi"]
                
                if shi1 in dizhi:
                    print(file,shi1)
                    ab=ab+1
                    
                    df.loc[index,"sheng"]=shi1
                    break
        df.to_csv("ua"+str(au)+"x.csv")

            
import pandas as pd
import dask.dataframe as dd
import os,glob,sys
directory=os.getcwd()
files=glob.glob(directory+"/*")
index=-1
au=0
for file in files:
    if ".csv" in file:
        au=au+1
        df = pd.read_csv(file)
        df["sheng"]=""
        df["shi"]=""
        df1 = pd.read_stata('shengw.dta')
        ab=1
        index=-1
        for dizhi in df["所属地区"]:
            index=index+1
        #for index,row in df.iterrows():
##            dizhi=str(row["所属地区"])
##            dizhia=str(row["全文"])
            #print(dizhi,dizhia)
            dizhi=str(dizhi)
            for sheng,shi1 in zip(df1["sheng"],df1["shi"]):
            #for index1,row1 in df1.iterrows():
##                
##                shi=row1["xian"]
##                shi1=row1["shi"]
                
                if shi1 in dizhi:
                    print(file,shi1)
                    ab=ab+1
                    
                    df.loc[index,"sheng"]=shi1
                    break
        df.to_csv("ta"+str(au)+"x.csv")