常用的Python代码片段(地理相关)

发布时间 2023-11-15 19:18:51作者: GEOLI

把pandas的dataframe转为geopandas的地理格式(df to geodf)

def df2gdf(df, lon_col='longitude', lat_col='latitude', epsg=4326, crs=None):
    gdf = gpd.GeoDataFrame(df)
    gdf['geometry'] = gpd.points_from_xy(x=df[lon_col], y=df[lat_col])
    gdf.geometry = gdf['geometry']
    if crs is None:
        gdf.set_crs(epsg=epsg, inplace=True)
    else:
        gdf.set_crs(crs=crs, inplace=True)
    return gdf

重采样

from osgeo import gdal
import glob
import os
os.chdir("/mnt/f/analysis/")
def resample(image, new_xres, new_yres, save_path):
    """
    Decrease the pixel size of the raster.
    Args:
        new_xres (int): desired resolution in x-direction
        new_yres (int): desired resolution in y-direction
        save_path (str): filepath to where the output file should be stored
    Returns: Nothing, it writes a raster file with decreased resolution.
    """
    props = image.GetGeoTransform()
    print('current pixel xsize:', props[1], 'current pixel ysize:', -props[-1])
    options = gdal.WarpOptions(options=['tr'], xRes=new_xres, yRes=new_yres)
    newfile = gdal.Warp(save_path, image, options=options)
    newprops = newfile.GetGeoTransform()
    print('new pixel xsize:', newprops[1], 'new pixel ysize:', -newprops[-1])
    print('file saved in ' + save_path)
IN_DIR = "CDL-clip"
OUT_DIR = "CDL_resample"
if not os.path.exists(OUT_DIR):
    os.mkdir(OUT_DIR)
for i in glob.glob(f"{IN_DIR}/*.tif"):
    print(f"[{i}]","* "*5)
    resample(gdal.Open(i), 0.008983152841195214,0.008983152841195214, i.replace(IN_DIR,OUT_DIR))

栅格运算

os.system(f"gdal_calc.py --calc \(B=={CROP_CLASS}\)*A --outfile {out_filename} \
        -A {IN_FILENAMES[i]} -B {MASK_FILENAMES[i]} \
        --extent union  --A_band 1 --B_band 1  --quiet --NoDataValue=0")

但是在windows下的gdal运行出错,在ubuntu系统内运行成功

读取地理栅格图像

def read_tif(tif_path, display=False):
    dataset = rio.open(tif_path)
   
    if display:
        print(f"width:{dataset.width}, height:{dataset.height}, bands:{dataset.count}")
       
        types = {i: dtype for i, dtype in zip(dataset.indexes, dataset.dtypes)}
        for k in types.keys():
            print(f"band_{k}: {types[k]}")
       
        print(f"{dataset.crs}")
        print(dataset.transform)
   
    return dataset
# 根据经纬度,从栅格中提取值(适用于EPSG:4326,其他坐标系未做测试)
def extract_value_from_tif(rs, lon,lat):
    '''
    geometry: geodataframe的geometry列
    rs: 栅格数据
    '''
    x = lon
    y = lat
    row, col = rs.index(x,y)
    value = rs.read(1)[row,col]
    return value

批量裁剪

import geopandas as gpd
import rasterio as rio
#from rasterio.mask import mask
import rasterio.mask  
from geopandas import GeoSeries
import os
def handle_grid(shpdatafile, rasterfile, out_file):
    """
    handle gird
    :param shpdatafile: path of shpfile
    :param rasterfile: path of rasterfile
    :param out_file
    :return:
    """
    shpdata = gpd.GeoDataFrame.from_file(shpdatafile)
    # print(shpdata.crs)
    # shpdata.crs = {'init': 'epsg:4326'}
    print(shpdata.crs)
    rasterdata = rio.open(rasterfile)
    print(rasterdata.crs)
    print(f"total {len(shpdata)}")
    for i in range(0, len(shpdata)):
        # getting vector data features
        print(f"current {i}th",end="")
        geo = shpdata.geometry[i]
        out_name = shpdata.gridid[i]
        #out_name = str(i)
        feature = [geo.__geo_interface__]
        # try:
        # Using feature to get building mask tif
        # print("running mask...")
        out_image, out_transform = rio.mask.mask(rasterdata, feature, all_touched=True, crop=True, nodata=rasterdata.nodata)
        #out_image, out_transform = rio.mask.mask(rasterdata, feature, all_touched=True, invert=True, nodata=rasterdata.nodata)
        # get tif Value值,and convert to list
        out_meta = rasterdata.meta.copy()
        out_meta.update({"driver": "GTiff",
                            "height": out_image.shape[1],
                            "width": out_image.shape[2],
                            "transform": out_transform})
        band_mask = rasterio.open(os.path.join(out_file, f"{out_name}.tif"), "w", **out_meta)
        # print(out_image)
        band_mask.write(out_image)
        # except Exception as e:
        #    print(e)
        #    pass
           
           
shp = "/home/data/jyx/population-5-5/run-data-result/urban_vitality/beijing_unicom/bj3857.shp"
raster = "/home/data/jyx/population-5-5/rs/uv_sentinel/beijing.tif"
out = "/home/data/jyx/population-5-5/rs/uv_sentinel/split_tif/"
handle_grid(shp,raster,out)

log_resolver.ipynb用来绘图

原因是通过程序绘制的图更规范,有一些指标,如散点图的拟合公式、\(R^2\)等 ,以前在excel中需要点点点才能显示,在观察、分析大量log文件时,很繁琐,不利于发现规律。
而在程序写好之后,log可以很直观的看到,并进行对比。并且由于程序制图较规范,便于不同实验之间进行比较,美观度也还可以,后续略作调整即可用于论文插图
以下代码实现了plt的density scatter.

def density_scatter( x , y, ax = None, sort = True, bins = 20, is_cbar=True, **kwargs )   :
    """
    Scatter plot colored by 2d histogram
    """
    if ax is None :
        fig , ax = plt.subplots()
    data , x_e, y_e = np.histogram2d( x, y, bins = bins, density = True )
    z = interpn( ( 0.5*(x_e[1:] + x_e[:-1]) , 0.5*(y_e[1:]+y_e[:-1]) ) , data , np.vstack([x,y]).T , method = "splinef2d", bounds_error = False)
    #To be sure to plot all data
    z[np.where(np.isnan(z))] = 0.0
    # Sort the points by density, so that the densest points are plotted last
    if sort :
        idx = z.argsort()
        x, y, z = x[idx], y[idx], z[idx]
    ax.scatter( x, y, c=z, **kwargs )
    ax.grid(linestyle='--',linewidth=0.5)
    norm = Normalize(vmin = np.min(z), vmax = np.max(z))
    if is_cbar:
        cbar = plt.colorbar(cm.ScalarMappable(norm = norm), ax=ax)
        cbar.ax.set_ylabel('Density')
    return ax