hotspot

发布时间 2023-07-03 11:56:02作者: Bonjour_!
点击查看代码
import argparse
import scanpy as sc
import hotspot
import numpy as np
import mplscience
import matplotlib
import matplotlib.pyplot as plt
import pdb

def ParserInput():
    parser = argparse.ArgumentParser(description='hotspot.space')
    parser.add_argument("--h5ad","-a",required=True, dest='h5ad',help='h5ad path',type=str)
    parser.add_argument("--outdir","-o",required=True, dest='outdir',help='result directory',type=str)
    parser.add_argument("--species","-s",required=False, dest='species',help='species,hsa or mmu',type=str,default="hsa")
    parser.add_argument("--prefix","-p",required=False, dest='prefix',help='result file prefix',type=str,default="sample")
    parser.add_argument("--threads","-t",required=False, dest='threads',help='threads num',type=int,default=4)
    parser.add_argument("--mingenes","-g",required=False, dest='mingenes',help='min genes per spot',type=int,default=50)
    parser.add_argument("--mincells","-c",required=False, dest='mincells',help='min cells per spot',type=int,default=1)
    args = parser.parse_args()
    return args

#filter spot and renormalize the data for expression viz on plots
def PreProcess(adata,min_genes,min_cells):
    sc.pp.filter_cells(adata, min_genes=min_genes)
    sc.pp.filter_genes(adata, min_cells=min_cells)
    adata.obs["total_counts"] = np.asarray(adata.X.sum(1)).ravel()
    adata.layers["csc_counts"] = adata.X.tocsc()
    sc.pp.normalize_total(adata)
    sc.pp.log1p(adata)
    return adata

# Create the Hotspot object and the neighborhood graph
def RunHotspot(adata):
    hs = hotspot.Hotspot(
        adata,
        layer_key="csc_counts",
        model='danb',
        latent_obsm_key="spatial",
        umi_counts_obs_key="total_counts",
    )
    hs.create_knn_graph(weighted_graph=True, n_neighbors=30,)
    return hs

'''
@compute autocorrelations for each gene
@Grouping genes into spatial modules
@Compute pair-wise local correlations between these genes
'''
def ModuleDef(hs,jobs):
    hs_results = hs.compute_autocorrelations(jobs=jobs)
    hs_genes = hs_results.head(6000).index
    lcz = hs.compute_local_correlations(hs_genes, jobs=jobs)
    modules = hs.create_modules(min_gene_threshold=20, core_only=False, fdr_threshold=0.05)
    return modules

def main():
    args = ParserInput()
    outdir = args.outdir
    prefix = args.prefix
    jobs = args.threads
    min_genes= args.mingenes
    min_cells = args.mincells
    adata = sc.read(args.h5ad)
    adata = PreProcess(adata,min_genes,min_cells)
    hs = RunHotspot(adata)
    modules = ModuleDef(hs,jobs)
    modules.value_counts()
    fig = plt.figure()
    hs.plot_local_correlations()
    plt.savefig(outdir+"/"+prefix+'.module.pdf')

    results = hs.results.join(hs.modules)
    # Show the top genes for a module
    for module in results.Module.unique().tolist():
        if(not np.isnan(module)):
            result = results.loc[results.Module == module]
            result.sort_values('Z', ascending=False).head(10)
            result.to_csv(outdir+"/"+prefix+".module."+str(module)+".csv")

            # Plot top gene expression in spatial position
            cmap = matplotlib.colors.LinearSegmentedColormap.from_list(
               'grays', ['#DDDDDD', '#000000'])
            genes = result.sort_values('Z', ascending=False).head(6).index
            fig = plt.figure()
            with mplscience.style_context():
                sc.pl.spatial(
                    adata,
                    color=genes,
                    cmap=cmap,
                    frameon=False,
                    vmin='p0',
                    vmax='p95',
                    spot_size=30,
                )
            plt.savefig(outdir+"/"+prefix+'.module.'+str(module)+'.topgene.pdf')

if __name__ == '__main__':
    main()