STAMP lung cancer SMI with DISCO
Analysis on Human Lung SMI Data
In [28]:
                    Copied!
                    
                    
                import scanpy as sc
import numpy as np
import sctm
import squidpy as sq
import pandas as pd
import seaborn as sns
import scvi
%load_ext autoreload
%autoreload 2
import scanpy as sc
import numpy as np
import sctm
import squidpy as sq
import pandas as pd
import seaborn as sns
import scvi
%load_ext autoreload
%autoreload 2
        
        The autoreload extension is already loaded. To reload it, use: %reload_ext autoreload
In this tutorial, we demonstrate how to analyse single cell level nanostring smi data. We transferred our data from the Giotto object located here https://nanostring.com/products/cosmx-spatial-molecular-imager/nsclc-ffpe-dataset. The processed dataset can be found at https://zenodo.org/records/10988053.
In [ ]:
                    Copied!
                    
                    
                adata = sc.read_h5ad(
    "../../../../STAMP/Reproducibility/ProcessedData/SMI_Lung/Lung5-3.h5ad"
)
adata.var_names_make_unique()
adata = sc.read_h5ad(
    "../../../../STAMP/Reproducibility/ProcessedData/SMI_Lung/Lung5-3.h5ad"
)
adata.var_names_make_unique()
        
        In [ ]:
                    Copied!
                    
                    
                adata.obsm['spatial'] = adata.obsm['spatial_raw']
adata.obsm['spatial'] = adata.obsm['spatial_raw']
        
        In [ ]:
                    Copied!
                    
                    
                sc.pl.spatial(adata, spot_size = 0.03, color = 'cell_type')
sc.pl.spatial(adata, spot_size = 0.03, color = 'cell_type')
        
        In [ ]:
                    Copied!
                    
                    
                sc.pp.filter_cells(adata, min_counts=50)
sctm.pp.filter_genes(adata, 0.01)
sc.pp.highly_variable_genes(adata, n_top_genes=600, flavor="seurat_v3")
sc.pp.filter_cells(adata, min_counts=50)
sctm.pp.filter_genes(adata, 0.01)
sc.pp.highly_variable_genes(adata, n_top_genes=600, flavor="seurat_v3")
        
        In [ ]:
                    Copied!
                    
                    
                # Build neighborhood graph
sq.gr.spatial_neighbors(adata, n_neighs=round(1/1000 * adata.n_obs))
# Build neighborhood graph
sq.gr.spatial_neighbors(adata, n_neighs=round(1/1000 * adata.n_obs))
        
        In [ ]:
                    Copied!
                    
                    
                n_topics = 15
# Only hvgs and fit a total of 30 topics here
sctm.seed.seed_everything(0)
#
model = sctm.stamp.STAMP(
    adata[:, adata.var.highly_variable],
    n_topics=n_topics,
    gene_likelihood = "nb")
# uses gpu by default  to use cpu use device="cpu"
model.train(learning_rate = 0.01)
topic_prop = model.get_cell_by_topic()
beta = model.get_feature_by_topic()
for i in topic_prop.columns:
    adata.obs[i] = topic_prop[i]
n_topics = 15
# Only hvgs and fit a total of 30 topics here
sctm.seed.seed_everything(0)
#
model = sctm.stamp.STAMP(
    adata[:, adata.var.highly_variable],
    n_topics=n_topics,
    gene_likelihood = "nb")
# uses gpu by default  to use cpu use device="cpu"
model.train(learning_rate = 0.01)
topic_prop = model.get_cell_by_topic()
beta = model.get_feature_by_topic()
for i in topic_prop.columns:
    adata.obs[i] = topic_prop[i]
        
        Computing background frequencies
0%| | 0/800 [00:00<?, ?it/s]/home/chengwei/miniconda3/envs/torch/lib/python3.9/site-packages/pyro/primitives.py:443: UserWarning: encoder.norm_topic.0.weight was not registered in the param store because requires_grad=False. You can silence this warning by calling my_module.train(False) warnings.warn( Epoch Loss:241.257: 10%|████████▌ | 80/800 [1:16:56<11:35:13, 57.94s/it]
In [31]:
                    Copied!
                    
                    
                topic_prop = model.get_cell_by_topic()
beta = model.get_feature_by_topic()
# Add it into adata to visualize
for i in topic_prop.columns:
    adata.obs[i] = topic_prop[i]
topic_prop = model.get_cell_by_topic()
beta = model.get_feature_by_topic()
# Add it into adata to visualize
for i in topic_prop.columns:
    adata.obs[i] = topic_prop[i]
        
        In [32]:
                    Copied!
                    
                    
                sctm.pl.spatial(adata, color = topic_prop.columns, size = 7, vmax = 'p99')
sctm.pl.spatial(adata, color = topic_prop.columns, size = 7, vmax = 'p99')
        
        Analysis of topics with DISCO. In general, disco is very good at human celltypes that are not cancer related. Alternatively, we can also run other analysis such as over-enrichment analysis as well
In [33]:
                    Copied!
                    
                    
                celltypes = sctm.analysis.get_topic_disco(beta)
celltypes = sctm.analysis.get_topic_disco(beta)
        
        [Parallel(n_jobs=1)]: Done 3280 tasks | elapsed: 2.8min [Parallel(n_jobs=1)]: Done 3280 tasks | elapsed: 2.8min [Parallel(n_jobs=1)]: Done 3280 tasks | elapsed: 2.8min [Parallel(n_jobs=1)]: Done 3280 tasks | elapsed: 2.9min [Parallel(n_jobs=1)]: Done 3280 tasks | elapsed: 2.9min [Parallel(n_jobs=1)]: Done 3280 tasks | elapsed: 2.9min [Parallel(n_jobs=1)]: Done 3280 tasks | elapsed: 2.9min
In [37]:
                    Copied!
                    
                    
                filtered = [] 
for i in celltypes.keys():
    # Filter out the low overlap
    df = celltypes[i].loc[celltypes[i].overlap > 5]
    # Filter only in lung
    df = df.loc[df['name'].str.contains("lung")]
    df["Topic"] = i
    filtered.append(df.head(3))
filtered = [] 
for i in celltypes.keys():
    # Filter out the low overlap
    df = celltypes[i].loc[celltypes[i].overlap > 5]
    # Filter only in lung
    df = df.loc[df['name'].str.contains("lung")]
    df["Topic"] = i
    filtered.append(df.head(3))
        
        In [38]:
                    Copied!
                    
                    
                filtered = pd.concat(filtered)
filtered
filtered = pd.concat(filtered)
filtered
        
        Out[38]:
| pval | or | name | gene | background | overlap | geneset | Topic | |
|---|---|---|---|---|---|---|---|---|
| 139 | 0.0 | 51.430 | Goblet cell vs All others in lung | EPCAM,S100P,AGR2,PSCA,ERBB3,KRT19,CEACAM6,KRT17 | 9264 | 8 | 166 | Topic1 | 
| 143 | 0.0 | 36.123 | Airway basal cell vs All others in lung | SLC2A1,EPCAM,DDR1,AGR2,CDH1,KRT19,KRT17,S100P,... | 9264 | 10 | 348 | Topic1 | 
| 141 | 0.0 | 32.913 | Club cell vs All others in lung | EPCAM,SPINK1,AGR2,KRT19,ERBB3,CDH1 | 9264 | 6 | 166 | Topic1 | 
| 275 | 0.0 | 163.079 | Mast cell vs All others in lung | RGS1,RGS2,IL1RL1,CPA3,KIT,HPGDS,PTGS1,TPSB2,TP... | 9264 | 17 | 344 | Topic2 | 
| 455 | 0.0 | 109.297 | GZMK CD8 T cell vs All others in lung | CD2,CD8A,DUSP2,TIGIT,FYB1,GZMK,GZMA,CD69,CCL5,... | 9264 | 17 | 497 | Topic3 | 
| 456 | 0.0 | 73.825 | GZMB CD8 T cell vs All others in lung | CD2,CD8A,GZMA,CCL5,GZMK,STAT4,ITK,KLRB1,TIGIT,... | 9264 | 15 | 398 | Topic3 | 
| 195 | 0.0 | 157.969 | Plasma cell vs All others in lung | IGKC,CD38,JCHAIN,MZB1,IRF4,CD27,IGHG2,IGHA1,IG... | 9264 | 14 | 157 | Topic4 | 
| 192 | 0.0 | 59.285 | Plasma cell vs Memory B cell in lung | MZB1,JCHAIN,XBP1,IGHM,IGHA1,TNFRSF17,IGHG1,DUS... | 9264 | 14 | 387 | Topic4 | 
| 494 | 0.0 | 85.616 | Memory B cell vs All others in lung | CD52,FCRLA,SELL,CD74,LTB,HLA-DQA1,MS4A1,IGHD,I... | 9264 | 16 | 368 | Topic5 | 
| 324 | 0.0 | 118.625 | Vascular smooth muscle cell vs All others in lung | IGFBP5,IGFBP7,SPARCL1,PDGFRB,GPX3,CALD1,BGN,TA... | 9264 | 17 | 461 | Topic6 | 
| 323 | 0.0 | 100.241 | Pericyte vs All others in lung | RGS5,IGFBP7,SPARCL1,PDGFRB,GPX3,CALD1,BGN,COL4... | 9264 | 17 | 538 | Topic6 | 
| 334 | 0.0 | 70.929 | ADAMDEC1+ADAM28+ fibroblast vs All others in lung | COL3A1,COL5A2,COL6A3,COL1A2,COL5A1,CDH11,COL1A... | 9264 | 15 | 413 | Topic7 | 
| 237 | 0.0 | 64.018 | Capillary EC vs All others in lung | SPARCL1,RAMP3,FZD4,ESAM,VWF,FLT1,COL4A1,COL4A2... | 9264 | 18 | 849 | Topic8 | 
| 240 | 0.0 | 50.222 | Venous EC vs All others in lung | ACKR1,RGS5,SPARCL1,GPX3,RAMP3,FZD4,ESAM,VWF,FL... | 9264 | 17 | 776 | Topic8 | 
| 180 | 0.0 | 118.039 | Goblet cell vs All others in lung | TACSTD2,S100P,CLDN4,PSCA,LCN2,MMP7,KRT7,KRT8,K... | 9264 | 13 | 166 | Topic10 | 
| 183 | 0.0 | 42.944 | Airway basal cell vs All others in lung | EPHA2,TACSTD2,CLDN4,KRT8,KRT18,KRT19,KRT17,S10... | 9264 | 12 | 348 | Topic10 | 
| 167 | 0.0 | 33.483 | Goblet cell vs AT2 in lung | LCN2,S100P,CEACAM6,PSCA,TACSTD2,KRT19,KRT7,KRT... | 9264 | 10 | 303 | Topic10 | 
| 350 | 0.0 | 190.913 | LYVE1 macrophage vs All others in lung | C1QA,C1QC,C1QB,FCER1G,MERTK,SELENOP,CD14,HLA-D... | 9264 | 19 | 478 | Topic11 | 
| 326 | 0.0 | 176.581 | LYVE1 macrophage vs cDC2 in lung | SELENOP,C1QA,C1QB,C1QC,MS4A4A,CD163,PSAP,CD14,... | 9264 | 18 | 337 | Topic11 | 
| 328 | 0.0 | 99.321 | LYVE1 macrophage vs CD14 monocyte in lung | C1QC,C1QA,C1QB,SELENOP,MRC1,HLA-DQA1,HLA-DRB1,... | 9264 | 18 | 571 | Topic11 | 
| 169 | 0.0 | 35.909 | Goblet cell vs All others in lung | PIGR,S100P,AGR2,MMP7,KRT19,EPHA2 | 9264 | 6 | 166 | Topic12 | 
| 163 | 0.0 | 19.108 | Goblet cell vs AT2 in lung | S100P,AGR2,KRT19,PIGR,LYZ,MMP7 | 9264 | 6 | 303 | Topic12 | 
| 271 | 0.0 | 46.090 | Goblet cell vs Airway basal cell in lung | SLPI,CXCL17,CLU,LTF,CXCL2,KRT7,SERPINA1,MMP7 | 9264 | 8 | 155 | Topic13 | 
| 279 | 0.0 | 45.459 | AT2 vs All others in lung | CHI3L1,ITGB6,LAMP3,CXCL2,DMBT1,SLPI,CXCL17,EFN... | 9264 | 12 | 330 | Topic13 | 
| 280 | 0.0 | 34.935 | Goblet cell vs All others in lung | LTF,MMP7,KRT7,SLPI,CXCL17,AQP3,CXCL2 | 9264 | 7 | 166 | Topic13 | 
| 526 | 0.0 | 401.837 | LYVE1 macrophage vs All others in lung | CD14,CSF1R,CD74,HLA-DRA,HLA-DRB1,HLA-DQA1,HLA-... | 9264 | 20 | 478 | Topic14 | 
| 533 | 0.0 | 259.446 | cDC2 vs All others in lung | RGS1,CSF1R,CD74,HLA-DRA,HLA-DRB1,HLA-DQA1,HLA-... | 9264 | 19 | 361 | Topic14 | 
| 293 | 0.0 | 181.388 | CFD+MGP+ fibroblast vs All others in lung | PDGFRA,COL1A2,PTGDS,MGP,LUM,DCN,MMP2,CDH11,COL... | 9264 | 18 | 477 | Topic15 | 
| 660 | 0.0 | 88.800 | CFD+MGP+ fibroblast vs Pericyte in lung | LUM,MGP,DCN,MMP2,PTGDS,PDGFRA,COL1A1,CDH11,COL... | 9264 | 17 | 601 | Topic15 | 
Plot out some of the top gene modules and topics
In [40]:
                    Copied!
                    
                    
                # Plot out Mast cells and top genes "Topic2"
sctm.pl.spatial(adata, color = ["Topic2"] + beta.nlargest(3, "Topic2").index.tolist(), size = 7, vmax ='p99')
# Plot out Mast cells and top genes "Topic2"
sctm.pl.spatial(adata, color = ["Topic2"] + beta.nlargest(3, "Topic2").index.tolist(), size = 7, vmax ='p99')
        
        In [42]:
                    Copied!
                    
                    
                # Plot out CAF and top genes "Topic7"
sctm.pl.spatial(adata, color = ["Topic7"] + beta.nlargest(3, "Topic7").index.tolist(), size = 7, vmax ='p99')
# Plot out CAF and top genes "Topic7"
sctm.pl.spatial(adata, color = ["Topic7"] + beta.nlargest(3, "Topic7").index.tolist(), size = 7, vmax ='p99')
        
        In [43]:
                    Copied!
                    
                    
                # Plot out tumor core and top genes "Topic1"
sctm.pl.spatial(adata, color = ["Topic1"] + beta.nlargest(3, "Topic1").index.tolist(), size = 7, vmax ='p99')
# Plot out tumor core and top genes "Topic1"
sctm.pl.spatial(adata, color = ["Topic1"] + beta.nlargest(3, "Topic1").index.tolist(), size = 7, vmax ='p99')
        
        In [44]:
                    Copied!
                    
                    
                # Plot out tumor edge and top genes "Topic12"
sctm.pl.spatial(adata, color = ["Topic12"] + beta.nlargest(3, "Topic12").index.tolist(), size = 7, vmax ='p99')
# Plot out tumor edge and top genes "Topic12"
sctm.pl.spatial(adata, color = ["Topic12"] + beta.nlargest(3, "Topic12").index.tolist(), size = 7, vmax ='p99')