STAMP Multi Sample DLPFC

In this part, we focus on batch-correction for multi-sample Human DLPFC. The prepocessed data can be accessible and downloaded via https://zenodo.org/records/10988053.

In [1]:

                
                    Copied!
                    
                        
                        
                    
                    

            
import scanpy as sc
import numpy as np
import sctm
import squidpy as sq
import pandas as pd
import seaborn as sns
import anndata as ad

# Please install scib metrics to run this example
# https://scib-metrics.readthedocs.io/en/stable/
import scib_metrics
from scib_metrics.benchmark import Benchmarker

%load_ext autoreload
%autoreload 2
import scanpy as sc
import numpy as np
import sctm
import squidpy as sq
import pandas as pd
import seaborn as sns
import anndata as ad

# Please install scib metrics to run this example
# https://scib-metrics.readthedocs.io/en/stable/
import scib_metrics
from scib_metrics.benchmark import Benchmarker

%load_ext autoreload
%autoreload 2

/home/chengwei/miniconda3/envs/torch/lib/python3.9/site-packages/geopandas/_compat.py:124: UserWarning: The Shapely GEOS version (3.11.1-CAPI-1.17.1) is incompatible with the GEOS version PyGEOS was compiled with (3.10.4-CAPI-1.16.2). Conversions between both will be slow.
  warnings.warn(
/home/chengwei/miniconda3/envs/torch/lib/python3.9/site-packages/spatialdata/__init__.py:9: UserWarning: Geopandas was set to use PyGEOS, changing to shapely 2.0 with:

	geopandas.options.use_pygeos = True

If you intended to use PyGEOS, set the option to False.
  _check_geopandas_using_shapely()

In [2]:

                
                    Copied!
                    
adata = sc.read_h5ad(
    "../../../../STAMP/Reproducibility/ProcessedData/Visium_DLPFC/adata.h5ad"
)
adata = sc.read_h5ad(
    "../../../../STAMP/Reproducibility/ProcessedData/Visium_DLPFC/adata.h5ad"
)

In [3]:

                
                    Copied!
                    
adata
# The data  already have spatial neighbors built with default scanpy parameters
adata
# The data  already have spatial neighbors built with default scanpy parameters

Out[3]:

AnnData object with n_obs × n_vars = 47681 × 4000
    obs: 'in_tissue', 'array_row', 'array_col', 'library_id', 'layer', 'Topic1', 'Topic2', 'Topic3', 'Topic4', 'Topic5', 'Topic6', 'Topic7', 'Topic8', 'Topic9', 'Topic10', '_scvi_batch', '_scvi_labels'
    var: 'highly_variable_rank', 'highly_variable'
    uns: '_scvi_manager_uuid', '_scvi_uuid', 'beta', 'neighbors', 'spatial', 'spatial_neighbors', 'topic_prop', 'umap'
    obsm: 'X_pca', 'X_scvi', 'X_stamp', 'X_umap', 'spatial'
    layers: 'counts'
    obsp: 'connectivities', 'distances', 'spatial_connectivities', 'spatial_distances'

In [4]:

                
                    Copied!
                    
                        
                        
                    
                    

            
# In this case, we use the SGC, since we want the embeddings to be smooth
sctm.seed.seed_everything(0)

model = sctm.stamp.STAMP(
    adata,
    n_topics = 10,
    # layer="count",
    categorical_covariate_keys=["library_id"],
    mode="sgc",
    gene_likelihood="nb")

model.train(learning_rate = 0.01)
# In this case, we use the SGC, since we want the embeddings to be smooth
sctm.seed.seed_everything(0)

model = sctm.stamp.STAMP(
    adata,
    n_topics = 10,
    # layer="count",
    categorical_covariate_keys=["library_id"],
    mode="sgc",
    gene_likelihood="nb")

model.train(learning_rate = 0.01)

Computing background frequencies

  0%|                                                                                                                        | 0/800 [00:00<?, ?it/s]/home/chengwei/miniconda3/envs/torch/lib/python3.9/site-packages/pyro/primitives.py:443: UserWarning: encoder.norm_topic.0.weight was not registered in the param store because requires_grad=False. You can silence this warning by calling my_module.train(False)
  warnings.warn(
/home/chengwei/miniconda3/envs/torch/lib/python3.9/site-packages/pyro/primitives.py:443: UserWarning: encoder.norm_topic.1.weight was not registered in the param store because requires_grad=False. You can silence this warning by calling my_module.train(False)
  warnings.warn(
/home/chengwei/miniconda3/envs/torch/lib/python3.9/site-packages/pyro/primitives.py:443: UserWarning: encoder.norm_topic.2.weight was not registered in the param store because requires_grad=False. You can silence this warning by calling my_module.train(False)
  warnings.warn(
/home/chengwei/miniconda3/envs/torch/lib/python3.9/site-packages/pyro/primitives.py:443: UserWarning: encoder.norm_topic.3.weight was not registered in the param store because requires_grad=False. You can silence this warning by calling my_module.train(False)
  warnings.warn(
/home/chengwei/miniconda3/envs/torch/lib/python3.9/site-packages/pyro/primitives.py:443: UserWarning: encoder.norm_topic.4.weight was not registered in the param store because requires_grad=False. You can silence this warning by calling my_module.train(False)
  warnings.warn(
/home/chengwei/miniconda3/envs/torch/lib/python3.9/site-packages/pyro/primitives.py:443: UserWarning: encoder.norm_topic.5.weight was not registered in the param store because requires_grad=False. You can silence this warning by calling my_module.train(False)
  warnings.warn(
/home/chengwei/miniconda3/envs/torch/lib/python3.9/site-packages/pyro/primitives.py:443: UserWarning: encoder.norm_topic.6.weight was not registered in the param store because requires_grad=False. You can silence this warning by calling my_module.train(False)
  warnings.warn(
/home/chengwei/miniconda3/envs/torch/lib/python3.9/site-packages/pyro/primitives.py:443: UserWarning: encoder.norm_topic.7.weight was not registered in the param store because requires_grad=False. You can silence this warning by calling my_module.train(False)
  warnings.warn(
/home/chengwei/miniconda3/envs/torch/lib/python3.9/site-packages/pyro/primitives.py:443: UserWarning: encoder.norm_topic.8.weight was not registered in the param store because requires_grad=False. You can silence this warning by calling my_module.train(False)
  warnings.warn(
/home/chengwei/miniconda3/envs/torch/lib/python3.9/site-packages/pyro/primitives.py:443: UserWarning: encoder.norm_topic.9.weight was not registered in the param store because requires_grad=False. You can silence this warning by calling my_module.train(False)
  warnings.warn(
/home/chengwei/miniconda3/envs/torch/lib/python3.9/site-packages/pyro/primitives.py:443: UserWarning: encoder.norm_topic.10.weight was not registered in the param store because requires_grad=False. You can silence this warning by calling my_module.train(False)
  warnings.warn(
/home/chengwei/miniconda3/envs/torch/lib/python3.9/site-packages/pyro/primitives.py:443: UserWarning: encoder.norm_topic.11.weight was not registered in the param store because requires_grad=False. You can silence this warning by calling my_module.train(False)
  warnings.warn(
Epoch Loss:1614.948:  16%|█████████████▎                                                                       | 125/800 [1:01:52<5:34:06, 29.70s/it]

Early Stopping

In [5]:

                
                    Copied!
                    
topic_prop = model.get_cell_by_topic()
beta = model.get_feature_by_topic()
for i in topic_prop.columns:
    adata.obs[i] =  topic_prop[i]
topic_prop = model.get_cell_by_topic()
beta = model.get_feature_by_topic()
for i in topic_prop.columns:
    adata.obs[i] =  topic_prop[i]

In [6]:

                
                    Copied!
                    
adata1 = adata[adata.obs.library_id == "151673"]
sctm.pl.spatial(adata1, color = topic_prop.columns, size = 40)
adata1 = adata[adata.obs.library_id == "151673"]
sctm.pl.spatial(adata1, color = topic_prop.columns, size = 40)

In [7]:

                
                    Copied!
                    
adata.obsm["X_stamp"] = topic_prop.values
adata.obsm["X_stamp"] = topic_prop.values

In [8]:

                
                    Copied!
                    
adata.obs['library_id'] = adata.obs['library_id'].astype('category')
adata.obs['layer'] = adata.obs['layer'].astype('category')
adata.obs['library_id'] = adata.obs['library_id'].astype('category')
adata.obs['layer'] = adata.obs['layer'].astype('category')

In [9]:

                
                    Copied!
                    
# Remove the NA ones so we can benchmark
adata = adata[~adata.obs.layer.isna()]
# Remove the NA ones so we can benchmark
adata = adata[~adata.obs.layer.isna()]

In [10]:

                
                    Copied!
                    
# Add abit of jitter due to some problems with the current scib metrics
adata.obsm["X_stamp"] = np.random.standard_normal((47329, 10)) * 1e-8 + adata.obsm["X_stamp"]
# Add abit of jitter due to some problems with the current scib metrics
adata.obsm["X_stamp"] = np.random.standard_normal((47329, 10)) * 1e-8 + adata.obsm["X_stamp"]

In [11]:

                
                    Copied!
                    
                        
                        
                    
                    

            
bm = Benchmarker(
    adata,
    batch_key="library_id",
    label_key="layer",
    embedding_obsm_keys=["X_stamp", "X_pca"],
    n_jobs=6,
)
bm.benchmark()
bm = Benchmarker(
    adata,
    batch_key="library_id",
    label_key="layer",
    embedding_obsm_keys=["X_stamp", "X_pca"],
    n_jobs=6,
)
bm.benchmark()

Computing neighbors:   0%|                                                                                                     | 0/2 [00:00<?, ?it/s]/home/chengwei/miniconda3/envs/torch/lib/python3.9/site-packages/umap/distances.py:1063: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @numba.jit()
/home/chengwei/miniconda3/envs/torch/lib/python3.9/site-packages/umap/distances.py:1071: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @numba.jit()
/home/chengwei/miniconda3/envs/torch/lib/python3.9/site-packages/umap/distances.py:1086: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @numba.jit()
/home/chengwei/miniconda3/envs/torch/lib/python3.9/site-packages/umap/umap_.py:660: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @numba.jit()
Computing neighbors: 100%|████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [05:00<00:00, 150.25s/it]
Embeddings:   0%|                                                                                                              | 0/2 [00:00<?, ?it/s]
Metrics:   0%|                                                                                                                | 0/10 [00:00<?, ?it/s]
Metrics:   0%|                                                                             | 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels]INFO:root:isolated labels: no more than 8 batches per label
INFO:jax._src.xla_bridge:Unable to initialize backend 'cuda': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
INFO:jax._src.xla_bridge:Unable to initialize backend 'rocm': module 'jaxlib.xla_extension' has no attribute 'GpuAllocatorConfig'
INFO:jax._src.xla_bridge:Unable to initialize backend 'tpu': INVALID_ARGUMENT: TpuPlatform is not available.
WARNING:jax._src.xla_bridge:No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)

Metrics:  10%|██████▉                                                              | 1/10 [00:14<02:13, 14.79s/it, Bio conservation: isolated_labels]
Metrics:  10%|█████▌                                                 | 1/10 [00:14<02:13, 14.79s/it, Bio conservation: nmi_ari_cluster_labels_kmeans]
Metrics:  20%|███████████                                            | 2/10 [00:19<01:09,  8.64s/it, Bio conservation: nmi_ari_cluster_labels_kmeans]
Metrics:  20%|█████████████▌                                                      | 2/10 [00:19<01:09,  8.64s/it, Bio conservation: silhouette_label]
Metrics:  30%|████████████████████▍                                               | 3/10 [00:33<01:19, 11.42s/it, Bio conservation: silhouette_label]
Metrics:  30%|██████████████████████▌                                                    | 3/10 [00:33<01:19, 11.42s/it, Bio conservation: clisi_knn]
Metrics:  40%|██████████████████████████████                                             | 4/10 [00:35<00:46,  7.70s/it, Bio conservation: clisi_knn]
Metrics:  40%|███████████████████████████▏                                        | 4/10 [00:35<00:46,  7.70s/it, Batch correction: silhouette_batch]
Metrics:  50%|██████████████████████████████████                                  | 5/10 [00:44<00:39,  7.88s/it, Batch correction: silhouette_batch]
Metrics:  50%|█████████████████████████████████████▌                                     | 5/10 [00:44<00:39,  7.88s/it, Batch correction: ilisi_knn]
Metrics:  60%|█████████████████████████████████████████████                              | 6/10 [00:44<00:22,  5.51s/it, Batch correction: ilisi_knn]
Metrics:  60%|██████████████████████████████████████████                            | 6/10 [00:44<00:22,  5.51s/it, Batch correction: kbet_per_label]
Metrics:  70%|█████████████████████████████████████████████████                     | 7/10 [01:32<00:57, 19.17s/it, Batch correction: kbet_per_label]
Metrics:  70%|██████████████████████████████████████████████▏                   | 7/10 [01:32<00:57, 19.17s/it, Batch correction: graph_connectivity]
Metrics:  80%|████████████████████████████████████████████████████████              | 8/10 [01:32<00:38, 19.17s/it, Batch correction: pcr_comparison]
Embeddings:  50%|███████████████████████████████████████████████████                                                   | 1/2 [01:33<01:33, 93.52s/it]
Metrics:   0%|                                                                                                                | 0/10 [00:00<?, ?it/s]
                                                                                                                                                     
Metrics:   0%|                                                                             | 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels]INFO:root:isolated labels: no more than 8 batches per label

Metrics:  10%|██████▉                                                              | 1/10 [01:35<14:16, 95.20s/it, Bio conservation: isolated_labels]
Metrics:  10%|█████▌                                                 | 1/10 [01:35<14:16, 95.20s/it, Bio conservation: nmi_ari_cluster_labels_kmeans]
Metrics:  20%|███████████                                            | 2/10 [01:41<05:43, 42.92s/it, Bio conservation: nmi_ari_cluster_labels_kmeans]
Metrics:  20%|█████████████▌                                                      | 2/10 [01:41<05:43, 42.92s/it, Bio conservation: silhouette_label]
Metrics:  30%|████████████████████▍                                               | 3/10 [03:06<07:16, 62.30s/it, Bio conservation: silhouette_label]
Metrics:  30%|██████████████████████▌                                                    | 3/10 [03:06<07:16, 62.30s/it, Bio conservation: clisi_knn]
Metrics:  40%|██████████████████████████████                                             | 4/10 [03:07<03:48, 38.05s/it, Bio conservation: clisi_knn]
Metrics:  40%|███████████████████████████▏                                        | 4/10 [03:07<03:48, 38.05s/it, Batch correction: silhouette_batch]
Metrics:  50%|██████████████████████████████████                                  | 5/10 [03:28<02:39, 31.91s/it, Batch correction: silhouette_batch]
Metrics:  50%|█████████████████████████████████████▌                                     | 5/10 [03:28<02:39, 31.91s/it, Batch correction: ilisi_knn]
Metrics:  60%|█████████████████████████████████████████████                              | 6/10 [03:29<01:25, 21.31s/it, Batch correction: ilisi_knn]
Metrics:  60%|██████████████████████████████████████████                            | 6/10 [03:29<01:25, 21.31s/it, Batch correction: kbet_per_label]
Metrics:  70%|█████████████████████████████████████████████████                     | 7/10 [04:12<01:25, 28.55s/it, Batch correction: kbet_per_label]
Metrics:  70%|██████████████████████████████████████████████▏                   | 7/10 [04:12<01:25, 28.55s/it, Batch correction: graph_connectivity]
Metrics:  80%|████████████████████████████████████████████████████▊             | 8/10 [04:13<00:38, 19.50s/it, Batch correction: graph_connectivity]
Metrics:  80%|████████████████████████████████████████████████████████              | 8/10 [04:13<00:38, 19.50s/it, Batch correction: pcr_comparison]
Embeddings: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [05:47<00:00, 173.60s/it]

In [13]:

                
                    Copied!
                    
bm.plot_results_table(min_max_scale=False)
bm.plot_results_table(min_max_scale=False)

Out[13]:

<plottable.table.Table at 0x7f8d0c5ee0d0>