Interacting with scDEED Flagged Neighborhoods

In [1]:

Copied!

import scanpy as sc

counts = sc.datasets.pbmc3k().X.todense().T
%load_ext rpy2.ipython
import scanpy as sc

counts = sc.datasets.pbmc3k().X.todense().T
%load_ext rpy2.ipython

In [2]:

Copied!





%%R -i counts

library(tidyverse)
library(Seurat)
library(scDEED)

perplexity <- 40
data <- CreateSeuratObject(counts) |>
    FindVariableFeatures() |>
    NormalizeData() |>
    ScaleData() |>
    RunPCA() |>
    RunTSNE(perplexity=perplexity)
%%R -i counts

library(tidyverse)
library(Seurat)
library(scDEED)

perplexity <- 40
data <- CreateSeuratObject(counts) |>
    FindVariableFeatures() |>
    NormalizeData() |>
    ScaleData() |>
    RunPCA() |>
    RunTSNE(perplexity=perplexity)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

    WARNING: The R package "reticulate" only fixed recently
    an issue that caused a segfault when used with rpy2:
    https://github.com/rstudio/reticulate/pull/1188
    Make sure that you use a version of that package that includes
    the fix.

Loading required package: SeuratObject
Loading required package: sp
‘SeuratObject’ was built under R 4.5.0 but the current version is
4.5.1; it is recomended that you reinstall ‘SeuratObject’ as the ABI
for R may have changed

Attaching package: ‘SeuratObject’

The following objects are masked from ‘package:base’:

    intersect, t


Attaching package: ‘scDEED’

The following object is masked from ‘package:stats’:

    optimize

Warning: Data is of class matrix. Coercing to dgCMatrix.
Finding variable features for layer counts
Calculating gene variances
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Normalizing layer: counts
Performing log-normalization
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Centering and scaling data matrix

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================================================| 100%
PC_ 1 
Positive:  Feature28818, Feature30430, Feature10609, Feature10611, Feature30934, Feature16745, Feature21054, Feature18999, Feature1956, Feature2225 
	   Feature32216, Feature29429, Feature31964, Feature1958, Feature1846, Feature31959, Feature23264, Feature18773, Feature18149, Feature17465 
	   Feature13509, Feature13383, Feature30115, Feature25893, Feature1903, Feature23057, Feature22827, Feature19264, Feature25268, Feature2483 
Negative:  Feature19155, Feature10608, Feature24758, Feature1636, Feature26263, Feature12205, Feature19180, Feature2301, Feature26825, Feature13131 
	   Feature15831, Feature8890, Feature28833, Feature2760, Feature4088, Feature7587, Feature13779, Feature8887, Feature15412, Feature13126 
	   Feature13862, Feature19991, Feature532, Feature4134, Feature7224, Feature18406, Feature9383, Feature31076, Feature20238, Feature11419 
PC_ 2 
Positive:  Feature30657, Feature18945, Feature23289, Feature10681, Feature10678, Feature10682, Feature23927, Feature27508, Feature10680, Feature9799 
	   Feature10701, Feature10696, Feature10685, Feature10679, Feature10700, Feature10694, Feature2243, Feature21419, Feature10608, Feature17729 
	   Feature23476, Feature26139, Feature25915, Feature31683, Feature18431, Feature7803, Feature7465, Feature30509, Feature2121, Feature25909 
Negative:  Feature31076, Feature17450, Feature28833, Feature8890, Feature22590, Feature7345, Feature19180, Feature4047, Feature22589, Feature7170 
	   Feature26840, Feature2238, Feature26825, Feature2301, Feature2317, Feature16816, Feature16916, Feature17424, Feature7587, Feature19539 
	   Feature32158, Feature1965, Feature16089, Feature24758, Feature7593, Feature3295, Feature11888, Feature2318, Feature32017, Feature11419 
PC_ 3 
Positive:  Feature10681, Feature30657, Feature27508, Feature10682, Feature10701, Feature9799, Feature10700, Feature18945, Feature10680, Feature10679 
	   Feature10678, Feature10685, Feature23289, Feature23927, Feature10694, Feature10696, Feature21419, Feature2243, Feature25915, Feature17729 
	   Feature23476, Feature7465, Feature23451, Feature26139, Feature31683, Feature18431, Feature12405, Feature4028, Feature7669, Feature9601 
Negative:  Feature7697, Feature7696, Feature4865, Feature9825, Feature12535, Feature19923, Feature6459, Feature2538, Feature29270, Feature14587 
	   Feature10421, Feature19444, Feature27143, Feature20143, Feature5519, Feature15067, Feature10867, Feature20164, Feature27373, Feature13865 
	   Feature10828, Feature10241, Feature10110, Feature31597, Feature14266, Feature22002, Feature22527, Feature10390, Feature28980, Feature26183 
PC_ 4 
Positive:  Feature10681, Feature10421, Feature7696, Feature4865, Feature30657, Feature27508, Feature7697, Feature12535, Feature9825, Feature10682 
	   Feature18945, Feature9799, Feature6459, Feature10701, Feature19923, Feature2538, Feature10867, Feature20143, Feature19444, Feature10685 
	   Feature14587, Feature29270, Feature15067, Feature10680, Feature10700, Feature27143, Feature5519, Feature10678, Feature23289, Feature20164 
Negative:  Feature17040, Feature1958, Feature1963, Feature1965, Feature4028, Feature1956, Feature24758, Feature13126, Feature1901, Feature31959 
	   Feature232, Feature4088, Feature16745, Feature21054, Feature1636, Feature1957, Feature18938, Feature8803, Feature1903, Feature15831 
	   Feature13127, Feature19354, Feature16089, Feature19155, Feature10611, Feature13131, Feature7692, Feature595, Feature4026, Feature26245 
PC_ 5 
Positive:  Feature10608, Feature17040, Feature15831, Feature17443, Feature4088, Feature24011, Feature1636, Feature19254, Feature4582, Feature8803 
	   Feature24758, Feature25589, Feature8033, Feature20638, Feature27657, Feature27929, Feature20905, Feature13779, Feature25893, Feature22260 
	   Feature27827, Feature9383, Feature16960, Feature2760, Feature17357, Feature13127, Feature925, Feature11512, Feature13865, Feature532 
Negative:  Feature22590, Feature7345, Feature31076, Feature4047, Feature17450, Feature26840, Feature28833, Feature7170, Feature8890, Feature16816 
	   Feature22589, Feature2317, Feature19180, Feature32158, Feature16916, Feature26825, Feature7593, Feature2318, Feature1958, Feature26839 
	   Feature30430, Feature7587, Feature1737, Feature9855, Feature1956, Feature2225, Feature22798, Feature31959, Feature232, Feature1957

In [3]:

Copied!





%%R

embeddings <- Embeddings(data, "tsne")
normalized_counts <- GetAssayData(data, layer = "scale.data") |>
    as.matrix() |>
    t()
%%R

embeddings <- Embeddings(data, "tsne")
normalized_counts <- GetAssayData(data, layer = "scale.data") |>
    as.matrix() |>
    t()

In [4]:

Copied!

import numpy as np
import rpy2.robjects

embeddings = np.array(rpy2.robjects.globalenv['embeddings'])
normalized_counts = np.array(rpy2.robjects.globalenv["normalized_counts"])
import numpy as np
import rpy2.robjects

embeddings = np.array(rpy2.robjects.globalenv['embeddings'])
normalized_counts = np.array(rpy2.robjects.globalenv["normalized_counts"])

In [5]:

Copied!





%%R

K <- 8
result <- scDEED(data, K = K, reduction.method = 'tsne', rerun = F, perplexity = perplexity)
dubious <- result$full_results |>
    filter(perplexity == perplexity) |>
    pull(dubious_cells) |>
    str_split(",")

dubious <- as.integer(dubious[[1]])

data <- FindNeighbors(data, features = VariableFeatures(data), k.param = 50)
G <- data@graphs$RNA_nn
N <- map(dubious, ~ which(G[.x, ] > 0)) |>
    set_names(dubious)
%%R

K <- 8
result <- scDEED(data, K = K, reduction.method = 'tsne', rerun = F, perplexity = perplexity)
dubious <- result$full_results |>
    filter(perplexity == perplexity) |>
    pull(dubious_cells) |>
    str_split(",")

dubious <- as.integer(dubious[[1]])

data <- FindNeighbors(data, features = VariableFeatures(data), k.param = 50)
G <- data@graphs$RNA_nn
N <- map(dubious, ~ which(G[.x, ] > 0)) |>
    set_names(dubious)

[1] "Permuting data"
  |======================================================================| 100%[1] "Permutation finished"

Warning: Number of dimensions changing from 50 to 8
Computing nearest neighbor graph
Computing SNN
In addition: Warning messages:
1: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
ℹ Please use the `layer` argument instead.
ℹ The deprecated feature was likely used in the scDEED package.
  Please report the issue to the authors.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
generated. 
2: The `slot` argument of `SetAssayData()` is deprecated as of SeuratObject 5.0.0.
ℹ Please use the `layer` argument instead.
ℹ The deprecated feature was likely used in the scDEED package.
  Please report the issue to the authors.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
generated.

In [6]:

Copied!





from distortions.geometry import Geometry, bind_metric, local_distortions

geom = Geometry("brute", laplacian_method="geometric", affinity_kwds={"radius": 20}, adjacency_kwds={"radius": 50}, laplacian_kwds={"scaling_epps": 5})
H, Hvv, Hs = local_distortions(embeddings, normalized_counts, geom)
embeddings = bind_metric(embeddings, Hvv, Hs)
N = rpy2.robjects.globalenv["N"]
N_dict = {int(key): list(val) for key, val in N.items()}
from distortions.geometry import Geometry, bind_metric, local_distortions

geom = Geometry("brute", laplacian_method="geometric", affinity_kwds={"radius": 20}, adjacency_kwds={"radius": 50}, laplacian_kwds={"scaling_epps": 5})
H, Hvv, Hs = local_distortions(embeddings, normalized_counts, geom)
embeddings = bind_metric(embeddings, Hvv, Hs)
N = rpy2.robjects.globalenv["N"]
N_dict = {int(key): list(val) for key, val in N.items()}

In [7]:

Copied!





from distortions.visualization import dplot

plots = {}
plots["scdeed_distort"] = dplot(embeddings, width=440, height=440)\
    .mapping(x="embedding_0", y="embedding_1")\
    .geom_ellipse(radiusMax=10, radiusMin=1)\
    .inter_edge_link(N=N_dict, stroke="#F25E7A", highlightColor="#C83F58", strokeWidth=.4, highlightStrokeWidth=5, threshold=10, backgroundOpacity=0.5)
from distortions.visualization import dplot

plots = {}
plots["scdeed_distort"] = dplot(embeddings, width=440, height=440)\
    .mapping(x="embedding_0", y="embedding_1")\
    .geom_ellipse(radiusMax=10, radiusMin=1)\
    .inter_edge_link(N=N_dict, stroke="#F25E7A", highlightColor="#C83F58", strokeWidth=.4, highlightStrokeWidth=5, threshold=10, backgroundOpacity=0.5)

In [8]:

Copied!

#[p.save(f"../paper/figures/{k}.svg") for k, p in plots.items()]
#[p.save(f"../paper/figures/{k}.svg") for k, p in plots.items()]

In [9]:

Copied!

[display(p) for p in plots.values()]
[display(p) for p in plots.values()]

dplot(dataset=[{'embedding_0': 5.783105882455072, 'embedding_1': -12.634558999702302, 'x0': -0.303412665270283…

Out[9]:

[None]

In [ ]: