Comparing t-SNE Hyperparameters in the Hydra Atlas
In [1]:
Copied!
import random
import scanpy as sc
import numpy as np
import urllib.request
import tempfile
random.seed(20250409)
temp_file = tempfile.mktemp(suffix=".h5ad")
urllib.request.urlretrieve("https://uwmadison.box.com/shared/static/gp1d9akvyq5r7a0rzxw27n86zlqe0d7f.h5ad", temp_file)
adata = sc.read_h5ad(temp_file)
adata.X = adata.X.todense()
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=1000)
n_neighbors = 50
ix = np.random.choice(len(adata), 2000, replace=False)
adata = adata[ix, adata.var.highly_variable]
sc.pp.neighbors(adata, n_neighbors=n_neighbors)
sc.tl.tsne(adata, n_pcs=30, perplexity=500)
import random
import scanpy as sc
import numpy as np
import urllib.request
import tempfile
random.seed(20250409)
temp_file = tempfile.mktemp(suffix=".h5ad")
urllib.request.urlretrieve("https://uwmadison.box.com/shared/static/gp1d9akvyq5r7a0rzxw27n86zlqe0d7f.h5ad", temp_file)
adata = sc.read_h5ad(temp_file)
adata.X = adata.X.todense()
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=1000)
n_neighbors = 50
ix = np.random.choice(len(adata), 2000, replace=False)
adata = adata[ix, adata.var.highly_variable]
sc.pp.neighbors(adata, n_neighbors=n_neighbors)
sc.tl.tsne(adata, n_pcs=30, perplexity=500)
In [2]:
Copied!
from distortions.geometry import Geometry, bind_metric, local_distortions
embedding = adata.obsm["X_tsne"].copy()
radius = 3 * np.mean(adata.obsp["distances"].data)
geom = Geometry("brute", laplacian_method="geometric", affinity_kwds={"radius": radius}, adjacency_kwds={"n_neighbors": n_neighbors}, laplacian_kwds={"scaling_epps": 5})
H, Hvv, Hs = local_distortions(embedding, adata.X, geom)
embedding = bind_metric(embedding, Hvv, Hs)
embedding["ident"] = adata.obs["ident"].str.split("-").str[1].values
from distortions.geometry import Geometry, bind_metric, local_distortions
embedding = adata.obsm["X_tsne"].copy()
radius = 3 * np.mean(adata.obsp["distances"].data)
geom = Geometry("brute", laplacian_method="geometric", affinity_kwds={"radius": radius}, adjacency_kwds={"n_neighbors": n_neighbors}, laplacian_kwds={"scaling_epps": 5})
H, Hvv, Hs = local_distortions(embedding, adata.X, geom)
embedding = bind_metric(embedding, Hvv, Hs)
embedding["ident"] = adata.obs["ident"].str.split("-").str[1].values
In [3]:
Copied!
import altair as alt
alt.data_transformers.enable("vegafusion")
alt.Chart(embedding).mark_circle(opacity=1).encode(
x=alt.X("embedding_0"),
y=alt.Y("embedding_1")
).properties(width=400, height=400)
import altair as alt
alt.data_transformers.enable("vegafusion")
alt.Chart(embedding).mark_circle(opacity=1).encode(
x=alt.X("embedding_0"),
y=alt.Y("embedding_1")
).properties(width=400, height=400)
Out[3]:
In [4]:
Copied!
from distortions.visualization import dplot
from distortions.geometry import neighborhoods
plots = {}
N = neighborhoods(adata, threshold=.2, outlier_factor=3, embed_key="X_tsne", frame=[100, 100], method="window")
plots["hydra_link_80"] = dplot(embedding, width=440, height=440)\
.mapping(x="embedding_0", y="embedding_1")\
.inter_edge_link(N=N, threshold=3, stroke="#F25E7A", highlightColor="#F25E7A", backgroundOpacity=0.2, strokeWidth=0.2, opacity=0.6)\
.geom_ellipse(radiusMax=10, radiusMin=.8)\
.labs(x="t-SNE 1", y="t-SNE 2")
plots["hydra_link_80"]
from distortions.visualization import dplot
from distortions.geometry import neighborhoods
plots = {}
N = neighborhoods(adata, threshold=.2, outlier_factor=3, embed_key="X_tsne", frame=[100, 100], method="window")
plots["hydra_link_80"] = dplot(embedding, width=440, height=440)\
.mapping(x="embedding_0", y="embedding_1")\
.inter_edge_link(N=N, threshold=3, stroke="#F25E7A", highlightColor="#F25E7A", backgroundOpacity=0.2, strokeWidth=0.2, opacity=0.6)\
.geom_ellipse(radiusMax=10, radiusMin=.8)\
.labs(x="t-SNE 1", y="t-SNE 2")
plots["hydra_link_80"]
Out[4]:
dplot(dataset=[{'embedding_0': -5.127078056335449, 'embedding_1': -6.697624206542969, 'x0': -0.833874391295024…
In [5]:
Copied!
metrics = {k: H[k] / H.mean() for k in range(len(H))}
plots["hydra_isometry"] = dplot(embedding, width=440, height=440)\
.mapping(x="embedding_0", y="embedding_1")\
.inter_isometry(metrics=metrics, metrics_bw=.05, transformation_bw=.1, stroke="#dcdcdc")\
.geom_ellipse(radiusMax=10, radiusMin=.8)\
.labs(x="t-SNE 1", y="t-SNE 2")
plots["hydra_isometry"]
metrics = {k: H[k] / H.mean() for k in range(len(H))}
plots["hydra_isometry"] = dplot(embedding, width=440, height=440)\
.mapping(x="embedding_0", y="embedding_1")\
.inter_isometry(metrics=metrics, metrics_bw=.05, transformation_bw=.1, stroke="#dcdcdc")\
.geom_ellipse(radiusMax=10, radiusMin=.8)\
.labs(x="t-SNE 1", y="t-SNE 2")
plots["hydra_isometry"]
Out[5]:
dplot(dataset=[{'embedding_0': -5.127078056335449, 'embedding_1': -6.697624206542969, 'x0': -0.833874391295024…
In [6]:
Copied!
from distortions.geometry import neighborhood_distances
dists = neighborhood_distances(adata, embed_key="X_tsne")
plots["hydra_boxplot"] = dplot(embedding, width=550, height=440)\
.mapping(x="embedding_0", y="embedding_1")\
.geom_ellipse(radiusMax=8, radiusMin=.5)\
.inter_boxplot(dists=dists, strokeWidth=0.2)\
.labs(x = "t-SNE 1", y = "t-SNE 2")
plots["hydra_boxplot"]
from distortions.geometry import neighborhood_distances
dists = neighborhood_distances(adata, embed_key="X_tsne")
plots["hydra_boxplot"] = dplot(embedding, width=550, height=440)\
.mapping(x="embedding_0", y="embedding_1")\
.geom_ellipse(radiusMax=8, radiusMin=.5)\
.inter_boxplot(dists=dists, strokeWidth=0.2)\
.labs(x = "t-SNE 1", y = "t-SNE 2")
plots["hydra_boxplot"]
Out[6]:
dplot(dataset=[{'embedding_0': -5.127078056335449, 'embedding_1': -6.697624206542969, 'x0': -0.833874391295024…
In [7]:
Copied!
#[p.save(f"../paper/figures/{k}.svg") for k, p in plots.items()]
#[p.save(f"../paper/figures/{k}.svg") for k, p in plots.items()]
In [8]:
Copied!
[display(p) for p in plots.values()]
[display(p) for p in plots.values()]
dplot(dataset=[{'embedding_0': -5.127078056335449, 'embedding_1': -6.697624206542969, 'x0': -0.833874391295024…
dplot(dataset=[{'embedding_0': -5.127078056335449, 'embedding_1': -6.697624206542969, 'x0': -0.833874391295024…
dplot(dataset=[{'embedding_0': -5.127078056335449, 'embedding_1': -6.697624206542969, 'x0': -0.833874391295024…
Out[8]:
[None, None, None]
In [ ]:
Copied!