Analyzing Topology Failures with Interlocking Links
In [1]:
Copied!
import numpy as np
import pandas as pd
from distortions.geometry import Geometry, bind_metric, local_distortions
import numpy as np
import pandas as pd
from distortions.geometry import Geometry, bind_metric, local_distortions
In [2]:
Copied!
import math
def rotate(x, y, z):
u = x
cos_val = math.cos(0.4)
sin_val = math.sin(0.4)
v = cos_val * y + sin_val * z
w = -sin_val * y + cos_val * z
return [u, v, w]
def link_data(n, offset=1):
"""
https://github.com/kevinrobinson/umap-playground/blob/master/public/assets/demo-datas.js#L311
"""
points = []
for i in range(n):
t = 2 * math.pi * i / n
sin_t = math.sin(t)
cos_t = math.cos(t)
points.append(rotate(cos_t, sin_t, 0))
points.append(rotate(offset + cos_t, 0, sin_t))
return np.array(points)
import math
def rotate(x, y, z):
u = x
cos_val = math.cos(0.4)
sin_val = math.sin(0.4)
v = cos_val * y + sin_val * z
w = -sin_val * y + cos_val * z
return [u, v, w]
def link_data(n, offset=1):
"""
https://github.com/kevinrobinson/umap-playground/blob/master/public/assets/demo-datas.js#L311
"""
points = []
for i in range(n):
t = 2 * math.pi * i / n
sin_t = math.sin(t)
cos_t = math.cos(t)
points.append(rotate(cos_t, sin_t, 0))
points.append(rotate(offset + cos_t, 0, sin_t))
return np.array(points)
In [3]:
Copied!
from anndata import AnnData
import scanpy as sc
M = 100
n_neighbors = 50
data = link_data(M)
adata = AnnData(X=data, obs=pd.DataFrame(range(2 * M)))
sc.pp.neighbors(adata, n_neighbors=50)
sc.tl.umap(adata)
embedding = adata.obsm["X_umap"].copy()
from anndata import AnnData
import scanpy as sc
M = 100
n_neighbors = 50
data = link_data(M)
adata = AnnData(X=data, obs=pd.DataFrame(range(2 * M)))
sc.pp.neighbors(adata, n_neighbors=50)
sc.tl.umap(adata)
embedding = adata.obsm["X_umap"].copy()
In [4]:
Copied!
radius = 3 * np.mean(adata.obsp["distances"].data)
geom = Geometry("brute", laplacian_method="geometric", affinity_kwds={"radius": radius}, adjacency_kwds={"n_neighbors": n_neighbors}, laplacian_kwds={"scaling_epps": 5})
H, Hvv, Hs = local_distortions(embedding, data, geom)
embedding = bind_metric(embedding, Hvv, Hs)
embedding["x_orig"] = data[:, 0]
radius = 3 * np.mean(adata.obsp["distances"].data)
geom = Geometry("brute", laplacian_method="geometric", affinity_kwds={"radius": radius}, adjacency_kwds={"n_neighbors": n_neighbors}, laplacian_kwds={"scaling_epps": 5})
H, Hvv, Hs = local_distortions(embedding, data, geom)
embedding = bind_metric(embedding, Hvv, Hs)
embedding["x_orig"] = data[:, 0]
In [5]:
Copied!
from distortions.geometry import neighborhoods
from distortions.visualization import dplot
N = neighborhoods(adata, threshold=0.01, outlier_factor=2)
dplot(embedding, width=900, height=500)\
.mapping(x="embedding_0", y="embedding_1", color="x_orig")\
.inter_edge_link(N=N, threshold=1)\
.geom_ellipse()\
.scale_color()
from distortions.geometry import neighborhoods
from distortions.visualization import dplot
N = neighborhoods(adata, threshold=0.01, outlier_factor=2)
dplot(embedding, width=900, height=500)\
.mapping(x="embedding_0", y="embedding_1", color="x_orig")\
.inter_edge_link(N=N, threshold=1)\
.geom_ellipse()\
.scale_color()
Out[5]:
dplot(dataset=[{'embedding_0': 9.539716720581055, 'embedding_1': 3.1580326557159424, 'x0': -0.8638632514305921…
In [6]:
Copied!
plots = []
for offset in np.linspace(0.5, 2.5, 5):
data = link_data(M, offset=offset)
adata = AnnData(X=data, obs=pd.DataFrame(range(2 * M)))
sc.pp.neighbors(adata, n_neighbors=50)
sc.tl.umap(adata)
embedding = adata.obsm["X_umap"].copy()
H, Hvv, Hs = local_distortions(embedding, data, geom)
embedding = bind_metric(embedding, Hvv, Hs)
embedding["x_orig"] = data[:, 0]
N = neighborhoods(adata, threshold=0.2, outlier_factor=2)
metrics = {k: H[k] for k in range(len(H))}
plots += [dplot(embedding, width=400, height=400)\
.mapping(x="embedding_0", y="embedding_1", color="x_orig")\
.geom_ellipse()\
.inter_edge_link(N=N, threshold=1)\
.labs(title=f"{offset} Units Apart")\
.scale_color()]
plots = []
for offset in np.linspace(0.5, 2.5, 5):
data = link_data(M, offset=offset)
adata = AnnData(X=data, obs=pd.DataFrame(range(2 * M)))
sc.pp.neighbors(adata, n_neighbors=50)
sc.tl.umap(adata)
embedding = adata.obsm["X_umap"].copy()
H, Hvv, Hs = local_distortions(embedding, data, geom)
embedding = bind_metric(embedding, Hvv, Hs)
embedding["x_orig"] = data[:, 0]
N = neighborhoods(adata, threshold=0.2, outlier_factor=2)
metrics = {k: H[k] for k in range(len(H))}
plots += [dplot(embedding, width=400, height=400)\
.mapping(x="embedding_0", y="embedding_1", color="x_orig")\
.geom_ellipse()\
.inter_edge_link(N=N, threshold=1)\
.labs(title=f"{offset} Units Apart")\
.scale_color()]
In [7]:
Copied!
[display(p) for p in plots]
[display(p) for p in plots]
dplot(dataset=[{'embedding_0': 3.289219617843628, 'embedding_1': 13.244303703308105, 'x0': -0.6242006037121905…
dplot(dataset=[{'embedding_0': 9.539716720581055, 'embedding_1': 3.1580326557159424, 'x0': -0.8638632514305921…
dplot(dataset=[{'embedding_0': 12.08263111114502, 'embedding_1': 9.085368156433105, 'x0': -0.4536952649302679,…
dplot(dataset=[{'embedding_0': 0.22946549952030182, 'embedding_1': 0.14115360379219055, 'x0': -0.5723637280364…
dplot(dataset=[{'embedding_0': 7.340658664703369, 'embedding_1': 4.244654655456543, 'x0': -0.8520414657269931,…
Out[7]:
[None, None, None, None, None]