Two Types of Distortion in a Variable Density Swiss Roll
In [1]:
Copied!
import numpy as np
from sklearn.manifold import TSNE
from baselines.scripts.variable_density_swiss_roll import non_uniform_swiss
import numpy as np
from sklearn.manifold import TSNE
from baselines.scripts.variable_density_swiss_roll import non_uniform_swiss
In [2]:
Copied!
n_samples = 1500
noise = 0.0
random_state = 20251106
K = 2
n_samples = 1500
noise = 0.0
random_state = 20251106
K = 2
In [3]:
Copied!
np.random.seed(random_state)
X, t = non_uniform_swiss(2, 1, 0.2, n_samples, pi0=0.25, K=K, dim=3)
rng = np.random.RandomState(random_state)
X_noisy = X + noise * rng.normal(size=X.shape)
np.random.seed(random_state)
X, t = non_uniform_swiss(2, 1, 0.2, n_samples, pi0=0.25, K=K, dim=3)
rng = np.random.RandomState(random_state)
X_noisy = X + noise * rng.normal(size=X.shape)
In [4]:
Copied!
tsne = TSNE(n_components=2, perplexity=50, random_state=random_state, learning_rate='auto')
X_emb = tsne.fit_transform(X_noisy)
tsne = TSNE(n_components=2, perplexity=50, random_state=random_state, learning_rate='auto')
X_emb = tsne.fit_transform(X_noisy)
In [5]:
Copied!
import pandas as pd
import altair as alt
df = pd.DataFrame({'x': X_emb[:, 0], 'y': X_emb[:, 1], 't': t })
alt.Chart(df)\
.mark_circle(size=30)\
.encode(
x=alt.X('x', title='t-SNE 1'),
y=alt.Y('y', title='t-SNE 2'),
color=alt.Color('t:Q', title='unrolled coordinate', scale=alt.Scale(scheme='spectral')),
)\
.properties(title=f"t-SNE embedding of Swiss Roll (noise={noise})")
import pandas as pd
import altair as alt
df = pd.DataFrame({'x': X_emb[:, 0], 'y': X_emb[:, 1], 't': t })
alt.Chart(df)\
.mark_circle(size=30)\
.encode(
x=alt.X('x', title='t-SNE 1'),
y=alt.Y('y', title='t-SNE 2'),
color=alt.Color('t:Q', title='unrolled coordinate', scale=alt.Scale(scheme='spectral')),
)\
.properties(title=f"t-SNE embedding of Swiss Roll (noise={noise})")
Out[5]:
In [44]:
Copied!
from distortions.geometry import Geometry, bind_metric, local_distortions, neighborhood_distances, neighborhoods
from distortions.visualization import dplot
from anndata import AnnData
from sklearn.neighbors import NearestNeighbors
n_neighbors = 40
adata = AnnData(X=X_noisy)
nn = NearestNeighbors(n_neighbors=n_neighbors, metric="euclidean").fit(X_noisy)
knn_graph = nn.kneighbors_graph(X_noisy, mode="distance") # sparse CSR matrix
adata.obsp["distances"] = knn_graph
adata.obsm["X_tsne"] = X_emb
radius = 0.5 * np.mean(adata.obsp["distances"].data)
geom = Geometry(affinity_kwds={"radius": radius}, adjacency_kwds={"n_neighbors": n_neighbors})
H, Hvv, Hs = local_distortions(X_emb, X_noisy, geom)
embedding = bind_metric(X_emb, Hvv, Hs)
embedding["t"] = t
from distortions.geometry import Geometry, bind_metric, local_distortions, neighborhood_distances, neighborhoods
from distortions.visualization import dplot
from anndata import AnnData
from sklearn.neighbors import NearestNeighbors
n_neighbors = 40
adata = AnnData(X=X_noisy)
nn = NearestNeighbors(n_neighbors=n_neighbors, metric="euclidean").fit(X_noisy)
knn_graph = nn.kneighbors_graph(X_noisy, mode="distance") # sparse CSR matrix
adata.obsp["distances"] = knn_graph
adata.obsm["X_tsne"] = X_emb
radius = 0.5 * np.mean(adata.obsp["distances"].data)
geom = Geometry(affinity_kwds={"radius": radius}, adjacency_kwds={"n_neighbors": n_neighbors})
H, Hvv, Hs = local_distortions(X_emb, X_noisy, geom)
embedding = bind_metric(X_emb, Hvv, Hs)
embedding["t"] = t
In [47]:
Copied!
distances = neighborhood_distances(adata, "X_tsne")
N = neighborhoods(adata, threshold=.2, outlier_factor=9, embed_key="X_tsne")
plot = dplot(embedding, height=400, width=600)\
.mapping(x="embedding_0", y="embedding_1", color="t")\
.geom_ellipse(radiusMin=1, radiusMax=25)\
.inter_edge_link(N=N, strokeWidth=.2, opacity=0.9, threshold=10, stroke="#F25E7A", highlightColor="#C83F58", backgroundOpacity=0.6)\
.inter_boxplot(dists=distances, outlier_iqr=10, highlightColor="#F25E7A", strokeWidth=0.4)
plot
distances = neighborhood_distances(adata, "X_tsne")
N = neighborhoods(adata, threshold=.2, outlier_factor=9, embed_key="X_tsne")
plot = dplot(embedding, height=400, width=600)\
.mapping(x="embedding_0", y="embedding_1", color="t")\
.geom_ellipse(radiusMin=1, radiusMax=25)\
.inter_edge_link(N=N, strokeWidth=.2, opacity=0.9, threshold=10, stroke="#F25E7A", highlightColor="#C83F58", backgroundOpacity=0.6)\
.inter_boxplot(dists=distances, outlier_iqr=10, highlightColor="#F25E7A", strokeWidth=0.4)
plot
Out[47]:
dplot(dataset=[{'embedding_0': -13.072385787963867, 'embedding_1': -25.01403045654297, 'x0': -0.94523381485337…
In [ ]:
Copied!