Comparing Embeddings vs. RMetric Stability
In [1]:
Copied!
import numpy as np
seeds = [20251106, 20251108]
import numpy as np
seeds = [20251106, 20251108]
In [2]:
Copied!
import pandas as pd
N = 1500
K = 2
noise = 0.0
X_noisy = pd.read_csv(f"./baselines/data/swiss_noise_{noise}.csv").values
t = X_noisy[:, 3]
X_noisy = X_noisy[:, :3]
X = X_noisy
import pandas as pd
N = 1500
K = 2
noise = 0.0
X_noisy = pd.read_csv(f"./baselines/data/swiss_noise_{noise}.csv").values
t = X_noisy[:, 3]
X_noisy = X_noisy[:, :3]
X = X_noisy
Data Generation¶
Here's a plot of the swiss roll data.
In [3]:
Copied!
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
sc = ax.scatter(X_noisy[:, 0], X_noisy[:, 1], X_noisy[:, 2], c=t, s=20, alpha=0.7, cmap='viridis')
plt.colorbar(sc, ax=ax, label='t')
plt.show()
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
sc = ax.scatter(X_noisy[:, 0], X_noisy[:, 1], X_noisy[:, 2], c=t, s=20, alpha=0.7, cmap='viridis')
plt.colorbar(sc, ax=ax, label='t')
plt.show()
In [4]:
Copied!
import altair as alt
import pandas as pd
def plot_swiss_emb(X_emb, t):
df_emb = pd.DataFrame(X_emb, columns=['x', 'y'])
df_emb["t"] = t
return alt.Chart(df_emb).mark_circle(size=60).encode(
x='x',
y='y',
color='t'
).properties(width=400, height=300)
import altair as alt
import pandas as pd
def plot_swiss_emb(X_emb, t):
df_emb = pd.DataFrame(X_emb, columns=['x', 'y'])
df_emb["t"] = t
return alt.Chart(df_emb).mark_circle(size=60).encode(
x='x',
y='y',
color='t'
).properties(width=400, height=300)
$t$-SNE Distortions¶
In [5]:
Copied!
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, perplexity=100, random_state=seeds[0], learning_rate='auto') # set to init='random' for more variation
Z1 = tsne.fit_transform(X_noisy)
tsne = TSNE(n_components=2, perplexity=100, random_state=seeds[1], learning_rate='auto')
Z2 = tsne.fit_transform(X_noisy)
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, perplexity=100, random_state=seeds[0], learning_rate='auto') # set to init='random' for more variation
Z1 = tsne.fit_transform(X_noisy)
tsne = TSNE(n_components=2, perplexity=100, random_state=seeds[1], learning_rate='auto')
Z2 = tsne.fit_transform(X_noisy)
Apply procrustes to align the two embeddings.
In [6]:
Copied!
from scipy.linalg import orthogonal_procrustes
Z1 = Z1 - Z1.mean(axis=0)
Z2 = Z2 - Z2.mean(axis=0)
R, scale = orthogonal_procrustes(Z2, Z1)
Z2 = Z2 @ R
from scipy.linalg import orthogonal_procrustes
Z1 = Z1 - Z1.mean(axis=0)
Z2 = Z2 - Z2.mean(axis=0)
R, scale = orthogonal_procrustes(Z2, Z1)
Z2 = Z2 @ R
In [7]:
Copied!
plots = [
plot_swiss_emb(Z1, t),
plot_swiss_emb(Z2, t)
]
[display(p) for p in plots]
plots = [
plot_swiss_emb(Z1, t),
plot_swiss_emb(Z2, t)
]
[display(p) for p in plots]
Out[7]:
[None, None]
In [8]:
Copied!
from distortions.geometry import Geometry, bind_metric, local_distortions, neighborhoods
from distortions.visualization import dplot
from anndata import AnnData
from sklearn.neighbors import NearestNeighbors
def distortion_plot(Z, X, t, n_neighbors=40, geom_radius=1, threshold=0.1, outlier_factor=2):
adata = AnnData(X=X)
nn = NearestNeighbors(n_neighbors=n_neighbors, metric="euclidean").fit(X)
knn_graph = nn.kneighbors_graph(X, mode="distance")
adata.obsp["distances"] = knn_graph
adata.obsm["X_tsne"] = Z
geom_radius = 0.5 * np.mean(adata.obsp["distances"].data)
geom = Geometry(affinity_kwds={"radius": geom_radius}, adjacency_kwds={"n_neighbors": n_neighbors})
H, Hvv, Hs = local_distortions(Z, X, geom)
hq = np.quantile(Hs, 0.99)
Hs[Hs > hq] = hq
embedding = bind_metric(Z, Hvv, Hs)
embedding["t"] = t
N = neighborhoods(adata, threshold=threshold, outlier_factor=outlier_factor, embed_key="X_tsne")
pal = ['#B776A6', '#BAC4A2']
plot = dplot(embedding, height=350, width=450)\
.mapping(x="embedding_0", y="embedding_1", color="t")\
.inter_edge_link(N=N, strokeWidth=.2, opacity=0.9, threshold=10, stroke="#363E59", highlightColor="#363E59", backgroundOpacity=0.6)\
.geom_ellipse(radiusMin=1, radiusMax=10)\
.labs(x='t-SNE 1', y='t-SNE 2')\
.scale_color(scheme=pal)
return plot, H, embedding, N
from distortions.geometry import Geometry, bind_metric, local_distortions, neighborhoods
from distortions.visualization import dplot
from anndata import AnnData
from sklearn.neighbors import NearestNeighbors
def distortion_plot(Z, X, t, n_neighbors=40, geom_radius=1, threshold=0.1, outlier_factor=2):
adata = AnnData(X=X)
nn = NearestNeighbors(n_neighbors=n_neighbors, metric="euclidean").fit(X)
knn_graph = nn.kneighbors_graph(X, mode="distance")
adata.obsp["distances"] = knn_graph
adata.obsm["X_tsne"] = Z
geom_radius = 0.5 * np.mean(adata.obsp["distances"].data)
geom = Geometry(affinity_kwds={"radius": geom_radius}, adjacency_kwds={"n_neighbors": n_neighbors})
H, Hvv, Hs = local_distortions(Z, X, geom)
hq = np.quantile(Hs, 0.99)
Hs[Hs > hq] = hq
embedding = bind_metric(Z, Hvv, Hs)
embedding["t"] = t
N = neighborhoods(adata, threshold=threshold, outlier_factor=outlier_factor, embed_key="X_tsne")
pal = ['#B776A6', '#BAC4A2']
plot = dplot(embedding, height=350, width=450)\
.mapping(x="embedding_0", y="embedding_1", color="t")\
.inter_edge_link(N=N, strokeWidth=.2, opacity=0.9, threshold=10, stroke="#363E59", highlightColor="#363E59", backgroundOpacity=0.6)\
.geom_ellipse(radiusMin=1, radiusMax=10)\
.labs(x='t-SNE 1', y='t-SNE 2')\
.scale_color(scheme=pal)
return plot, H, embedding, N
In [9]:
Copied!
import time
start_time = time.time()
display(distortion_plot(Z1, X_noisy, t)[0])
elapsed = time.time() - start_time
with open(f"baselines/data/runtime_dist_{noise}.txt", "w") as f:
f.write(str(elapsed))
import time
start_time = time.time()
display(distortion_plot(Z1, X_noisy, t)[0])
elapsed = time.time() - start_time
with open(f"baselines/data/runtime_dist_{noise}.txt", "w") as f:
f.write(str(elapsed))
dplot(dataset=[{'embedding_0': -17.553518295288086, 'embedding_1': -4.967970371246338, 'x0': -0.97378945252753…
In [10]:
Copied!
distortion_data = [
distortion_plot(Z1, X_noisy, t),
distortion_plot(Z2, X_noisy, t)
]
[display(p[0]) for p in distortion_data]
distortion_data = [
distortion_plot(Z1, X_noisy, t),
distortion_plot(Z2, X_noisy, t)
]
[display(p[0]) for p in distortion_data]
dplot(dataset=[{'embedding_0': -17.553518295288086, 'embedding_1': -4.967970371246338, 'x0': -0.97378945252753…
dplot(dataset=[{'embedding_0': -17.55352020263672, 'embedding_1': -4.967971324920654, 'x0': -0.973802412845542…
Out[10]:
[None, None]
In [11]:
Copied!
save_dir = "/Users/krissankaran/Desktop/collaborations/distortions-project/distortions-dev/paper/figures/"
#distortion_data[0][0].save(f"{save_dir}/swiss_roll_baseline_no_interact_{noise}.svg")
#distortion_data[0][0].save(f"{save_dir}/swiss_roll_baseline_interact_{noise}.svg")
save_dir = "/Users/krissankaran/Desktop/collaborations/distortions-project/distortions-dev/paper/figures/"
#distortion_data[0][0].save(f"{save_dir}/swiss_roll_baseline_no_interact_{noise}.svg")
#distortion_data[0][0].save(f"{save_dir}/swiss_roll_baseline_interact_{noise}.svg")
Neighbor Distance Preservation¶
In [12]:
Copied!
from scipy.spatial.distance import cdist
from sklearn.neighbors import NearestNeighbors
# Compute pairwise distances in embedding spaces, then the ratio matrix
D1 = cdist(Z1, Z1)
D2 = cdist(Z2, Z2)
R = D1 / D2
R_inv = D2 / D1
# Compute nearest neighbors in original space X
n_neighbors = 15
nn = NearestNeighbors(n_neighbors=n_neighbors, metric='euclidean').fit(X)
knn_indices = nn.kneighbors(X, return_distance=False)
# Build mask M: M[i, j] = 1 if j is among i's nearest neighbors (excluding self)
n = X.shape[0]
M = np.zeros((n, n), dtype=int)
for i in range(n):
for j in knn_indices[i][1:]: # skip self (first neighbor)
M[i, j] = 1
# For each row, compute variance of R[i, j] over j where M[i, j] == 1
# Compute analogous V_inv for R_inv.
V = np.array([np.var(R[i][M[i]==1]) for i in range(n)])
V_inv = np.array([np.var(R_inv[i][M[i]==1]) for i in range(n)])
V_max = np.maximum(V, V_inv)
from scipy.spatial.distance import cdist
from sklearn.neighbors import NearestNeighbors
# Compute pairwise distances in embedding spaces, then the ratio matrix
D1 = cdist(Z1, Z1)
D2 = cdist(Z2, Z2)
R = D1 / D2
R_inv = D2 / D1
# Compute nearest neighbors in original space X
n_neighbors = 15
nn = NearestNeighbors(n_neighbors=n_neighbors, metric='euclidean').fit(X)
knn_indices = nn.kneighbors(X, return_distance=False)
# Build mask M: M[i, j] = 1 if j is among i's nearest neighbors (excluding self)
n = X.shape[0]
M = np.zeros((n, n), dtype=int)
for i in range(n):
for j in knn_indices[i][1:]: # skip self (first neighbor)
M[i, j] = 1
# For each row, compute variance of R[i, j] over j where M[i, j] == 1
# Compute analogous V_inv for R_inv.
V = np.array([np.var(R[i][M[i]==1]) for i in range(n)])
V_inv = np.array([np.var(R_inv[i][M[i]==1]) for i in range(n)])
V_max = np.maximum(V, V_inv)
In [13]:
Copied!
alt.data_transformers.enable("vegafusion")
# Flatten R to a 1D array and create a DataFrame for plotting
R_flat = R.flatten()
R_df = pd.DataFrame({'R': np.log10(R_flat)})
alt.Chart(R_df).mark_bar().encode(
alt.X('R', bin=alt.Bin(maxbins=100), title='Distance Ratio R (log_{10})'),
alt.Y('count()', title='Frequency')
).properties(
width=400, height=250,
title='Histogram of Distance Ratios log_{10}(R)'
)
alt.data_transformers.enable("vegafusion")
# Flatten R to a 1D array and create a DataFrame for plotting
R_flat = R.flatten()
R_df = pd.DataFrame({'R': np.log10(R_flat)})
alt.Chart(R_df).mark_bar().encode(
alt.X('R', bin=alt.Bin(maxbins=100), title='Distance Ratio R (log_{10})'),
alt.Y('count()', title='Frequency')
).properties(
width=400, height=250,
title='Histogram of Distance Ratios log_{10}(R)'
)
Out[13]:
In [14]:
Copied!
from scipy.linalg import fractional_matrix_power, logm
Hs1 = distortion_data[0][1] # shape: (n, 2, 2)
Hs2 = distortion_data[1][1]
norm = 'fro'
H_instability = np.linalg.norm(Hs1 - Hs2, ord=norm, axis=(1, 2))
Hs1_norm = np.linalg.norm(Hs1, ord=norm, axis=(1, 2))
Hs2_norm = np.linalg.norm(Hs2, ord=norm, axis=(1, 2))
stability_data = pd.DataFrame({
"v_d": V_max,
"n_H": H_instability / (Hs1_norm * Hs2_norm),
"t": t
})
# Compute n_H_det: |det(Hs1[i]^{-1} @ Hs2[i])| for each i
n = Hs1.shape[0]
n_H_det = np.empty(n)
for i in range(n):
n_H_det[i] = np.abs(np.log(np.linalg.det(Hs1[i])) - np.log(np.linalg.det(Hs2[i])))
stability_data["n_H_det"] = n_H_det
n_H_sim = np.empty(n)
for i in range(n):
H1_inv_sqrt = fractional_matrix_power(Hs1[i], -0.5)
sim = H1_inv_sqrt @ Hs2[i] @ H1_inv_sqrt
sim_log = logm(sim)
n_H_sim[i] = 0.5 * np.linalg.norm(sim_log, ord='fro')
stability_data["n_H_sim"] = n_H_sim
from scipy.linalg import fractional_matrix_power, logm
Hs1 = distortion_data[0][1] # shape: (n, 2, 2)
Hs2 = distortion_data[1][1]
norm = 'fro'
H_instability = np.linalg.norm(Hs1 - Hs2, ord=norm, axis=(1, 2))
Hs1_norm = np.linalg.norm(Hs1, ord=norm, axis=(1, 2))
Hs2_norm = np.linalg.norm(Hs2, ord=norm, axis=(1, 2))
stability_data = pd.DataFrame({
"v_d": V_max,
"n_H": H_instability / (Hs1_norm * Hs2_norm),
"t": t
})
# Compute n_H_det: |det(Hs1[i]^{-1} @ Hs2[i])| for each i
n = Hs1.shape[0]
n_H_det = np.empty(n)
for i in range(n):
n_H_det[i] = np.abs(np.log(np.linalg.det(Hs1[i])) - np.log(np.linalg.det(Hs2[i])))
stability_data["n_H_det"] = n_H_det
n_H_sim = np.empty(n)
for i in range(n):
H1_inv_sqrt = fractional_matrix_power(Hs1[i], -0.5)
sim = H1_inv_sqrt @ Hs2[i] @ H1_inv_sqrt
sim_log = logm(sim)
n_H_sim[i] = 0.5 * np.linalg.norm(sim_log, ord='fro')
stability_data["n_H_sim"] = n_H_sim
In [15]:
Copied!
n = Hs1.shape[0]
HH_sv = np.empty(n)
for i in range(n):
U, s, Vh = np.linalg.svd(np.linalg.inv(Hs1[i]) @ Hs2[i])
HH_sv[i] = np.max(np.abs(np.sqrt(s) - 1))
stability_data["HH_sv"] = HH_sv
n = Hs1.shape[0]
HH_sv = np.empty(n)
for i in range(n):
U, s, Vh = np.linalg.svd(np.linalg.inv(Hs1[i]) @ Hs2[i])
HH_sv[i] = np.max(np.abs(np.sqrt(s) - 1))
stability_data["HH_sv"] = HH_sv
In [16]:
Copied!
N_keys_0 = set([k - 1 for k in distortion_data[0][3].keys()])
stability_data["distorted"] = stability_data.index.isin(N_keys_0)
N_keys_0 = set([k - 1 for k in distortion_data[0][3].keys()])
stability_data["distorted"] = stability_data.index.isin(N_keys_0)
In [17]:
Copied!
embedding_list = [p[2] for p in distortion_data]
groups = ["seed1", "seed2"]
for emb, group in zip(embedding_list, groups):
emb["sample"] = embedding_list[0].index
emb["group"] = group
emb["V"] = np.log(V_max)
embedding_list = [p[2] for p in distortion_data]
groups = ["seed1", "seed2"]
for emb, group in zip(embedding_list, groups):
emb["sample"] = embedding_list[0].index
emb["group"] = group
emb["V"] = np.log(V_max)
In [18]:
Copied!
combined_embedding = pd.concat(embedding_list)
plot_var = dplot(combined_embedding, height = 350, width=450)\
.mapping(x="embedding_0", y="embedding_1", color="V")\
.geom_ellipse(opacity=0.9, radiusMin=1, radiusMax=20, stroke=True)\
.scale_color(stroke=True)\
.labs(x="UMAP1", y="UMAP2")
plot_group = dplot(combined_embedding, height = 350, width=450)\
.mapping(x="embedding_0", y="embedding_1", color="group")\
.geom_ellipse(opacity=0.9, radiusMin=1, radiusMax=20, stroke=True)\
.scale_color(stroke=True, scheme=["green", "purple"])\
.labs(x="UMAP1", y="UMAP2")
combined_embedding = pd.concat(embedding_list)
plot_var = dplot(combined_embedding, height = 350, width=450)\
.mapping(x="embedding_0", y="embedding_1", color="V")\
.geom_ellipse(opacity=0.9, radiusMin=1, radiusMax=20, stroke=True)\
.scale_color(stroke=True)\
.labs(x="UMAP1", y="UMAP2")
plot_group = dplot(combined_embedding, height = 350, width=450)\
.mapping(x="embedding_0", y="embedding_1", color="group")\
.geom_ellipse(opacity=0.9, radiusMin=1, radiusMax=20, stroke=True)\
.scale_color(stroke=True, scheme=["green", "purple"])\
.labs(x="UMAP1", y="UMAP2")
In [19]:
Copied!
plot_var
plot_var
Out[19]:
dplot(dataset=[{'embedding_0': -17.553518295288086, 'embedding_1': -4.967970371246338, 'x0': -0.97378945252753…
In [20]:
Copied!
plot_group
plot_group
Out[20]:
dplot(dataset=[{'embedding_0': -17.553518295288086, 'embedding_1': -4.967970371246338, 'x0': -0.97378945252753…
In [21]:
Copied!
#plot_var.save("/Users/krissankaran/Downloads/v3.svg")
plot_group.save("/Users/krissankaran/Downloads/v4.svg")
#plot_var.save("/Users/krissankaran/Downloads/v3.svg")
plot_group.save("/Users/krissankaran/Downloads/v4.svg")
In [22]:
Copied!
def make_stability_scatter(y, y_title=None, y_scale='log', color='distorted', color_title=None, color_range=None, size=20):
if y_title is None:
y_title = y
if color_title is None:
color_title = color
# Split data for overlay
df_black = stability_data[stability_data[color] == False]
df_red = stability_data[stability_data[color] == True]
enc = [
alt.X('v_d', title="SD of $d_i/d'_i$ ratios among neighbors", scale=alt.Scale(type='log')),
alt.Y(y, title=y_title, scale=alt.Scale(type=y_scale)),
]
scatter_black = alt.Chart(df_black).mark_circle(size=size, color='black').encode(*enc)
scatter_red = alt.Chart(df_red).mark_circle(size=size, color='red').encode(*enc)
line_data = pd.DataFrame({
'v_d': np.linspace(stability_data['v_d'].min(), stability_data['v_d'].max(), 100)
})
line_data[y] = line_data['v_d']
line = alt.Chart(line_data).mark_line(color='gray', strokeDash=[4,4]).encode(
x='v_d',
y=y
)
return (scatter_black + scatter_red + line).properties(
width=400, height=300,
title=f'Neighbor Preservation vs. Distortion Stability: {y_title}'
)
def make_stability_scatter(y, y_title=None, y_scale='log', color='distorted', color_title=None, color_range=None, size=20):
if y_title is None:
y_title = y
if color_title is None:
color_title = color
# Split data for overlay
df_black = stability_data[stability_data[color] == False]
df_red = stability_data[stability_data[color] == True]
enc = [
alt.X('v_d', title="SD of $d_i/d'_i$ ratios among neighbors", scale=alt.Scale(type='log')),
alt.Y(y, title=y_title, scale=alt.Scale(type=y_scale)),
]
scatter_black = alt.Chart(df_black).mark_circle(size=size, color='black').encode(*enc)
scatter_red = alt.Chart(df_red).mark_circle(size=size, color='red').encode(*enc)
line_data = pd.DataFrame({
'v_d': np.linspace(stability_data['v_d'].min(), stability_data['v_d'].max(), 100)
})
line_data[y] = line_data['v_d']
line = alt.Chart(line_data).mark_line(color='gray', strokeDash=[4,4]).encode(
x='v_d',
y=y
)
return (scatter_black + scatter_red + line).properties(
width=400, height=300,
title=f'Neighbor Preservation vs. Distortion Stability: {y_title}'
)
In [23]:
Copied!
stability_data
stability_data
Out[23]:
| v_d | n_H | t | n_H_det | n_H_sim | HH_sv | distorted | |
|---|---|---|---|---|---|---|---|
| 0 | 3.574513e-18 | 0.001657 | 11.489719 | 0.000073 | 0.000073 | 0.000074 | False |
| 1 | 3.146408e-14 | 0.000024 | 6.187302 | 0.000005 | 0.000002 | 0.000002 | False |
| 2 | 1.894028e-14 | 0.003019 | 10.729005 | 0.000256 | 0.000101 | 0.000097 | False |
| 3 | 7.504273e-15 | 0.000091 | 7.555652 | 0.000019 | 0.000010 | 0.000012 | True |
| 4 | 2.735290e-15 | 0.000602 | 8.282046 | 0.000035 | 0.000018 | 0.000022 | False |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 935 | 1.630904e-14 | 0.000056 | 13.811979 | 0.000040 | 0.000021 | 0.000021 | False |
| 936 | 3.844885e-14 | 0.000042 | 13.368451 | 0.000010 | 0.000011 | 0.000010 | False |
| 937 | 7.932017e-14 | 0.000026 | 13.001232 | 0.000010 | 0.000004 | 0.000004 | False |
| 938 | 1.314958e-14 | 0.000181 | 12.456238 | 0.000022 | 0.000016 | 0.000016 | False |
| 939 | 1.155698e-14 | 0.000008 | 13.616978 | 0.000002 | 0.000002 | 0.000002 | False |
940 rows × 7 columns
In [24]:
Copied!
# Frobenius norm of log-matrix similarity
plot_n_H_sim = make_stability_scatter(
y='n_H_sim',
y_title="(1/2) * ||\\log H^{-1/2} (H') H^{-1/2}||_{F}",
color='distorted',
color_range=['black', 'red'],
size=20
)
# Absolute log-determinant difference
plot_n_H_det = make_stability_scatter(
y='n_H_det',
y_title="|\\log|H| - \\log|H'||",
color='distorted',
color_range=['black', 'red'],
size=20
)
plot_n_H = make_stability_scatter(
y='n_H',
y_title="normalized ||H - H'||_{F}",
color='distorted',
color_range=['black', 'red'],
size=20
)
# Singular value difference
plot_HH_sv = make_stability_scatter(
y='HH_sv',
y_title="\max(|\sqrt{\lambda_1}-1|,|\sqrt{\lambda_2}-1|)",
color='distorted',
color_range=['black', 'red'],
size=20
)
# Frobenius norm of log-matrix similarity
plot_n_H_sim = make_stability_scatter(
y='n_H_sim',
y_title="(1/2) * ||\\log H^{-1/2} (H') H^{-1/2}||_{F}",
color='distorted',
color_range=['black', 'red'],
size=20
)
# Absolute log-determinant difference
plot_n_H_det = make_stability_scatter(
y='n_H_det',
y_title="|\\log|H| - \\log|H'||",
color='distorted',
color_range=['black', 'red'],
size=20
)
plot_n_H = make_stability_scatter(
y='n_H',
y_title="normalized ||H - H'||_{F}",
color='distorted',
color_range=['black', 'red'],
size=20
)
# Singular value difference
plot_HH_sv = make_stability_scatter(
y='HH_sv',
y_title="\max(|\sqrt{\lambda_1}-1|,|\sqrt{\lambda_2}-1|)",
color='distorted',
color_range=['black', 'red'],
size=20
)
In [25]:
Copied!
plot_n_H_sim.save("/Users/krissankaran/Downloads/n_H_sim.png")
plot_n_H_det.save("/Users/krissankaran/Downloads/n_H_det.png")
plot_HH_sv.save("/Users/krissankaran/Downloads/HH_sv.png")
plot_n_H.save("/Users/krissankaran/Downloads/n_H.png")
plot_n_H_sim.save("/Users/krissankaran/Downloads/n_H_sim.png")
plot_n_H_det.save("/Users/krissankaran/Downloads/n_H_det.png")
plot_HH_sv.save("/Users/krissankaran/Downloads/HH_sv.png")
plot_n_H.save("/Users/krissankaran/Downloads/n_H.png")
In [26]:
Copied!
def make_t_scatter(metric, title, scale_type='log', size=40):
df_plot = stability_data.sort_values("distorted", ascending=True)
return alt.Chart(df_plot).mark_circle(size=size).encode(
x=alt.X(
't',
title='Swiss Roll Parameter',
scale=alt.Scale(domain=[df_plot['t'].min(), df_plot['t'].max()])
),
y=alt.Y(metric, scale=alt.Scale(type=scale_type), title = title),
color=alt.Color('distorted:N', title='Distorted', scale=alt.Scale(domain=[False, True], range=['black', 'red']))
).properties(width=400, height=300)
def make_t_scatter(metric, title, scale_type='log', size=40):
df_plot = stability_data.sort_values("distorted", ascending=True)
return alt.Chart(df_plot).mark_circle(size=size).encode(
x=alt.X(
't',
title='Swiss Roll Parameter',
scale=alt.Scale(domain=[df_plot['t'].min(), df_plot['t'].max()])
),
y=alt.Y(metric, scale=alt.Scale(type=scale_type), title = title),
color=alt.Color('distorted:N', title='Distorted', scale=alt.Scale(domain=[False, True], range=['black', 'red']))
).properties(width=400, height=300)
In [27]:
Copied!
make_t_scatter('n_H_sim', "(1/2) * ||\\log H^{-1/2} (H') H^{-1/2}||_{F}")
make_t_scatter('n_H_sim', "(1/2) * ||\\log H^{-1/2} (H') H^{-1/2}||_{F}")
Out[27]:
In [28]:
Copied!
make_t_scatter('HH_sv', "max(|\sqrt{\lambda_1}-1|,|\sqrt{\lambda_2}|)")
make_t_scatter('HH_sv', "max(|\sqrt{\lambda_1}-1|,|\sqrt{\lambda_2}|)")
Out[28]:
In [ ]:
Copied!