Commit 1b5b1885 by Florent Chatelain

up nb

parent 184521a1
 ... @@ -54,10 +54,10 @@ ... @@ -54,10 +54,10 @@ "The data matrix is $X\\in \\mathbb{C}^{p \\times n}$ where $p$ and $n$ are the feature and sample size resp.\n", "The data matrix is $X\\in \\mathbb{C}^{p \\times n}$ where $p$ and $n$ are the feature and sample size resp.\n", "\n", "\n", "Then \n", "Then \n", "$$K = \\left[ \\frac 1p (X \\odot S)' (X \\odot S) \\right] \\odot B$$\n", "$$K = \\left[ \\frac 1p (X \\odot S)' (X \\odot S) \\right] \\odot B$$\n", "is the $n\\times n$ *two-way punctured* kernel matrix where:\n", "is the $n\\times n$ *two-way punctured* kernel matrix where:\n", " - $S$ is the Bernoulli iid $(p \\times n)$ random matrix to select the **data** entries with rate eS\n", " - matrix $S$ is the Bernoulli iid $(p \\times n)$ random matrix to select the **data** entries with rate eS\n", " - $B$ is the Bernoulli iid $(n \\times n)$ random matrix to select the **kernel** entries with rate eB" " - matrix $B$ is the Bernoulli iid $(n \\times n)$ random matrix to select the **kernel** entries with rate eB" ] ] }, }, { { ... @@ -67,7 +67,9 @@ ... @@ -67,7 +67,9 @@ "## Simulations\n", "## Simulations\n", "\n", "\n", "#### Figure 1.\n", "#### Figure 1.\n", "Eigenvalue distribution $\\nu_n$ of $K$ versus limit measure $\\nu$, for $p=200$, $n=4\\,000$, $x_i\\sim .4 \\mathcal N(\\mu_1,I_p)+.6\\mathcal N(\\mu_2,I_p)$ for $[\\mu_1^T,\\mu_2^T]^T \\sim\\mathcal{N}(0, \\frac1p \\left[\\begin{smallmatrix} 10 & 5.5 \\\\ 5.5 & 15\\end{smallmatrix}\\right]\\otimes I_p)$; $\\varepsilon_S=.2$, $\\varepsilon_B=.4$. Sample vs theoretical spikes in blue vs red circles. The two humps'' remind the semi-circular and Marcenko-Pastur laws." "Eigenvalue distribution $\\nu_n$ of $K$ versus limit measure $\\nu$, for $p=200$, $n=4\\,000$, $x_i\\sim .4 \\mathcal N(\\mu_1,I_p)+.6\\mathcal N(\\mu_2,I_p)$ for \n", "$[\\mu_1^T,\\mu_2^T]^T \\sim\\mathcal{N}(0, \\frac1p \\left[\\begin{smallmatrix} 10 & 5.5 \\\\ 5.5 & 15\\end{smallmatrix}\\right]\\otimes I_p)$; \n", "$\\varepsilon_S=.2$, $\\varepsilon_B=.4$. Sample vs theoretical spikes in blue vs red circles. The two \"humps\" remind the semi-circular and Marcenko-Pastur laws." ] ] }, }, { { ... ...
 %% Cell type:markdown id: tags: %% Cell type:markdown id: tags: # # #
*"Two-way kernel matrix puncturing: towards resource-efficient PCA and spectral clustering"*
#
*"Two-way kernel matrix puncturing: towards resource-efficient PCA and spectral clustering"*
##
-- Supplementary Material --
##
-- Supplementary Material --
##
-- Python Codes of main article figures --
##
-- Python Codes of main article figures --
## Preamble: useful packages and functions ## Preamble: useful packages and functions %% Cell type:code id: tags: %% Cell type:code id: tags:  python  python import numpy as np import numpy as np import scipy as sp import scipy as sp import matplotlib.pyplot as plt import matplotlib.pyplot as plt import scipy.linalg as lin import scipy.linalg as lin import scipy.stats as stats import scipy.stats as stats import scipy.sparse.linalg import scipy.sparse.linalg import scipy.special import scipy.special import pandas as pd import pandas as pd import seaborn as sns import seaborn as sns from numpy.random import default_rng from numpy.random import default_rng rng = default_rng(0) rng = default_rng(0) %load_ext autoreload %load_ext autoreload %autoreload 2 %autoreload 2 # user's functions from the local .py file # user's functions from the local .py file from punctutils import * from punctutils import * %matplotlib inline %matplotlib inline plt.rcParams.update({"font.size": 12}) plt.rcParams.update({"font.size": 12})   %% Cell type:markdown id: tags: %% Cell type:markdown id: tags: ## Two-way puncturing of the kernel matrix ## Two-way puncturing of the kernel matrix The data matrix is $X\in \mathbb{C}^{p \times n}$ where $p$ and $n$ are the feature and sample size resp. The data matrix is $X\in \mathbb{C}^{p \times n}$ where $p$ and $n$ are the feature and sample size resp. Then Then $$K = \left[ \frac 1p (X \odot S)' (X \odot S) \right] \odot B$$ $$K = \left[ \frac 1p (X \odot S)' (X \odot S) \right] \odot B$$ is the $n\times n$ *two-way punctured* kernel matrix where: is the $n\times n$ *two-way punctured* kernel matrix where: - $S$ is the Bernoulli iid $(p \times n)$ random matrix to select the **data** entries with rate eS - matrix $S$ is the Bernoulli iid $(p \times n)$ random matrix to select the **data** entries with rate eS - $B$ is the Bernoulli iid $(n \times n)$ random matrix to select the **kernel** entries with rate eB - matrix $B$ is the Bernoulli iid $(n \times n)$ random matrix to select the **kernel** entries with rate eB %% Cell type:markdown id: tags: %% Cell type:markdown id: tags: ## Simulations ## Simulations #### Figure 1. #### Figure 1. Eigenvalue distribution $\nu_n$ of $K$ versus limit measure $\nu$, for $p=200$, $n=4\,000$, $x_i\sim .4 \mathcal N(\mu_1,I_p)+.6\mathcal N(\mu_2,I_p)$ for $[\mu_1^T,\mu_2^T]^T \sim\mathcal{N}(0, \frac1p \left[\begin{smallmatrix} 10 & 5.5 \\ 5.5 & 15\end{smallmatrix}\right]\otimes I_p)$; $\varepsilon_S=.2$, $\varepsilon_B=.4$. Sample vs theoretical spikes in blue vs red circles. The two humps'' remind the semi-circular and Marcenko-Pastur laws. Eigenvalue distribution $\nu_n$ of $K$ versus limit measure $\nu$, for $p=200$, $n=4\,000$, $x_i\sim .4 \mathcal N(\mu_1,I_p)+.6\mathcal N(\mu_2,I_p)$ for $[\mu_1^T,\mu_2^T]^T \sim\mathcal{N}(0, \frac1p \left[\begin{smallmatrix} 10 & 5.5 \\ 5.5 & 15\end{smallmatrix}\right]\otimes I_p)$; $\varepsilon_S=.2$, $\varepsilon_B=.4$. Sample vs theoretical spikes in blue vs red circles. The two "humps" remind the semi-circular and Marcenko-Pastur laws. %% Cell type:code id: tags: %% Cell type:code id: tags:  python  python # Generation of the mean vectors for each sample # Generation of the mean vectors for each sample p = 200 # dimension size p = 200 # dimension size n = 4000 # sample size n = 4000 # sample size c0 = p / n # dimension/sample size ratio c0 = p / n # dimension/sample size ratio # Set the covariance for the two mean vectorss # Set the covariance for the two mean vectorss cov_mu = np.array([[10, 5.5], [5.5, 15]]) / p cov_mu = np.array([[10, 5.5], [5.5, 15]]) / p mus = gen_synth_mus(p=p, n=n, cov_mu=cov_mu) mus = gen_synth_mus(p=p, n=n, cov_mu=cov_mu) # Set the proportion for each of the two classes # Set the proportion for each of the two classes cs = [0.4, 0.6] cs = [0.4, 0.6] # Generate the noisy data matrix and the spikes matrices # Generate the noisy data matrix and the spikes matrices X, ells, vM = gen_synth_X(p, n, mus, cs) X, ells, vM = gen_synth_X(p, n, mus, cs) # Puncturing settings # Puncturing settings eS = 0.2 # data puncturing ratio eS = 0.2 # data puncturing ratio eB = 0.4 # kernel puncturing ratio eB = 0.4 # kernel puncturing ratio b = 1 # kernel matrix diagonal entry b = 1 # kernel matrix diagonal entry # Empirical Spectrum # Empirical Spectrum lambdas = puncture_eigs(X, eB, eS, b)[0] lambdas = puncture_eigs(X, eB, eS, b)[0] xmin = min(np.min(lambdas) * 0.8, np.min(lambdas) * 1.2) # accounting negative min xmin = min(np.min(lambdas) * 0.8, np.min(lambdas) * 1.2) # accounting negative min xmax = np.max(lambdas) * 1.2 xmax = np.max(lambdas) * 1.2 xs, density = lsd(eB, eS, c0, b, xmin=xmin, xmax=xmax, nsamples=200) xs, density = lsd(eB, eS, c0, b, xmin=xmin, xmax=xmax, nsamples=200) isolated_eig_0 = spike(eB, eS, c0, ells[0], b=1)[0] isolated_eig_0 = spike(eB, eS, c0, ells[0], b=1)[0] isolated_eig_1 = spike(eB, eS, c0, ells[1], b=1)[0] isolated_eig_1 = spike(eB, eS, c0, ells[1], b=1)[0] f, ax = plt.subplots(1, 1) f, ax = plt.subplots(1, 1) sns.histplot( sns.histplot( lambdas.flatten(), # color="blue", cbar_kws={'edgecolor': 'darkblue'}, lambdas.flatten(), # color="blue", cbar_kws={'edgecolor': 'darkblue'}, stat="density", stat="density", ax=ax, ax=ax, ) ) plt.plot(xs, density, "r", label="Limiting density") plt.plot(xs, density, "r", label="Limiting density") plt.plot( plt.plot( isolated_eig_0, 0.002, "og", fillstyle="none", label=r"limiting spikes $\rho_{1,2}$" isolated_eig_0, 0.002, "og", fillstyle="none", label=r"limiting spikes $\rho_{1,2}$" ) ) plt.plot(isolated_eig_1, 0.002, "og", fillstyle="none") plt.plot(isolated_eig_1, 0.002, "og", fillstyle="none") plt.plot(lambdas[-1], 0.002, "ob", label=r"Largest eigvals") plt.plot(lambdas[-1], 0.002, "ob", label=r"Largest eigvals") plt.plot(lambdas[-2], 0.002, "ob") plt.plot(lambdas[-2], 0.002, "ob") plt.legend() plt.legend() plt.xlim([-2, 5]) plt.xlim([-2, 5]) _ = plt.show() _ = plt.show()   %%%% Output: display_data %%%% Output: display_data [Hidden Image Output] [Hidden Image Output] %% Cell type:markdown id: tags: %% Cell type:markdown id: tags: #### Figure 2. #### Figure 2. Illustration of Theorem 2: asymptotic sample-population eigenvector alignment for $\mathcal L=\ell \in\mathbb R$, as a function of the information strength'' $\ell$. Various values of $(\varepsilon_S,\varepsilon_B,c_0)$ indicated in legend. Black dashed lines indicate the limiting (small $\varepsilon_S,\varepsilon_B$) phase transition threshold $\Gamma=(\varepsilon_S^2\varepsilon_Bc_0^{-1})^{-\frac12}$. As $\varepsilon_S,\varepsilon_B\to 0$, performance curves coincide when $\varepsilon_B\varepsilon_S^2c_0^{-1}$ is constant (plain versus dashed set of curves). Illustration of Theorem 2: asymptotic sample-population eigenvector alignment for $\mathcal L=\ell \in\mathbb R$, as a function of the information strength'' $\ell$. Various values of $(\varepsilon_S,\varepsilon_B,c_0)$ indicated in legend. Black dashed lines indicate the limiting (small $\varepsilon_S,\varepsilon_B$) phase transition threshold $\Gamma=(\varepsilon_S^2\varepsilon_Bc_0^{-1})^{-\frac12}$. As $\varepsilon_S,\varepsilon_B\to 0$, performance curves coincide when $\varepsilon_B\varepsilon_S^2c_0^{-1}$ is constant (plain versus dashed set of curves). %% Cell type:code id: tags: %% Cell type:code id: tags:  python  python eSeBc0s = [ eSeBc0s = [ (0.1, 0.1, 1), (0.1, 0.1, 1), (0.2, 0.025, 1), (0.2, 0.025, 1), (0.1, 0.2, 2), (0.1, 0.2, 2), (0.05, 0.05, 1), (0.05, 0.05, 1), (0.1, 0.0125, 1), (0.1, 0.0125, 1), (0.05, 0.1, 2), (0.05, 0.1, 2), ] ] ells = np.linspace(1, 400, 100) ells = np.linspace(1, 400, 100) plt.figure(figsize=(8, 4)) plt.figure(figsize=(8, 4)) for eS, eB, c0 in eSeBc0s: for eS, eB, c0 in eSeBc0s: plt.plot( plt.plot( ells, ells, [spike(eB, eS, c0, ell)[1] for ell in ells], [spike(eB, eS, c0, ell)[1] for ell in ells], label="({},{},{})".format(eS, eB, c0), label="({},{},{})".format(eS, eB, c0), ) ) plt.xlabel(r"$\ell$") plt.xlabel(r"$\ell$") plt.ylabel(r"$\zeta$") plt.ylabel(r"$\zeta$") plt.grid("On") plt.grid("On") _ = plt.legend() _ = plt.legend()   %%%% Output: display_data %%%% Output: display_data [Hidden Image Output] [Hidden Image Output] %% Cell type:markdown id: tags: %% Cell type:markdown id: tags: #### Figure 3. #### Figure 3. Phase transition curves $F(\ell)=0$ for $\mathcal L=\ell\in\mathbb R$ and varying values of $\ell$, for $c_0=.05$. Above each phase transition curve, a spike eigenvalue is found away from the support of $\nu$. For large $\ell$, a wide range of $\varepsilon_B$'s (resp., $\varepsilon_S$) is admissible at virtually no performance loss. Here, also, sparser $B$ matrices are more effective than sparser $S$ matrices. Phase transition curves $F(\ell)=0$ for $\mathcal L=\ell\in\mathbb R$ and varying values of $\ell$, for $c_0=.05$. Above each phase transition curve, a spike eigenvalue is found away from the support of $\nu$. For large $\ell$, a wide range of $\varepsilon_B$'s (resp., $\varepsilon_S$) is admissible at virtually no performance loss. Here, also, sparser $B$ matrices are more effective than sparser $S$ matrices. %% Cell type:code id: tags: %% Cell type:code id: tags:  python  python ells = [1, 2, 5] # spike powers ells = [1, 2, 5] # spike powers c0 = 0.05 c0 = 0.05 f, ax = plt.subplots(1, 2, figsize=(12, 4), sharey=True) f, ax = plt.subplots(1, 2, figsize=(12, 4), sharey=True) for ell in ells: for ell in ells: eBs, eSs = phase_transition(c0, ell, res=1e-3) eBs, eSs = phase_transition(c0, ell, res=1e-3) ax[0].axes.plot(eBs, eSs, label=r"$\ell$={}".format(ell)) ax[0].axes.plot(eBs, eSs, label=r"$\ell$={}".format(ell)) ax[1].axes.plot(eSs, eBs, label=r"$\ell$={}".format(ell)) ax[1].axes.plot(eSs, eBs, label=r"$\ell$={}".format(ell)) ax[0].axes.set_xlabel(r"$\varepsilon_B$") ax[0].axes.set_xlabel(r"$\varepsilon_B$") ax[0].axes.set_ylabel(r"$\varepsilon_S$") ax[0].axes.set_ylabel(r"$\varepsilon_S$") ax[0].axes.grid("On") ax[0].axes.grid("On") ax[1].axes.set_xlabel(r"$\varepsilon_S$") ax[1].axes.set_xlabel(r"$\varepsilon_S$") ax[1].axes.set_ylabel(r"$\varepsilon_B$") ax[1].axes.set_ylabel(r"$\varepsilon_B$") ax[1].axes.set_ylim([0, 0.3]) ax[1].axes.set_ylim([0, 0.3]) ax[1].axes.grid("On") ax[1].axes.grid("On") ax[1].axes.legend() ax[1].axes.legend() _ = plt.show() _ = plt.show()   %%%% Output: display_data %%%% Output: display_data [Hidden Image Output] [Hidden Image Output] %% Cell type:markdown id: tags: %% Cell type:markdown id: tags: #### Figure 4: #### Figure 4: Two-way punctured matrices $K$ for **(left)** $(\varepsilon_S,\varepsilon_B)=(.2,1)$ or **(right)** $(\varepsilon_S,\varepsilon_B)=(1,.04)$, with $c_0=\frac12$, $n=4\,000$, $p=2\,000$, $b=0$. Clustering setting with $x_i\sim .4\mathcal N(\mu_1,I_p)+.6\mathcal N(\mu_2,I_p)$ for $[\mu_1^T,\mu_2^T]^T\sim \mathcal N(0,\frac1p[\begin{smallmatrix} 20 & 12 \\ 12 & 30\end{smallmatrix}]\otimes I_p)$. **(Top)** first $100\times 100$ absolute entries of $K$ (white for zero); **(Middle)** spectrum of $K$, theoretical limit, and isolated eigenvalues; **(Bottom)** second dominant eigenvector $\hat v_2$ of $K$ against theoretical average in red. **As confirmed by theory, although (top) $K$ is dense for $\varepsilon_B=1$ and sparse for $\varepsilon_B=.04$ ($96\%$ empty) and (middle) the spectra strikingly differ, (bottom) since $\varepsilon_S^2\varepsilon_Bc_0^{-1}$ is constant, the eigenvector alignment $|\hat v_2^T v_2|^2$ is the same in both cases.** Two-way punctured matrices $K$ for **(left)** $(\varepsilon_S,\varepsilon_B)=(.2,1)$ or **(right)** $(\varepsilon_S,\varepsilon_B)=(1,.04)$, with $c_0=\frac12$, $n=4\,000$, $p=2\,000$, $b=0$. Clustering setting with $x_i\sim .4\mathcal N(\mu_1,I_p)+.6\mathcal N(\mu_2,I_p)$ for $[\mu_1^T,\mu_2^T]^T\sim \mathcal N(0,\frac1p[\begin{smallmatrix} 20 & 12 \\ 12 & 30\end{smallmatrix}]\otimes I_p)$. **(Top)** first $100\times 100$ absolute entries of $K$ (white for zero); **(Middle)** spectrum of $K$, theoretical limit, and isolated eigenvalues; **(Bottom)** second dominant eigenvector $\hat v_2$ of $K$ against theoretical average in red. **As confirmed by theory, although (top) $K$ is dense for $\varepsilon_B=1$ and sparse for $\varepsilon_B=.04$ ($96\%$ empty) and (middle) the spectra strikingly differ, (bottom) since $\varepsilon_S^2\varepsilon_Bc_0^{-1}$ is constant, the eigenvector alignment $|\hat v_2^T v_2|^2$ is the same in both cases.** %% Cell type:code id: tags: %% Cell type:code id: tags:  python  python # Generation of the mean vectors for each sample # Generation of the mean vectors for each sample p = 2000 # dimension size p = 2000 # dimension size n = 4000 # sample size n = 4000 # sample size c0 = p / n # dimension/sample size ratio c0 = p / n # dimension/sample size ratio # Set the covariance for the two mean vectorss # Set the covariance for the two mean vectorss cov_mu = np.array([[20, 12], [12, 30]]) / p cov_mu = np.array([[20, 12], [12, 30]]) / p mus = gen_synth_mus(p=p, n=n, cov_mu=cov_mu) mus = gen_synth_mus(p=p, n=n, cov_mu=cov_mu) # Set the proportion for each of the two classes # Set the proportion for each of the two classes cs = [0.4, 0.6] cs = [0.4, 0.6] n0 = int(n * cs[0]) n0 = int(n * cs[0]) # Generate the noisy data matrix and the spikes matrices # Generate the noisy data matrix and the spikes matrices X, ells, vM = gen_synth_X(p, n, mus, cs) X, ells, vM = gen_synth_X(p, n, mus, cs) # Puncturing settings # Puncturing settings eS = 0.2 # data puncturing ratio eS = 0.2 # data puncturing ratio eB = 0.04 # kernel puncturing ratio eB = 0.04 # kernel puncturing ratio b = 0 # kernel matrix diagonal entry b = 0 # kernel matrix diagonal entry f, ax = plt.subplots(3, 2, figsize=(12, 8)) f, ax = plt.subplots(3, 2, figsize=(12, 8)) # S-punctured spectrum # S-punctured spectrum B, _, _ = mask_B(n, 1, is_diag=b) # mask for diag entries B, _, _ = mask_B(n, 1, is_diag=b) # mask for diag entries X_S = puncture_X(X, eS) # punctured data mat X_S = puncture_X(X, eS) # punctured data mat K_S = puncture_K_vanilla(X_S, B) # remove diag entries K_S = puncture_K_vanilla(X_S, B) # remove diag entries l, u_S = sp.sparse.linalg.eigsh(K_S, k=2, which="LA", tol=0, return_eigenvectors=True) l, u_S = sp.sparse.linalg.eigsh(K_S, k=2, which="LA", tol=0, return_eigenvectors=True) u_S = u_S[:, 1].ravel() #  second principal eigvect u_S = u_S[:, 1].ravel() #  second principal eigvect u_S = u_S * np.sign(u_S[:n0].mean() - u_S[n0:].mean()) u_S = u_S * np.sign(u_S[:n0].mean() - u_S[n0:].mean()) lambdas_S = np.linalg.eigvalsh(K_S.todense()) lambdas_S = np.linalg.eigvalsh(K_S.todense()) #  B-punctured spectrum #  B-punctured spectrum B, _, _ = mask_B(n, eB, is_diag=b) B, _, _ = mask_B(n, eB, is_diag=b) K_B = puncture_K_vanilla(X, B) #  punctured kernel mat K_B = puncture_K_vanilla(X, B) #  punctured kernel mat l, u_B = sp.sparse.linalg.eigsh(K_B, k=2, which="LA", tol=0, return_eigenvectors=True) l, u_B = sp.sparse.linalg.eigsh(K_B, k=2, which="LA", tol=0, return_eigenvectors=True) u_B = u_B[:, 1].ravel() # second principal eigvect u_B = u_B[:, 1].ravel() # second principal eigvect u_B = u_B * np.sign(u_B[:n0].mean() - u_B[n0:].mean()) u_B = u_B * np.sign(u_B[:n0].mean() - u_B[n0:].mean()) lambdas_B = np.linalg.eigvalsh(K_B.todense()) lambdas_B = np.linalg.eigvalsh(K_B.todense()) # S-punctured figs # S-punctured figs ax[0, 0].imshow( ax[0, 0].imshow( np.abs(K_S[:100, :100].todense()), interpolation="nearest", cmap="gray_r" np.abs(K_S[:100, :100].todense()), interpolation="nearest", cmap="gray_r" ) ) ax[0, 0].axis("off") ax[0, 0].axis("off") disp_eigs(ax[1:, 0], lambdas_S, u_S, 1, eS, c0, n0, ells, b, vM) disp_eigs(ax[1:, 0], lambdas_S, u_S, 1, eS, c0, n0, ells, b, vM) ax[1, 0].axes.set_ylim([0, 2]) ax[1, 0].axes.set_ylim([0, 2]) # B-punctured figs # B-punctured figs ax[0, 1].imshow( ax[0, 1].imshow( np.abs(K_B[:100, :100].todense()), interpolation="nearest", cmap="gray_r" np.abs(K_B[:100, :100].todense()), interpolation="nearest", cmap="gray_r" ) ) ax[0, 1].axis("off") ax[0, 1].axis("off") disp_eigs(ax[1:, 1], lambdas_B, u_B, eB, 1, c0, n0, ells, b, vM) disp_eigs(ax[1:, 1], lambdas_B, u_B, eB, 1, c0, n0, ells, b, vM)   %%%% Output: display_data %%%% Output: display_data [Hidden Image Output] [Hidden Image Output] %% Cell type:markdown id: tags: %% Cell type:markdown id: tags: #### Figure 5. #### Figure 5. Limiting probability of error of spectral clustering of $\mathcal N(\pm\mu,I_p)$ with equal class sizes on $K$: as a function of $\varepsilon_B$ for fixed $\ell=\|\mu\|^2=50$ (top), and $\varepsilon_S$ for fixed $\ell=50$ (bottom). Simulations (single realization) in markers for $p=n=4\,000$ ($\color{blue}\times$) and $p=n=8\,000$ ($\color{blue}+$). Very good fit between theory and practice for not too small $\varepsilon_S,\varepsilon_B$ . Limiting probability of error of spectral clustering of $\mathcal N(\pm\mu,I_p)$ with equal class sizes on $K$: as a function of $\varepsilon_B$ for fixed $\ell=\|\mu\|^2=50$ (top), and $\varepsilon_S$ for fixed $\ell=50$ (bottom). Simulations (single realization) in markers for $p=n=4\,000$ ($\color{blue}\times$) and $p=n=8\,000$ ($\color{blue}+$). Very good fit between theory and practice for not too small $\varepsilon_S,\varepsilon_B$ . %% Cell type:code id: tags: %% Cell type:code id: tags:  python  python p = 4000 p = 4000 ns = np.array([2000, 2000]) ns = np.array([2000, 2000]) n0 = ns[0] n0 = ns[0] n = np.sum(ns) n = np.sum(ns) cs = ns / n cs = ns / n c0 = p / n c0 = p / n power = 50 power = 50 mu = rng.normal(size=(p,)) mu = rng.normal(size=(p,)) mu = mu / np.linalg.norm(mu) * np.sqrt(power) mu = mu / np.linalg.norm(mu) * np.sqrt(power) mus = np.array([mu, -mu]).T mus = np.array([mu, -mu]).T X, ells, _ = gen_synth_X(p, n, mus, cs) X, ells, _ = gen_synth_X(p, n, mus, cs) ell = np.max(ells) ell = np.max(ells) f, ax = plt.subplots(2, 1, figsize=(8, 8), sharex=True) f, ax = plt.subplots(2, 1, figsize=(8, 8), sharex=True) ## Perf vs epsilon_B (fixed epsilon_S) ## Perf vs epsilon_B (fixed epsilon_S) res = 1e-1 res = 1e-1 eSs = [1 / 10, 1 / 20] eSs = [1 / 10, 1 / 20] eBs = np.linspace(res, 1, int(1 / res)) eBs = np.linspace(res, 1, int(1 / res)) res_thin = 1e-2 res_thin = 1e-2 eSs_thin = [1 / 10, 1 / 20] eSs_thin = [1 / 10, 1 / 20] eBs_thin = np.linspace(res_thin, 1, int(1 / res_thin)) eBs_thin = np.linspace(res_thin, 1, int(1 / res_thin)) perf_sim = np.zeros((len(eBs), len(eSs))) perf_sim = np.zeros((len(eBs), len(eSs))) perf_th = np.zeros((len(eBs_thin), len(eSs_thin))) perf_th = np.zeros((len(eBs_thin), len(eSs_thin))) for iS, eS in enumerate(eSs): for iS, eS in enumerate(eSs): for iB, eB in enumerate(eBs): for iB, eB in enumerate(eBs): u = puncture_eigs(X, eB, eS, b=1, sparsity=1)[1] u = puncture_eigs(X, eB, eS, b=1, sparsity=1)[1] overlap = 1 / n * (np.sum(u[:n0] > 0) + np.sum(u[n0:] < 0)) overlap = 1 / n * (np.sum(u[:n0] > 0) + np.sum(u[n0:] < 0)) perf_sim[iB, iS] = np.min([overlap, 1 - overlap]) perf_sim[iB, iS] = np.min([overlap, 1 - overlap]) ax[0].axes.plot( ax[0].axes.plot( eBs, perf_sim[:, iS], "xb", label=r"Simulation, $\varepsilon_S=${}".format(eS) eBs, perf_sim[:, iS], "xb", label=r"Simulation, $\varepsilon_S=${}".format(eS) ) ) for iS, eS in enumerate(eSs_thin): for iS, eS in enumerate(eSs_thin): for iB, eB in enumerate(eBs_thin): for iB, eB in enumerate(eBs_thin): zeta = spike(eB, eS, c0, ell, b=1)[1] zeta = spike(eB, eS, c0, ell, b=1)[1] perf_th[iB, iS] = qfunc(np.sqrt(zeta / (1 - zeta))) perf_th[iB, iS] = qfunc(np.sqrt(zeta / (1 - zeta))) ax[0].axes.plot( ax[0].axes.plot( eBs_thin, perf_th[:, iS], "r", label=r"Theory, $\varepsilon_S=${}".format(eS) eBs_thin, perf_th[:, iS], "r", label=r"Theory, $\varepsilon_S=${}".format(eS) ) ) ax[0].axes.set_xlabel(r"$\varepsilon_B$") ax[0].axes.set_xlabel(r"$\varepsilon_B$") ax[0].axes.set_ylabel(r"$P_e$") ax[0].axes.set_ylabel(r"$P_e$") ax[0].axes.grid("On") ax[0].axes.grid("On") ax[0].axes.legend() ax[0].axes.legend() ## Perf vs epsilon_S (fixed epsilon_B) ## Perf vs epsilon_S (fixed epsilon_B) eBs = [1 / 100, 1 / 400] eBs = [1 / 100, 1 / 400] eSs = np.linspace(res, 1, int(1 / res)) eSs = np.linspace(res, 1, int(1 / res)) res_thin = 1e-2 res_thin = 1e-2 eBs_thin = [1 / 100, 1 / 400] eBs_thin = [1 / 100, 1 / 400] eSs_thin = np.linspace(res_thin, 1, int(1 / res_thin)) eSs_thin = np.linspace(res_thin, 1, int(1 / res_thin)) perf_sim = np.zeros((len(eSs), len(eBs))) perf_sim = np.zeros((len(eSs), len(eBs))) perf_th = np.zeros((len(eSs_thin), len(eBs_thin))) perf_th = np.zeros((len(eSs_thin), len(eBs_thin))) for iB, eB in enumerate(eBs): for iB, eB in enumerate(eBs): for iS, eS in enumerate(eSs): for iS, eS in enumerate(eSs): u = puncture_eigs(X, eB, eS, b=1, sparsity=1)[1] u = puncture_eigs(X, eB, eS, b=1, sparsity=1)[1] overlap = 1 / n * (np.sum(u[:n0] > 0) + np.sum(u[n0:] < 0)) overlap = 1 / n * (np.sum(u[:n0] > 0) + np.sum(u[n0:] < 0)) perf_sim[iS, iB] = np.min([overlap, 1 - overlap]) perf_sim[iS, iB] = np.min([overlap, 1 - overlap]) ax[1].axes.plot( ax[1].axes.plot( eSs, perf_sim[:, iB], "xb", label=r"Simulation, $\varepsilon_B=${}".format(eB) eSs, perf_sim[:, iB], "xb", label=r"Simulation, $\varepsilon_B=${}".format(eB) ) ) for iB, eB in enumerate(eBs_thin): for iB, eB in enumerate(eBs_thin): for iS, eS in enumerate(eSs_thin): for iS, eS in enumerate(eSs_thin): zeta = spike(eB, eS, c0, ell, b=1)[1] zeta = spike(eB, eS, c0, ell, b=1)[1] perf_th[iS, iB] = qfunc(np.sqrt(zeta / (1 - zeta))) perf_th[iS, iB] = qfunc(np.sqrt(zeta / (1 - zeta))) ax[1].axes.plot( ax[1].axes.plot( eSs_thin, perf_th[:, iB], "r", label=r"Theory, $\varepsilon_B=${}".format(eB) eSs_thin, perf_th[:, iB], "r", label=r"Theory, $\varepsilon_B=${}".format(eB) ) ) ax[1].axes.set_xlabel(r"$\varepsilon_S$") ax[1].axes.set_xlabel(r"$\varepsilon_S$") ax[1].axes.set_ylabel(r"$P_e$") ax[1].axes.set_ylabel(r"$P_e$") ax[1].axes.grid("On") ax[1].axes.grid("On") ax[1].axes.legend() ax[1].axes.legend()   %%%% Output: execute_result %%%% Output: execute_result %%%% Output: display_data %%%% Output: display_data [Hidden Image Output] [Hidden Image Output] %% Cell type:markdown id: tags: %% Cell type:markdown id: tags: #### Figure 6 #### Figure 6 Examples of **MNIST-fashion images**, trouser instances **(top row)**, pullover instances **(bottom row)**. Examples of **MNIST-fashion images**, trouser instances **(top row)**, pullover instances **(bottom row)**. **Note:** the GAN data we generated and used in the submitted paper are too voluminous to be included in the supplementary material and is not publicly and anonymously available. For this reason, we are using below the smaller, publicly available, MNIST-fashion real word data set. However **the conclusions obtained for this dataset are very similar to those drawn in the paper for the GAN data.** **Note:** the GAN data we generated and used in the submitted paper are too voluminous to be included in the supplementary material and is not publicly and anonymously available. For this reason, we are using below the smaller, publicly available, MNIST-fashion real word data set. However **the conclusions obtained for this dataset are very similar to those drawn in the paper for the GAN data.** %% Cell type:code id: tags: %% Cell type:code id: tags:  python  python from tensorflow.keras.datasets import fashion_mnist from tensorflow.keras.datasets import fashion_mnist (X, y), _ = fashion_mnist.load_data() (X, y), _ = fashion_mnist.load_data() selected_labels = [1, 2] selected_labels = [1, 2] nb_im = 5 # number of images to display for each class nb_im = 5 # number of images to display for each class im0 = X[y == selected_labels[0]] im0 = X[y == selected_labels[0]] im1 = X[y == selected_labels[1]] im1 = X[y == selected_labels[1]] f, ax = plt.subplots(2, 5, figsize=(10, 4), sharex=True, sharey=True) f, ax = plt.subplots(2, 5, figsize=(10, 4), sharex=True, sharey=True) for i in range(nb_im): for i in range(nb_im): ax[0, i].axes.imshow(im0[i, :, :].squeeze(), interpolation="nearest", cmap="gray_r") ax[0, i].axes.imshow(im0[i, :, :].squeeze(), interpolation="nearest", cmap="gray_r") ax[0, i].axes.axis("off") ax[0, i].axes.axis("off") ax[1, i].axes.imshow(im1[i, :, :].squeeze(), interpolation="nearest", cmap="gray_r") ax[1, i].axes.imshow(im1[i, :, :].squeeze(), interpolation="nearest", cmap="gray_r") ax[1, i].axes.axis("off") ax[1, i].axes.axis("off")   %%%% Output: display_data %%%% Output: display_data [Hidden Image Output] [Hidden Image Output] %% Cell type:markdown id: tags: %% Cell type:markdown id: tags: #### Figure 7 #### Figure 7 Empirical classification errors for $2$-class (balanced) MNIST-fashion images (trouser vs pullover), with $n=512$ (**top**) and $n=2048$ (**bottom**). **Theoretically predicted plateau''-behavior observed for all $\varepsilon_B$ not too small**. Empirical classification errors for $2$-class (balanced) MNIST-fashion images (trouser vs pullover), with $n=512$ (**top**) and $n=2048$ (**bottom**). **Theoretically predicted plateau''-behavior observed for all $\varepsilon_B$ not too small**. %% Cell type:code id: tags: %% Cell type:code id: tags:  python  python nbMC = 40 nbMC = 40 df_1, _ = get_perf_clustering(n0=256, nbMC=nbMC) df_1, _ = get_perf_clustering(n0=256, nbMC=nbMC) df_2, _ = get_perf_clustering(n0=1024, nbMC=nbMC) df_2, _ = get_perf_clustering(n0=1024, nbMC=nbMC)   %%%% Output: stream %%%% Output: stream [progression 40/40] [progression 40/40] %% Cell type:code id: tags: %% Cell type:code id: tags:  python  python f, ax = plt.subplots(2, 1, figsize=(8, 8), sharex=True) f, ax = plt.subplots(2, 1, figsize=(8, 8), sharex=True) plot_perf_clustering(df_1, n0, ax[0]) plot_perf_clustering(df_1, n0, ax[0]) plot_perf_clustering(df_2, n0, ax[1]) plot_perf_clustering(df_2, n0, ax[1])   %%%% Output: display_data %%%% Output: display_data [Hidden Image Output] [Hidden Image Output] %% Cell type:markdown id: tags: %% Cell type:markdown id: tags: #### Figure 8 #### Figure 8 Sample vs limiting spectra and dominant eigenvector of $K$ for 2-class MNIST-fashion images (trouser vs pullover); **(left)** $\varepsilon_S=\varepsilon_B=1$ (error rate: $\mathbb P_e=.09$); **(right)** $\varepsilon_S=0.02$, $\varepsilon_B=0.2$ ($\mathbb P_e=.12$). **Surprisingly good fit between sample and predicted isolated eigenvalue/eigenvector in all cases; as for spectral measure, significant prediction improvement as $\varepsilon_S,\varepsilon_B\to 0$** Sample vs limiting spectra and dominant eigenvector of $K$ for 2-class MNIST-fashion images (trouser vs pullover); **(left)** $\varepsilon_S=\varepsilon_B=1$ (error rate: $\mathbb P_e=.09$); **(right)** $\varepsilon_S=0.02$, $\varepsilon_B=0.2$ ($\mathbb P_e=.12$). **Surprisingly good fit between sample and predicted isolated eigenvalue/eigenvector in all cases; as for spectral measure, significant prediction improvement as $\varepsilon_S,\varepsilon_B\to 0$** %% Cell type:code id: tags: %% Cell type:code id: tags:  python  python eB = 0.2 eB = 0.2 eS = 0.02 eS = 0.02 X, n0, n1, ell = gen_MNIST_vectors(n0=2048) X, n0, n1, ell = gen_MNIST_vectors(n0=2048) (p, n) = X.shape (p, n) = X.shape c0 = p / n c0 = p / n eigvals_punct = puncture_eigs(X, eB, eS, b=1, sparsity=0)[0] eigvals_punct = puncture_eigs(X, eB, eS, b=1, sparsity=0)[0] u_punct = puncture_eigs(X, eB, eS, b=1, sparsity=1)[1] u_punct = puncture_eigs(X, eB, eS, b=1, sparsity=1)[1] u_punct = np.sign(u_punct[:n0].mean() - u_punct[n0:].mean()) * u_punct u_punct = np.sign(u_punct[:n0].mean() - u_punct[n0:].mean()) * u_punct overlap = 1 / n * (np.sum(u_punct[:n0] > 0) + np.sum(u_punct[n0:] < 0)) overlap = 1 / n * (np.sum(u_punct[:n0] > 0) + np.sum(u_punct[n0:] < 0)) print("Punctured (right) error rate == {:.2f}".format(1 - overlap)) print("Punctured (right) error rate == {:.2f}".format(1 - overlap)) eigvals_full = puncture_eigs(X, eB=1, eS=1, b=1, sparsity=0)[0] eigvals_full = puncture_eigs(X, eB=1, eS=1, b=1, sparsity=0)[0] u_full = puncture_eigs(X, eB=1, eS=1, b=1, sparsity=1)[1] u_full = puncture_eigs(X, eB=1, eS=1, b=1, sparsity=1)[1] u_full = np.sign(u_full[:n0].mean() - u_full[n0:].mean()) * u_full u_full = np.sign(u_full[:n0].mean() - u_full[n0:].mean()) * u_full overlap = 1 / n * (np.sum(u_full[:n0] > 0) + np.sum(u_full[n0:] < 0)) overlap = 1 / n * (np.sum(u_full[:n0] > 0) + np.sum(u_full[n0:] < 0)) print("Full (left) error rate == {:.2f}".format(1 - overlap)) print("Full (left) error rate == {:.2f}".format(1 - overlap)) # Truncate the eigvals dist to show the histrogram # Truncate the eigvals dist to show the histrogram lmax = 15 lmax = 15 print( print( "Full (left): ratio of eigenvalues < {:d} == {:d}/{:d}".format( "Full (left): ratio of eigenvalues < {:d} == {:d}/{:d}".format( lmax, np.sum(eigvals_full < lmax), n lmax, np.sum(eigvals_full < lmax), n ) ) ) ) f, axes = plt.subplots(2, 2, figsize=(12, 10)) f, axes = plt.subplots(2, 2, figsize=(12, 10)) # Puncturing # Puncturing disp_eigs(axes[:, 1], eigvals_punct, u_punct, eB, eS, c0, n0, ell, b=1, vM=None) disp_eigs(axes[:, 1], eigvals_punct, u_punct, eB, eS, c0, n0, ell, b=1, vM=None) # Full # Full disp_eigs_full(axes[:, 0], eigvals_full, u_full, c0, ell, lmax=lmax, b=1) disp_eigs_full(axes[:, 0], eigvals_full, u_full, c0, ell, lmax=lmax, b=1) plt.show() plt.show()   %%%% Output: stream %%%% Output: stream Punctured (right) error rate == 0.12 Punctured (right) error rate == 0.12 Full (left) error rate == 0.10 Full (left) error rate == 0.10 Full (left): ratio of eigenvalues < 15 == 4083/4096 Full (left): ratio of eigenvalues < 15 == 4083/4096 Full case: largest sample eigval == 981.74, lagest limiting spike == 783.62 Full case: largest sample eigval == 981.74, lagest limiting spike == 783.62 %%%% Output: display_data %%%% Output: display_data [Hidden Image Output] [Hidden Image Output] %% Cell type:code id: tags: %% Cell type:code id: tags:  python  python   ... ...
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!