diff options
| author | jeanpouget-abadie <jean.pougetabadie@gmail.com> | 2015-01-29 17:33:32 -0500 |
|---|---|---|
| committer | jeanpouget-abadie <jean.pougetabadie@gmail.com> | 2015-01-29 17:33:32 -0500 |
| commit | 752ca266a44613a2b505f4f81cfed38cfd8fa5d4 (patch) | |
| tree | 22fb5513294a98355cf4284d5591577132a4b1af /paper | |
| parent | f6a02eeb1e006510bc2221eaac38681bc918c2c1 (diff) | |
| download | cascades-752ca266a44613a2b505f4f81cfed38cfd8fa5d4.tar.gz | |
linear threshold section
Diffstat (limited to 'paper')
| -rw-r--r-- | paper/paper.tex | 4 | ||||
| -rw-r--r-- | paper/sections/assumptions.tex | 2 | ||||
| -rw-r--r-- | paper/sections/intro.tex | 2 | ||||
| -rw-r--r-- | paper/sections/model.tex | 30 | ||||
| -rw-r--r-- | paper/sections/results.tex | 3 | ||||
| -rw-r--r-- | paper/sparse.bib | 67 |
6 files changed, 81 insertions, 27 deletions
diff --git a/paper/paper.tex b/paper/paper.tex index b172a5d..930e8ba 100644 --- a/paper/paper.tex +++ b/paper/paper.tex @@ -116,6 +116,10 @@ Summarize contributions: \label{sec:assumptions} \input{sections/assumptions.tex} +\section{Linear Threshold Model} +\label{sec:linear_threshold} +\input{sections/linear_threshold.tex} + \section{Experiments} \section{Discussion} diff --git a/paper/sections/assumptions.tex b/paper/sections/assumptions.tex index 14b16d4..8b7e3ee 100644 --- a/paper/sections/assumptions.tex +++ b/paper/sections/assumptions.tex @@ -19,7 +19,7 @@ As mentioned previously, it is intuitive that the irrepresentability condition i \begin{proposition} \label{prop:irrepresentability} -If the irrepresentability condition holds with $\epsilon > \frac{2}{3}$, then the restricted eigenvalue condition holds with constant $\gamma_n \geq (1 - 3(1 -\epsilon))^2 \lambda_{\min}^2n/(4s)$, where $\lambda_{\min} > 0$ is the smallest eigenvalue of $Q^*_{S,S}$, on which the results of \cite{Daneshmand:2014} also depend. +If the irrepresentability condition holds with $\epsilon > \frac{2}{3}$, then the restricted eigenvalue condition holds with constant $\gamma_n \geq \frac{ (1 - 3(1 -\epsilon))^2 \lambda_{\min}^2}{4s}n$, where $\lambda_{\min} > 0$ is the smallest eigenvalue of $Q^*_{S,S}$, on which the results of \cite{Daneshmand:2014} also depend. \end{proposition} diff --git a/paper/sections/intro.tex b/paper/sections/intro.tex index 8edf341..aa65b5d 100644 --- a/paper/sections/intro.tex +++ b/paper/sections/intro.tex @@ -12,7 +12,7 @@ Throughout this paper, we focus on the two main diffusion processes, outlined in The study of edge prediction in graph has been an active field of research for over a decade. \cite{GomezRodriguez:2010} was one of the first papers to study graph prediction from cascades. They introduce the {\scshape netinf} algorithm, which approximates the likelihood of cascades represented as a continuous process. The algorithm was later improved/modified in later work. Beside validation on generic networks, {\scshape netinf} is not known to have any theoretical recovery guarantees. \cite{Netrapalli:2012} studied solely the independent cascade model and obtained the first ${\cal O}(\Delta^2 \log m)$ guarantee on general networks. The algorithm is based around the same likelihood function we suggest, without the $\ell1$-norm penalty. However, the analysis depended strongly on a restrictive {\it correlation decay} assumption, which strongly restricts the number of new infections at every step. In this restricted setting, they show a complex lower bound, which is roughly $\Omega(\Delta \log (m/\Delta))$ lower bound for perfect support recovery with constant probability. -The work of \cite{Abrahao:13} study the same continous-model framework as \cite{GomezRodriguez:2010} and obtain a ${\cal O}(\Delta^9 \log^2 \Delta \log m)$ support recovery algorithm. Their work also studies the information leveraged by different `parts' of the cascade, showing that a surprisingly important amount of information can be gleaned from the first `infections' of the cascade. We will reach a similar conclusion in section~\ref{sec:assumptions}. However, their model supposes a single-source model, where only one source is selected at random, which is less realistic in practice. Oftentimes, the `patient 0' is unknown to us, and a multi-source model intuitively models the more common situation of a delayed observation of the cascade. +The work of \cite{Abrahao:13} study the same continuous-model framework as \cite{GomezRodriguez:2010} and obtain a ${\cal O}(\Delta^9 \log^2 \Delta \log m)$ support recovery algorithm. Their work also studies the information leveraged by different `parts' of the cascade, showing that a surprisingly important amount of information can be gleaned from the first `infections' of the cascade. We will reach a similar conclusion in section~\ref{sec:assumptions}. However, their model supposes a single-source model, where only one source is selected at random, which is less realistic in practice. Oftentimes, the `patient 0' is unknown to us, and a multi-source model intuitively models the more common situation of a delayed observation of the cascade. The recent work of \cite{Daneshmand:2014} is very similar to our own, they consider a $\ell1$-norm penalty to their objective function, adapting standard results from sparse recovery to obtain a ${\cal O}(\Delta^3 \log m)$ algorithm under an irrepresentability condition. With stronger assumptions, they match the \cite{Netrapalli:2012} bound of ${\cal O}(\Delta^2 \log m)$, by exploiting a similar proof technique based around the KKT conditions of the objective function. Their work has the merit of studying a general framework of continuous functions. Similarly to \cite{Abrahao:13}, they place themselves in the restrictive single-source context. diff --git a/paper/sections/model.tex b/paper/sections/model.tex index 98712d7..25a0a47 100644 --- a/paper/sections/model.tex +++ b/paper/sections/model.tex @@ -26,16 +26,16 @@ problems. \subsection{Independent Cascade Model} -In the independent cascade model, nodes can be either uninfected, active or -infected. All nodes start either as uninfected or active. At each time step -$t$, for each edge $(i,j)$ where $j$ is uninfected and $i$ is active, $i$ +In the independent cascade model, nodes can be either susceptible, active or +infected. All nodes start either as susceptible or active. At each time step +$t$, for each edge $(i,j)$ where $j$ is susceptible and $i$ is active, $i$ attempts to infect $j$ with probability $p_{i,j}\in[0,1]$. If $i$ succeeds, $j$ will become active at time step $t+1$. Regardless of $i$'s success, node $i$ will be infected at time $t+1$: nodes stay active for only one time step. The cascade continues until no active nodes remain. If we denote by $X^t$ the indicator variable of the set of active nodes at time -step $t-1$, then if $j$ is uninfected at time step $t-1$, we have: +step $t-1$, then if $j$ is susceptible at time step $t-1$, we have: \begin{displaymath} \P\big[X^{t+1}_j = 1\,|\, X^{t}\big] = 1 - \prod_{i = 1}^m (1 - p_{i,j})^{X^t_i}. @@ -50,25 +50,7 @@ as: \end{equation} where we defined $\theta_j\defeq (\Theta_{1,j},\ldots,\Theta_{m,j})$. -\subsection{Linear Threshold Model} - -In the Linear Threshold Model, each node $j\in V$ has a threshold $t_j$ drawn -uniformly from the interval $[0,1]$. Furthermore, there is a weight -$\Theta_{i,j}\in[0,1]$ for each edge $(i,j)$. We assume that the weights are -such that for each node $j\in V$, $\sum_{i=1}^m \Theta_{i,j} \leq 1$. - -Nodes can be either uninfected or active. At each time step, each uninfected -node $j$ becomes active if the sum of the weights $\Theta_{i,j}$ for $i$ an -active parent of $j$ is larger than $j$'s threshold $t_j$. Denoting by $X^t$ -the indicator variable of the set of active nodes at time step $t-1$, if -$j\in V$ is uninfected at time step $t-1$, then: -\begin{equation} - \label{eq:lt} - \tag{LT} - \P\big[X^{t+1}_j = 1\,|\, X^{t}\big] = \sum_{i=1}^m \Theta_{i,j}X^t_i - = \inprod{\theta_j}{X^t} -\end{equation} -where we defined again $\theta_j\defeq (\Theta_{1,j},\ldots,\Theta_{m,j})$. +\subsection{Generalized Linear Models} \subsection{Maximum Likelihood Estimation} @@ -104,7 +86,7 @@ a separate optimization program: \end{equation} Furthermore, the state evolution of a node $j\in V$ has the same structure in -both models: the transition from uninfected to active at time step $t+1$ is +both models: the transition from susceptible to active at time step $t+1$ is controlled by a Bernoulli variable whose parameter can be written $f(\inprod{\theta_j}{x^t})$ for some function $f$. Hence, if we denote by $n_j$ the first step at which $j$ becomes active, we can rewrite the MLE program diff --git a/paper/sections/results.tex b/paper/sections/results.tex index 449ae02..965447b 100644 --- a/paper/sections/results.tex +++ b/paper/sections/results.tex @@ -26,7 +26,8 @@ Interestingly, finding such an upper-bound is commonly studied in sparse recover \begin{equation} \nonumber -\forall X \in {\cal C}, \| \Sigma X \|_2^2 \geq \gamma_n \|X\|_2^2 \qquad \text{\bf (RE)} +\forall X \in {\cal C}, \| \Sigma X \|_2^2 \geq \gamma_n \|X\|_2^2 +\tag{RE} \end{equation} We compare this condition to the irrepresentability condition used in prior work in section~\ref{sec:lowerbound}. We cite the following theorem from \cite{Negahban:2009} diff --git a/paper/sparse.bib b/paper/sparse.bib index 11209ee..7d88004 100644 --- a/paper/sparse.bib +++ b/paper/sparse.bib @@ -158,3 +158,70 @@ year = {2006}, doi = {10.1198/016214506000000735}, URL = {http://dx.doi.org/10.1198/016214506000000735}, } + +@article{Jacques:2013, + author = {Laurent Jacques and + Jason N. Laska and + Petros T. Boufounos and + Richard G. Baraniuk}, + title = {Robust 1-Bit Compressive Sensing via Binary Stable Embeddings of Sparse + Vectors}, + journal = {{IEEE} Transactions on Information Theory}, + volume = {59}, + number = {4}, + pages = {2082--2102}, + year = {2013}, + url = {http://dx.doi.org/10.1109/TIT.2012.2234823}, + doi = {10.1109/TIT.2012.2234823}, + timestamp = {Tue, 09 Apr 2013 19:57:48 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/tit/JacquesLBB13}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Boufounos:2008, + author = {Petros Boufounos and + Richard G. Baraniuk}, + title = {1-Bit compressive sensing}, + booktitle = {42nd Annual Conference on Information Sciences and Systems, {CISS} + 2008, Princeton, NJ, USA, 19-21 March 2008}, + pages = {16--21}, + year = {2008}, + url = {http://dx.doi.org/10.1109/CISS.2008.4558487}, + doi = {10.1109/CISS.2008.4558487}, + timestamp = {Wed, 15 Oct 2014 17:04:27 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/ciss/BoufounosB08}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Gupta:2010, + author = {Ankit Gupta and + Robert Nowak and + Benjamin Recht}, + title = {Sample complexity for 1-bit compressed sensing and sparse classification}, + booktitle = {{IEEE} International Symposium on Information Theory, {ISIT} 2010, + June 13-18, 2010, Austin, Texas, USA, Proceedings}, + pages = {1553--1557}, + year = {2010}, + url = {http://dx.doi.org/10.1109/ISIT.2010.5513510}, + doi = {10.1109/ISIT.2010.5513510}, + timestamp = {Thu, 15 Jan 2015 17:11:50 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/GuptaNR10}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{Plan:2014, + author = {Yaniv Plan and + Roman Vershynin}, + title = {Dimension Reduction by Random Hyperplane Tessellations}, + journal = {Discrete {\&} Computational Geometry}, + volume = {51}, + number = {2}, + pages = {438--461}, + year = {2014}, + url = {http://dx.doi.org/10.1007/s00454-013-9561-6}, + doi = {10.1007/s00454-013-9561-6}, + timestamp = {Tue, 11 Feb 2014 13:48:56 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/dcg/PlanV14}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + |
