Merge branch 'master' of https://github.com/jeanpouget-abadie/cracking_cascades

author: jeanpouget-abadie <jean.pougetabadie@gmail.com> 2015-05-18 19:19:51 +0200
committer: jeanpouget-abadie <jean.pougetabadie@gmail.com> 2015-05-18 19:19:51 +0200
commit: d60899ced622c27ad69f7aa9a1e0cd66d273901a (patch)
tree: 0c9d962679ac936cc60270cf9016f98ed09dd7d4
parent: e41204334108b4ad40246f3d81435a40ff484837 (diff)
parent: 2ba75084c8d7f230fcd8fe2fe506b8d8e71f2de1 (diff)
download: cascades-d60899ced622c27ad69f7aa9a1e0cd66d273901a.tar.gz
2 files changed, 57 insertions, 56 deletions
diff --git a/paper/sections/model.tex b/paper/sections/model.tex
index 01c7860..26e1a8d 100644
--- a/paper/sections/model.tex
+++ b/paper/sections/model.tex
@@ -121,22 +121,24 @@ time step $t$, then if $j$ is susceptible at time step $t+1$, we have:
     \P\big[X^{t+1}_j = 1\,|\, X^{t}\big]
     = 1 - \prod_{i = 1}^m {(1 - p_{i,j})}^{X^t_i}.
 \end{displaymath}
-Defining $\Theta_{i,j} \defeq \log(1-p_{i,j})$, this can be rewritten as:
+Defining $\Theta_{i,j} \defeq \log(\frac{1}{1-p_{i,j}})$, this can be rewritten as:
 \begin{equation}\label{eq:ic}
     \tag{IC}
-    \P\big[X^{t+1}_j = 1\,|\, X^{t}\big]
-    = 1 - \prod_{i = 1}^m e^{\Theta_{i,j}X^t_i}
-    = 1 - e^{\inprod{\Theta_j}{X^t}}
+    \begin{split}
+    \P\big[X^{t+1}_j &= 1\,|\, X^{t}\big]
+    = 1 - \prod_{i = 1}^m e^{-\Theta_{i,j}X^t_i}\\
+    &= 1 - e^{-\inprod{\Theta_j}{X^t}}
+\end{split}
 \end{equation}
 
 Therefore, the independent cascade model is a Generalized Linear Cascade model
-with inverse link function $f : z \mapsto 1 - e^z$.
+with inverse link function $f : z \mapsto 1 - e^{-z}$.
 
 Note that to write the Independent Cascade Model as a Generalized Linear
 Cascade Model, we had to introduce the change of variable $\Theta_{i,j}
-= \log(1-p_{i,j})$. The recovery results in Section~\ref{sec:results} hold on
-the $\Theta_j$ parameters. Fortunately, the following lemma shows that the
-recovery error on $\Theta_j$ is an upper bound on the error on the original
+= \log(\frac{1}{1-p_{i,j}})$. The recovery results in Section~\ref{sec:results}
+hold on the $\Theta_j$ parameters. Fortunately, the following lemma shows that
+the recovery error on $\Theta_j$ is an upper bound on the error on the original
 $p_j$ parameters.
 
 \begin{lemma}
@@ -144,7 +146,7 @@ $p_j$ parameters.
 \end{lemma}
 \begin{proof}
 Using the inequality $\forall x>0, \; \log x \geq 1 - \frac{1}{x}$, we have
-$|\log (1 - p) - \log (1-p')| \geq \max(1 - \frac{1-p}{1-p'},
+$|\log (\frac{1}{1 - p}) - \log (\frac{1}{1-p'})| \geq \max(1 - \frac{1-p}{1-p'},
 1 - \frac{1-p'}{1-p}) \geq \max( p-p', p'-p)$.
 \end{proof}
 
@@ -176,8 +178,8 @@ independent cascade model with exponential transmission function (CICE)
 of~\cite{GomezRodriguez:2010, Abrahao:13, Daneshmand:2014}. Assume that the
 temporal resolution of the discretization is $\varepsilon$, \emph{i.e.} all
 nodes whose (continuous) infection time is within the interval $[k\varepsilon,
-    (k+1)\varepsilon)$ are considered infected at (discrete) time step $t$.
-    pLet $X^k$ be the indicator vector of the set of nodes `infected' before or
+    (k+1)\varepsilon)$ are considered infected at (discrete) time step $t$. Let
+    $X^k$ be the indicator vector of the set of nodes `infected' before or
     during the $k^{th}$ time interval.  Note that contrary to the discrete-time
     independent cascade model, $X^k_j = 1 \implies X^{k+1}_j = 1$, that is,
     there is no immune state and nodes remain contagious forever.
@@ -288,11 +290,43 @@ until node $i$ becomes contagious, after which its behavior is deterministic.
 Contrary to prior work, our results depend on the number of
 measurements and not the number of cascades.
 
-A sufficient condition for program~\eqref{eq:pre-mle} to be a convex program is
-that $\mathcal{L}_i$ is a concave function. This will be the case if, for
-example, $f$ and $(1-f)$ are both log-concave functions. Remember that
-a twice-differentiable function $f$ is log concave iff. $f''f \leq f'^2$. It is
+\paragraph{Regularity assumptions}
+
+We would like program~\eqref{eq:pre-mle} to be a convex program in order to
+solve it efficiently. A sufficient condition is to
+assume that $\mathcal{L}_i$ is a concave function. This will be the case if,
+for example, $f$ and $(1-f)$ are both log-concave functions. Remember that
+a twice-differentiable function $f$ is log-concave iff. $f''f \leq f'^2$. It is
 easy to verify this property for $f$ and $(1-f)$ in the Independent Cascade
 Model and Voter Model.
 
-{\color{red} TODO:~talk about the different constraints}
+Furthermore, the data-dependent bounds in Section~\ref{sec:main_theorem} will
+require the following regularity assumption on the inverse link function $f$:
+\begin{equation}
+  \tag{LF}
+  \max \big\{ | (\log f)'(z_x) |, |(\log (1-f))'(z_x) | \big\}
+  \leq \frac{1}{\alpha}
+\end{equation}
+for some $\alpha\in(0, 1)$ and for all $z_x\defeq\inprod{\theta^*}{x}$ such that
+$f(z_x)\notin\{0,1\}$.
+
+In the voter model, $\frac{f'(z)}{f(z)} = \frac{1}{z}$ and
+$\frac{f'(z)}{(1-f)(z)} =
+\frac{1}{1-z}$. Hence (LF) will hold as soon as $\alpha\leq \Theta_{i,j}\leq
+1-\alpha$ for all $(i,j)\in E$ which is always satisfied for some $\alpha$ for
+non-isolated nodes.  In the Independent Cascade Model, $\frac{f'(z)}{f(z)} =
+\frac{1}{e^{z}-1}$ and $\frac{f'(z)}{(1-f)(z)} = 1$. Hence (LF) holds as soon
+as $p_{i,j}\geq \alpha$ for all $(i,j)\in E$ which is always satisfied for some
+$\alpha\in(0,1)$.
+
+For the data-independent bound of Proposition~\ref{prop:fi}, we will require the
+following additional regularity assumption:
+\begin{equation}
+  \tag{LF2}
+  \max \big\{ | (\log f)''(z_x) |, |(\log (1-f))''(z_x) | \big\}
+  \leq \frac{1}{\alpha}
+\end{equation}
+for some $\alpha\in(0, 1)$ and for all $z_x\defeq\inprod{\theta^*}{x}$ such
+that $f(z_x)\notin\{0,1\}$. It is again easy to see that this condition is
+verified for the Independent Cascade Model and the Voter model for the same
+$\alpha\in(0,1)$.
diff --git a/paper/sections/results.tex b/paper/sections/results.tex
index 7fca661..2292ce3 100644
--- a/paper/sections/results.tex
+++ b/paper/sections/results.tex
@@ -44,31 +44,9 @@ A discussion of the $(S,\gamma)$-{\bf(RE)} assumption in the context of
 generalized linear cascade models can be found in Section~\ref{sec:re}. In our
 setting we require that the {\bf(RE)}-condition holds for the Hessian of the
 log-likelihood function $\mathcal{L}$: it essentially captures the fact that
-the binary vectors of the set of active nodes (\emph{i.e} the measurement) are
+the binary vectors of the set of active nodes (\emph{i.e} the measurements) are
 not \emph{too} collinear.
 
-{\color{red} Rewrite the minimal assumptions necessary}
-We will also need the following assumption on the inverse link function $f$ of
-the generalized linear cascade model:
-%\begin{equation}
-    %\tag{LF}
-    %\max\left(\left|\frac{f'}{f}(\inprod{\theta^*}{x})\right|,
-    %\left|\frac{f'}{1-f}(\inprod{\theta^*}{x})\right|\right)\leq
-    %\frac{1}{\alpha}
-%\end{equation}
-\begin{equation}
-  \tag{LF}
-  \max \left( \| (\log f)' \|_{\infty}, \|(\log (1-f))' \|_{\infty} \right)
-  \leq \frac{1}{\alpha}
-\end{equation}
-for some $\alpha\in(0, 1)$ and for all $x\in\reals^m$ such that
-$f(\inprod{\theta^*}{x})\notin\{0,1\}$.
-
-In the voter model, $\frac{f'}{f}(z) = \frac{1}{z}$ and $\frac{f'}{f}(1-z) =
-\frac{1}{1-z}$. Hence (LF) will hold as soon as $\alpha\leq \Theta_{i,j}\leq
-1-\alpha$ for all $(i,j)\in E$ which is always satisfied for some $\alpha$ for
-non-isolated nodes.  In the Independent Cascade Model, $\frac{f'}{f}(z) =
-\frac{1}{1-e^{-z}}$ and $\frac{f'}{1-f}(z) = -1$ and (LF) is not restrictive.
 
 \begin{comment}
 Remember that in this case we have $\Theta_{i,j} = \log(1-p_{i,j})$. These
@@ -231,7 +209,7 @@ As before, edge recovery is a consequence of upper-bounding $\|\theta^* - \hat
         \frac{16}{\epsilon^2}\| \theta^* - \theta^*_{\lfloor
     s\rfloor}\|_1\right)s\log m
 \end{equation}
-then similarly: ${\cal S}^*_{\eta + \epsilon} \subset \hat {\cal S}_\eta
+then: ${\cal S}^*_{\eta + \epsilon} \subset \hat {\cal S}_\eta
 \subset {\cal S}^*$ w.p. at least $1-\frac{1}{m}$.
 \end{corollary}
 
@@ -267,8 +245,8 @@ then
 \end{displaymath}
 and the $(S, \gamma)$-({\bf RE}) condition on the Hessian in
 Theorem~\ref{thm:main} and Theorem~\ref{thm:approx_sparse} reduces to a
-condition on the \emph{gram matrix} of the observations $X^T X =
-\frac{1}{|\mathcal{T}|}\sum_{t\in\mathcal{T}}x^t(x^t)^T$ with $\gamma \leftarrow
+condition on the \emph{Gram matrix} of the observations $X^T X =
+\frac{1}{|\mathcal{T}|}\sum_{t\in\mathcal{T}}x^t(x^t)^T$ for $\gamma' \defeq
 \gamma\cdot c$.
 
 \paragraph{(RE) with high probability}
@@ -287,7 +265,7 @@ probability.
 
 If $f$ and $1-f$ are strictly log-convex, then the previous observations yields
 the following natural interpretation of the {\bf(RE)}-condition: the expected
-\emph{gram matrix} $A \equiv \mathbb{E}[X^T X]$ is a matrix whose entry
+\emph{Gram matrix} $A \equiv \mathbb{E}[X^T X]$ is a matrix whose entry
 $a_{i,j}$ is the average fraction of times node $i$ and node $j$ are infected
 at the same time during several runs of the cascade process. Note that the
 diagonal terms $a_{i,i}$ are simply the average fraction of times the
@@ -295,24 +273,13 @@ corresponding node $i$ is infected.
 
 Therefore, under an assumption which only involves the probabilistic model and
 not the actual observations, Proposition~\ref{prop:fi} allows us to draw the
-same conclusion as in Theorem~\ref{thm:main}. We will need the following
-additional assumptions on the inverse link function $f$:
-\begin{equation}
-    \tag{LF2}
-    \|f'\|_{\infty} \leq M
-    \text{ and }
-    \max\left(\left|\frac{f''}{1-f}\right|,
-    \left|\frac{f''}{f}\right|\right)
-    \leq\frac{1}{\alpha}
-\end{equation}
-whenever $f(\inprod{\theta^*}{x})\notin\{0,1\}$. These conditions are once
-again non restrictive in the (IC) model and (V) model.
+same conclusion as in Theorem~\ref{thm:main}.
 
 \begin{proposition}
     \label{prop:fi}
     Suppose $\E[\nabla^2\mathcal{L}(\theta^*)]$ verifies the $(S,\gamma)$-{\bf
     (RE)} condition and assume {\bf (LF)} and {\bf (LF2)}. For $\delta> 0$, if
-    $n^{1-\delta}\geq \frac{M+2}{21\gamma\alpha}s^2\log m $, then
+    $n^{1-\delta}\geq \frac{1}{28\gamma\alpha}s^2\log m $, then
     $\nabla^2\mathcal{L}(\theta^*)$ verifies the $(S,\frac{\gamma}{2})$-(RE)
     condition, w.p $\geq 1-e^{-n^\delta\log m}$.
 \end{proposition}
author	jeanpouget-abadie <jean.pougetabadie@gmail.com>	2015-05-18 19:19:51 +0200
committer	jeanpouget-abadie <jean.pougetabadie@gmail.com>	2015-05-18 19:19:51 +0200
commit	d60899ced622c27ad69f7aa9a1e0cd66d273901a (patch)
tree	0c9d962679ac936cc60270cf9016f98ed09dd7d4
parent	e41204334108b4ad40246f3d81435a40ff484837 (diff)
parent	2ba75084c8d7f230fcd8fe2fe506b8d8e71f2de1 (diff)
download	cascades-d60899ced622c27ad69f7aa9a1e0cd66d273901a.tar.gz