aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjeanpouget-abadie <jean.pougetabadie@gmail.com>2015-05-18 19:19:51 +0200
committerjeanpouget-abadie <jean.pougetabadie@gmail.com>2015-05-18 19:19:51 +0200
commitd60899ced622c27ad69f7aa9a1e0cd66d273901a (patch)
tree0c9d962679ac936cc60270cf9016f98ed09dd7d4
parente41204334108b4ad40246f3d81435a40ff484837 (diff)
parent2ba75084c8d7f230fcd8fe2fe506b8d8e71f2de1 (diff)
downloadcascades-d60899ced622c27ad69f7aa9a1e0cd66d273901a.tar.gz
Merge branch 'master' of https://github.com/jeanpouget-abadie/cracking_cascades
-rw-r--r--paper/sections/model.tex66
-rw-r--r--paper/sections/results.tex47
2 files changed, 57 insertions, 56 deletions
diff --git a/paper/sections/model.tex b/paper/sections/model.tex
index 01c7860..26e1a8d 100644
--- a/paper/sections/model.tex
+++ b/paper/sections/model.tex
@@ -121,22 +121,24 @@ time step $t$, then if $j$ is susceptible at time step $t+1$, we have:
\P\big[X^{t+1}_j = 1\,|\, X^{t}\big]
= 1 - \prod_{i = 1}^m {(1 - p_{i,j})}^{X^t_i}.
\end{displaymath}
-Defining $\Theta_{i,j} \defeq \log(1-p_{i,j})$, this can be rewritten as:
+Defining $\Theta_{i,j} \defeq \log(\frac{1}{1-p_{i,j}})$, this can be rewritten as:
\begin{equation}\label{eq:ic}
\tag{IC}
- \P\big[X^{t+1}_j = 1\,|\, X^{t}\big]
- = 1 - \prod_{i = 1}^m e^{\Theta_{i,j}X^t_i}
- = 1 - e^{\inprod{\Theta_j}{X^t}}
+ \begin{split}
+ \P\big[X^{t+1}_j &= 1\,|\, X^{t}\big]
+ = 1 - \prod_{i = 1}^m e^{-\Theta_{i,j}X^t_i}\\
+ &= 1 - e^{-\inprod{\Theta_j}{X^t}}
+\end{split}
\end{equation}
Therefore, the independent cascade model is a Generalized Linear Cascade model
-with inverse link function $f : z \mapsto 1 - e^z$.
+with inverse link function $f : z \mapsto 1 - e^{-z}$.
Note that to write the Independent Cascade Model as a Generalized Linear
Cascade Model, we had to introduce the change of variable $\Theta_{i,j}
-= \log(1-p_{i,j})$. The recovery results in Section~\ref{sec:results} hold on
-the $\Theta_j$ parameters. Fortunately, the following lemma shows that the
-recovery error on $\Theta_j$ is an upper bound on the error on the original
+= \log(\frac{1}{1-p_{i,j}})$. The recovery results in Section~\ref{sec:results}
+hold on the $\Theta_j$ parameters. Fortunately, the following lemma shows that
+the recovery error on $\Theta_j$ is an upper bound on the error on the original
$p_j$ parameters.
\begin{lemma}
@@ -144,7 +146,7 @@ $p_j$ parameters.
\end{lemma}
\begin{proof}
Using the inequality $\forall x>0, \; \log x \geq 1 - \frac{1}{x}$, we have
-$|\log (1 - p) - \log (1-p')| \geq \max(1 - \frac{1-p}{1-p'},
+$|\log (\frac{1}{1 - p}) - \log (\frac{1}{1-p'})| \geq \max(1 - \frac{1-p}{1-p'},
1 - \frac{1-p'}{1-p}) \geq \max( p-p', p'-p)$.
\end{proof}
@@ -176,8 +178,8 @@ independent cascade model with exponential transmission function (CICE)
of~\cite{GomezRodriguez:2010, Abrahao:13, Daneshmand:2014}. Assume that the
temporal resolution of the discretization is $\varepsilon$, \emph{i.e.} all
nodes whose (continuous) infection time is within the interval $[k\varepsilon,
- (k+1)\varepsilon)$ are considered infected at (discrete) time step $t$.
- pLet $X^k$ be the indicator vector of the set of nodes `infected' before or
+ (k+1)\varepsilon)$ are considered infected at (discrete) time step $t$. Let
+ $X^k$ be the indicator vector of the set of nodes `infected' before or
during the $k^{th}$ time interval. Note that contrary to the discrete-time
independent cascade model, $X^k_j = 1 \implies X^{k+1}_j = 1$, that is,
there is no immune state and nodes remain contagious forever.
@@ -288,11 +290,43 @@ until node $i$ becomes contagious, after which its behavior is deterministic.
Contrary to prior work, our results depend on the number of
measurements and not the number of cascades.
-A sufficient condition for program~\eqref{eq:pre-mle} to be a convex program is
-that $\mathcal{L}_i$ is a concave function. This will be the case if, for
-example, $f$ and $(1-f)$ are both log-concave functions. Remember that
-a twice-differentiable function $f$ is log concave iff. $f''f \leq f'^2$. It is
+\paragraph{Regularity assumptions}
+
+We would like program~\eqref{eq:pre-mle} to be a convex program in order to
+solve it efficiently. A sufficient condition is to
+assume that $\mathcal{L}_i$ is a concave function. This will be the case if,
+for example, $f$ and $(1-f)$ are both log-concave functions. Remember that
+a twice-differentiable function $f$ is log-concave iff. $f''f \leq f'^2$. It is
easy to verify this property for $f$ and $(1-f)$ in the Independent Cascade
Model and Voter Model.
-{\color{red} TODO:~talk about the different constraints}
+Furthermore, the data-dependent bounds in Section~\ref{sec:main_theorem} will
+require the following regularity assumption on the inverse link function $f$:
+\begin{equation}
+ \tag{LF}
+ \max \big\{ | (\log f)'(z_x) |, |(\log (1-f))'(z_x) | \big\}
+ \leq \frac{1}{\alpha}
+\end{equation}
+for some $\alpha\in(0, 1)$ and for all $z_x\defeq\inprod{\theta^*}{x}$ such that
+$f(z_x)\notin\{0,1\}$.
+
+In the voter model, $\frac{f'(z)}{f(z)} = \frac{1}{z}$ and
+$\frac{f'(z)}{(1-f)(z)} =
+\frac{1}{1-z}$. Hence (LF) will hold as soon as $\alpha\leq \Theta_{i,j}\leq
+1-\alpha$ for all $(i,j)\in E$ which is always satisfied for some $\alpha$ for
+non-isolated nodes. In the Independent Cascade Model, $\frac{f'(z)}{f(z)} =
+\frac{1}{e^{z}-1}$ and $\frac{f'(z)}{(1-f)(z)} = 1$. Hence (LF) holds as soon
+as $p_{i,j}\geq \alpha$ for all $(i,j)\in E$ which is always satisfied for some
+$\alpha\in(0,1)$.
+
+For the data-independent bound of Proposition~\ref{prop:fi}, we will require the
+following additional regularity assumption:
+\begin{equation}
+ \tag{LF2}
+ \max \big\{ | (\log f)''(z_x) |, |(\log (1-f))''(z_x) | \big\}
+ \leq \frac{1}{\alpha}
+\end{equation}
+for some $\alpha\in(0, 1)$ and for all $z_x\defeq\inprod{\theta^*}{x}$ such
+that $f(z_x)\notin\{0,1\}$. It is again easy to see that this condition is
+verified for the Independent Cascade Model and the Voter model for the same
+$\alpha\in(0,1)$.
diff --git a/paper/sections/results.tex b/paper/sections/results.tex
index 7fca661..2292ce3 100644
--- a/paper/sections/results.tex
+++ b/paper/sections/results.tex
@@ -44,31 +44,9 @@ A discussion of the $(S,\gamma)$-{\bf(RE)} assumption in the context of
generalized linear cascade models can be found in Section~\ref{sec:re}. In our
setting we require that the {\bf(RE)}-condition holds for the Hessian of the
log-likelihood function $\mathcal{L}$: it essentially captures the fact that
-the binary vectors of the set of active nodes (\emph{i.e} the measurement) are
+the binary vectors of the set of active nodes (\emph{i.e} the measurements) are
not \emph{too} collinear.
-{\color{red} Rewrite the minimal assumptions necessary}
-We will also need the following assumption on the inverse link function $f$ of
-the generalized linear cascade model:
-%\begin{equation}
- %\tag{LF}
- %\max\left(\left|\frac{f'}{f}(\inprod{\theta^*}{x})\right|,
- %\left|\frac{f'}{1-f}(\inprod{\theta^*}{x})\right|\right)\leq
- %\frac{1}{\alpha}
-%\end{equation}
-\begin{equation}
- \tag{LF}
- \max \left( \| (\log f)' \|_{\infty}, \|(\log (1-f))' \|_{\infty} \right)
- \leq \frac{1}{\alpha}
-\end{equation}
-for some $\alpha\in(0, 1)$ and for all $x\in\reals^m$ such that
-$f(\inprod{\theta^*}{x})\notin\{0,1\}$.
-
-In the voter model, $\frac{f'}{f}(z) = \frac{1}{z}$ and $\frac{f'}{f}(1-z) =
-\frac{1}{1-z}$. Hence (LF) will hold as soon as $\alpha\leq \Theta_{i,j}\leq
-1-\alpha$ for all $(i,j)\in E$ which is always satisfied for some $\alpha$ for
-non-isolated nodes. In the Independent Cascade Model, $\frac{f'}{f}(z) =
-\frac{1}{1-e^{-z}}$ and $\frac{f'}{1-f}(z) = -1$ and (LF) is not restrictive.
\begin{comment}
Remember that in this case we have $\Theta_{i,j} = \log(1-p_{i,j})$. These
@@ -231,7 +209,7 @@ As before, edge recovery is a consequence of upper-bounding $\|\theta^* - \hat
\frac{16}{\epsilon^2}\| \theta^* - \theta^*_{\lfloor
s\rfloor}\|_1\right)s\log m
\end{equation}
-then similarly: ${\cal S}^*_{\eta + \epsilon} \subset \hat {\cal S}_\eta
+then: ${\cal S}^*_{\eta + \epsilon} \subset \hat {\cal S}_\eta
\subset {\cal S}^*$ w.p. at least $1-\frac{1}{m}$.
\end{corollary}
@@ -267,8 +245,8 @@ then
\end{displaymath}
and the $(S, \gamma)$-({\bf RE}) condition on the Hessian in
Theorem~\ref{thm:main} and Theorem~\ref{thm:approx_sparse} reduces to a
-condition on the \emph{gram matrix} of the observations $X^T X =
-\frac{1}{|\mathcal{T}|}\sum_{t\in\mathcal{T}}x^t(x^t)^T$ with $\gamma \leftarrow
+condition on the \emph{Gram matrix} of the observations $X^T X =
+\frac{1}{|\mathcal{T}|}\sum_{t\in\mathcal{T}}x^t(x^t)^T$ for $\gamma' \defeq
\gamma\cdot c$.
\paragraph{(RE) with high probability}
@@ -287,7 +265,7 @@ probability.
If $f$ and $1-f$ are strictly log-convex, then the previous observations yields
the following natural interpretation of the {\bf(RE)}-condition: the expected
-\emph{gram matrix} $A \equiv \mathbb{E}[X^T X]$ is a matrix whose entry
+\emph{Gram matrix} $A \equiv \mathbb{E}[X^T X]$ is a matrix whose entry
$a_{i,j}$ is the average fraction of times node $i$ and node $j$ are infected
at the same time during several runs of the cascade process. Note that the
diagonal terms $a_{i,i}$ are simply the average fraction of times the
@@ -295,24 +273,13 @@ corresponding node $i$ is infected.
Therefore, under an assumption which only involves the probabilistic model and
not the actual observations, Proposition~\ref{prop:fi} allows us to draw the
-same conclusion as in Theorem~\ref{thm:main}. We will need the following
-additional assumptions on the inverse link function $f$:
-\begin{equation}
- \tag{LF2}
- \|f'\|_{\infty} \leq M
- \text{ and }
- \max\left(\left|\frac{f''}{1-f}\right|,
- \left|\frac{f''}{f}\right|\right)
- \leq\frac{1}{\alpha}
-\end{equation}
-whenever $f(\inprod{\theta^*}{x})\notin\{0,1\}$. These conditions are once
-again non restrictive in the (IC) model and (V) model.
+same conclusion as in Theorem~\ref{thm:main}.
\begin{proposition}
\label{prop:fi}
Suppose $\E[\nabla^2\mathcal{L}(\theta^*)]$ verifies the $(S,\gamma)$-{\bf
(RE)} condition and assume {\bf (LF)} and {\bf (LF2)}. For $\delta> 0$, if
- $n^{1-\delta}\geq \frac{M+2}{21\gamma\alpha}s^2\log m $, then
+ $n^{1-\delta}\geq \frac{1}{28\gamma\alpha}s^2\log m $, then
$\nabla^2\mathcal{L}(\theta^*)$ verifies the $(S,\frac{\gamma}{2})$-(RE)
condition, w.p $\geq 1-e^{-n^\delta\log m}$.
\end{proposition}