paper/supplementary.tex


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157

\documentclass[10pt]{article}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath, amssymb, amsthm, bbm, fullpage}
\DeclareMathOperator{\E}{\mathbb{E}}
\let\P\relax
\DeclareMathOperator{\P}{\mathbb{P}}
\newcommand{\ex}[1]{\E\left[#1\right]}
\newcommand{\prob}[1]{\P\left[#1\right]}
\date{}
\author{}
\newtheorem{claim}{Claim}
\newtheorem{proposition}{Proposition}
\newtheorem{lemma}{Lemma}

\title{Supplementary Material \\ {\small Inferring Graphs from Cascades: A Sparse Recovery Framework}}

\begin{document}

\maketitle


\subsection*{Learnability and Optimization}

Let $G = (V, E)$ be a weighted directed graph. Without loss of generality, we
can assign a weight $p_{u,v} \in [0,1]$ to every possible edge $(u,v) \in V^2$.
Let $m$ be the number of non-zero edges of $G$. Let $\sigma_G(S, p)$ be the
influence of the set $S \subseteq V$ in $G$ under the IC model with parameters
$p$. 

Recall from \cite{Kempe:03} that:
\begin{equation}
    \sigma_G(S, p) = \sum_{v\in V} \P_{G_p}\big[r_{G_p}(S\leadsto v)\big]
\end{equation}
where $G_p$ is a random graph where each edge $(u,v)\in E$ appears with
probability $p_{u,v}$ and $r_{G_p}(S\leadsto v)$ is the indicator variable of
\emph{there exists a path from $S$ to $v$ in $G_p$}.

\begin{claim}
\label{cla:oracle}
If for all $(u,v) \in V^2$, $p_{u,v} \geq p'_{u,v} \geq p_{u,v}
- \frac{1}{\alpha m}$, then:
\begin{displaymath}
    \forall S \subseteq V,\, \sigma_{G}(S, p) \geq \sigma_{G}(S,p') \geq (1
    - 1/\alpha) \sigma_{G}(S,p)
\end{displaymath}
\end{claim}

\begin{proof}
    We define two coupled random graphs $G_p$ and $G_p'$ as follows: for each
    edge $(u,v)\in E$, draw a uniform random variable $\mathcal{U}_{u,v}\in
    [0,1]$. Include $(u,v)$ in $G_p$ (resp. $G_{p'}$) iff
    $\mathcal{U}_{u,v}\leq p_{u,v}$ (resp. $\mathcal{U}_{u,v}\leq p'_{u,v}$).
    It is clear from the construction that each edge $(u,v)$ will be present in
    $G_p$ (resp. $G_p'$) with probability $p_{u,v}$ (resp. $p'_{u,v}$). Hence:
    \begin{displaymath}
        \sigma_G(S, p) = \sum_{v\in V} \P_{G_p}\big[r_{G_p}(S\leadsto v)\big]
        \text{  and  }
        \sigma_G(S, p') = \sum_{v\in V} \P_{G_p'}\big[r_{G_p'}(S\leadsto v)\big]
    \end{displaymath}

    By construction $G_{p'}$ is always a subgraph of $G_p$, i.e
    $r_{G_p'}(S\leadsto v)\leq r_{G_p}(S\leadsto v)$. This proves the left-hand
    side of the Claim's inequality.

    The probability that an edge is present in $G_p$ but not in $G_p'$ is at
    most $\frac{1}{\alpha m}$, so with probability $\left(1-\frac{1}{\alpha
    m}\right)^m$, $G_p$ and $G_p'$ have the same set of edges. This implies that:
    \begin{displaymath}
        \P_{G_{p'}}\big[r_{G_p'}(S\leadsto v)\big]\geq
        \left(1-\frac{1}{\alpha m}\right)^m\P_{G_{p}}\big[r_{G_p}(S\leadsto v)\big]
    \end{displaymath}
    which proves the right-hand side of the claim after observing that
    $\left(1-\frac{1}{\alpha m}\right)^m\geq 1-\frac{1}{\alpha}$ with equality
    iff $m=1$.
\end{proof}

 We can use Claim~\ref{cla:oracle} to find a constant factor approximation
 algorithm to maximising influence on $G$ by maximising influence on $G'$. For
 $k \in \mathbb{N}^*$, let $O_k \in \arg\max_{|S| \leq k} \sigma_G(S)$ and
 $\sigma_{G}^* = \sigma_G(O_k)$ where we omit the $k$ when it is unambiguous.


\begin{proposition}
\label{prop:approx_optim}
Suppose we have an unknown graph $G$ and a known graph $G'$ such that $V = V'$ and for all $(u,v) \in V^2, |p_{u,v} - p_{u,v}'| \leq  \frac{1}{\alpha m}$. Then for all $k \in \mathbb{N}^*$, we can find a set $\hat O_k$ such that $\sigma_{G}(\hat O_k) \geq (1 - e^{\frac{2}{\alpha} - 1}) \sigma^*_{G}$
\end{proposition}

\begin{proof}
For every edge $(u,v) \in V^2$, let $\hat p = p'_{u,v} - \frac{1}{\alpha m}$. We are now in the conditions of Claim~\ref{cla:oracle} with $\alpha \leftarrow \alpha/2$. We return the set $\hat O_k$ obtained by greedy maximisation on $\hat G$. It is a classic exercise to show then that $\sigma_G(\hat O_k) \geq 1 - e^{\frac{2}{\alpha} - 1}$ (see Pset 1, CS284r).
\end{proof}

A small note on the approximation factor: it is only $>0$ for $\alpha > 2$. Note that $\alpha \geq 7 \implies 1 - e^{\frac{2}{\alpha} - 1} \geq \frac{1}{2}$ and that it converges to $(1 - 1/e)$ which is the best possible polynomial-time approximation ratio to influence maximisation unless $P = NP$. Also note that in the limit of large $m$, $(1 -\frac{1}{\alpha m})^m \rightarrow \exp(-1/\alpha)$ and the approximation ratio goes to $1 - \exp(-\exp(-2/\alpha))$.

\subsection*{Obtaining $\| p - p^*\|_{\infty} \leq 1/2m$}

Note that with $n \geq C \frac{s^2}{\gamma \alpha} \log m$ measurements for $C$ an absolute constant, we have for every node $i$, $$\|\theta_i - \theta_i^*\|_2 \leq \frac{6}{\gamma}\sqrt{\frac{s \log m}{\alpha n^{1-\delta}}}$$ with probability $1 - 2e^{-n^\delta \log m}$

Suppose we want this for every node, then by using the first measurements of every cascade where a node isn't infected at $t=0$, which occurs with probability $1 - \frac{1}{p_{\text{init}}}$ (we'll ignore this for now), we have by union bound, for $n \geq C \frac{\max(s)^2}{\gamma \alpha} \log m$ measurements:
$$\|\theta - \theta^*\|_{2} \leq \frac{6}{\gamma}\sqrt{\frac{|E| \log m}{\alpha n^{1-\delta}}}$$ 
with probability $1 - 2\exp^{-(n^{\delta} -1) \log m}$ (by union bound). Note that we could also have reasoned with the $\|\cdot\|_{\infty}$-norm. In which case, we have for $\Delta:= \max(s)$, the same number of measurements and the same probability:
$$\|\theta - \theta^*\|_{\infty} \leq \frac{6}{\gamma}\sqrt{\frac{\Delta\log m}{\alpha n^{1-\delta}}}$$

Suppose $n^{1-\delta} \geq \frac{36 C \Delta |E|}{\gamma^2 \alpha} \log m$, then with high probability:
$$|\theta_{ji} - \theta^*_{ji}| \leq \| \theta - \theta^* \|_{\infty} \leq \frac{1}{\sqrt{|E|C}}$$

We can now show that $f_{\theta^*} \geq \frac{1}{\sqrt{C}} f_{\theta}$ in the independent cascade model.


\subsection*{(RE) with high probability}

\subsubsection*{(RE) for the gram matrix}

Recall the expression of the Hessian:
\begin{equation}
\nonumber
    \nabla^2\mathcal{L}(\theta^*)
    = \frac{1}{|\mathcal{T}|}\sum_{t\in\mathcal{T}}x^t(x^t)^T
    \bigg[x_i^{t+1}\frac{f''f-f'^2}{f^2}(\theta^* \cdot x^t)\\
    -(1-x_i^{t+1})\frac{f''(1-f) + f'^2}{(1-f)^2}(\theta^* \cdot x^t)\bigg]
\end{equation}

This can be rewritten as $X J X^T$ where $J$ is a diagonal matrix with coefficients either $g_1$ or $g_2$. Under certain regularity conditions for $f$ (such as strong log-concavity), we can consider only the gram matrix: $\Delta^T X^T J X \Delta \geq \gamma_1 \Delta^T X^T X \Delta$

Denote $p_i$ the probability that node $i$ is infected at the beginning of the cascade. Let $\mathbb{P}(i)$ be the probability that $i$ is infected at one point during a cascade. Let $\mathbb{P}(S)$ for any set $S \subseteq N$ be the probability that $S$ is infected (all at once) at one point during a cascade. The diagonal of the Gram Matrix can be written as:
\begin{equation}
\nonumber
c_j := \mathbb{P}(i) = p_i - \sum_{S \subseteq {\cal N}(i)} (-1)^{|S|} \mathbb{P}(S) \prod_{j \in S} w_{ji}  =  p_i + \sum_{j \in N(i)} w_{ji}c_j + \dots
\end{equation}
Note that we have the following bounds for $c_j$:

\begin{align}
c_j &\leq p_i + \sum_{j \in S} w_{ji} c_j \tag{PageRank}\\
c_j &\geq p_i + \max_{j \in S} w_{ji} c_j \tag{?}
\end{align}

\paragraph{Simple claim}

If we assume that every node has an independent probability $p_i \in ]0,1[$ to be a seed node, then by taking the first measuremnent of every cascade, we get a matrix of the form

$[[p_1, p_1 p_2, \dots],[p_1 p_2, p_2, p_2 p_3, \dots] \dots]$

which can be written as $D \times (A + (I - D))$ where $D = diag(p_i)$ and $A_{i,j} = p_j$ for all $i$.

Since $\det(D(A + (I-D))) \geq det(D)det(A) + det(D)det(I-D) \geq \min(p_i (1-p_i))^m > 0$ and we have our $\gamma > 0$


\subsection*{Relaxed (RE) with high probability}

\subsection*{Lower Bound}


\bibliography{./sparse}
\bibliographystyle{plain}

\end{document}