notes/cracking_cascades_classposter.tex


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461

\documentclass[final]{beamer}
\usepackage[utf8]{inputenc}
\usepackage[scale=1.6]{beamerposter} % Use the beamerposter package for laying out the poster

\usetheme{confposter} % Use the confposter theme supplied with this template

\usepackage{color, bbm}
\setbeamercolor{block title}{fg=dblue,bg=white} % Colors of the block titles
\setbeamercolor{block body}{fg=black,bg=white} % Colors of the body of blocks
\setbeamercolor{block alerted title}{fg=white,bg=dblue!70} % Colors of the highlighted block titles
\setbeamercolor{block alerted body}{fg=black,bg=dblue!10} % Colors of the body of highlighted blocks
% Many more colors are available for use in beamerthemeconfposter.sty

%-----------------------------------------------------------
% Define the column widths and overall poster size
% To set effective sepwid, onecolwid and twocolwid values, first choose how many columns you want and how much separation you want between columns
% In this template, the separation width chosen is 0.024 of the paper width and a 4-column layout
% onecolwid should therefore be (1-(# of columns+1)*sepwid)/# of columns e.g. (1-(4+1)*0.024)/4 = 0.22
% Set twocolwid to be (2*onecolwid)+sepwid = 0.464
% Set threecolwid to be (3*onecolwid)+2*sepwid = 0.708

\newlength{\sepwid}
\newlength{\onecolwid}
\newlength{\twocolwid}
\newlength{\threecolwid}
\setlength{\paperwidth}{48in} % A0 width: 46.8in
\setlength{\paperheight}{40in} % A0 height: 33.1in
\setlength{\sepwid}{0.024\paperwidth} % Separation width (white space) between columns
\setlength{\onecolwid}{0.22\paperwidth} % Width of one column
\setlength{\twocolwid}{0.464\paperwidth} % Width of two columns
\setlength{\threecolwid}{0.708\paperwidth} % Width of three columns
\setlength{\topmargin}{-1in} % Reduce the top margin size
%-----------------------------------------------------------

\usepackage{graphicx}  % Required for including images

\usepackage{booktabs} % Top and bottom rules for tables


%----------------------------------------------------------------------------------------
%	TITLE SECTION 
%----------------------------------------------------------------------------------------

\title{Sparse Recovery for Graph Reconstruction } % Poster title

\author{Eric Balkanski, Jean Pouget-Abadie} % Author(s)

\institute{Harvard University} % Institution(s)
%----------------------------------------------------------------------------------------
\begin{document}
\addtobeamertemplate{block end}{}{\vspace*{2ex}} % White space under blocks
\addtobeamertemplate{block alerted end}{}{\vspace*{2ex}} % White space under highlighted (alert) blocks

\setlength{\belowcaptionskip}{2ex} % White space under figures
\setlength\belowdisplayshortskip{2ex} % White space under equations

\begin{frame}[t] % The whole poster is enclosed in one beamer frame

\begin{columns}[t] % The whole poster consists of three major columns, the second of which is split into two columns twice - the [t] option aligns each column's content to the top

\begin{column}{\sepwid}\end{column} % Empty spacer column

\begin{column}{\onecolwid} % The first column

%----------------------------------------------------------------------------------------
%	INTODUCTION
%----------------------------------------------------------------------------------------


%\vspace{- 15.2 cm}
%\begin{center}
%{\includegraphics[height=7em]{logo.png}} % First university/lab logo on the left
%\end{center}

%\vspace{4.6 cm}

\begin{block}{Problem Statement}
\begin{center}
\bf{How can we reconstruct a graph on which observed cascades spread?}
\end{center} 
\end{block}


%{\bf Graph Reconstruction}:

%\begin{itemize}
%\item \{${\cal G}, \vec p$\}: directed graph, edge probabilities
%\item $F$: cascade generating model
%\item ${\cal M} := F\{{\cal G}, \vec p\}$: cascade
%\end{itemize}

%{\bf Objective}:
%\begin{itemize}
%\item Find algorithm which computes $F^{-1}({\cal M}) = \{{\cal G}, \vec p\}$ w.h.p., i.e. recovers graph from cascades.
%\end{itemize}

%{\bf Approach}
%\begin{itemize}
%\item Frame graph reconstruction as a {\it Sparse Recovery} problem for two cascade generating models.
%\end{itemize}

%Given a set of observed cascades, the \textbf{graph reconstruction problem} consists of finding the underlying graph on which these cascades spread. We assume that these cascades come from the classical \textbf{Independent Cascade Model} where at each time step, newly infected nodes infect each of their neighbor with some probability.

%In previous work, this problem has been formulated in different ways, including a convex optimization and a maximum likelihood problem. However, there is no known algorithm for graph reconstruction with theoretical guarantees and with a reasonable required sample size.

%We formulate a novel approach to this problem in which we use \textbf{Sparse Recovery} to find the edges in the unknown underlying network. Sparse Recovery is the problem of finding the sparsest vector $x$ such that $\mathbf{M x =b}$. In our case, for each node $i$, we wish to recover the vector $x = p_i$ where $p_{i_j}$ is the probability that node $j$ infects node $i$ if $j$ is active. To recover this vector, we are given $M$, where row $M_{t,k}$ indicates which nodes are infected at time $t$ in observed cascade $k$,  and $b$, where $b_{t+1,k}$ indicates if node $i$ is infected at time $t+1$ in cascade $k$. Since most nodes have a small number of neighbors in large networks, we can assume that these vectors are sparse. Sparse Recovery is a well studied problem which can be solved efficiently and with small error if $M$ satisfies certain properties. In this project, we empirically study to what extent $M$ satisfies the Restricted Isometry Property.


%---------------------------------------------------------------------------------
%---------------------------------------------------------------------------------

\begin{block}{Voter Model}

\begin{figure}
\centering
\includegraphics[width=0.6\textwidth]{images/voter.png}
\end{figure}


\vspace{0.5 cm}
{\bf Description}

\vspace{0.5 cm}

\begin{itemize}
\item $\mathbb{P}$({\color{blue} blue} at $t=0) = p_{\text{init}}$
\item $\mathbb{P}$({\color{blue} blue} at $t+1) =  \frac{\text{Number of {\color{blue}blue} neighbors}}{\text{Total number of neighbors}}$
\end{itemize}

\vspace{0.5 cm}

{\bf Sparse Recovery Formulation}

\vspace{0.5 cm}


To recover the neighbors of $v_1$, observe which nodes are  {\color{red} red} (1) or {\color{blue} blue} (0) at time step $t$:
\begin{align*}
&v_2 \hspace{0.2 cm}  v_3 \hspace{0.2 cm} v_4 \hspace{0.2 cm} v_5 &\\
\vspace{1 cm}
M  = &  \left( \begin{array}{cccc}
0 & 0 & 1 & 1 \\
1 & 1 & 0 & 0 \\
\end{array} \right)  &  \begin{array}{l} \hspace{ - 5cm} 
\text{time step 0} \\
 \hspace{ - 5cm}  \text{time step 1} \\
\end{array}
\end{align*}

and which color $v_1$ is at time step $t+1$ due to $M$:

\begin{align*}
b_1  = &  \left( \begin{array}{c}
1 \\
1 \\
\end{array} \right) &  \begin{array}{l} \hspace{ - 5cm} 
\text{time step 1} \\
 \hspace{ - 5cm}  \text{time step 2} \\
 \end{array}
\end{align*}

Then ,

\begin{equation}
\boxed{M \vec x_1 = \vec b_1 + \epsilon \nonumber}
\end{equation}

where $(\vec x_{1})_j := \frac{\text{1}}{\text{deg}(i)} \cdot \left[\text{j parent of 1 in }{\cal G}\right] $


\end{block}


%---------------------------------------------------------------------------------
%---------------------------------------------------------------------------------


%---------------------------------------------------------------------------------
%---------------------------------------------------------------------------------


%---------------------------------------------------------------------------------
%---------------------------------------------------------------------------------


\end{column} % End of the first column

\begin{column}{\sepwid}\end{column} % Empty spacer column

\begin{column}{\onecolwid} % The first column

%----------------------------------------------------------------------------------------
%	CONSTRAINT SATISFACTION - BACKTRACKING
%----------------------------------------------------------------------------------------
\begin{block}{Independent Cascades Model}
\begin{figure}
\centering
\includegraphics[width=0.6\textwidth]{images/icc.png}
\end{figure}

\vspace{0.5 cm}

{\bf Description}

\vspace{0.5 cm}

\begin{itemize}
\item Three possible states:   {\color{blue} susceptible},   {\color{red} infected},  {\color{yellow} inactive }
\item $\mathbb{P}$(infected at t=0)$=p_{\text{init}}$
\item Infected node $j$ infects its susceptible neighbors $i$ with probability $p_{j,i}$ independently
\end{itemize}

\vspace{0.5 cm}

{\bf Sparse Recovery Formulation}

\vspace{0.5 cm}

To recover the neighbors of $v_5$,observe which nodes are  {\color{red} red} (1), {\color{blue} blue} (0), or {\color{yellow} yellow} (0) at time step $t$:
\begin{align*}
&v_1 \hspace{0.2 cm}  v_2 \hspace{0.2 cm} v_3 \hspace{0.2 cm} v_4 \\
\vspace{1 cm}
M  = &  \left( \begin{array}{cccc}
1 & 0 & 0 & 0 \\
0 & 1 & 1 & 0 \\
\end{array} \right)   \begin{array}{l} \hspace{ 1cm} 
\text{time step 0} \\
 \hspace{ 1cm}  \text{time step 1} \\
 \end{array}
\end{align*}

and if $M$ caused $v_5$ to be infected at time step $t+1$:

\begin{align*}
b_5  = &  \left( \begin{array}{c}
0 \\
1 \\
\end{array} \right)  \begin{array}{l} \hspace{ 1cm} 
\text{time step 1} \\
 \hspace{ 1cm}  \text{time step 2} \\
 \end{array}
\end{align*}


Then,

\begin{equation}
\boxed{e^{M \vec \theta_5} = (1 - \vec b_5) + \epsilon} \nonumber
\end{equation}

where $(\vec \theta_5)_j := \log ( 1 - p_{j,5})  $

\vspace{1 cm}


This problem is a {\bf Noisy Sparse Recovery} problem, which has been studied extensively. Here, the vectors $\vec x_i$ are deg(i)-sparse. 


\end{block}

%----------------------------------------------------------------------------------------


%----------------------------------------------------------------------------------------
%	MIP
%----------------------------------------------------------------------------------------

% \begin{block}{RIP property}

% %The Restricted Isometry Property (RIP) characterizes a quasi-orthonormality of the measurement matrix M on sparse vectors. 

% For all k, we define $\delta_k$ as the best possible constant such that for all unit-normed ($l$2) and k-sparse vectors $x$

% \begin{equation}
% 1-\delta_k \leq \|Mx\|^2_2 \leq 1 + \delta_k
% \end{equation}

% In general, the smaller $\delta_k$ is, the better we can recover $k$-sparse vectors $x$!
 
% \end{block}

%----------------------------------------------------------------------------------------

\end{column} 

\begin{column}{\sepwid}\end{column} % Empty spacer column

\begin{column}{\onecolwid} % The first column within column 2 (column 2.1)


%----------------------------------------------------------------------------------------


\begin{block}{Algorithms}

{\bf Voter Model}

\begin{itemize}
\item Solve for each node i:
\begin{equation}
\min_{\vec x_i} \|\vec x_i\|_1 + \lambda \|M \vec x_i - \vec b_i \|_2 \nonumber
\end{equation}
\end{itemize}

{\bf Independent Cascade Model}

\begin{itemize}
\item Solve for each node i:
\begin{equation}
\min_{\vec \theta_i} \|\vec \theta_i\|_1 + \lambda \|e^{M \vec \theta_i} - (1 - \vec b_i) \|_2 \nonumber
\end{equation}
\end{itemize}

\end{block}

\begin{block}{Restricted Isometry Property (RIP)}
{\bf Definition}
\begin{itemize}
\item Characterizes a quasi-orthonormality of M on sparse vectors. 

\item The RIP constant $\delta_k$ is the best possible constant such that for all unit-normed ($l$2) and k-sparse vectors $x$:

\begin{equation}
1-\delta_k \leq \|Mx\|^2_2 \leq 1 + \delta_k \nonumber
\end{equation}

\item The smaller $\delta_k$ is, the better we can recover $k$-sparse vectors $x$.
\end{itemize}


\end{block}

\begin{block}{Theoretical Guarantees}

With small RIP constants $(\delta \leq 0.25)$ for $M$ and some assumption on the noise $\epsilon$:

{\bf Theorem \cite{candes}}

If node $i$ has degree $\Delta$  and $n_{\text{rows}}(M) \geq C_1 \mu \Delta \log n$, then, w.h.p.,

$$\| \hat x - x^* \|_2 \leq C (1 + \log^{3/2}(n))\sqrt{\frac{\Delta \log n}{n_{\text{rows}}(M) }}$$


\end{block}


%----------------------------------------------------------------------------------------
%	RESULTS
%----------------------------------------------------------------------------------------

\begin{block}{RIP Experiments}

\begin{center}
\begin{table}
\begin{tabular}{c | c | c | c | c }
& $c$ = 100, &$c$ = 1000,& $c$ = 100, &$c$ = 1000,\\
&  $i$ = 0.1& $i$ = 0.1& $i$ = 0.05& $i$ = 0.05\\
 \hline
  $\delta_4$ & 0.54 & 0.37 &0.43&0.23 \\
  \end{tabular} 
  \caption{RIP constant for a small graph. Here, $c$ is the number of cascades and $i$ is $p_{\text{init}}$.}
\end{table}
\end{center}


\end{block}

%----------------------------------------------------------------------------------------


\end{column} % End of the second column

\begin{column}{\sepwid}\end{column} % Empty spacer column

\begin{column}{\onecolwid} % The third column

%----------------------------------------------------------------------------------------
%	IOVERALL COMPARISON
%----------------------------------------------------------------------------------------

%\vspace{- 14.2 cm}
%\begin{center}
%{\includegraphics[height=7em]{cmu_logo.png}} % First university/lab logo on the left
%\end{center}

%\vspace{4 cm}

\begin{alertblock}{Experimental Results}


\end{alertblock} 

%----------------------------------------------------------------------------------------


%----------------------------------------------------------------------------------------
%	CONCLUSION
%----------------------------------------------------------------------------------------

\begin{block}{Conclusion}

\begin{center}


{\bf Graph reconstruction can naturally be expressed as Sparse Recovery. Understanding properties of $M$, for example RIP, leads to theoretical guarantees on the reconstruction.}

\end{center}

\end{block}

%----------------------------------------------------------------------------------------
%	REFERENCES
%----------------------------------------------------------------------------------------

\begin{block}{References}

\begin{thebibliography}{42}

\bibitem{candes}
Candès, E., and Plan, Y.
\newblock {\it A Probabilistic and RIPless Theory of Compressed Sensing}
\newblock Information Theory, IEEE Transactions on, 57(11): 7235--7254,
\newblock 2011.
\end{thebibliography}

\end{block}

%----------------------------------------------------------------------------------------

\end{column} % End of the third column

\end{columns} % End of all the columns in the poster

\end{frame} % End of the enclosing frame


\end{document}