diff options
Diffstat (limited to 'poster_abstract/main.tex')
| -rw-r--r-- | poster_abstract/main.tex | 163 |
1 files changed, 158 insertions, 5 deletions
diff --git a/poster_abstract/main.tex b/poster_abstract/main.tex index 45f4aad..5c17bf8 100644 --- a/poster_abstract/main.tex +++ b/poster_abstract/main.tex @@ -1,7 +1,34 @@ -\documentclass{sig-alternate-2013}
+\documentclass[a4]{sig-alternate-2013}
+\usepackage[utf8x]{inputenc}
+\usepackage{amsmath, amsfonts, amssymb, amsthm, bbm}
+\usepackage{verbatim}
+\newcommand{\reals}{\mathbb{R}}
+\newcommand{\ints}{\mathbb{N}}
+\renewcommand{\O}{\mathcal{O}}
+\DeclareMathOperator{\E}{\mathbb{E}}
+\let\P\relax
+\DeclareMathOperator{\P}{\mathbb{P}}
+\newcommand{\ex}[1]{\E\left[#1\right]}
+\newcommand{\prob}[1]{\P\left[#1\right]}
+\newcommand{\inprod}[2]{#1 \cdot #2}
+\newcommand{\defeq}{\equiv}
+\DeclareMathOperator*{\argmax}{argmax}
+\DeclareMathOperator*{\argmin}{argmin}
-\permission{Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage, and that copies bear this notice and the full citation on the first page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s). Copyright is held by the author/owner(s).}
+\newtheorem{theorem}{Theorem}
+\newtheorem{lemma}{Lemma}
+\newtheorem{corollary}{Corollary}
+\newtheorem{remark}{Remark}
+\newtheorem{proposition}{Proposition}
+\newtheorem{definition}{Definition}
+
+\permission{Permission to make digital or hard copies of part or all of this
+work for personal or classroom use is granted without fee provided that copies
+are not made or distributed for profit or commercial advantage, and that copies
+bear this notice and the full citation on the first page. Copyrights for
+third-party components of this work must be honored. For all other uses,
+contact the owner/author(s). Copyright is held by the author/owner(s).}
\conferenceinfo{WWW 2015 Companion,}{May 18--22, 2015, Florence, Italy.}
\copyrightetc{ACM \the\acmcopyr}
\crdata{978-1-4503-3473-0/15/05. \\
@@ -11,7 +38,6 @@ http://dx.doi.org/10.1145/2740908.2744107} \widowpenalty = 10000
\title{Inferring Graphs from Cascades:\\ A Sparse Recovery Framework}
-\subtitle{[Extended Abstract]
%\titlenote{A full version of this paper is available as \textit{Author's Guide to Preparing ACM SIG Proceedings Using \LaTeX$2_\epsilon$\ and BibTeX} at \texttt{www.acm.org/eaddress.htm}}}
\numberofauthors{2}
@@ -31,16 +57,143 @@ Thibaut Horel\\ \maketitle
\begin{abstract}
+In the Graph Inference problem, one seeks to recover the edges of an unknown
+graph from the observations of cascades propagating over this graph. We
+approach this problem from the sparse recovery perspective. We introduce
+a general model of cascades, including the voter model and the independent
+cascade model, for which we provide the first algorithm which recovers the
+graph's edges with high probability and ${\cal O}(s\log m)$ measurements where
+$s$ is the maximum degree of the graph and $m$ is the number of nodes.
+Furthermore, we show that our algorithm also recovers the edge weights (the
+parameters of the diffusion process) and is robust in the context of
+approximate sparsity. Finally we prove an almost matching lower bound of
+$\Omega(s\log\frac{m}{s})$ and validate our approach empirically on synthetic
+graphs.
\end{abstract}
\category{I.2.6}{Artificial Intelligence}{Learning}[Parameter Learning]
\terms{Theory, Performance, Measurements}
\keywords{Graph Inference; Cascades; Sparse Recovery}
+\section{Introduction}
+
+A recent line of research has focused on the Graph Inference Problem:
+recovering the edges of an unknown graph from the observations of a diffusion
+process propagating on this graph \cite{Netrapalli:2012} CITE. The Independent
+Cascade Model CITE is a famous example of such a diffusion process where the
+edges between a given vertex and its parents are weighted; the weights are
+parameters of the model and capture how much influence a parent vertex has over
+its child vertex.
+
+Here, we propose a sparse recovery framework to not only solve the Graph
+Inference Problem, but also recover the unknown weights of the diffusion
+process. The parallel with sparse recovery problems is as follows: for a given
+vertex, the signal to recover is a vector of weights, the “influence” of its
+(unknown) parents in the graph. This signal is observed indirectly through
+instances of the diffusion process: the only thing known to the observer are
+the time steps at which the vertices in the graph become “infected” by the
+diffusion process. The two main challenges to apply sparse recovery tools to
+this problem are: (1) contrary to a very common assumption, the measurements
+given by a diffusion process are correlated (2) most diffusion processes lead
+to non-linear sparse recovery problems.
+
+In what follows, we first present a general class of discrete-time diffusion
+processes which encompasses the Influence Cascade Model and the Voter Model
+CITE. For this class of diffusion processes, despite the afore-mentioned
+challenges, we show how to recover the unknown parameters with a convergence
+rate $O(s\log m/\sqrt{n})$ where $s$ is the in-degree of a given vertex, $m$ is
+the number of vertices in the graph and $n$ is the number of observations. In
+particular, given $\Omega(s\log m)$ measurements, we are able to recover the
+edges of the graph. Finally, we validate our approach experimentally, by
+comparing its performance to state of the art algorithms on synthetic data.
+
+\section{Model}
+
+Let consider a graph $G = (V, E)$ with $|V|= m$ and where the set of edges $E$
+is unknown. For a given vertex $j$, the cascade model is parametrized by
+a vector $\theta_j\in\reals^m$ where the $i$-th coordinate of $\theta_j$
+captures the “influence” of vertex $i$ on $j$. This influence is 0 if $i$ is
+not a parent of $j$.
+
+A cascade is characterized by the propagation of a “contagious” state in
+discrete time steps. Initially, each vertex is contagious with probability
+$p_{\text{init}}$. Let us denote by $X^0$ the indicator vector of the initially
+contagious vertices.
+Denoting by $X^t$ the indicator vector of the set of
+vertices which are contagious at time step $t$, the probability that $j$ will
+be contagious at time $t+1$ is given by:
+\begin{equation}
+ \label{eq:glm}
+ \mathbb{P}(X^{t+1}_j = 1|X^t)
+ = f(\theta_j \cdot X^t)
+\end{equation}
+where $f: \reals \rightarrow [0,1]$. Conditioned on $X^t$, this
+evolution happens independently for each vertex at time $t+1$.
+
+DO WE HAVE ENOUGH SPACE FOR EXAMPLES HERE?
+
+\section{Results}
+
+For a given vertex $i$, we are given a set of measurements, $(X^t,
+X^{t+1}_i)_{t\in\mathcal{T}_i}$ generated from \eqref{eq:glm}. We estimate
+$\theta_i$ via $\ell_1$-regularized maximum likelihood estimation:
+\begin{equation}\label{eq:pre-mle}
+ \hat{\theta}_i \in \argmax_{\theta} \mathcal{L}_i(\theta_i\,|\,x^1,\ldots,x^n)
+ - \lambda\|\theta_i\|_1
+\end{equation}
+where $\mathcal{L}_i$ is the log-likelihood of $\theta_i$ given the
+observations:
+\begin{multline*}
+ \mathcal{L}_i(\theta_i\,|\,x^1,\ldots,x^n) = \frac{1}{|{\cal T}_i|}
+ \sum_{t\in {\cal T}_i } x_i^{t+1}\log f(\inprod{\theta_i}{x^{t}}) \\
+ + (1 - x_i^{t+1})\log\big(1-f(\inprod{\theta_i}{x^t})\big)
+\end{multline*}
+
+We will need the following assumptions:
+\begin{enumerate}
+ \item $\log f$ and $\log (1-f)$ are concave functions.
+ \item $\log f$ and $\log (1-f)$ have derivatives bounded in absolute value
+ by $\frac{1}{\alpha}$ for some $\alpha > 0$.
+ \item denoting by $S$ the support of the true vector of parameters
+ $\theta_i^*$, define $\mathcal{C}(S)\defeq
+ \{X\in\reals^m\,:\,\|X\|_1\leq 1\text{ and } \|X_{S^c}\|_1\leq
+ 3\|X_S\|_1\}$. We assume that:
+\begin{equation*}
+ \forall X \in {\cal C(S)}, X^T \nabla^2\mathcal{L}_i(\theta_i^*)X
+ \geq \gamma \|X\|_2^2
+\end{equation*}
+for some $\gamma>0$.
+\end{enumerate}
+
+\begin{theorem}
+\label{thm:main}
+Assume 1. to 3. above and define $n\defeq |\mathcal{T}_i|$. For any
+$\delta\in(0,1)$, let $\hat{\theta_i}$ be the solution of \eqref{eq:pre-mle}
+with $\lambda \defeq 2\sqrt{\frac{\log m}{\alpha n^{1 - \delta}}}$, then:
+\begin{equation}
+ \label{eq:sparse}
+ \|\hat \theta_i - \theta^*_i \|_2
+ \leq \frac{6}{\gamma} \sqrt{\frac{s \log m}{\alpha n^{1-\delta}}}
+\quad
+\text{w.p.}\;1-\frac{1}{e^{n^\delta \log m}}
+\end{equation}
+\end{theorem}
+
+Assumption 3. above can be replaced by the following data-independent
+assumption:
+\begin{enumerate}
+ \item[3.] $\log f$ and $\log (1-f)$ are $\varepsilon$-concave and the
+ expected Hessian $H = \lim_{n\rightarrow\infty\frac{1}{n}X^TX}$ has
+ a smallest eigenvalue bounded from below by $\gamma>0$, where $X$ is
+ the $n\times m$ design matrix whose $k$-th row is $X^k$.
+\end{enumerate}
+at the cost of slightly reduced convergence rate:
+\section{Experiments}
+
%\section{Acknowledgments}
-%\bibliographystyle{abbrv}
-%\bibliography{sigproc}
+\bibliographystyle{abbrv}
+\bibliography{sparse}
%\balancecolumns
\end{document}
|
