\documentclass[a4]{sig-alternate-2013}
\usepackage[utf8x]{inputenc}

\usepackage{amsmath, amsfonts, amssymb, amsthm, bbm}
\usepackage{verbatim}
\newcommand{\reals}{\mathbb{R}}
\newcommand{\ints}{\mathbb{N}}
\renewcommand{\O}{\mathcal{O}}
\DeclareMathOperator{\E}{\mathbb{E}}
\let\P\relax
\DeclareMathOperator{\P}{\mathbb{P}}
\newcommand{\ex}[1]{\E\left[#1\right]}
\newcommand{\prob}[1]{\P\left[#1\right]}
\newcommand{\inprod}[2]{#1 \cdot #2}
\newcommand{\defeq}{\equiv}
\DeclareMathOperator*{\argmax}{argmax}
\DeclareMathOperator*{\argmin}{argmin}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newtheorem{corollary}{Corollary}
\newtheorem{remark}{Remark}
\newtheorem{proposition}{Proposition}
\newtheorem{definition}{Definition}

\permission{Permission to make digital or hard copies of part or all of this
work for personal or classroom use is granted without fee provided that copies
are not made or distributed for profit or commercial advantage, and that copies
bear this notice and the full citation on the first page. Copyrights for
third-party components of this work must be honored. For all other uses,
contact the owner/author(s).  Copyright is held by the author/owner(s).}
\conferenceinfo{WWW 2015 Companion,}{May 18--22, 2015, Florence, Italy.}
\copyrightetc{ACM \the\acmcopyr}
\crdata{978-1-4503-3473-0/15/05. \\
http://dx.doi.org/10.1145/2740908.2744107}

\clubpenalty=10000
\widowpenalty = 10000

\title{Inferring Graphs from Cascades:\\ A Sparse Recovery Framework}
%\titlenote{A full version of this paper is available as \textit{Author's Guide to Preparing ACM SIG Proceedings Using \LaTeX$2_\epsilon$\ and BibTeX} at \texttt{www.acm.org/eaddress.htm}}}

\numberofauthors{2}
\author{
\alignauthor
Jean Pouget-Abadie\\
       \affaddr{Harvard University}\\
       \email{pougetabadie@g.harvard.edu}
\alignauthor
Thibaut Horel\\
       \affaddr{Harvard University}\\
       \email{thorel@seas.harvard.edu}
}

\begin{document}

\maketitle

\begin{abstract}
In the Graph Inference problem, one seeks to recover the edges of an unknown
graph from the observations of cascades propagating over this graph.  We
approach this problem from the sparse recovery perspective.  We introduce
a general model of cascades, including the voter model and the independent
cascade model, for which we provide the first algorithm which recovers the
graph's edges with high probability and ${\cal O}(s\log m)$ measurements where
$s$ is the maximum degree of the graph and $m$ is the number of nodes.
Furthermore, we show that our algorithm also recovers the edge weights (the
parameters of the diffusion process) and is robust in the context of
approximate sparsity. Finally we prove an almost matching lower bound of
$\Omega(s\log\frac{m}{s})$ and validate our approach empirically on synthetic
graphs.
\end{abstract}

\category{I.2.6}{Artificial Intelligence}{Learning}[Parameter Learning]
%\terms{Theory, Performance, Measurements}
%\keywords{Graph Inference; Cascades; Sparse Recovery}

\section{Introduction}

A recent line of research has focused on the Graph Inference Problem:
recovering the edges of an unknown graph from the observations of a diffusion
process propagating on this graph \cite{Netrapalli:2012} CITE. The Independent
Cascade Model CITE is a famous example of such a diffusion process where the
edges between a given vertex and its parents are weighted; the weights are
parameters of the model and capture how much influence a parent vertex has over
its child vertex.

Here, we propose a sparse recovery framework to not only solve the Graph
Inference Problem, but also recover the unknown weights of the diffusion
process. The parallel with sparse recovery problems is as follows: for a given
vertex, the signal to recover is a vector of weights, the “influence” of its
(unknown) parents in the graph. This signal is observed indirectly through
instances of the diffusion process: the only thing known to the observer are
the time steps at which the vertices in the graph become “infected” by the
diffusion process. The two main challenges to apply sparse recovery tools to
this problem are: (1) contrary to a very common assumption, the measurements
given by a diffusion process are correlated (2) most diffusion processes lead
to non-linear sparse recovery problems.

In what follows, we first present a general class of discrete-time diffusion
processes which encompasses the Influence Cascade Model and the Voter Model
CITE. For this class of diffusion processes, despite the afore-mentioned
challenges, we show how to recover the unknown parameters with a convergence
rate $O(s\log m/\sqrt{n})$ where $s$ is the in-degree of a given vertex, $m$ is
the number of vertices in the graph and $n$ is the number of observations. In
particular, given $\Omega(s\log m)$ measurements, we are able to recover the
edges of the graph. Finally, we validate our approach experimentally, by
comparing its performance to state of the art algorithms on synthetic data.

\section{Model}

Let consider a graph $G = (V, E)$ with $|V|= m$ and where the set of edges $E$
is unknown. For a given vertex $j$, the cascade model is parametrized by
a vector $\theta_j\in\reals^m$ where the $i$-th coordinate of $\theta_j$
captures the “influence” of vertex $i$ on $j$. This influence is 0 if $i$ is
not a parent of $j$.

A cascade is characterized by the propagation of a “contagious” state in
discrete time steps. Initially, each vertex is contagious with probability
$p_{\text{init}}$. Let us denote by $X^0$ the indicator vector of the initially
contagious vertices.
Denoting by $X^t$ the indicator vector of the set of
vertices which are contagious at time step $t$, the probability that $j$ will
be contagious at time $t+1$ is given by:
\begin{equation}
    \label{eq:glm}
    \mathbb{P}(X^{t+1}_j = 1|X^t) 
    = f(\theta_j \cdot X^t)
\end{equation}
where $f: \reals \rightarrow [0,1]$. Conditioned on $X^t$, this
evolution happens independently for each vertex at time $t+1$.

DO WE HAVE ENOUGH SPACE FOR EXAMPLES HERE?

\section{Results}

For a given vertex $i$, we are given a set of measurements, $(X^t,
X^{t+1}_i)_{t\in\mathcal{T}_i}$ generated from \eqref{eq:glm}. We estimate
$\theta_i$ via $\ell_1$-regularized maximum likelihood estimation:
\begin{equation}\label{eq:pre-mle}
    \hat{\theta}_i \in \argmax_{\theta} \mathcal{L}_i(\theta_i\,|\,x^1,\ldots,x^n)
    - \lambda\|\theta_i\|_1
\end{equation}
where $\mathcal{L}_i$ is the log-likelihood of $\theta_i$ given the
observations:
\begin{multline*}
    \mathcal{L}_i(\theta_i\,|\,x^1,\ldots,x^n) = \frac{1}{|{\cal T}_i|}
    \sum_{t\in {\cal T}_i } x_i^{t+1}\log f(\inprod{\theta_i}{x^{t}}) \\
    + (1 - x_i^{t+1})\log\big(1-f(\inprod{\theta_i}{x^t})\big)
\end{multline*}

We will need the following assumptions:
\begin{enumerate}
    \item $\log f$ and $\log (1-f)$ are concave functions.
    \item $\log f$ and $\log (1-f)$ have derivatives bounded in absolute value
        by $\frac{1}{\alpha}$ for some $\alpha > 0$.
    \item denoting by $S$ the support of the true vector of parameters
        $\theta_i^*$, define $\mathcal{C}(S)\defeq
    \{X\in\reals^m\,:\,\|X\|_1\leq 1\text{ and } \|X_{S^c}\|_1\leq
    3\|X_S\|_1\}$. We assume that:
\begin{equation*}
    \forall X \in {\cal C(S)}, X^T \nabla^2\mathcal{L}_i(\theta_i^*)X
    \geq \gamma \|X\|_2^2 
\end{equation*}
for some $\gamma>0$.
\end{enumerate}

\begin{theorem}
\label{thm:main}
Assume 1. to 3. above and define $n\defeq |\mathcal{T}_i|$. For any
$\delta\in(0,1)$, let $\hat{\theta_i}$ be the solution of \eqref{eq:pre-mle}
with $\lambda \defeq 2\sqrt{\frac{\log m}{\alpha n^{1 - \delta}}}$, then:
\begin{equation}
    \label{eq:sparse}
    \|\hat \theta_i - \theta^*_i \|_2
    \leq \frac{6}{\gamma} \sqrt{\frac{s \log m}{\alpha n^{1-\delta}}}
\quad
\text{w.p.}\;1-\frac{1}{e^{n^\delta \log m}}
\end{equation}
\end{theorem}

Assumption 3. above can be replaced by the following data-independent
assumption:
\begin{enumerate}
    \item[3.] $\log f$ and $\log (1-f)$ are $\varepsilon$-concave and the
        expected Hessian $H = \lim_{n\rightarrow\infty\frac{1}{n}X^TX}$ has
        a smallest eigenvalue bounded from below by $\gamma>0$, where $X$ is
        the $n\times m$ design matrix whose $k$-th row is $X^k$.
\end{enumerate}
at the cost of slightly reduced convergence rate:
\section{Experiments}

%\section{Acknowledgments}

\bibliographystyle{abbrv}
\bibliography{sparse}

%\balancecolumns
\end{document}