\documentclass[10pt]{article} \usepackage{fullpage} \usepackage[utf8x]{inputenc} \usepackage{amsmath,amsfonts,amsthm} \usepackage[english]{babel} \usepackage[capitalize, noabbrev]{cleveref} \usepackage{algorithm} \usepackage{algpseudocode} \usepackage{graphicx} \newenvironment{enumerate*}% {\vspace{-2ex} \begin{enumerate} % \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% {\end{enumerate}} \newenvironment{itemize*}% {\vspace{-2ex} \begin{itemize} % \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% {\end{itemize}} \newenvironment{description*}% {\vspace{-2ex} \begin{description} % \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% {\end{description}} \DeclareMathOperator{\E}{\mathbb{E}} \let\P\relax \DeclareMathOperator{\P}{\mathbb{P}} \newcommand{\ex}[1]{\E\left[#1\right]} \newcommand{\prob}[1]{\P\left[#1\right]} \newcommand{\inprod}[1]{\left\langle #1 \right\rangle} \newcommand{\R}{\mathbf{R}} \newcommand{\N}{\mathbf{N}} \newcommand{\C}{\mathcal{C}} \newcommand{\eps}{\varepsilon} \newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}} \newcommand{\llbracket}{[\![} \newtheorem{theorem}{Theorem} \newtheorem{lemma}{Lemma} \author{Thibaut Horel} \title{CS 28r Problem Set 1 -- Solutions} \date{October 29, 2014} \begin{document} \maketitle \section*{Problem 1} \paragraph{(a)} We first prove that submodularity implies the following property: \begin{equation}\label{eq:sub} \forall S,T\subseteq N,\; f(S\cup T) + f(S\cap T) \leq f(S) + f(T). \end{equation} For this consider $A = S\cap T$, and $B = T\setminus S$. For an arbitrary ordering $B = \{b_1,\ldots, b_m\}$ of the elements in $B$, let us write $B_i = \{b_1,\ldots, b_i\}$ with the convention that $B_0 = \emptyset$. Then one can write: \begin{displaymath} f(T) - f(A) = \sum_{i=1}^m f(A\cup B_i) - f(A\cup B_{i-1}) \leq \sum_{i=1}^m f(S\cup B_i) - f(S\cup B_{i-1}) = f(S\cup T) - f(S) \end{displaymath} where the first equality is simply a telescopic sum, the inequality directly follows from submodularity since $A\cup B_{i-1}\subseteq S\cup B_{i-1}$, and the last equality is a telescopic sum again. Reordering the terms in the previous equation and using the definition of $A$ we obtain Equation~\ref{eq:sub}. Subadditivity directly follows from Equation~\ref{eq:sub} by observing that by positivity of the function $f$, $f(S\cup T) + f(S\cap T) \geq f(S\cup T)$. \paragraph{(b)} We first show that if $f$ is submodular, then the marginal contribution function is subadditive. If $f$ is submodular, it is easy to see that $f_S$ is also submodular. Indeed, for $T\subseteq T'$ and $u\in N\setminus T'$: \begin{displaymath} f_S(T\cup\{u\}) - f_S(T) = f(S\cup T\cup\{u\}) - f(S\cup T) \geq f(S\cup T'\cup\{u\}) - f(S\cup T') = f_S(T'\cup\{u\}) - f_S(T') \end{displaymath} where the inequality follows from the submodularity of $f$. We saw in \textbf{(a)} that a submodular function is subadditive, hence $f_S$ is subadditive. Conversely, let us assume that $f_S$ is subadditive for all $S$ and let us consider two sets $S\subseteq T$ and $u\in N\setminus T$. Then: \begin{displaymath} f_S(T\cup\{u\})\leq f_S(T) + f_S(\{u\}) \end{displaymath} using the definition of $f_S$, this is equivalent to: \begin{displaymath} f(S\cup T\cup\{u\}) - f(S)\leq f(S\cup T) - f(S) + f(S\cup\{u\}) - f(S) \end{displaymath} observing that $S\cup T = T$ and reordering the terms we obtain: \begin{displaymath} f(T\cup\{u\}) - f(T)\leq f(S\cup\{u\}) - f(S) \end{displaymath} which is exactly the submodularity of $f$. \section*{Problem 2} We adapt the proof of the approximation ratio of the greedy algorithm from Lecture 5. Using the same notations, if $o_{max}$ is the element of highest marginal contribution in $O$ at stage $i+1$ and $a_{i+1}$ is the element added by the greedy algorithm (with approximate oracle) at step $i$, we have: \begin{displaymath} \tilde{f}_{S_i}(a_{i+1}) \geq \tilde{f}_{S_i}(o_{max}) \end{displaymath} now using that $\tilde{f}$ is an $\alpha$ approximate oracle, we can upper bound $\tilde{f}_{S_i}(a_{i+1})$ by $f_{S_i}(a_{i+1})$ and lower bound $\tilde{f}_{S_i}(o_{max})$ by $\alpha f_{S_i}(o_{max})$ and obtain: \begin{displaymath} f_{S_i}(o_{max})\leq \frac{1}{\alpha}f_{S_i}(a_{i+1}) \end{displaymath} plugging this inequality into equation (3) in the proof of Lemma 8 directly gives the ``modified'' lemma for $\alpha$-approximate oracle: \begin{displaymath} f(S_{i+1}) - f(S_i) \geq \frac{\alpha}{k} (f(O) - f(S_i)),\; i\in[k] \end{displaymath} From there, applying verbatim the proof of Lemma 9, we obtain by induction: \begin{equation}\label{eq:ind} f(S_i)\geq \left(1-\left(1-\frac{\alpha}{k}\right)^i\right)f(O),\; i\in[k] \end{equation} We then conclude by observing that: \begin{displaymath} \left(1-\frac{\alpha}{k}\right)^k = e^{k\log (1-\frac{\alpha}{k})}\leq e^{-\alpha} \end{displaymath} where we used the inequality $\log(1+x)\leq x$. Plugging this into \eqref{eq:ind} gives: \begin{displaymath} f(S_k)\geq \left(1-\frac{1}{e^{\alpha}}\right)f(O) \end{displaymath} which is exactly what we wanted. \section*{Problem 3} In this exercise we denote by $I(S)$ the influence of a set $S$ in the independent cascade model. \paragraph{(a)} As we saw in class, the influence of a node $v$ in the independent cascade model is exactly the expectation of the number of nodes reachable from $v$ over every possible realizations of subgraphs $G'$ of $G$ where each edge $(u,v)$ realizes independently with probability $p_{u,v}$. Let us writhe $I(v)$ the influence of $v$. The algorithm SampleInfluence is simply considering $m$ independent realizations of $G'$ and averaging the number of nodes reachable from $v$ over these $m$ realizations. It suffices to find $m$ such that: \begin{displaymath} \Pr\left[\left|\frac{1}{m}\sum_{i=1}r_i - I(v)\right| > \eps I(v)\right]\leq \gamma \end{displaymath} indeed, this event implies that $\frac{1}{m}\sum_i r_i$ is a $1-\eps$-approximation of $I(v)$ with probability at least $1-\gamma$. Applying the Chernoff bound with $\delta = \eps m I(v)$, we obtain: \begin{displaymath} \Pr\left[\left|\frac{1}{m}\sum_{i=1}r_i - I(v)\right| > \eps I(v)\right]\leq 2e^{\frac{-m\eps^2I(v)^2}{n^2}} \end{displaymath} since the variables $r_i$ clearly live in the interval $[0, n]$ (we take $b = n$). Now we assume that $I(v)\geq 1$\footnote{This depends on how you formulate the independent cascade model, do we always consider that a node influences himself? If the assumption doesn't hold, we can lower bound $I(v)$ by $p_{min}$ where $p_{min}$ is the minimum probability that appears on any edge in $G$ and all the following results go through by replacing $1$ by $p_{min}$}. Taking $m = \lceil\frac{n^2}{\eps^2}\log(2/\gamma)\rceil$, we obtain: \begin{displaymath} \Pr\left[\left|\frac{1}{m}\sum_{i=1}r_i - I(v)\right| > \eps I(v)\right]\leq \gamma \end{displaymath} and $\frac{1}{m}\sum_i r_i$ is a $1-\eps$-approximation of $I(v)$ with probability at least $1-\gamma$ as we wanted. \textbf{(b)} We first note that we can adapt question \textbf{(a)} to obtain $\tilde{I}_S(x)$ an approximate oracle of $I_S(x)$ for any $S$ and $x$. Let us denote by $R(S, G)$ the set of nodes reachable from any node in $S$ in graph $G$. We modify the SampleInfluence algorithm as follows: we generate $m$ realizations $G_i$ ($i\in[m]$) of the random graph and for each realization $i\in[m]$, we define $r_i$ to be $|R(S\cup\{x\}, G_i)| - |R(S, G_i)|$, and $\tilde{I}_S(x)$ to be $\frac{1}{m}\sum_i r_i$. By applying the same Chernoff bound as in question \textbf{(a)}, we can guarantee that choosing $m = \lceil\frac{n^2}{\eps^2}\log(2/\gamma)\rceil$ implies that: \begin{equation}\label{eq:oracle} \Pr\left[\left|\tilde{I}_S(x) - I_S(x)\right| > \eps I_S(x)\right]\leq \gamma \end{equation} Now we want to use this approximate oracle $\tilde{I}_S(x)$ the same way as in Problem 2. We note that the greedy algorithm is going to make at most $nk$ calls to the approximate oracle. Hence, by choosing $\gamma = \frac{\delta}{nk}$ and applying a union bound on \eqref{eq:oracle}, we get that with probability at least $1-\delta$, each time we call the oracle we will have: \begin{displaymath} (1-\eps)I_S(x)\leq\tilde{I}_S(x) \leq (1+\eps)I_S(x) \end{displaymath} Dividing this previous inequality by $1+\eps$, we obtain \begin{displaymath} \frac{1-\eps}{1+\eps}I_S(x)\leq\frac{1}{1+\eps}\tilde{I}_S(x) \leq I_S(x) \end{displaymath} Applying the result of Problem 2 with $\alpha = \frac{1+\eps}{1-\eps}$, we obtain that using $\tilde{I}_S(x)$ as an approximate oracle in the greedy algorithm yields an approximation ratio of: \begin{displaymath} \rho = (1+\eps)\left(1-\frac{1}{e^{\frac{1-\eps}{1+\eps}}}\right) \end{displaymath} But a Taylor expansion of $\rho$ shows that: \begin{displaymath} \rho \geq 1-\frac{1}{e} - C\eps \end{displaymath} for some constant $C$. Choosing $\eps' = \eps/C$, we get that by using $m = \lceil\frac{n^2C^2}{\eps^{2}}\log(2nk/\delta)\rceil$ samples each time we invoke the oracle, the greedy algorithm will return a $1-1/e-\eps$ approximation with probability at least $1-\delta$. We are doing $nk$ calls to the oracle, each call can be computed in time $O(mn^3)$ (generating a sample takes time $n^2$ at most and a BFS traversal takes linear time). Given the expression we have for $m$, the overall complexity is $O(\frac{kn^6}{\eps^2}\log\frac{nk}{\delta})$ which is polynomial in the number of nodes. \section*{Problem 4} We assume that $x_0\leq \min_i x_i$, otherwise the power-law distribution cannot explain the data and the maximum likelihood problem is ill-defined. Maximizing the likelihood is equivalent to maximizing the log-likelihood, hence, we want to find: \begin{displaymath} \hat{\alpha} \in \arg\max_\alpha \sum_{i=1}^n \log\left(\frac{\alpha-1}{x_0}\left(\frac{x_i}{x_0}\right)^{-\alpha}\right) \end{displaymath} removing constant terms, this is equivalent to finding: \begin{displaymath} \hat{\alpha} \in \arg\max_\alpha n\log(\alpha-1) - \alpha\sum_{i=1}^n\log\frac{x_i}{x_0} \end{displaymath} The function that we are maximizing converges to $-\infty$ as $\alpha$ converges to $1$ and to $+\infty$, hence it has a global maximum in $(1,\infty)$. We find this maximum by solving the critical point equation (finding points where the derivate is zero): \begin{displaymath} \frac{n}{\alpha-1} = \sum_{i=1}^n\log\frac{x_i}{x_0} \end{displaymath} This equation has a unique solution which is the maximum likelihood estimator: \begin{displaymath} \boxed{ \hat{a} = 1 +\frac{n}{\sum_{i=1}^n\log\frac{x_i}{x_0}} } \end{displaymath} \section*{Problem 5} In addition to the three provided datasets, I used the largest strongly connected component of the PGP graph. PGP is data encryption software using asymmetric encryption. Asymmetric encryption relies critically on being able to associate a public key to an owner (user or institution) in a tamper-proof way. This is usually done by using certificate authorities (for example in SSL). In contrast, PGP relies on building a \emph{web of trust}: users certificate the ownership of the keys of their friends, which in turn certificate the keys of their friends, thus propagating the trust and building a web of trust. As predicted by the random graph theory, it is assumed that the PGP web of trust has a single large strongly connected component, which is called the \emph{PGP strong-set}. It is easy to discover this strong-set by doing a breadth first traversal of the trust links, starting from famous privacy activists. \paragraph{(a)} The degree distributions for the four networks can be found in Figure~\ref{fig:deg}. Because of the quick decay, these plots are hard to read (all the points are concentrated in the bottom left corner). See the next question for easier to read graphs. \begin{figure} \includegraphics[width=\textwidth]{degr.pdf} \caption{Degree distributions.} \label{fig:deg} \end{figure} \paragraph{(b)} The degree distributions in log-log scale can be found in Figure~\ref{fig:deg-log}. We observe that these distributions approximately look like lines. This is characteristic of a power-law degree distribution. Indeed, if $f(d) = kd^{-\alpha}$, then $\log f(d) = \log k - \alpha \log d$. Hence if the degree distribution follows a power law, the distribution is a line in log-log scale. The slope of the line is the negative of power-law exponent. \begin{figure} \includegraphics[width=\textwidth]{degr_log.pdf} \caption{Degree distributions in log-log scale.} \label{fig:deg-log} \end{figure} \paragraph{(c)} I computed the harmonic centrality (see code) and plotted its distribution across nodes. This can be found in Figure~\ref{fig:centrality}. \begin{figure} \includegraphics[width=\textwidth]{centrality.pdf} \caption{Harmonic centrality distributions.} \label{fig:centrality} \end{figure} \paragraph{(e)} I computed the voter model influence (see code) for $t=100$ and plotted its distribution across nodes in log-log scale. This can be found in Figure~\ref{fig:voter}. \begin{figure} \includegraphics[width=\textwidth]{voter.pdf} \caption{Voter model influence distributions in log-log scale} \label{fig:voter} \end{figure} \paragraph{(e)} I computed the pagerank for $t=100$ (see code) and plotted its distribution across nodes in log-log scale. This can be found in Figure~\ref{fig:pr}. \begin{figure} \includegraphics[width=\textwidth]{pr.pdf} \caption{Page ranks distributions in log-log scale} \label{fig:pr} \end{figure} \end{document}