summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2014-10-30 00:00:29 -0400
committerThibaut Horel <thibaut.horel@gmail.com>2014-10-30 00:00:29 -0400
commit59560fca8caa4d76abe2fe8597d806e0a897874a (patch)
treec8441b1200ffee2d6f5ebaecc9ab6077f6ea7772
parentab978841c3da7cf1e6f7e21132a88a13715db773 (diff)
downloadcs284-59560fca8caa4d76abe2fe8597d806e0a897874a.tar.gz
[ps1] Add code for Problem 5
-rw-r--r--ps1/code/main.py219
-rw-r--r--ps1/code/matplotlibrc27
2 files changed, 246 insertions, 0 deletions
diff --git a/ps1/code/main.py b/ps1/code/main.py
new file mode 100644
index 0000000..5ddb979
--- /dev/null
+++ b/ps1/code/main.py
@@ -0,0 +1,219 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import networkx as nx
+import random
+
+DATASETS = ["enron", "gpg", "livejournal", "epinions"]
+#DATASETS = ["enron", "gpg", "epinions"]
+
+
+def build_graph(dataset):
+ d = {}
+ with open(dataset + ".txt") as f:
+ for line in f:
+ u, v = map(int, line.strip().split())
+ if u in d:
+ d[u].append(v)
+ else:
+ d[u] = [v]
+ if dataset == "gpg":
+ if v in d:
+ d[v].append(u)
+ else:
+ d[v] = [u]
+
+ return d
+
+
+def degree_distributions(dataset):
+ graph = build_graph(dataset)
+ with open(dataset + "_distr.txt", "w") as f:
+ for u in graph:
+ f.write(str(len(graph[u])) + "\n")
+
+
+def plot_degrees(log=False):
+ plt.figure(figsize=(8, 6))
+ for i, dataset in enumerate(DATASETS):
+ values = [int(line.strip()) for line in open(dataset + "_distr.txt")]
+ d = {}
+ for j, value in enumerate(values):
+ if value in d:
+ d[value] += 1
+ else:
+ d[value] = 1
+ ax = plt.subplot(2, 2, i + 1)
+ if log:
+ ax.set_yscale("log")
+ ax.set_xscale("log")
+ a, b = zip(*d.iteritems())
+ plt.plot(a, np.array(b) / float(j), ".")
+ plt.xlabel("Degree")
+ plt.ylabel("Fraction of nodes")
+ plt.title(dataset)
+ plt.tight_layout()
+ plt.savefig("degr.pdf")
+
+
+def centrality(dataset):
+ random.seed()
+ graph = build_graph(dataset)
+ G = nx.from_dict_of_lists(graph)
+ pop = random.sample(graph.keys(), 1000)
+ l = []
+ for i, u in enumerate(pop):
+ print i
+ lengths = nx.single_source_shortest_path_length(G, u)
+ lengths = np.array([v for v in lengths.values() if v != 0])
+ s = np.sum(1. / lengths)
+ l.append(s)
+ with open(dataset + "_centrality.txt", "w") as f:
+ for c in l:
+ f.write(str(c) + "\n")
+
+
+def plot_centrality(log=False):
+ plt.figure(figsize=(8, 6))
+ for i, dataset in enumerate(DATASETS):
+ values = [float(line.strip())
+ for line in open(dataset + "_centrality.txt")]
+ plt.subplot(2, 2, i + 1)
+ n = len(values)
+ h, b = np.histogram(values, bins=30)
+ width = 0.7 * (b[1] - b[0])
+ plt.bar(b[:-1], h / float(n), width=width)
+ plt.xlabel("Centrality")
+ plt.ylabel("Fraction of nodes")
+ plt.title(dataset)
+ plt.tight_layout()
+ plt.savefig("centrality.pdf")
+
+
+def pagerank(G, alpha=0.85, max_iter=100):
+ if not G.is_directed():
+ D = G.to_directed()
+ else:
+ D = G
+ W = nx.stochastic_graph(D)
+ N = W.number_of_nodes()
+ x = dict.fromkeys(W, 1.0 / N)
+ p = dict.fromkeys(W, 1.0 / N)
+
+ dangling_weights = p
+ dangling_nodes = [n for n in W if W.out_degree(n) == 0.0]
+
+ for i in range(max_iter):
+ print i
+ xlast = x
+ x = dict.fromkeys(xlast.keys(), 0)
+ danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
+ for n in x:
+ for nbr in W[n]:
+ x[nbr] += alpha * xlast[n] * W[n][nbr]["weight"]
+ x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]
+ return x
+
+
+def voter(G, max_iter=100):
+ if not G.is_directed():
+ D = G.to_directed()
+ else:
+ D = G
+ W = nx.stochastic_graph(D)
+ x = dict.fromkeys(W, 1.0)
+
+ for i in range(max_iter):
+ print i
+ xlast = x
+ x = dict.fromkeys(xlast.keys(), 0)
+ for n in x:
+ for nbr in W[n]:
+ x[nbr] += xlast[n] * W[n][nbr]["weight"]
+ return x
+
+
+def page_rank(dataset, n=100):
+ graph = build_graph(dataset)
+ G = nx.from_dict_of_lists(graph)
+ x = pagerank(G, max_iter=n)
+ with open(dataset + "_pr.txt", "w") as f:
+ for v in x.itervalues():
+ f.write(str(v) + "\n")
+
+
+def voter_model(dataset, n=100):
+ graph = build_graph(dataset)
+ G = nx.from_dict_of_lists(graph)
+ x = voter(G, max_iter=n)
+ with open(dataset + "_voter.txt", "w") as f:
+ for v in x.itervalues():
+ f.write(str(v) + "\n")
+
+
+def plot_pr():
+ plt.figure(figsize=(8, 6))
+ for i, dataset in enumerate(DATASETS):
+ values = [float(line.strip()) for line in open(dataset + "_pr.txt")]
+ low, high = min(values), max(values)
+ bins = 500
+ thresholds = [low + l * (high - low) / bins for l in xrange(bins)]
+ d = {}
+ for j, value in enumerate(values):
+ for k, t in enumerate(thresholds):
+ if value > t:
+ u = thresholds[k - 1]
+ if u in d:
+ d[u] += 1
+ else:
+ d[u] = 1
+
+ ax = plt.subplot(2, 2, i + 1)
+ ax.set_yscale("log")
+ ax.set_xscale("log")
+ a, b = zip(*d.iteritems())
+ plt.plot(a, np.array(b) / float(j), ".")
+ plt.xlabel("PageRank")
+ plt.ylabel("Fraction of nodes")
+ plt.title(dataset)
+ plt.tight_layout()
+ plt.savefig("pr.pdf")
+
+
+def plot_voter():
+ plt.figure(figsize=(8, 6))
+ for i, dataset in enumerate(DATASETS):
+ values = [float(line.strip()) for line in open(dataset + "_voter.txt")]
+ low, high = min(values), max(values)
+ bins = 500
+ thresholds = [low + l * (high - low) / float(bins)
+ for l in xrange(bins)]
+ d = {}
+ for j, value in enumerate(values):
+ for k, t in enumerate(thresholds):
+ if value > t:
+ u = thresholds[k - 1]
+ if u in d:
+ d[u] += 1
+ else:
+ d[u] = 1
+
+ ax = plt.subplot(2, 2, i + 1)
+ ax.set_yscale("log")
+ ax.set_xscale("log")
+ a, b = zip(*d.iteritems())
+ plt.plot(a, np.array(b) / float(j), ".")
+ plt.xlabel("Voter Influence")
+ plt.ylabel("Fraction of nodes")
+ plt.title(dataset)
+ plt.tight_layout()
+ plt.savefig("voter.pdf")
+
+if __name__ == "__main__":
+ #degree_distributions("gpg")
+ #plot_degrees(log=False)
+ #for dataset in DATASETS:
+ # voter_model(dataset)
+ #plot_centrality()
+ #plot_pr()
+ #page_rank("epinions")
+ plot_voter()
diff --git a/ps1/code/matplotlibrc b/ps1/code/matplotlibrc
new file mode 100644
index 0000000..3bda572
--- /dev/null
+++ b/ps1/code/matplotlibrc
@@ -0,0 +1,27 @@
+font.size : 8
+font.family : sans-serif
+font.sans-serif : Helvetica
+font.serif : Palatino
+text.usetex : True
+mathtext.fontset : custom
+
+
+lines.markersize : 2
+lines.linewidth : 0.75
+lines.linestyle : -
+lines.marker : .
+patch.linewidth : 0.5
+
+axes.linewidth : 0.5
+#axes.color_cycle: 66C2A5, FC8D62, 8DA0CB, E78AC3, A6D854, FFD92F, E5C494, B3B3B3
+
+legend.fontsize : medium
+legend.numpoints : 1
+legend.markerscale : 0.8
+legend.frameon : True
+legend.borderaxespad: 0.6
+
+savefig.dpi : 100
+savefig.bbox : tight
+savefig.pad_inches : 0.05
+figure.figsize: 3.9, 3.5