summaryrefslogtreecommitdiffstats
path: root/facebook_analysis/analyze.py
diff options
context:
space:
mode:
Diffstat (limited to 'facebook_analysis/analyze.py')
-rw-r--r--facebook_analysis/analyze.py55
1 files changed, 51 insertions, 4 deletions
diff --git a/facebook_analysis/analyze.py b/facebook_analysis/analyze.py
index c5e6feb..9b7f893 100644
--- a/facebook_analysis/analyze.py
+++ b/facebook_analysis/analyze.py
@@ -10,10 +10,14 @@ import pulp
import sys
from random import seed, betavariate, normalvariate
import matplotlib.pyplot as plt
+from scipy.sparse import coo_matrix
+from sklearn.preprocessing import normalize as nm
DATA_DIR = "../facebook_data"
-DATASETS = ["hbo", "nyt", "lp", "google", "lmpt", "gp", "kiva", "coachella",
- "peet", "gap"]
+#DATASETS = ["hbo", "nyt", "lp", "google", "lmpt", "gp", "kiva", "coachella",
+# "peet", "gap"]
+DATASETS = ["hbo", "nyt", "lp", "google", "gp", "kiva", "coachella",
+ "gap"]
SYNTH_DIR = "../apgl"
SYNTH_DATASETS = ["b-a", "kk", "sw"]
@@ -74,6 +78,48 @@ def build_graph(dataset):
return build_graph1(dataset)
+def build_graph3(dataset):
+ d = {}
+ e = {}
+ with open(dataset + ".txt") as f:
+ for line in f:
+ u, v = map(int, line.strip().split())
+ d[u, v] = 1
+ d[v, u] = 1
+ d[u, u] = 1
+ if u in e:
+ e[u].append(v)
+ else:
+ e[u] = [v]
+ i, j = zip(*d.keys())
+ v = d.values()
+ m = coo_matrix((v, (i, j)), dtype="float")
+ m = nm(m, norm='l1', axis=1, copy=False)
+ return m, e
+
+
+def voter(mat, node, t):
+ n = mat.shape[0]
+ v = np.zeros(n)
+ u = np.ones(n)
+ v[node] = 1
+ for i in xrange(t):
+ v = mat.dot(v)
+ return v.dot(u)
+
+
+def influence_exp(dataset, size):
+ mat, graph = build_graph3(dataset)
+ sp = sample(graph.keys(), size)
+ graph = {s: graph[s] for s in sp}
+ sd = list(sd_users(graph))
+ sd += graph.keys()
+ for t in xrange(100):
+ degrees = {s: voter(mat, s, t) for s in sd}
+ #aps(graph, degrees, size)
+ print im(graph, degrees, size)
+
+
def print_graph(dataset):
graph, degrees = build_graph(dataset)
with open(dataset + "_single_graph.txt", "w") as f:
@@ -257,7 +303,7 @@ def lp_time():
def aps_time():
- graph, degrees = build_graph("big")
+ graph, degrees = build_graph("hbo")
sp = sample(graph.keys(), int(sys.argv[2]))
graph = {s: graph[s] for s in sp}
a = int(sys.argv[1])
@@ -326,7 +372,7 @@ def stats():
if __name__ == "__main__":
#for dataset in SYNTH_DATASETS:
# compute_performance(dataset)
- compute_performance_p("coachella", "deg")
+ #compute_performance_p("coachella", "power")
#compute_performance("coachella")
#hbo_likes()
#lp_perf()
@@ -338,3 +384,4 @@ if __name__ == "__main__":
# with open("coachella_degrees.txt", "w") as fh:
# for deg in degrees.itervalues():
# fh.write(str(deg) + "\n")
+ influence_exp("slashdot", 100)