diff options
Diffstat (limited to 'facebook_analysis/analyze.py')
| -rw-r--r-- | facebook_analysis/analyze.py | 55 |
1 files changed, 51 insertions, 4 deletions
diff --git a/facebook_analysis/analyze.py b/facebook_analysis/analyze.py index c5e6feb..9b7f893 100644 --- a/facebook_analysis/analyze.py +++ b/facebook_analysis/analyze.py @@ -10,10 +10,14 @@ import pulp import sys from random import seed, betavariate, normalvariate import matplotlib.pyplot as plt +from scipy.sparse import coo_matrix +from sklearn.preprocessing import normalize as nm DATA_DIR = "../facebook_data" -DATASETS = ["hbo", "nyt", "lp", "google", "lmpt", "gp", "kiva", "coachella", - "peet", "gap"] +#DATASETS = ["hbo", "nyt", "lp", "google", "lmpt", "gp", "kiva", "coachella", +# "peet", "gap"] +DATASETS = ["hbo", "nyt", "lp", "google", "gp", "kiva", "coachella", + "gap"] SYNTH_DIR = "../apgl" SYNTH_DATASETS = ["b-a", "kk", "sw"] @@ -74,6 +78,48 @@ def build_graph(dataset): return build_graph1(dataset) +def build_graph3(dataset): + d = {} + e = {} + with open(dataset + ".txt") as f: + for line in f: + u, v = map(int, line.strip().split()) + d[u, v] = 1 + d[v, u] = 1 + d[u, u] = 1 + if u in e: + e[u].append(v) + else: + e[u] = [v] + i, j = zip(*d.keys()) + v = d.values() + m = coo_matrix((v, (i, j)), dtype="float") + m = nm(m, norm='l1', axis=1, copy=False) + return m, e + + +def voter(mat, node, t): + n = mat.shape[0] + v = np.zeros(n) + u = np.ones(n) + v[node] = 1 + for i in xrange(t): + v = mat.dot(v) + return v.dot(u) + + +def influence_exp(dataset, size): + mat, graph = build_graph3(dataset) + sp = sample(graph.keys(), size) + graph = {s: graph[s] for s in sp} + sd = list(sd_users(graph)) + sd += graph.keys() + for t in xrange(100): + degrees = {s: voter(mat, s, t) for s in sd} + #aps(graph, degrees, size) + print im(graph, degrees, size) + + def print_graph(dataset): graph, degrees = build_graph(dataset) with open(dataset + "_single_graph.txt", "w") as f: @@ -257,7 +303,7 @@ def lp_time(): def aps_time(): - graph, degrees = build_graph("big") + graph, degrees = build_graph("hbo") sp = sample(graph.keys(), int(sys.argv[2])) graph = {s: graph[s] for s in sp} a = int(sys.argv[1]) @@ -326,7 +372,7 @@ def stats(): if __name__ == "__main__": #for dataset in SYNTH_DATASETS: # compute_performance(dataset) - compute_performance_p("coachella", "deg") + #compute_performance_p("coachella", "power") #compute_performance("coachella") #hbo_likes() #lp_perf() @@ -338,3 +384,4 @@ if __name__ == "__main__": # with open("coachella_degrees.txt", "w") as fh: # for deg in degrees.itervalues(): # fh.write(str(deg) + "\n") + influence_exp("slashdot", 100) |
