from analyze import sd_users, build_graph, DATASETS, SYNTH_DATASETS import matplotlib.pyplot as plt from matplotlib import rcParams, cm from matplotlib.pyplot import plot, legend, savefig, xlabel, ylabel,\ hist, title, subplot, tight_layout, ticklabel_format, xlim, ylim from mpl_toolkits.mplot3d import Axes3D import numpy as np mq = lambda x: x * 4 def voter(): with open("epinions_voter.txt") as f: ep = [float(line) for line in f] with open("epinions_voter_influence.txt") as f: epr = [float(line) for line in f] with open("slashdot_voter.txt") as f: sl = [float(line) for line in f] with open("slashdot_voter_influence.txt") as f: slr = [float(line) for line in f] a = range(1, 51) plt.figure(figsize=(7, 3)) plt.subplot(1, 2, 1) plt.plot(a, ep, label="Adapt. Seeding") plt.plot(a, epr, label="Inf. Max.") plt.legend() plt.title("Epinions") plt.xlabel("t") plt.ylabel("Performance") plt.subplot(1, 2, 2) plt.plot(a, sl, label="Adapt. Seeding") plt.plot(a, slr, label="Inf. Max") plt.legend() plt.title("Slashdot") plt.xlabel("t") plt.ylabel("Performance") plt.savefig("voter.pdf") def sampling(): with open("hbo_sampling.txt") as f: values = [line.strip().replace(",", "").split() for line in f] ks, ts, cs = zip(*values) ks = map(int, ks) ts = map(float, ts) cs = map(float, cs) with open("hbo_sans_sampling.txt") as f: values = [line.strip().replace(",", "").split() for line in f] k, t, c = zip(*values) k = map(int, k) t = map(float, t) c = map(float, c) plt.figure(figsize=(7, 3)) plt.subplot(1, 2, 1) plt.gca().set_yscale("log") plt.plot(ks, ts, label="Sampling based") plt.plot(ks, t, label="Comb. alg.") plt.xlabel("Size") plt.ylabel("Time (s)") plt.legend(loc="upper left") plt.subplot(1, 2, 2) plt.gca().set_yscale("log") plt.plot(ks, cs, label="Sampling based") plt.plot(ks, c, label="Comb. alg.") plt.legend(loc="upper left") plt.xlabel("Size") plt.ylabel("\# Cycles") plt.savefig("sampling2.pdf") def plot_degree_distributions(): plt.figure(figsize=(7, 3)) graph, degrees = build_graph("kiva") fd_degrees = list(degrees[user] for user in graph) sd_degrees = list(degrees[user] for user in sd_users(graph)) n, bins, patches = plt.hist(fd_degrees, bins=50, cumulative=True, label="Core set", normed=True, alpha=0.5, histtype="stepfilled") n, bins, patches = plt.hist(sd_degrees, bins=50, cumulative=True, histtype="stepfilled", normed=True, alpha=0.5, label="Friends") ylim(ymax=1.1) plt.xlabel("Degree") plt.ylabel("Probability") plt.legend(loc="lower right") plt.savefig("dist.pdf") def plot_all_performances(): plt.figure(figsize=(6, 6)) for i, dataset in enumerate(DATASETS): values = [map(float, line.strip().split("\t")) for line in open(dataset + "_performance.txt")] a, im, rd, rdf, aps = zip(*values) a, im, rd, rdf, aps = [map(mq, l) for l in (a, im, rd, rdf, aps)] a = np.arange(0, 1.001, 0.1) ax = plt.subplot(2, 2, i + 1) #ax.set_yscale("log") plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) plt.plot(a, im, label="Inf. Max") plt.plot(a, rd, label="Rand. Node") plt.plot(a, rdf, label="Rand. Friend") plt.plot(a, aps, label="Adapt. Seeding") plt.xlabel("Budget") plt.ylabel("Performance") titl = dataset if dataset == "sw": titl = "SmallWorld" #if dataset == "coachella": # titl = "Conf. Model" if dataset == "kk": titl = "Kronecker" if dataset == "b-a": titl = "Barabasi-Albert" plt.title(titl) xlim(xmax=1.1) plt.legend(loc="upper center", ncol=4, bbox_to_anchor=(0, 0, 1, 1.03), bbox_transform=plt.gcf().transFigure) plt.tight_layout() plt.savefig("perf10.pdf") def compare_performance(fn): plots = {} plt.figure() for dataset in DATASETS: values = [map(float, line.strip().split("\t")) for line in open(dataset + "_performance.txt")] a, im, rd, rdf, aps = zip(*values) plots[dataset] = [j * 1. / i for (j, i) in zip(aps, im)[1:]] a = map(mq, a) for dataset in DATASETS: plt.plot(a[1:], plots[dataset], label=dataset) xlim(xmax=550) plt.xlabel("Budget") plt.ylabel("Performance") plt.legend(loc="lower right", ncol=2, fontsize="small") plt.savefig(fn) def compare_performance2(fn): plots = {} plt.figure(figsize=(5, 3)) for dataset in DATASETS: values = [map(float, line.strip().split("\t")) for line in open(dataset + "_performance.txt")] a, im, rd, rdf, aps = zip(*values) plots[dataset] = [j * 1. / i for (j, i) in zip(aps, im)[1:]] a = map(mq, a) a = map(int, a) z = zip(*plots.itervalues()) means = [np.mean(w) for w in z] maxi = [np.max(w) for w in z] mini = [np.min(w) for w in z] ind = range(len(a[1:])) width = 0.35 plt.bar(ind, means, width, linewidth=0.1) plt.errorbar([i + width / 2. for i in ind], means, [mini, maxi], elinewidth=1.2, fmt="none") plt.xticks([i + width / 2. for i in ind], [100, 150, 200, 250, 300, 350, 400, 450, 500, 550]) plt.xlim(-width, len(ind) - 1 + 2 * width) plt.xlabel("Budget") plt.ylabel("Relative improvement") plt.savefig(fn) def compare_dist(): fd, sd = [], [] plt.figure(figsize=(5, 3)) cm = iter(rcParams["axes.color_cycle"]) for dataset in DATASETS: graph, degrees = build_graph(dataset) fd_degrees = list(degrees[user] for user in graph) sd_degrees = list(degrees[user] for user in sd_users(graph)) fd.append(np.mean(fd_degrees)) sd.append(np.mean(sd_degrees)) ind = range(len(DATASETS)) width = 0.35 plt.bar(ind, fd, width, label="Core users", color=next(cm)) plt.bar([i + width for i in ind], sd, width, label="Friends", color=next(cm)) plt.xlim(-width, len(ind) - 1 + 3 * width) plt.xticks([i + width for i in ind], DATASETS) plt.ylabel("Avg. degree") plt.legend() plt.savefig("para.pdf") def plot_perf_prob(): plt.figure() rcParams["font.size"] = 10 with open("peet_performance_p.txt") as f: values = [map(float, line.strip().split("\t")) for line in f] values = zip(*values) a = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] for i in [0, 1, 2, 3, 5, 9]: plt.plot(values[0], values[i + 1], label="$p = " + str(a[i]) + "$") plt.legend() with open("peet_performance.txt") as f: values = [map(float, line.strip().split("\t")) for line in f] values = zip(*values) #plt.gca().set_yscale("log") plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) plt.xlabel("Budget") plt.ylabel("Performance") plt.plot(values[0], values[1], label="Inf. Max.") plt.legend(loc="upper left", fontsize="small", ncol=2) xlim(xmax=450) plt.savefig("prob.pdf") def plot_hbo_likes(): plt.figure() rcParams["font.size"] = 10 with open("hbo_likes_performance.txt") as f: values = [map(float, line.strip().split("\t")) for line in f] a, im, aps, apso = zip(*values) a = np.arange(0, 1.001, 0.1) #plt.gca().set_yscale("log") plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) plt.plot(a, map(mq, im), label="Inf. Max.") plt.plot(a, map(mq, aps), label="Adapt. seed. (subgraph)") plt.plot(a, map(mq, apso), label="Adapt. seed.") plt.xlabel("Budget") plt.ylabel("Performance") xlim(xmax=1.1) plt.legend(loc="upper left") plt.savefig("hbo_likes.pdf") def plot_3d(): rcParams["font.size"] = 7 for dist in ["beta", "gauss", "power", "deg"]: fig = plt.figure() with open("coachella_performance_p_" + dist + ".txt") as f: values = [map(float, line.strip().split("\t")) for line in f] k = np.arange(0, 1.001, 0.1) ps = np.arange(0.01, 0.99, 0.1) x, y = np.meshgrid(k, ps) perfs = [value[1:] for value in values] perfs = zip(*perfs) ax = fig.add_subplot(111, projection='3d') ax.plot_wireframe(x, y, perfs, linewidth=0.1) ticklabel_format(style='sci', axis='z', scilimits=(0, 0)) xlabel("Budget (fraction of nodes)") ylabel("Mean") ax.set_zlabel("Performance") ax.invert_xaxis() plt.savefig(dist + ".pdf") plt.show() def plot_time(): plt.figure() rcParams["font.size"] = 6 a1 = np.loadtxt("time_aps_100.txt") a2 = np.loadtxt("time_aps_500.txt") lp1 = np.loadtxt("time_lp_100.txt") lp2 = np.loadtxt("time_lp_500.txt") subplot(2, 2, 1) plot(a1[:, 0], a1[:, 1], "-", label="Comb.") plot(lp1[:, 0], lp1[:, 1], "-", label="LP") xlabel("n") ylabel("time (s)") xlim(0, 100000) legend(loc="upper left") ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) subplot(2, 2, 2) plot(a1[:, 0], a1[:, 2], "-", label="Comb.") plot(lp1[:, 0], lp1[:, 2], "-", label="LP") ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) xlabel("n") ylabel("\# cycles") xlim(0, 100000) legend(loc="upper left") subplot(2, 2, 3) plot(a2[:, 0], a2[:, 1], "-", label="Comb.") plot(lp2[:, 0], lp2[:, 1], "-", label="LP") ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) xlabel("n") ylabel("time (s)") xlim(0, 100000) legend(loc="upper left") subplot(2, 2, 4) plot(a2[:, 0], a2[:, 2], "-", label="Comb.") plot(lp2[:, 0], lp2[:, 2], "-", label="LP") ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) xlabel("n") ylabel("\# cycles") xlim(0, 100000) legend(loc="upper left") tight_layout(h_pad=-0.5) savefig("time.pdf") if __name__ == "__main__": SYNTH_DATASETS = ["b-a", "kk", "sw", "coachella"] DATASETS = ["lp", "gp", "google", "coachella"] #plot_all_performances() #plot_3d() #plot_hbo_likes() #compare_performance() #plot_perf_prob() #compare_dist() #plot_time() #plot_degree_distributions() #for style in plt.style.available: # plt.style.use(style) # compare_performance2("comp4_" + style + ".pdf") sampling() #voter()