New version of the model

author: Thibaut Horel <thibaut.horel@gmail.com> 2015-03-30 15:02:23 -0400
committer: Thibaut Horel <thibaut.horel@gmail.com> 2015-03-30 15:02:23 -0400
commit: f0860ef0d66a9b70ac7bc4073716c2ae0f55862a (patch)
tree: c7312af233e0d406ec5018dbb7f50456e2de1d35 /experiments
parent: b84dddecf2eab982941704a43663cf643be027d3 (diff)
download: criminal_cascades-f0860ef0d66a9b70ac7bc4073716c2ae0f55862a.tar.gz
4 files changed, 119 insertions, 0 deletions
diff --git a/experiments/Makefile b/experiments/Makefile
new file mode 100644
index 0000000..991c178
--- /dev/null
+++ b/experiments/Makefile
@@ -0,0 +1,6 @@
+all:
+	cython -a ml.pyx
+	gcc -pthread -fno-strict-aliasing -march=x86-64 -mtune=generic -O3 -pipe -fstack-protector-strong --param=ssp-buffer-size=4 -DNDEBUG -march=x86-64 -mtune=generic -O3 -pipe -fstack-protector-strong --param=ssp-buffer-size=4 -fPIC -I/usr/include/python2.7 -c ml.c -o ml.o
+	gcc -pthread -shared -Wl,-O3,--sort-common,--as-needed,-z,relro ml.o -L/usr/lib -lpython2.7 -o ml.so
+
+
diff --git a/experiments/analyze.py b/experiments/analyze.py
new file mode 100644
index 0000000..6a6896b
--- /dev/null
+++ b/experiments/analyze.py
@@ -0,0 +1,4 @@
+import pstats
+
+p = pstats.Stats("run.stats")
+p.print_stats()
diff --git a/experiments/ml.pyx b/experiments/ml.pyx
new file mode 100644
index 0000000..c6291c5
--- /dev/null
+++ b/experiments/ml.pyx
@@ -0,0 +1,59 @@
+# cython: boundscheck=False, cdivision=True
+import numpy as np
+cimport numpy as np
+from libc.math cimport log
+
+DTYPE = np.float64
+ctypedef np.float_t DTYPE_t
+
+cdef DTYPE_t weight_victim(int dist, int dt, DTYPE_t alpha,
+                           DTYPE_t delta, DTYPE_t gamma):
+    cdef DTYPE_t structural, temporal
+    structural = delta ** dist
+    temporal = (gamma - 1. / alpha) * 1. / (1. + dt / alpha) ** gamma
+    return structural * temporal
+
+
+cdef DTYPE_t weight_non_victim(int dist, int t, DTYPE_t alpha,
+                               DTYPE_t delta, DTYPE_t gamma):
+    cdef DTYPE_t structural, temporal
+    structural = delta ** dist
+    temporal = 1. - 1. / (1. + (3012. - t) / alpha) ** gamma
+    return 1. - structural * temporal
+
+
+def ml(dict root_victims, dict victims, dict non_victims,
+       DTYPE_t alpha, DTYPE_t delta, DTYPE_t gamma=1.01):
+    cdef:
+        int n_roots, n_victims, n_nodes, roots, i, dist, dt, t
+        DTYPE_t beta
+        list parents, parents_weights
+    n_roots, n_victims = len(root_victims), len(victims)
+    n_nodes = n_victims + len(non_victims)
+    cdef:
+        np.ndarray[DTYPE_t] probs = np.zeros(n_victims, dtype=DTYPE)
+        np.ndarray[DTYPE_t] probs_nv = np.zeros(len(non_victims), dtype=DTYPE)
+    for i, parents in enumerate(victims.itervalues()):
+        parents_weights = [weight_victim(dist, dt, alpha, delta, gamma)
+                           for (dist, dt) in parents]
+        probs[i] = max(parents_weights)
+    for i, parents in enumerate(non_victims.itervalues()):
+        parents_weights = [weight_non_victim(dist, t, alpha, delta, gamma)
+                           for (dist, t) in parents]
+        probs_nv[i] = max(parents_weights)
+    probs.sort()
+    probs = probs[::-1]
+    cdef:
+        np.ndarray[DTYPE_t] betas = probs / (1. + probs)
+        np.ndarray[DTYPE_t] cums = np.log(probs.cumsum())
+    for i in xrange(n_victims - 1, 0, -1):
+        roots = n_roots + n_victims - 1 - i
+        if betas[i] > roots / float(n_nodes):
+            break
+    else:
+        print "alpha: {0}, delta: {1}. Everyone is a root".format(alpha, delta)
+        roots = n_roots + n_victims
+    beta = roots / float(n_nodes)
+    return (beta, roots,
+            roots * log(beta) + (n_nodes - roots) * log(1 - beta) + cums[i]
+            + np.log(probs_nv).sum())
diff --git a/experiments/process.py b/experiments/process.py
new file mode 100644
index 0000000..1ec917e
--- /dev/null
+++ b/experiments/process.py
@@ -0,0 +1,50 @@
+from csv import DictReader
+import sys
+from ml import ml
+import numpy as np
+from cPickle import dump, load
+from itertools import product
+
+
+def build_network(filename):
+    victims = {}
+    non_victims = {}
+    with open(filename) as fh:
+        reader = DictReader(fh)
+        for row in reader:
+            from_, to = int(float(row["from"])), int(float(row["to"]))
+            if row["t2"] != "NA":
+                dt = int(row["t2"]) - int(row["t1"])
+                parent = (int(row["dist"]), dt)
+                if to not in victims:
+                    victims[to] = []
+                victims[to].append(parent)
+                if from_ not in victims:
+                    victims[from_] = []
+            else:
+                from_, to = int(float(row["from"])), int(float(row["to"]))
+                parent = (int(row["dist"]), int(row["t1"]))
+                if to not in victims:
+                    non_victims[to] = []
+                non_victims[to].append(parent)
+                if from_ not in victims:
+                    victims[from_] = []
+    root_victims = {}
+    for victim in victims.keys():
+        if not victims[victim]:
+            del victims[victim]
+        root_victims[victim] = []
+    return root_victims, victims, non_victims
+
+
+if __name__ == "__main__":
+    #root_victims, victims, non_victims = build_network(sys.argv[1])
+    #dump((root_victims, victims, non_victims), open("network.pickle", "w"))
+    root_victims, victims, non_victims = load(open("network.pickle"))
+
+    alpha = np.arange(1000006., 1000007., 1.)
+    delta = np.arange(0.1, 1., 0.01)
+    with open("out.log", "a") as fh:
+        for a, d in product(alpha, delta):
+            beta, roots, ll = ml(root_victims, victims, non_victims, a, d)
+            fh.write("\t".join(map(str, [a, d, beta, roots, ll])) + "\n")
author	Thibaut Horel <thibaut.horel@gmail.com>	2015-03-30 15:02:23 -0400
committer	Thibaut Horel <thibaut.horel@gmail.com>	2015-03-30 15:02:23 -0400
commit	f0860ef0d66a9b70ac7bc4073716c2ae0f55862a (patch)
tree	c7312af233e0d406ec5018dbb7f50456e2de1d35 /experiments
parent	b84dddecf2eab982941704a43663cf643be027d3 (diff)
download	criminal_cascades-f0860ef0d66a9b70ac7bc4073716c2ae0f55862a.tar.gz