summaryrefslogtreecommitdiffstats
path: root/hw3/2.py
diff options
context:
space:
mode:
Diffstat (limited to 'hw3/2.py')
-rw-r--r--hw3/2.py86
1 files changed, 86 insertions, 0 deletions
diff --git a/hw3/2.py b/hw3/2.py
new file mode 100644
index 0000000..6ccbcfd
--- /dev/null
+++ b/hw3/2.py
@@ -0,0 +1,86 @@
+from numpy.random import normal
+import numpy as np
+from pickle import load
+from random import shuffle
+
+
+def init_vectors(K, sigma=0.01):
+ U = normal(0, sigma, size=(n+1, K))
+ V = normal(0, sigma, size=(m+1, K))
+ return U, V
+
+
+def init_vectors_bis(K, sigma=0.01):
+ U = normal(0, sigma, size=(n+1, K))
+ V = normal(0, sigma, size=(m+1, K))
+ a = np.ones(n+1)
+ b = np.ones(m+1)
+ g = 1
+ return U, V, a, b, g
+
+
+def sgd_step(U, V, lamb=0.05, sigmas=1.0):
+ keys = train.keys()
+ shuffle(keys)
+ for (i, j) in keys:
+ r = train[(i, j)]
+ ui = np.copy(U[i])
+ vj = np.copy(V[j])
+ a = (lamb / sigmas) * (float(r) - np.inner(ui, vj))
+ U[i] += a * vj
+ V[j] += a * ui
+
+
+def sgd_step_bis(U, V, a, b, g, lamb=0.05, sigmas=1.0):
+ keys = train.keys()
+ shuffle(keys)
+ for (i, j) in keys:
+ r = train[(i, j)]
+ ui = np.copy(U[i])
+ vj = np.copy(V[j])
+ e = (lamb / sigmas) * (float(r) - np.inner(ui, vj) - a[i] - b[j] - g)
+ U[i] += e * vj
+ V[j] += e * ui
+ a[i] += e
+ b[j] += e
+ g += e
+ return g
+
+
+def mse(U, V, d):
+ serror = sum((float(r) - np.inner(U[i], V[j])) ** 2
+ for (i, j), r in d.iteritems())
+ return serror / len(d)
+
+
+def mse_bis(U, V, a, b, g, d):
+ serror = sum((float(r) - np.inner(U[i], V[j]) - a[i] - b[j] - g) ** 2
+ for (i, j), r in d.iteritems())
+ return serror / len(d)
+
+
+def part_c():
+ for K in xrange(1, 11):
+ U, V = init_vectors(K)
+ for t in xrange(10):
+ sgd_step(U, V)
+ print K, mse(U, V, train), mse(U, V, test)
+
+
+def part_d():
+ U, V, a, b, g = init_vectors_bis(2)
+ for t in xrange(10):
+ print t
+ g = sgd_step_bis(U, V, a, b, g)
+ print g
+ i = range(len(b))
+ mi = min(i, key=lambda x: b[x])
+ ma = max(i, key=lambda x: b[x])
+ print mi, b[mi]
+ print ma, b[ma]
+ print mse_bis(U, V, a, b, g, train), mse_bis(U, V, a, b, g, test)
+
+
+if __name__ == "__main__":
+ n, m, train, test = load(open("data.pickle", "rb"))
+ part_d()