diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2015-10-30 17:16:32 -0400 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2015-10-30 17:16:32 -0400 |
| commit | 61f644a6a7d36dc5c15d957c48d10675ab3627ae (patch) | |
| tree | e765c3ac2b1239ea2728a625a7a19196c370adbe /hw3/2.py | |
| parent | 6a969e7afb0b796996f63b8d341f8891f187ca8e (diff) | |
| download | cs281-61f644a6a7d36dc5c15d957c48d10675ab3627ae.tar.gz | |
[hw3]
Diffstat (limited to 'hw3/2.py')
| -rw-r--r-- | hw3/2.py | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/hw3/2.py b/hw3/2.py new file mode 100644 index 0000000..6ccbcfd --- /dev/null +++ b/hw3/2.py @@ -0,0 +1,86 @@ +from numpy.random import normal +import numpy as np +from pickle import load +from random import shuffle + + +def init_vectors(K, sigma=0.01): + U = normal(0, sigma, size=(n+1, K)) + V = normal(0, sigma, size=(m+1, K)) + return U, V + + +def init_vectors_bis(K, sigma=0.01): + U = normal(0, sigma, size=(n+1, K)) + V = normal(0, sigma, size=(m+1, K)) + a = np.ones(n+1) + b = np.ones(m+1) + g = 1 + return U, V, a, b, g + + +def sgd_step(U, V, lamb=0.05, sigmas=1.0): + keys = train.keys() + shuffle(keys) + for (i, j) in keys: + r = train[(i, j)] + ui = np.copy(U[i]) + vj = np.copy(V[j]) + a = (lamb / sigmas) * (float(r) - np.inner(ui, vj)) + U[i] += a * vj + V[j] += a * ui + + +def sgd_step_bis(U, V, a, b, g, lamb=0.05, sigmas=1.0): + keys = train.keys() + shuffle(keys) + for (i, j) in keys: + r = train[(i, j)] + ui = np.copy(U[i]) + vj = np.copy(V[j]) + e = (lamb / sigmas) * (float(r) - np.inner(ui, vj) - a[i] - b[j] - g) + U[i] += e * vj + V[j] += e * ui + a[i] += e + b[j] += e + g += e + return g + + +def mse(U, V, d): + serror = sum((float(r) - np.inner(U[i], V[j])) ** 2 + for (i, j), r in d.iteritems()) + return serror / len(d) + + +def mse_bis(U, V, a, b, g, d): + serror = sum((float(r) - np.inner(U[i], V[j]) - a[i] - b[j] - g) ** 2 + for (i, j), r in d.iteritems()) + return serror / len(d) + + +def part_c(): + for K in xrange(1, 11): + U, V = init_vectors(K) + for t in xrange(10): + sgd_step(U, V) + print K, mse(U, V, train), mse(U, V, test) + + +def part_d(): + U, V, a, b, g = init_vectors_bis(2) + for t in xrange(10): + print t + g = sgd_step_bis(U, V, a, b, g) + print g + i = range(len(b)) + mi = min(i, key=lambda x: b[x]) + ma = max(i, key=lambda x: b[x]) + print mi, b[mi] + print ma, b[ma] + print mse_bis(U, V, a, b, g, train), mse_bis(U, V, a, b, g, test) + + +if __name__ == "__main__": + n, m, train, test = load(open("data.pickle", "rb")) + part_d() |
