summaryrefslogtreecommitdiffstats
path: root/hw4/2.tex
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2015-11-16 12:35:05 -0500
committerThibaut Horel <thibaut.horel@gmail.com>2015-11-16 12:35:05 -0500
commitadc7cb7256c8fcc11e7fd85866d6d3e2dcb319c1 (patch)
tree9b0065b6215919e86fc0ea3f377ea6bf536b4bf2 /hw4/2.tex
parent61f644a6a7d36dc5c15d957c48d10675ab3627ae (diff)
downloadcs281-adc7cb7256c8fcc11e7fd85866d6d3e2dcb319c1.tar.gz
[HW4] Problem 1 and 2HEADmaster
Diffstat (limited to 'hw4/2.tex')
-rw-r--r--hw4/2.tex83
1 files changed, 83 insertions, 0 deletions
diff --git a/hw4/2.tex b/hw4/2.tex
new file mode 100644
index 0000000..6376208
--- /dev/null
+++ b/hw4/2.tex
@@ -0,0 +1,83 @@
+\begin{Verbatim}[commandchars=\\\{\}]
+\PY{k+kn}{import} \PY{n+nn}{sys}
+\PY{k+kn}{from} \PY{n+nn}{itertools} \PY{k+kn}{import} \PY{n}{islice}
+\PY{k+kn}{import} \PY{n+nn}{numpy} \PY{k+kn}{as} \PY{n+nn}{np}
+\PY{k+kn}{from} \PY{n+nn}{scipy.sparse} \PY{k+kn}{import} \PY{n}{coo\PYZus{}matrix}
+\PY{k+kn}{from} \PY{n+nn}{math} \PY{k+kn}{import} \PY{n}{sqrt}
+
+
+\PY{k}{def} \PY{n+nf}{get\PYZus{}ratings}\PY{p}{(}\PY{n}{filename}\PY{p}{)}\PY{p}{:}
+ \PY{k}{with} \PY{n+nb}{open}\PY{p}{(}\PY{n}{filename}\PY{p}{)} \PY{k}{as} \PY{n}{fh}\PY{p}{:}
+ \PY{k}{for} \PY{n}{line} \PY{o+ow}{in} \PY{n}{fh}\PY{p}{:}
+ \PY{k}{yield} \PY{n+nb}{map}\PY{p}{(}\PY{n+nb}{int}\PY{p}{,} \PY{n}{line}\PY{o}{.}\PY{n}{strip}\PY{p}{(}\PY{p}{)}\PY{o}{.}\PY{n}{split}\PY{p}{(}\PY{p}{)}\PY{p}{)}
+
+
+\PY{k}{def} \PY{n+nf}{get\PYZus{}train\PYZus{}test}\PY{p}{(}\PY{n}{filename}\PY{p}{)}\PY{p}{:}
+ \PY{n}{l} \PY{o}{=} \PY{p}{[}\PY{p}{(}\PY{n}{i}\PY{p}{,} \PY{n}{j}\PY{p}{)} \PY{k}{for} \PY{p}{(}\PY{n}{i}\PY{p}{,} \PY{n}{j}\PY{p}{,} \PY{n}{\PYZus{}}\PY{p}{)} \PY{o+ow}{in} \PY{n}{get\PYZus{}ratings}\PY{p}{(}\PY{n}{filename}\PY{p}{)}\PY{p}{]}
+ \PY{n}{n} \PY{o}{=} \PY{n+nb}{max}\PY{p}{(}\PY{n}{i} \PY{k}{for} \PY{p}{(}\PY{n}{i}\PY{p}{,} \PY{n}{\PYZus{}}\PY{p}{)} \PY{o+ow}{in} \PY{n}{l}\PY{p}{)}
+ \PY{n}{m} \PY{o}{=} \PY{n+nb}{max}\PY{p}{(}\PY{n}{j} \PY{k}{for} \PY{p}{(}\PY{n}{\PYZus{}}\PY{p}{,} \PY{n}{j}\PY{p}{)} \PY{o+ow}{in} \PY{n}{l}\PY{p}{)}
+ \PY{n}{g} \PY{o}{=} \PY{n}{get\PYZus{}ratings}\PY{p}{(}\PY{n}{filename}\PY{p}{)}
+ \PY{n}{train} \PY{o}{=} \PY{n}{islice}\PY{p}{(}\PY{n}{g}\PY{p}{,} \PY{l+m+mi}{100000}\PY{p}{)}
+ \PY{n}{test} \PY{o}{=} \PY{n}{islice}\PY{p}{(}\PY{n}{g}\PY{p}{,} \PY{l+m+mi}{100000}\PY{p}{)}
+ \PY{k}{return} \PY{n}{n}\PY{p}{,} \PY{n}{m}\PY{p}{,} \PY{n+nb}{list}\PY{p}{(}\PY{n}{train}\PY{p}{)}\PY{p}{,} \PY{n+nb}{list}\PY{p}{(}\PY{n}{test}\PY{p}{)}
+
+
+\PY{k}{def} \PY{n+nf}{sparse\PYZus{}matrix}\PY{p}{(}\PY{n}{ratings}\PY{p}{)}\PY{p}{:}
+ \PY{n}{i}\PY{p}{,} \PY{n}{j}\PY{p}{,} \PY{n}{data} \PY{o}{=} \PY{n+nb}{zip}\PY{p}{(}\PY{o}{*}\PY{n}{ratings}\PY{p}{)}
+ \PY{n}{S} \PY{o}{=} \PY{n}{coo\PYZus{}matrix}\PY{p}{(}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{p}{(}\PY{n}{i}\PY{p}{,} \PY{n}{j}\PY{p}{)}\PY{p}{)}\PY{p}{)}
+ \PY{k}{return} \PY{n}{S}\PY{o}{.}\PY{n}{tocsc}\PY{p}{(}\PY{p}{)}\PY{p}{,} \PY{n}{S}\PY{o}{.}\PY{n}{tocsr}\PY{p}{(}\PY{p}{)}
+
+
+\PY{k}{def} \PY{n+nf}{get\PYZus{}users}\PY{p}{(}\PY{n}{Rr}\PY{p}{)}\PY{p}{:}
+ \PY{k}{return} \PY{p}{[}\PY{n}{i} \PY{k}{for} \PY{n}{i} \PY{o+ow}{in} \PY{n+nb}{xrange}\PY{p}{(}\PY{n}{Rr}\PY{o}{.}\PY{n}{shape}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{)} \PY{k}{if} \PY{n+nb}{len}\PY{p}{(}\PY{n}{Rr}\PY{p}{[}\PY{n}{i}\PY{p}{]}\PY{o}{.}\PY{n}{nonzero}\PY{p}{(}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{1}\PY{p}{]}\PY{p}{)}\PY{p}{]}
+
+
+\PY{k}{def} \PY{n+nf}{get\PYZus{}jokes}\PY{p}{(}\PY{n}{Rc}\PY{p}{)}\PY{p}{:}
+ \PY{k}{return} \PY{p}{[}\PY{n}{j} \PY{k}{for} \PY{n}{j} \PY{o+ow}{in} \PY{n+nb}{xrange}\PY{p}{(}\PY{n}{Rc}\PY{o}{.}\PY{n}{shape}\PY{p}{[}\PY{l+m+mi}{1}\PY{p}{]}\PY{p}{)} \PY{k}{if} \PY{n+nb}{len}\PY{p}{(}\PY{n}{Rc}\PY{p}{[}\PY{p}{:}\PY{p}{,} \PY{n}{j}\PY{p}{]}\PY{o}{.}\PY{n}{nonzero}\PY{p}{(}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{)}\PY{p}{]}
+
+
+\PY{k}{def} \PY{n+nf}{sample\PYZus{}users}\PY{p}{(}\PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{,} \PY{n}{Rr}\PY{p}{,} \PY{n}{users}\PY{p}{)}\PY{p}{:}
+ \PY{k}{for} \PY{n}{i} \PY{o+ow}{in} \PY{n}{users}\PY{p}{:}
+ \PY{n}{r} \PY{o}{=} \PY{n}{Rr}\PY{p}{[}\PY{n}{i}\PY{p}{]}
+ \PY{n}{ind} \PY{o}{=} \PY{n}{r}\PY{o}{.}\PY{n}{nonzero}\PY{p}{(}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{1}\PY{p}{]}
+ \PY{n}{v} \PY{o}{=} \PY{n}{V}\PY{p}{[}\PY{n}{ind}\PY{p}{]}
+ \PY{n}{isigma} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{identity}\PY{p}{(}\PY{n}{k}\PY{p}{)} \PY{o}{/} \PY{l+m+mf}{5.} \PY{o}{+} \PY{n}{np}\PY{o}{.}\PY{n}{dot}\PY{p}{(}\PY{n}{v}\PY{o}{.}\PY{n}{T}\PY{p}{,} \PY{n}{v}\PY{p}{)}
+ \PY{n}{sigma} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{linalg}\PY{o}{.}\PY{n}{inv}\PY{p}{(}\PY{n}{isigma}\PY{p}{)}
+ \PY{n}{U}\PY{p}{[}\PY{n}{i}\PY{p}{]} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{random}\PY{o}{.}\PY{n}{multivariate\PYZus{}normal}\PY{p}{(}\PY{n}{np}\PY{o}{.}\PY{n}{dot}\PY{p}{(}\PY{n}{sigma}\PY{p}{,} \PY{n}{r}\PY{o}{.}\PY{n}{dot}\PY{p}{(}\PY{n}{V}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{)}\PY{p}{,}
+ \PY{n}{sigma}\PY{p}{)}
+
+
+\PY{k}{def} \PY{n+nf}{sample\PYZus{}jokes}\PY{p}{(}\PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{,} \PY{n}{Rc}\PY{p}{,} \PY{n}{jokes}\PY{p}{)}\PY{p}{:}
+ \PY{k}{for} \PY{n}{j} \PY{o+ow}{in} \PY{n}{jokes}\PY{p}{:}
+ \PY{n}{r} \PY{o}{=} \PY{n}{Rc}\PY{p}{[}\PY{p}{:}\PY{p}{,} \PY{n}{j}\PY{p}{]}
+ \PY{n}{u} \PY{o}{=} \PY{n}{U}\PY{p}{[}\PY{n}{r}\PY{o}{.}\PY{n}{nonzero}\PY{p}{(}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{]}
+ \PY{n}{isigma} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{identity}\PY{p}{(}\PY{n}{k}\PY{p}{)} \PY{o}{/} \PY{l+m+mf}{5.} \PY{o}{+} \PY{n}{np}\PY{o}{.}\PY{n}{dot}\PY{p}{(}\PY{n}{u}\PY{o}{.}\PY{n}{T}\PY{p}{,} \PY{n}{u}\PY{p}{)}
+ \PY{n}{sigma} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{linalg}\PY{o}{.}\PY{n}{inv}\PY{p}{(}\PY{n}{isigma}\PY{p}{)}
+ \PY{n}{V}\PY{p}{[}\PY{n}{j}\PY{p}{]} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{random}\PY{o}{.}\PY{n}{multivariate\PYZus{}normal}\PY{p}{(}\PY{n}{np}\PY{o}{.}\PY{n}{dot}\PY{p}{(}\PY{n}{sigma}\PY{p}{,} \PY{n}{r}\PY{o}{.}\PY{n}{T}\PY{o}{.}\PY{n}{dot}\PY{p}{(}\PY{n}{U}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{)}\PY{p}{,}
+ \PY{n}{sigma}\PY{p}{)}
+
+
+\PY{k}{def} \PY{n+nf}{sample}\PY{p}{(}\PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{,} \PY{n}{Rr}\PY{p}{,} \PY{n}{Rc}\PY{p}{,} \PY{n}{users}\PY{p}{,} \PY{n}{jokes}\PY{p}{)}\PY{p}{:}
+ \PY{n}{sample\PYZus{}users}\PY{p}{(}\PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{,} \PY{n}{Rr}\PY{p}{,} \PY{n}{users}\PY{p}{)}
+ \PY{n}{sample\PYZus{}jokes}\PY{p}{(}\PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{,} \PY{n}{Rc}\PY{p}{,} \PY{n}{jokes}\PY{p}{)}
+
+
+\PY{k}{def} \PY{n+nf}{likelihood}\PY{p}{(}\PY{n}{ratings}\PY{p}{,} \PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{)}\PY{p}{:}
+ \PY{k}{return} \PY{n+nb}{sum}\PY{p}{(}\PY{p}{(}\PY{n}{r} \PY{o}{\PYZhy{}} \PY{n}{np}\PY{o}{.}\PY{n}{inner}\PY{p}{(}\PY{n}{U}\PY{p}{[}\PY{n}{i}\PY{p}{]}\PY{p}{,} \PY{n}{V}\PY{p}{[}\PY{n}{j}\PY{p}{]}\PY{p}{)}\PY{p}{)} \PY{o}{*}\PY{o}{*} \PY{l+m+mi}{2} \PY{k}{for} \PY{n}{i}\PY{p}{,} \PY{n}{j}\PY{p}{,} \PY{n}{r} \PY{o+ow}{in} \PY{n}{ratings}\PY{p}{)}
+
+
+\PY{k}{if} \PY{n}{\PYZus{}\PYZus{}name\PYZus{}\PYZus{}} \PY{o}{==} \PY{l+s}{\PYZdq{}}\PY{l+s}{\PYZus{}\PYZus{}main\PYZus{}\PYZus{}}\PY{l+s}{\PYZdq{}}\PY{p}{:}
+ \PY{n}{n}\PY{p}{,} \PY{n}{m}\PY{p}{,} \PY{n}{train}\PY{p}{,} \PY{n}{test} \PY{o}{=} \PY{n}{get\PYZus{}train\PYZus{}test}\PY{p}{(}\PY{n}{sys}\PY{o}{.}\PY{n}{argv}\PY{p}{[}\PY{l+m+mi}{1}\PY{p}{]}\PY{p}{)}
+ \PY{n}{Rc}\PY{p}{,} \PY{n}{Rr} \PY{o}{=} \PY{n}{sparse\PYZus{}matrix}\PY{p}{(}\PY{n}{train}\PY{p}{)}
+ \PY{n}{users} \PY{o}{=} \PY{n}{get\PYZus{}users}\PY{p}{(}\PY{n}{Rr}\PY{p}{)} \PY{c}{\PYZsh{} users with at least one rating}
+ \PY{n}{jokes} \PY{o}{=} \PY{n}{get\PYZus{}jokes}\PY{p}{(}\PY{n}{Rc}\PY{p}{)} \PY{c}{\PYZsh{} jokes with at least one rating}
+ \PY{k}{for} \PY{n}{k} \PY{o+ow}{in} \PY{n+nb}{xrange}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,} \PY{l+m+mi}{11}\PY{p}{)}\PY{p}{:}
+ \PY{k}{with} \PY{n+nb}{open}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{gibbs\PYZus{}}\PY{l+s}{\PYZdq{}} \PY{o}{+} \PY{n+nb}{str}\PY{p}{(}\PY{n}{k}\PY{p}{)} \PY{o}{+} \PY{l+s}{\PYZdq{}}\PY{l+s}{.txt}\PY{l+s}{\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{w}\PY{l+s}{\PYZdq{}}\PY{p}{)} \PY{k}{as} \PY{n}{fh}\PY{p}{:}
+ \PY{n}{U} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{random}\PY{o}{.}\PY{n}{normal}\PY{p}{(}\PY{l+m+mi}{0}\PY{p}{,} \PY{n}{sqrt}\PY{p}{(}\PY{l+m+mi}{5}\PY{p}{)}\PY{p}{,} \PY{n}{size}\PY{o}{=}\PY{p}{(}\PY{n}{Rc}\PY{o}{.}\PY{n}{shape}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{,} \PY{n}{k}\PY{p}{)}\PY{p}{)}
+ \PY{n}{V} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{random}\PY{o}{.}\PY{n}{normal}\PY{p}{(}\PY{l+m+mi}{0}\PY{p}{,} \PY{n}{sqrt}\PY{p}{(}\PY{l+m+mi}{5}\PY{p}{)}\PY{p}{,} \PY{n}{size}\PY{o}{=}\PY{p}{(}\PY{n}{Rc}\PY{o}{.}\PY{n}{shape}\PY{p}{[}\PY{l+m+mi}{1}\PY{p}{]}\PY{p}{,} \PY{n}{k}\PY{p}{)}\PY{p}{)}
+ \PY{k}{for} \PY{n}{e} \PY{o+ow}{in} \PY{n+nb}{xrange}\PY{p}{(}\PY{l+m+mi}{100}\PY{p}{)}\PY{p}{:}
+ \PY{n}{fh}\PY{o}{.}\PY{n}{write}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s+se}{\PYZbs{}t}\PY{l+s}{\PYZdq{}}\PY{o}{.}\PY{n}{join}\PY{p}{(}\PY{n+nb}{map}\PY{p}{(}\PY{n+nb}{str}\PY{p}{,} \PY{p}{[}\PY{n}{e}\PY{p}{,} \PY{n}{likelihood}\PY{p}{(}\PY{n}{train}\PY{p}{,} \PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{)}\PY{p}{,}
+ \PY{n}{likelihood}\PY{p}{(}\PY{n}{test}\PY{p}{,} \PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{)}\PY{p}{]}\PY{p}{)}\PY{p}{)} \PY{o}{+} \PY{l+s}{\PYZdq{}}\PY{l+s+se}{\PYZbs{}n}\PY{l+s}{\PYZdq{}}\PY{p}{)}
+ \PY{n}{fh}\PY{o}{.}\PY{n}{flush}\PY{p}{(}\PY{p}{)}
+ \PY{n}{sample}\PY{p}{(}\PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{,} \PY{n}{Rr}\PY{p}{,} \PY{n}{Rc}\PY{p}{,} \PY{n}{users}\PY{p}{,} \PY{n}{jokes}\PY{p}{)}
+\end{Verbatim}