diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2015-11-16 12:35:05 -0500 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2015-11-16 12:35:05 -0500 |
| commit | adc7cb7256c8fcc11e7fd85866d6d3e2dcb319c1 (patch) | |
| tree | 9b0065b6215919e86fc0ea3f377ea6bf536b4bf2 /hw4/2.tex | |
| parent | 61f644a6a7d36dc5c15d957c48d10675ab3627ae (diff) | |
| download | cs281-adc7cb7256c8fcc11e7fd85866d6d3e2dcb319c1.tar.gz | |
Diffstat (limited to 'hw4/2.tex')
| -rw-r--r-- | hw4/2.tex | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/hw4/2.tex b/hw4/2.tex new file mode 100644 index 0000000..6376208 --- /dev/null +++ b/hw4/2.tex @@ -0,0 +1,83 @@ +\begin{Verbatim}[commandchars=\\\{\}] +\PY{k+kn}{import} \PY{n+nn}{sys} +\PY{k+kn}{from} \PY{n+nn}{itertools} \PY{k+kn}{import} \PY{n}{islice} +\PY{k+kn}{import} \PY{n+nn}{numpy} \PY{k+kn}{as} \PY{n+nn}{np} +\PY{k+kn}{from} \PY{n+nn}{scipy.sparse} \PY{k+kn}{import} \PY{n}{coo\PYZus{}matrix} +\PY{k+kn}{from} \PY{n+nn}{math} \PY{k+kn}{import} \PY{n}{sqrt} + + +\PY{k}{def} \PY{n+nf}{get\PYZus{}ratings}\PY{p}{(}\PY{n}{filename}\PY{p}{)}\PY{p}{:} + \PY{k}{with} \PY{n+nb}{open}\PY{p}{(}\PY{n}{filename}\PY{p}{)} \PY{k}{as} \PY{n}{fh}\PY{p}{:} + \PY{k}{for} \PY{n}{line} \PY{o+ow}{in} \PY{n}{fh}\PY{p}{:} + \PY{k}{yield} \PY{n+nb}{map}\PY{p}{(}\PY{n+nb}{int}\PY{p}{,} \PY{n}{line}\PY{o}{.}\PY{n}{strip}\PY{p}{(}\PY{p}{)}\PY{o}{.}\PY{n}{split}\PY{p}{(}\PY{p}{)}\PY{p}{)} + + +\PY{k}{def} \PY{n+nf}{get\PYZus{}train\PYZus{}test}\PY{p}{(}\PY{n}{filename}\PY{p}{)}\PY{p}{:} + \PY{n}{l} \PY{o}{=} \PY{p}{[}\PY{p}{(}\PY{n}{i}\PY{p}{,} \PY{n}{j}\PY{p}{)} \PY{k}{for} \PY{p}{(}\PY{n}{i}\PY{p}{,} \PY{n}{j}\PY{p}{,} \PY{n}{\PYZus{}}\PY{p}{)} \PY{o+ow}{in} \PY{n}{get\PYZus{}ratings}\PY{p}{(}\PY{n}{filename}\PY{p}{)}\PY{p}{]} + \PY{n}{n} \PY{o}{=} \PY{n+nb}{max}\PY{p}{(}\PY{n}{i} \PY{k}{for} \PY{p}{(}\PY{n}{i}\PY{p}{,} \PY{n}{\PYZus{}}\PY{p}{)} \PY{o+ow}{in} \PY{n}{l}\PY{p}{)} + \PY{n}{m} \PY{o}{=} \PY{n+nb}{max}\PY{p}{(}\PY{n}{j} \PY{k}{for} \PY{p}{(}\PY{n}{\PYZus{}}\PY{p}{,} \PY{n}{j}\PY{p}{)} \PY{o+ow}{in} \PY{n}{l}\PY{p}{)} + \PY{n}{g} \PY{o}{=} \PY{n}{get\PYZus{}ratings}\PY{p}{(}\PY{n}{filename}\PY{p}{)} + \PY{n}{train} \PY{o}{=} \PY{n}{islice}\PY{p}{(}\PY{n}{g}\PY{p}{,} \PY{l+m+mi}{100000}\PY{p}{)} + \PY{n}{test} \PY{o}{=} \PY{n}{islice}\PY{p}{(}\PY{n}{g}\PY{p}{,} \PY{l+m+mi}{100000}\PY{p}{)} + \PY{k}{return} \PY{n}{n}\PY{p}{,} \PY{n}{m}\PY{p}{,} \PY{n+nb}{list}\PY{p}{(}\PY{n}{train}\PY{p}{)}\PY{p}{,} \PY{n+nb}{list}\PY{p}{(}\PY{n}{test}\PY{p}{)} + + +\PY{k}{def} \PY{n+nf}{sparse\PYZus{}matrix}\PY{p}{(}\PY{n}{ratings}\PY{p}{)}\PY{p}{:} + \PY{n}{i}\PY{p}{,} \PY{n}{j}\PY{p}{,} \PY{n}{data} \PY{o}{=} \PY{n+nb}{zip}\PY{p}{(}\PY{o}{*}\PY{n}{ratings}\PY{p}{)} + \PY{n}{S} \PY{o}{=} \PY{n}{coo\PYZus{}matrix}\PY{p}{(}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{p}{(}\PY{n}{i}\PY{p}{,} \PY{n}{j}\PY{p}{)}\PY{p}{)}\PY{p}{)} + \PY{k}{return} \PY{n}{S}\PY{o}{.}\PY{n}{tocsc}\PY{p}{(}\PY{p}{)}\PY{p}{,} \PY{n}{S}\PY{o}{.}\PY{n}{tocsr}\PY{p}{(}\PY{p}{)} + + +\PY{k}{def} \PY{n+nf}{get\PYZus{}users}\PY{p}{(}\PY{n}{Rr}\PY{p}{)}\PY{p}{:} + \PY{k}{return} \PY{p}{[}\PY{n}{i} \PY{k}{for} \PY{n}{i} \PY{o+ow}{in} \PY{n+nb}{xrange}\PY{p}{(}\PY{n}{Rr}\PY{o}{.}\PY{n}{shape}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{)} \PY{k}{if} \PY{n+nb}{len}\PY{p}{(}\PY{n}{Rr}\PY{p}{[}\PY{n}{i}\PY{p}{]}\PY{o}{.}\PY{n}{nonzero}\PY{p}{(}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{1}\PY{p}{]}\PY{p}{)}\PY{p}{]} + + +\PY{k}{def} \PY{n+nf}{get\PYZus{}jokes}\PY{p}{(}\PY{n}{Rc}\PY{p}{)}\PY{p}{:} + \PY{k}{return} \PY{p}{[}\PY{n}{j} \PY{k}{for} \PY{n}{j} \PY{o+ow}{in} \PY{n+nb}{xrange}\PY{p}{(}\PY{n}{Rc}\PY{o}{.}\PY{n}{shape}\PY{p}{[}\PY{l+m+mi}{1}\PY{p}{]}\PY{p}{)} \PY{k}{if} \PY{n+nb}{len}\PY{p}{(}\PY{n}{Rc}\PY{p}{[}\PY{p}{:}\PY{p}{,} \PY{n}{j}\PY{p}{]}\PY{o}{.}\PY{n}{nonzero}\PY{p}{(}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{)}\PY{p}{]} + + +\PY{k}{def} \PY{n+nf}{sample\PYZus{}users}\PY{p}{(}\PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{,} \PY{n}{Rr}\PY{p}{,} \PY{n}{users}\PY{p}{)}\PY{p}{:} + \PY{k}{for} \PY{n}{i} \PY{o+ow}{in} \PY{n}{users}\PY{p}{:} + \PY{n}{r} \PY{o}{=} \PY{n}{Rr}\PY{p}{[}\PY{n}{i}\PY{p}{]} + \PY{n}{ind} \PY{o}{=} \PY{n}{r}\PY{o}{.}\PY{n}{nonzero}\PY{p}{(}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{1}\PY{p}{]} + \PY{n}{v} \PY{o}{=} \PY{n}{V}\PY{p}{[}\PY{n}{ind}\PY{p}{]} + \PY{n}{isigma} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{identity}\PY{p}{(}\PY{n}{k}\PY{p}{)} \PY{o}{/} \PY{l+m+mf}{5.} \PY{o}{+} \PY{n}{np}\PY{o}{.}\PY{n}{dot}\PY{p}{(}\PY{n}{v}\PY{o}{.}\PY{n}{T}\PY{p}{,} \PY{n}{v}\PY{p}{)} + \PY{n}{sigma} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{linalg}\PY{o}{.}\PY{n}{inv}\PY{p}{(}\PY{n}{isigma}\PY{p}{)} + \PY{n}{U}\PY{p}{[}\PY{n}{i}\PY{p}{]} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{random}\PY{o}{.}\PY{n}{multivariate\PYZus{}normal}\PY{p}{(}\PY{n}{np}\PY{o}{.}\PY{n}{dot}\PY{p}{(}\PY{n}{sigma}\PY{p}{,} \PY{n}{r}\PY{o}{.}\PY{n}{dot}\PY{p}{(}\PY{n}{V}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{)}\PY{p}{,} + \PY{n}{sigma}\PY{p}{)} + + +\PY{k}{def} \PY{n+nf}{sample\PYZus{}jokes}\PY{p}{(}\PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{,} \PY{n}{Rc}\PY{p}{,} \PY{n}{jokes}\PY{p}{)}\PY{p}{:} + \PY{k}{for} \PY{n}{j} \PY{o+ow}{in} \PY{n}{jokes}\PY{p}{:} + \PY{n}{r} \PY{o}{=} \PY{n}{Rc}\PY{p}{[}\PY{p}{:}\PY{p}{,} \PY{n}{j}\PY{p}{]} + \PY{n}{u} \PY{o}{=} \PY{n}{U}\PY{p}{[}\PY{n}{r}\PY{o}{.}\PY{n}{nonzero}\PY{p}{(}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{]} + \PY{n}{isigma} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{identity}\PY{p}{(}\PY{n}{k}\PY{p}{)} \PY{o}{/} \PY{l+m+mf}{5.} \PY{o}{+} \PY{n}{np}\PY{o}{.}\PY{n}{dot}\PY{p}{(}\PY{n}{u}\PY{o}{.}\PY{n}{T}\PY{p}{,} \PY{n}{u}\PY{p}{)} + \PY{n}{sigma} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{linalg}\PY{o}{.}\PY{n}{inv}\PY{p}{(}\PY{n}{isigma}\PY{p}{)} + \PY{n}{V}\PY{p}{[}\PY{n}{j}\PY{p}{]} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{random}\PY{o}{.}\PY{n}{multivariate\PYZus{}normal}\PY{p}{(}\PY{n}{np}\PY{o}{.}\PY{n}{dot}\PY{p}{(}\PY{n}{sigma}\PY{p}{,} \PY{n}{r}\PY{o}{.}\PY{n}{T}\PY{o}{.}\PY{n}{dot}\PY{p}{(}\PY{n}{U}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{)}\PY{p}{,} + \PY{n}{sigma}\PY{p}{)} + + +\PY{k}{def} \PY{n+nf}{sample}\PY{p}{(}\PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{,} \PY{n}{Rr}\PY{p}{,} \PY{n}{Rc}\PY{p}{,} \PY{n}{users}\PY{p}{,} \PY{n}{jokes}\PY{p}{)}\PY{p}{:} + \PY{n}{sample\PYZus{}users}\PY{p}{(}\PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{,} \PY{n}{Rr}\PY{p}{,} \PY{n}{users}\PY{p}{)} + \PY{n}{sample\PYZus{}jokes}\PY{p}{(}\PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{,} \PY{n}{Rc}\PY{p}{,} \PY{n}{jokes}\PY{p}{)} + + +\PY{k}{def} \PY{n+nf}{likelihood}\PY{p}{(}\PY{n}{ratings}\PY{p}{,} \PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{)}\PY{p}{:} + \PY{k}{return} \PY{n+nb}{sum}\PY{p}{(}\PY{p}{(}\PY{n}{r} \PY{o}{\PYZhy{}} \PY{n}{np}\PY{o}{.}\PY{n}{inner}\PY{p}{(}\PY{n}{U}\PY{p}{[}\PY{n}{i}\PY{p}{]}\PY{p}{,} \PY{n}{V}\PY{p}{[}\PY{n}{j}\PY{p}{]}\PY{p}{)}\PY{p}{)} \PY{o}{*}\PY{o}{*} \PY{l+m+mi}{2} \PY{k}{for} \PY{n}{i}\PY{p}{,} \PY{n}{j}\PY{p}{,} \PY{n}{r} \PY{o+ow}{in} \PY{n}{ratings}\PY{p}{)} + + +\PY{k}{if} \PY{n}{\PYZus{}\PYZus{}name\PYZus{}\PYZus{}} \PY{o}{==} \PY{l+s}{\PYZdq{}}\PY{l+s}{\PYZus{}\PYZus{}main\PYZus{}\PYZus{}}\PY{l+s}{\PYZdq{}}\PY{p}{:} + \PY{n}{n}\PY{p}{,} \PY{n}{m}\PY{p}{,} \PY{n}{train}\PY{p}{,} \PY{n}{test} \PY{o}{=} \PY{n}{get\PYZus{}train\PYZus{}test}\PY{p}{(}\PY{n}{sys}\PY{o}{.}\PY{n}{argv}\PY{p}{[}\PY{l+m+mi}{1}\PY{p}{]}\PY{p}{)} + \PY{n}{Rc}\PY{p}{,} \PY{n}{Rr} \PY{o}{=} \PY{n}{sparse\PYZus{}matrix}\PY{p}{(}\PY{n}{train}\PY{p}{)} + \PY{n}{users} \PY{o}{=} \PY{n}{get\PYZus{}users}\PY{p}{(}\PY{n}{Rr}\PY{p}{)} \PY{c}{\PYZsh{} users with at least one rating} + \PY{n}{jokes} \PY{o}{=} \PY{n}{get\PYZus{}jokes}\PY{p}{(}\PY{n}{Rc}\PY{p}{)} \PY{c}{\PYZsh{} jokes with at least one rating} + \PY{k}{for} \PY{n}{k} \PY{o+ow}{in} \PY{n+nb}{xrange}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,} \PY{l+m+mi}{11}\PY{p}{)}\PY{p}{:} + \PY{k}{with} \PY{n+nb}{open}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{gibbs\PYZus{}}\PY{l+s}{\PYZdq{}} \PY{o}{+} \PY{n+nb}{str}\PY{p}{(}\PY{n}{k}\PY{p}{)} \PY{o}{+} \PY{l+s}{\PYZdq{}}\PY{l+s}{.txt}\PY{l+s}{\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{w}\PY{l+s}{\PYZdq{}}\PY{p}{)} \PY{k}{as} \PY{n}{fh}\PY{p}{:} + \PY{n}{U} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{random}\PY{o}{.}\PY{n}{normal}\PY{p}{(}\PY{l+m+mi}{0}\PY{p}{,} \PY{n}{sqrt}\PY{p}{(}\PY{l+m+mi}{5}\PY{p}{)}\PY{p}{,} \PY{n}{size}\PY{o}{=}\PY{p}{(}\PY{n}{Rc}\PY{o}{.}\PY{n}{shape}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{,} \PY{n}{k}\PY{p}{)}\PY{p}{)} + \PY{n}{V} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{random}\PY{o}{.}\PY{n}{normal}\PY{p}{(}\PY{l+m+mi}{0}\PY{p}{,} \PY{n}{sqrt}\PY{p}{(}\PY{l+m+mi}{5}\PY{p}{)}\PY{p}{,} \PY{n}{size}\PY{o}{=}\PY{p}{(}\PY{n}{Rc}\PY{o}{.}\PY{n}{shape}\PY{p}{[}\PY{l+m+mi}{1}\PY{p}{]}\PY{p}{,} \PY{n}{k}\PY{p}{)}\PY{p}{)} + \PY{k}{for} \PY{n}{e} \PY{o+ow}{in} \PY{n+nb}{xrange}\PY{p}{(}\PY{l+m+mi}{100}\PY{p}{)}\PY{p}{:} + \PY{n}{fh}\PY{o}{.}\PY{n}{write}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s+se}{\PYZbs{}t}\PY{l+s}{\PYZdq{}}\PY{o}{.}\PY{n}{join}\PY{p}{(}\PY{n+nb}{map}\PY{p}{(}\PY{n+nb}{str}\PY{p}{,} \PY{p}{[}\PY{n}{e}\PY{p}{,} \PY{n}{likelihood}\PY{p}{(}\PY{n}{train}\PY{p}{,} \PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{)}\PY{p}{,} + \PY{n}{likelihood}\PY{p}{(}\PY{n}{test}\PY{p}{,} \PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{)}\PY{p}{]}\PY{p}{)}\PY{p}{)} \PY{o}{+} \PY{l+s}{\PYZdq{}}\PY{l+s+se}{\PYZbs{}n}\PY{l+s}{\PYZdq{}}\PY{p}{)} + \PY{n}{fh}\PY{o}{.}\PY{n}{flush}\PY{p}{(}\PY{p}{)} + \PY{n}{sample}\PY{p}{(}\PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{,} \PY{n}{Rr}\PY{p}{,} \PY{n}{Rc}\PY{p}{,} \PY{n}{users}\PY{p}{,} \PY{n}{jokes}\PY{p}{)} +\end{Verbatim} |
