1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
\begin{Verbatim}[commandchars=\\\{\}]
\PY{k+kn}{import} \PY{n+nn}{sys}
\PY{k+kn}{from} \PY{n+nn}{itertools} \PY{k+kn}{import} \PY{n}{islice}
\PY{k+kn}{import} \PY{n+nn}{numpy} \PY{k+kn}{as} \PY{n+nn}{np}
\PY{k+kn}{from} \PY{n+nn}{scipy.sparse} \PY{k+kn}{import} \PY{n}{coo\PYZus{}matrix}
\PY{k+kn}{from} \PY{n+nn}{math} \PY{k+kn}{import} \PY{n}{sqrt}
\PY{k}{def} \PY{n+nf}{get\PYZus{}ratings}\PY{p}{(}\PY{n}{filename}\PY{p}{)}\PY{p}{:}
\PY{k}{with} \PY{n+nb}{open}\PY{p}{(}\PY{n}{filename}\PY{p}{)} \PY{k}{as} \PY{n}{fh}\PY{p}{:}
\PY{k}{for} \PY{n}{line} \PY{o+ow}{in} \PY{n}{fh}\PY{p}{:}
\PY{k}{yield} \PY{n+nb}{map}\PY{p}{(}\PY{n+nb}{int}\PY{p}{,} \PY{n}{line}\PY{o}{.}\PY{n}{strip}\PY{p}{(}\PY{p}{)}\PY{o}{.}\PY{n}{split}\PY{p}{(}\PY{p}{)}\PY{p}{)}
\PY{k}{def} \PY{n+nf}{get\PYZus{}train\PYZus{}test}\PY{p}{(}\PY{n}{filename}\PY{p}{)}\PY{p}{:}
\PY{n}{l} \PY{o}{=} \PY{p}{[}\PY{p}{(}\PY{n}{i}\PY{p}{,} \PY{n}{j}\PY{p}{)} \PY{k}{for} \PY{p}{(}\PY{n}{i}\PY{p}{,} \PY{n}{j}\PY{p}{,} \PY{n}{\PYZus{}}\PY{p}{)} \PY{o+ow}{in} \PY{n}{get\PYZus{}ratings}\PY{p}{(}\PY{n}{filename}\PY{p}{)}\PY{p}{]}
\PY{n}{n} \PY{o}{=} \PY{n+nb}{max}\PY{p}{(}\PY{n}{i} \PY{k}{for} \PY{p}{(}\PY{n}{i}\PY{p}{,} \PY{n}{\PYZus{}}\PY{p}{)} \PY{o+ow}{in} \PY{n}{l}\PY{p}{)}
\PY{n}{m} \PY{o}{=} \PY{n+nb}{max}\PY{p}{(}\PY{n}{j} \PY{k}{for} \PY{p}{(}\PY{n}{\PYZus{}}\PY{p}{,} \PY{n}{j}\PY{p}{)} \PY{o+ow}{in} \PY{n}{l}\PY{p}{)}
\PY{n}{g} \PY{o}{=} \PY{n}{get\PYZus{}ratings}\PY{p}{(}\PY{n}{filename}\PY{p}{)}
\PY{n}{train} \PY{o}{=} \PY{n}{islice}\PY{p}{(}\PY{n}{g}\PY{p}{,} \PY{l+m+mi}{100000}\PY{p}{)}
\PY{n}{test} \PY{o}{=} \PY{n}{islice}\PY{p}{(}\PY{n}{g}\PY{p}{,} \PY{l+m+mi}{100000}\PY{p}{)}
\PY{k}{return} \PY{n}{n}\PY{p}{,} \PY{n}{m}\PY{p}{,} \PY{n+nb}{list}\PY{p}{(}\PY{n}{train}\PY{p}{)}\PY{p}{,} \PY{n+nb}{list}\PY{p}{(}\PY{n}{test}\PY{p}{)}
\PY{k}{def} \PY{n+nf}{sparse\PYZus{}matrix}\PY{p}{(}\PY{n}{ratings}\PY{p}{)}\PY{p}{:}
\PY{n}{i}\PY{p}{,} \PY{n}{j}\PY{p}{,} \PY{n}{data} \PY{o}{=} \PY{n+nb}{zip}\PY{p}{(}\PY{o}{*}\PY{n}{ratings}\PY{p}{)}
\PY{n}{S} \PY{o}{=} \PY{n}{coo\PYZus{}matrix}\PY{p}{(}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{p}{(}\PY{n}{i}\PY{p}{,} \PY{n}{j}\PY{p}{)}\PY{p}{)}\PY{p}{)}
\PY{k}{return} \PY{n}{S}\PY{o}{.}\PY{n}{tocsc}\PY{p}{(}\PY{p}{)}\PY{p}{,} \PY{n}{S}\PY{o}{.}\PY{n}{tocsr}\PY{p}{(}\PY{p}{)}
\PY{k}{def} \PY{n+nf}{get\PYZus{}users}\PY{p}{(}\PY{n}{Rr}\PY{p}{)}\PY{p}{:}
\PY{k}{return} \PY{p}{[}\PY{n}{i} \PY{k}{for} \PY{n}{i} \PY{o+ow}{in} \PY{n+nb}{xrange}\PY{p}{(}\PY{n}{Rr}\PY{o}{.}\PY{n}{shape}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{)} \PY{k}{if} \PY{n+nb}{len}\PY{p}{(}\PY{n}{Rr}\PY{p}{[}\PY{n}{i}\PY{p}{]}\PY{o}{.}\PY{n}{nonzero}\PY{p}{(}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{1}\PY{p}{]}\PY{p}{)}\PY{p}{]}
\PY{k}{def} \PY{n+nf}{get\PYZus{}jokes}\PY{p}{(}\PY{n}{Rc}\PY{p}{)}\PY{p}{:}
\PY{k}{return} \PY{p}{[}\PY{n}{j} \PY{k}{for} \PY{n}{j} \PY{o+ow}{in} \PY{n+nb}{xrange}\PY{p}{(}\PY{n}{Rc}\PY{o}{.}\PY{n}{shape}\PY{p}{[}\PY{l+m+mi}{1}\PY{p}{]}\PY{p}{)} \PY{k}{if} \PY{n+nb}{len}\PY{p}{(}\PY{n}{Rc}\PY{p}{[}\PY{p}{:}\PY{p}{,} \PY{n}{j}\PY{p}{]}\PY{o}{.}\PY{n}{nonzero}\PY{p}{(}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{)}\PY{p}{]}
\PY{k}{def} \PY{n+nf}{sample\PYZus{}users}\PY{p}{(}\PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{,} \PY{n}{Rr}\PY{p}{,} \PY{n}{users}\PY{p}{)}\PY{p}{:}
\PY{k}{for} \PY{n}{i} \PY{o+ow}{in} \PY{n}{users}\PY{p}{:}
\PY{n}{r} \PY{o}{=} \PY{n}{Rr}\PY{p}{[}\PY{n}{i}\PY{p}{]}
\PY{n}{ind} \PY{o}{=} \PY{n}{r}\PY{o}{.}\PY{n}{nonzero}\PY{p}{(}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{1}\PY{p}{]}
\PY{n}{v} \PY{o}{=} \PY{n}{V}\PY{p}{[}\PY{n}{ind}\PY{p}{]}
\PY{n}{isigma} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{identity}\PY{p}{(}\PY{n}{k}\PY{p}{)} \PY{o}{/} \PY{l+m+mf}{5.} \PY{o}{+} \PY{n}{np}\PY{o}{.}\PY{n}{dot}\PY{p}{(}\PY{n}{v}\PY{o}{.}\PY{n}{T}\PY{p}{,} \PY{n}{v}\PY{p}{)}
\PY{n}{sigma} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{linalg}\PY{o}{.}\PY{n}{inv}\PY{p}{(}\PY{n}{isigma}\PY{p}{)}
\PY{n}{U}\PY{p}{[}\PY{n}{i}\PY{p}{]} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{random}\PY{o}{.}\PY{n}{multivariate\PYZus{}normal}\PY{p}{(}\PY{n}{np}\PY{o}{.}\PY{n}{dot}\PY{p}{(}\PY{n}{sigma}\PY{p}{,} \PY{n}{r}\PY{o}{.}\PY{n}{dot}\PY{p}{(}\PY{n}{V}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{)}\PY{p}{,}
\PY{n}{sigma}\PY{p}{)}
\PY{k}{def} \PY{n+nf}{sample\PYZus{}jokes}\PY{p}{(}\PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{,} \PY{n}{Rc}\PY{p}{,} \PY{n}{jokes}\PY{p}{)}\PY{p}{:}
\PY{k}{for} \PY{n}{j} \PY{o+ow}{in} \PY{n}{jokes}\PY{p}{:}
\PY{n}{r} \PY{o}{=} \PY{n}{Rc}\PY{p}{[}\PY{p}{:}\PY{p}{,} \PY{n}{j}\PY{p}{]}
\PY{n}{u} \PY{o}{=} \PY{n}{U}\PY{p}{[}\PY{n}{r}\PY{o}{.}\PY{n}{nonzero}\PY{p}{(}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{]}
\PY{n}{isigma} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{identity}\PY{p}{(}\PY{n}{k}\PY{p}{)} \PY{o}{/} \PY{l+m+mf}{5.} \PY{o}{+} \PY{n}{np}\PY{o}{.}\PY{n}{dot}\PY{p}{(}\PY{n}{u}\PY{o}{.}\PY{n}{T}\PY{p}{,} \PY{n}{u}\PY{p}{)}
\PY{n}{sigma} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{linalg}\PY{o}{.}\PY{n}{inv}\PY{p}{(}\PY{n}{isigma}\PY{p}{)}
\PY{n}{V}\PY{p}{[}\PY{n}{j}\PY{p}{]} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{random}\PY{o}{.}\PY{n}{multivariate\PYZus{}normal}\PY{p}{(}\PY{n}{np}\PY{o}{.}\PY{n}{dot}\PY{p}{(}\PY{n}{sigma}\PY{p}{,} \PY{n}{r}\PY{o}{.}\PY{n}{T}\PY{o}{.}\PY{n}{dot}\PY{p}{(}\PY{n}{U}\PY{p}{)}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{)}\PY{p}{,}
\PY{n}{sigma}\PY{p}{)}
\PY{k}{def} \PY{n+nf}{sample}\PY{p}{(}\PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{,} \PY{n}{Rr}\PY{p}{,} \PY{n}{Rc}\PY{p}{,} \PY{n}{users}\PY{p}{,} \PY{n}{jokes}\PY{p}{)}\PY{p}{:}
\PY{n}{sample\PYZus{}users}\PY{p}{(}\PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{,} \PY{n}{Rr}\PY{p}{,} \PY{n}{users}\PY{p}{)}
\PY{n}{sample\PYZus{}jokes}\PY{p}{(}\PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{,} \PY{n}{Rc}\PY{p}{,} \PY{n}{jokes}\PY{p}{)}
\PY{k}{def} \PY{n+nf}{likelihood}\PY{p}{(}\PY{n}{ratings}\PY{p}{,} \PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{)}\PY{p}{:}
\PY{k}{return} \PY{n+nb}{sum}\PY{p}{(}\PY{p}{(}\PY{n}{r} \PY{o}{\PYZhy{}} \PY{n}{np}\PY{o}{.}\PY{n}{inner}\PY{p}{(}\PY{n}{U}\PY{p}{[}\PY{n}{i}\PY{p}{]}\PY{p}{,} \PY{n}{V}\PY{p}{[}\PY{n}{j}\PY{p}{]}\PY{p}{)}\PY{p}{)} \PY{o}{*}\PY{o}{*} \PY{l+m+mi}{2} \PY{k}{for} \PY{n}{i}\PY{p}{,} \PY{n}{j}\PY{p}{,} \PY{n}{r} \PY{o+ow}{in} \PY{n}{ratings}\PY{p}{)}
\PY{k}{if} \PY{n}{\PYZus{}\PYZus{}name\PYZus{}\PYZus{}} \PY{o}{==} \PY{l+s}{\PYZdq{}}\PY{l+s}{\PYZus{}\PYZus{}main\PYZus{}\PYZus{}}\PY{l+s}{\PYZdq{}}\PY{p}{:}
\PY{n}{n}\PY{p}{,} \PY{n}{m}\PY{p}{,} \PY{n}{train}\PY{p}{,} \PY{n}{test} \PY{o}{=} \PY{n}{get\PYZus{}train\PYZus{}test}\PY{p}{(}\PY{n}{sys}\PY{o}{.}\PY{n}{argv}\PY{p}{[}\PY{l+m+mi}{1}\PY{p}{]}\PY{p}{)}
\PY{n}{Rc}\PY{p}{,} \PY{n}{Rr} \PY{o}{=} \PY{n}{sparse\PYZus{}matrix}\PY{p}{(}\PY{n}{train}\PY{p}{)}
\PY{n}{users} \PY{o}{=} \PY{n}{get\PYZus{}users}\PY{p}{(}\PY{n}{Rr}\PY{p}{)} \PY{c}{\PYZsh{} users with at least one rating}
\PY{n}{jokes} \PY{o}{=} \PY{n}{get\PYZus{}jokes}\PY{p}{(}\PY{n}{Rc}\PY{p}{)} \PY{c}{\PYZsh{} jokes with at least one rating}
\PY{k}{for} \PY{n}{k} \PY{o+ow}{in} \PY{n+nb}{xrange}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,} \PY{l+m+mi}{11}\PY{p}{)}\PY{p}{:}
\PY{k}{with} \PY{n+nb}{open}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{gibbs\PYZus{}}\PY{l+s}{\PYZdq{}} \PY{o}{+} \PY{n+nb}{str}\PY{p}{(}\PY{n}{k}\PY{p}{)} \PY{o}{+} \PY{l+s}{\PYZdq{}}\PY{l+s}{.txt}\PY{l+s}{\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{w}\PY{l+s}{\PYZdq{}}\PY{p}{)} \PY{k}{as} \PY{n}{fh}\PY{p}{:}
\PY{n}{U} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{random}\PY{o}{.}\PY{n}{normal}\PY{p}{(}\PY{l+m+mi}{0}\PY{p}{,} \PY{n}{sqrt}\PY{p}{(}\PY{l+m+mi}{5}\PY{p}{)}\PY{p}{,} \PY{n}{size}\PY{o}{=}\PY{p}{(}\PY{n}{Rc}\PY{o}{.}\PY{n}{shape}\PY{p}{[}\PY{l+m+mi}{0}\PY{p}{]}\PY{p}{,} \PY{n}{k}\PY{p}{)}\PY{p}{)}
\PY{n}{V} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{random}\PY{o}{.}\PY{n}{normal}\PY{p}{(}\PY{l+m+mi}{0}\PY{p}{,} \PY{n}{sqrt}\PY{p}{(}\PY{l+m+mi}{5}\PY{p}{)}\PY{p}{,} \PY{n}{size}\PY{o}{=}\PY{p}{(}\PY{n}{Rc}\PY{o}{.}\PY{n}{shape}\PY{p}{[}\PY{l+m+mi}{1}\PY{p}{]}\PY{p}{,} \PY{n}{k}\PY{p}{)}\PY{p}{)}
\PY{k}{for} \PY{n}{e} \PY{o+ow}{in} \PY{n+nb}{xrange}\PY{p}{(}\PY{l+m+mi}{100}\PY{p}{)}\PY{p}{:}
\PY{n}{fh}\PY{o}{.}\PY{n}{write}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s+se}{\PYZbs{}t}\PY{l+s}{\PYZdq{}}\PY{o}{.}\PY{n}{join}\PY{p}{(}\PY{n+nb}{map}\PY{p}{(}\PY{n+nb}{str}\PY{p}{,} \PY{p}{[}\PY{n}{e}\PY{p}{,} \PY{n}{likelihood}\PY{p}{(}\PY{n}{train}\PY{p}{,} \PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{)}\PY{p}{,}
\PY{n}{likelihood}\PY{p}{(}\PY{n}{test}\PY{p}{,} \PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{)}\PY{p}{]}\PY{p}{)}\PY{p}{)} \PY{o}{+} \PY{l+s}{\PYZdq{}}\PY{l+s+se}{\PYZbs{}n}\PY{l+s}{\PYZdq{}}\PY{p}{)}
\PY{n}{fh}\PY{o}{.}\PY{n}{flush}\PY{p}{(}\PY{p}{)}
\PY{n}{sample}\PY{p}{(}\PY{n}{U}\PY{p}{,} \PY{n}{V}\PY{p}{,} \PY{n}{Rr}\PY{p}{,} \PY{n}{Rc}\PY{p}{,} \PY{n}{users}\PY{p}{,} \PY{n}{jokes}\PY{p}{)}
\end{Verbatim}
|