1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
|
from tasks import NumFollowers, ListFollowers, normalize, Likes
from bs4 import BeautifulSoup
from celery.result import ResultSet
import os.path as op
from datetime import datetime
import sys
nf = NumFollowers()
lf = ListFollowers()
likes = Likes()
users = {}
try:
with open(sys.argv[1]) as f:
for line in f:
values = line.strip().split()
users[values[0]] = int(values[1].replace(",", "").replace(".", "").replace(" ", "").encode("ascii", "ignore"))
except IOError:
pass
users_likes = {}
try:
with open(sys.argv[3]) as f:
for line in f:
values = line.strip().split()
users_likes[values[0]] = True
except IOError:
pass
output = open(sys.argv[3], "a")
bad = open("bad.txt", "a")
def add_user(user, degree):
users[user] = degree
output.write(user + " " + str(degree) + "\n")
def add_user2(user, likes):
output.write(user + "\t" + likes + "\n")
def strip2(url):
l = "/video_tv_show_favorite"
if url.endswith(l):
return url[:-len(l)]
else:
return url.split("&")[0]
def call_back(tid, value):
print datetime.now().isoformat() + " " + str(value)
if "likes" in value:
if value["likes"] is None:
bad.write(value["orig"] + "\n")
bad.flush()
return
basename, fname, getname = normalize(strip2(value["for"]))
add_user2(fname, value["likes"])
return
def normalize2(url):
if "profile.php" in url:
basename = url.split("&")[0]
fname = basename.split("=")[-1]
getname = basename + "&sk=video_tv_show_favorite"
else:
basename = url.split("?")[0]
fname = basename.split("/")[-1]
getname = basename + "/video_tv_show_favorite"
return basename, fname, getname
soup = BeautifulSoup(open(sys.argv[2]))
links = [div.a["href"] for div in soup.findAll("div", class_="fsl")]
chunk = []
for link in links:
basename, finame, getname = normalize(link)
if op.isfile("facebook/" + finame):
with open("facebook/" + finame) as f:
for line in f:
basename, fname, getname = normalize2(line.strip())
if fname in users and users[fname] > 0 and fname not in users_likes:
chunk.append(getname)
if len(chunk) == 100:
todo = ResultSet([])
for name in chunk:
todo.add(likes.delay(name))
chunk = []
todo.join_native(callback=call_back)
|