1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
import requests
import sys
from time import sleep
from collections import deque
from types import *
URL = "http://ws.audioscrobbler.com/2.0/"
def init(api):
global API_KEY
API_KEY = api
def make_request(method, payload):
params = {"api_key": API_KEY, "format": "json",
"method": method}
user_agent = {'User-agent': 'Mozilla/5.0'}
params.update(payload)
try:
r = requests.get(URL, params=params, headers=user_agent)
except requests.exceptions.ConnectionError:
sleep(30)
r = requests.get(URL, params=params, headers=user_agent)
try:
answer = r.json()
except ValueError: # request failed for some reason, retrying
i = 0
while r.status_code == 503 and i < 3:
sleep(0.3)
r = requests.get(URL, params=params, headers=user_agent)
i += 1
try:
answer = r.json()
except ValueError: # giving up
answer = None
return answer
def get_user_info(user):
try:
return make_request("user.getInfo", {"user": user})["user"]
except KeyError:
exit("Could not find user " + user)
def get_friends(user):
r = make_request("user.getFriends", {"user": user, "recenttracks": "0"})
if not r:
print "Unable to get user " + user
return
yield
try:
friends = r["friends"]
n_friends = int(friends["@attr"]["total"])
n_pages = int(friends["@attr"]["totalPages"])
friends = friends["user"]
except KeyError:
print "Problem with user " + user
return
yield
if type(friends) is dict:
friends = [friends]
for u in friends:
yield u
for page in xrange(2, n_pages+1):
sleep(0.1)
r = make_request("user.getFriends", {"user": user,
"recenttracks": "0",
"page": page})
if not r:
continue
try:
f = r["friends"]["user"]
except KeyError:
print r
continue
if type(f) is dict:
f = [f]
for us in f:
yield us
def build_graph(filename):
result = {}
try:
with open(filename) as f:
for line in f:
values = line.strip().split("\t")
result[values[0]] = values[1:]
except IOError:
pass
print len(result)
return result
def print_set(s):
i = 0
file = None
for item in s:
if i % 100 == 0:
if file:
file.close()
file = open(str(i/100) + ".txt", "w")
file.write(str(item) + "\n")
i += 1
if file:
file.close()
def bfs(graph, seed, process=True):
queue = deque([seed])
visited = set([seed])
to_do = set([])
with open(seed + ".txt", "a") as file:
i = 0
while queue:
i += 1
if i % 10 == 0:
print "Visited: {0}, Queued: {1}".format(i, len(queue))
c_node = queue.popleft()
try:
friends = graph[c_node]
except KeyError:
if not process:
to_do.add(c_node)
continue
friends = get_friends(c_node)
friends = [friend["name"] for friend in get_friends(c_node)]
sleep(0.1)
new = set(friends) - visited
visited |= new
queue.extend(new)
if c_node not in graph:
file.write(c_node + "\t" + "\t".join(friends) + "\n")
if not process:
print_set(to_do)
if __name__ == "__main__":
seed = sys.argv[1]
process = False if len(sys.argv) >=3 else True
graph = build_graph(seed + ".txt")
bfs(graph, seed, process)
|