import requests import sys from time import sleep from collections import deque from types import * URL = "http://ws.audioscrobbler.com/2.0/" def init(api): global API_KEY API_KEY = api def make_request(method, payload): params = {"api_key": API_KEY, "format": "json", "method": method} user_agent = {'User-agent': 'Mozilla/5.0'} params.update(payload) try: r = requests.get(URL, params=params, headers=user_agent) except requests.exceptions.ConnectionError: sleep(30) r = requests.get(URL, params=params, headers=user_agent) try: answer = r.json() except ValueError: # request failed for some reason, retrying i = 0 while r.status_code == 503 and i < 3: sleep(0.3) r = requests.get(URL, params=params, headers=user_agent) i += 1 try: answer = r.json() except ValueError: # giving up answer = None return answer def get_user_info(user): try: return make_request("user.getInfo", {"user": user})["user"] except KeyError: exit("Could not find user " + user) def get_friends(user): r = make_request("user.getFriends", {"user": user, "recenttracks": "0"}) if not r: print "Unable to get user " + user return yield try: friends = r["friends"] n_friends = int(friends["@attr"]["total"]) n_pages = int(friends["@attr"]["totalPages"]) friends = friends["user"] except KeyError: print "Problem with user " + user return yield if type(friends) is dict: friends = [friends] for u in friends: yield u for page in xrange(2, n_pages+1): sleep(0.1) r = make_request("user.getFriends", {"user": user, "recenttracks": "0", "page": page}) if not r: continue try: f = r["friends"]["user"] except KeyError: print r continue if type(f) is dict: f = [f] for us in f: yield us def build_graph(filename): result = {} try: with open(filename) as f: for line in f: values = line.strip().split("\t") result[values[0]] = values[1:] except IOError: pass print len(result) return result def print_set(s): i = 0 file = None for item in s: if i % 100 == 0: if file: file.close() file = open(str(i/100) + ".txt", "w") file.write(str(item) + "\n") i += 1 if file: file.close() def bfs(graph, seed, process=True): queue = deque([seed]) visited = set([seed]) to_do = set([]) with open(seed + ".txt", "a") as file: i = 0 while queue: i += 1 if i % 10 == 0: print "Visited: {0}, Queued: {1}".format(i, len(queue)) c_node = queue.popleft() try: friends = graph[c_node] except KeyError: if not process: to_do.add(c_node) continue friends = get_friends(c_node) friends = [friend["name"] for friend in get_friends(c_node)] sleep(0.1) new = set(friends) - visited visited |= new queue.extend(new) if c_node not in graph: file.write(c_node + "\t" + "\t".join(friends) + "\n") if not process: print_set(to_do) if __name__ == "__main__": seed = sys.argv[1] process = False if len(sys.argv) >=3 else True graph = build_graph(seed + ".txt") bfs(graph, seed, process)