summaryrefslogtreecommitdiffstats
path: root/lastfm.py
blob: dca9628a0f251f8bf499d128cff09cd7b99ff6a8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import requests

import sys
from time import sleep
from collections import deque
from types import *

URL = "http://ws.audioscrobbler.com/2.0/"

def init(api):
    global API_KEY
    API_KEY = api

def make_request(method, payload):
    params = {"api_key": API_KEY, "format": "json",
              "method": method}
    user_agent = {'User-agent': 'Mozilla/5.0'}
    params.update(payload)
    try:
        r = requests.get(URL, params=params, headers=user_agent)
    except requests.exceptions.ConnectionError:
        sleep(30)
        r = requests.get(URL, params=params, headers=user_agent)

    try:
        answer = r.json()
    except ValueError: # request failed for some reason, retrying
        i = 0
        while r.status_code == 503 and i < 3:
            sleep(0.3)
            r = requests.get(URL, params=params, headers=user_agent)
            i += 1
        try:
            answer = r.json()
        except ValueError: # giving up
            answer = None

    return answer

def get_user_info(user):
    try:
        return make_request("user.getInfo", {"user": user})["user"]
    except KeyError:
        exit("Could not find user " + user)

def get_friends(user):
    r = make_request("user.getFriends", {"user": user, "recenttracks": "0"})
    if not r:
        print "Unable to get user " + user
        return
        yield
    try:
        friends = r["friends"]
        n_friends = int(friends["@attr"]["total"])
        n_pages = int(friends["@attr"]["totalPages"])
        friends = friends["user"]
    except KeyError:
        print "Problem with user " + user
        return
        yield
    if type(friends) is dict:
        friends = [friends]
    for u in friends:
        yield u
    for page in xrange(2, n_pages+1):
        sleep(0.1)
        r = make_request("user.getFriends", {"user": user,
                                             "recenttracks": "0",
                                             "page": page})
        if not r:
            continue
        try:
            f = r["friends"]["user"]
        except KeyError:
            print r
            continue
        if type(f) is dict:
            f = [f]
        for us in f:
            yield us

def build_graph(filename):
    result = {}
    try:
        with open(filename) as f:
            for line in f:
                values = line.strip().split("\t")
                result[values[0]] = values[1:]
    except IOError:
        pass
    print len(result)
    return result

def print_set(s):
    i = 0
    file = None
    for item in s:
        if i % 100 == 0:
            if file:
                file.close()
            file = open(str(i/100) + ".txt", "w")
        file.write(str(item) + "\n")
        i += 1
    if file:
        file.close()

def bfs(graph, seed, process=True):
    queue = deque([seed])
    visited = set([seed])
    to_do = set([])
    with open(seed + ".txt", "a") as file:
        i = 0
        while queue:
            i += 1
            if i % 10 == 0:
                print "Visited: {0}, Queued: {1}".format(i, len(queue))
            c_node = queue.popleft()
            try:
                friends = graph[c_node]
            except KeyError:
                if not process:
                    to_do.add(c_node)
                    continue
                friends = get_friends(c_node)
                friends = [friend["name"] for friend in get_friends(c_node)]
                sleep(0.1)
            new = set(friends) - visited
            visited |= new
            queue.extend(new)
            if c_node not in graph:
                file.write(c_node + "\t" + "\t".join(friends) + "\n")
    if not process:
        print_set(to_do)

if __name__ == "__main__":
    seed = sys.argv[1]
    process = False if len(sys.argv) >=3 else True
    graph = build_graph(seed + ".txt")
    bfs(graph, seed, process)