aboutsummaryrefslogtreecommitdiffstats
path: root/python/parse_gs.py
blob: 30e04b048a22d811ca448ef4bb9c49c681d6f2b5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import pandas as pd
import pdb
import re

with open("example") as fh:
    flag = False
    masterdf = {}
    for line in fh:
        line = line.lstrip()
        if line.startswith("At"):
            quotedate = pd.to_datetime(line[4:])
        if line.startswith("Expiry"):
            m = re.match("Expiry (\d{2}\w{3}\d{2}) \(([^\t]+) ([^t]+)\)", line)
            if m:
                date, fwprice, fwspread = m.groups()
                date = pd.datetime.strptime(date, '%d%b%y')
            continue
        if line.startswith("Stk"):
            flag = True
            r = []
            continue
        if flag:
            if line:
                vals = re.sub(" +", " ", line).split(" ")
                vals.pop(2)
                vals.pop(9)
                r.append(vals)
                continue
            else:
                df = pd.DataFrame.from_records(r,
                                               columns = ['Stk', 'Sprd', 'Pay', 'Delta', 'Rec', 'Vol',
                                                          'VolChg', 'VolBpd', 'Tail'])
                df['forward'] = float(fwprice)
                df['spread'] = float(fwspread)
                df[['PayBid', 'PayOffer']] = df.Pay.str.split('/').apply(pd.Series)
                df[['RecBid', 'RecOffer']] = df.Rec.str.split('/').apply(pd.Series)
                df.drop(['Pay', 'Rec'], axis=1, inplace=True)
                df = df.convert_objects(convert_numeric=True)
                df.set_index('Stk', inplace=True)
                masterdf[date]=df
                flag = False
                r = []
                continue
        masterdf = pd.concat(masterdf)