aboutsummaryrefslogtreecommitdiffstats
path: root/python/parse_gs.py
blob: 3e25c6cbd8c2ea7520d26e299d50ed3c985c8c50 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import pandas as pd
import pdb
import re
import os

data_dir = "/home/share/guillaume/swaptions"
all_df = {}
fwd_index = []
for f in os.listdir(data_dir):
    with open(os.path.join(data_dir, f), "rb") as fh:
        flag = False
        masterdf = {}
        for line in fh:
            line = line.decode('utf-8', 'ignore')
            line = line.rstrip()
            m = re.search("(IG|HY)(\d{2}) 5y SWAPTION (?:♦GRANULAR♦ )?(?:UPDATE|CLOSES) - Ref\D+(.+)$", line)
            if m:
                indextype = m.groups()[0]
                series = int(m.groups()[1])
                if indextype == 'HY':
                    refprice, refspread = map(float,
                                              re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[2]).groups())
                else:
                    refspread = float(m.groups()[2])
                continue
            if line.startswith("At"):
                quotedate = pd.to_datetime(line[4:])
                continue
            if line.startswith("Expiry"):
                m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line)
                if m:
                    date, fwprice, fwspread = m.groups()
                    date = pd.to_datetime(date, format='%d%b%y')
                continue
            if line.startswith("Stk"):
                flag = True
                r = []
                continue
            if flag:
                if line:
                    vals = re.sub(" +", " ", line).split(" ")
                    if indextype=='HY':
                        vals.pop(2)
                        vals.pop(9)
                    else:
                        vals.pop(1)
                        vals.pop(8)
                    r.append(vals)
                    continue
                else:
                    if indextype=='HY':
                        cols = ['Strike', 'Sprd', 'Pay', 'DeltaPay', 'Rec', 'Vol',
                                'VolChg', 'VolBpd', 'Tail']
                    else:
                        cols = ['Strike', 'Pay', 'DeltaPay', 'Rec', 'Vol',
                                'VolChg', 'VolBpd', 'Tail']
                    df = pd.DataFrame.from_records(r, columns = cols)

                    df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True)
                    df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True)
                    df.drop(['Pay', 'Rec'], axis=1, inplace=True)
                    for col in df:
                        df[col] = pd.to_numeric(df[col], errors = 'coerce')
                    df.set_index('Strike', inplace=True)
                    d = {'quotedate': quotedate,
                         'expiry': date,
                         'index': indextype,
                         'series': series,
                         'ref': refspread if indextype =="IG" else refprice}
                    if indextype == "IG":
                        d['fwdspread'] = float(fwspread)
                    else:
                        d['fwdprice'] = float(fwprice)
                    fwd_index.append(d)

                    masterdf[date] = df
                    flag = False
                    r = []
                    continue
    all_df[(quotedate, indextype, series)] = pd.concat(masterdf, names=['expiry'])
all_df = pd.concat(all_df, names = ['quotedate', 'index', 'series'])
all_df['DeltaPay'] = - all_df['DeltaPay']/100
index_df = pd.DataFrame(fwd_index)

all_df.reset_index(inplace=True)
all_df = all_df.rename(columns={'Strike':'strike',
                                'Vol': 'vol',
                                'PayOffer': 'pay_offer',
                                'PayBid': 'pay_bid',
                                'RecOffer': 'rec_offer',
                                'RecBid': 'rec_bid',
                                'Tail': 'tail',
                                'DeltaPay': 'delta_pay'})
del all_df['VolBpd'], all_df['VolChg'], all_df['Sprd']
all_df['quote_source'] = 'GS'
from db import dbengine
serenitasdb = dbengine('serenitasdb')
all_df.to_sql('swaption_quotes', serenitasdb, if_exists='append', index=False)
index_df.to_sql('swaption_ref_quotes', serenitasdb, if_exists='append', index=False)