aboutsummaryrefslogtreecommitdiffstats
path: root/python/parse_gs.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/parse_gs.py')
-rw-r--r--python/parse_gs.py154
1 files changed, 85 insertions, 69 deletions
diff --git a/python/parse_gs.py b/python/parse_gs.py
index b58065ee..b0aad079 100644
--- a/python/parse_gs.py
+++ b/python/parse_gs.py
@@ -3,81 +3,83 @@ import pdb
import re
import os
-data_dir = "/home/share/guillaume/IG swaptions"
+data_dir = "/home/share/CorpCDOs/data/swaptions/GS swaptions"
all_df = {}
fwd_index = []
-for f in os.listdir(data_dir):
- with open(os.path.join(data_dir, f), "rb") as fh:
- flag = False
- masterdf = {}
- for line in fh:
- line = line.decode('utf-8', 'ignore')
- line = line.rstrip()
- m = re.search("(IG|HY)(\d{2}) 5y SWAPTION (?:♦GRANULAR♦ )?(?:UPDATE|CLOSES) - Ref\D+(.+)$", line)
- if m:
- indextype = m.groups()[0]
- series = int(m.groups()[1])
- if indextype == 'HY':
- refprice, refspread = map(float,
- re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[2]).groups())
- else:
- refspread = float(m.groups()[2])
- continue
- if line.startswith("At"):
- quotedate = pd.to_datetime(line[4:])
- continue
- if line.startswith("Expiry"):
- m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line)
+for index in ["IG", "HY"]:
+ full_path = os.path.join(data_dir, index + " swaptions")
+ for f in os.listdir(os.path.join(data_dir, index + " swaptions")):
+ with open(os.path.join(full_path, f), "rb") as fh:
+ flag = False
+ masterdf = {}
+ for line in fh:
+ line = line.decode('utf-8', 'ignore')
+ line = line.rstrip()
+ m = re.search("(IG|HY)(\d{2}) 5y SWAPTION (?:♦GRANULAR♦ )?(?:UPDATE|CLOSES) - Ref\D+(.+)$", line)
if m:
- date, fwprice, fwspread = m.groups()
- date = pd.to_datetime(date, format='%d%b%y')
- continue
- if line.startswith("Stk"):
- flag = True
- r = []
- continue
- if flag:
- if line:
- vals = re.sub(" +", " ", line).split(" ")
- if indextype=='HY':
- vals.pop(2)
- vals.pop(9)
+ indextype = m.groups()[0]
+ series = int(m.groups()[1])
+ if indextype == 'HY':
+ refprice, refspread = map(float,
+ re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[2]).groups())
else:
- vals.pop(1)
- vals.pop(8)
- r.append(vals)
+ refspread = float(m.groups()[2])
continue
- else:
- if indextype=='HY':
- cols = ['Strike', 'Sprd', 'Pay', 'DeltaPay', 'Rec', 'Vol',
- 'VolChg', 'VolBpd', 'Tail']
+ if line.startswith("At"):
+ quotedate = pd.to_datetime(line[4:])
+ continue
+ if line.startswith("Expiry"):
+ m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line)
+ if m:
+ date, fwprice, fwspread = m.groups()
+ date = pd.to_datetime(date, format='%d%b%y')
+ continue
+ if line.startswith("Stk"):
+ flag = True
+ r = []
+ continue
+ if flag:
+ if line:
+ vals = re.sub(" +", " ", line).split(" ")
+ if indextype=='HY':
+ vals.pop(2)
+ vals.pop(9)
+ else:
+ vals.pop(1)
+ vals.pop(8)
+ r.append(vals)
+ continue
else:
- cols = ['Strike', 'Pay', 'DeltaPay', 'Rec', 'Vol',
- 'VolChg', 'VolBpd', 'Tail']
- df = pd.DataFrame.from_records(r, columns = cols)
+ if indextype=='HY':
+ cols = ['Strike', 'Sprd', 'Pay', 'DeltaPay', 'Rec', 'Vol',
+ 'VolChg', 'VolBpd', 'Tail']
+ else:
+ cols = ['Strike', 'Pay', 'DeltaPay', 'Rec', 'Vol',
+ 'VolChg', 'VolBpd', 'Tail']
+ df = pd.DataFrame.from_records(r, columns = cols)
- df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True)
- df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True)
- df.drop(['Pay', 'Rec'], axis=1, inplace=True)
- for col in df:
- df[col] = pd.to_numeric(df[col], errors = 'coerce')
- df.set_index('Strike', inplace=True)
- d = {'quotedate': quotedate,
- 'expiry': date,
- 'index': indextype,
- 'series': series,
- 'ref': refspread if indextype =="IG" else refprice}
- if indextype == "IG":
- d['fwdspread'] = float(fwspread)
- else:
- d['fwdprice'] = float(fwprice)
- fwd_index.append(d)
+ df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True)
+ df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True)
+ df.drop(['Pay', 'Rec'], axis=1, inplace=True)
+ for col in df:
+ df[col] = pd.to_numeric(df[col], errors = 'coerce')
+ df.set_index('Strike', inplace=True)
+ d = {'quotedate': quotedate,
+ 'expiry': date,
+ 'index': indextype,
+ 'series': series,
+ 'ref': refspread if indextype =="IG" else refprice}
+ if indextype == "IG":
+ d['fwdspread'] = float(fwspread)
+ else:
+ d['fwdprice'] = float(fwprice)
+ fwd_index.append(d)
- masterdf[date] = df
- flag = False
- r = []
- continue
- all_df[(quotedate, indextype, series)] = pd.concat(masterdf, names=['expiry'])
+ masterdf[date] = df
+ flag = False
+ r = []
+ continue
+ all_df[(quotedate, indextype, series)] = pd.concat(masterdf, names=['expiry'])
all_df = pd.concat(all_df, names = ['quotedate', 'index', 'series'])
all_df['DeltaPay'] = - all_df['DeltaPay']/100
all_df['Vol'] /= 100
@@ -96,8 +98,22 @@ del all_df['VolBpd'], all_df['VolChg']
if 'Sprd' in all_df:
del all_df['Sprd']
all_df['quote_source'] = 'GS'
+index_df = index_df.drop_duplicates(['quotedate', 'index', 'series', 'expiry'])
+
+##insert quotes
from db import dbengine
serenitasdb = dbengine('serenitasdb')
+conn = serenitasdb.raw_connection()
+## first delete quotes
+with conn.cursor() as c:
+ c.execute("DELETE FROM serenitas_quotes WHERE quote_source='GS'")
+conn.commit()
all_df.to_sql('swaption_quotes', serenitasdb, if_exists='append', index=False)
-index_df = index_df.drop_duplicates(['quotedate', 'index', 'series', 'expiry'])
-index_df.to_sql('swaption_ref_quotes', serenitasdb, if_exists='append', index=False)
+
+sqlstr = "INSERT INTO swaption_ref_quotes(quotedate, index, series, expiry, ref, fwdprice, fwdspread) "\
+ "VALUES(%(quotedate)s, %(index)s, %(series)s, %(expiry)s, %(ref)s, %(fwdprice)s, %(fwdspread)s) " \
+ "ON CONFLICT DO NOTHING"
+with conn.cursor() as c:
+ c.executemany(sqlstr, index_df.to_dict(orient='records'))
+conn.commit()
+conn.close()