import pandas as pd import pdb import re import os data_dir = "/home/share/CorpCDOs/data/swaptions/GS swaptions" all_df = {} fwd_index = [] for index in ["IG", "HY"]: full_path = os.path.join(data_dir, index + " swaptions") for f in os.listdir(os.path.join(data_dir, index + " swaptions")): with open(os.path.join(full_path, f), "rb") as fh: flag = False masterdf = {} for line in fh: line = line.decode('utf-8', 'ignore') line = line.rstrip() m = re.search("(IG|HY)(\d{2}) 5y SWAPTION (?:♦GRANULAR♦ )?(?:UPDATE|CLOSES) - Ref\D+(.+)$", line) if m: indextype = m.groups()[0] series = int(m.groups()[1]) if indextype == 'HY': refprice, refspread = map(float, re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[2]).groups()) else: refspread = float(m.groups()[2]) continue if line.startswith("At"): quotedate = pd.to_datetime(line[4:]) continue if line.startswith("Expiry"): m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line) if m: date, fwprice, fwspread = m.groups() date = pd.to_datetime(date, format='%d%b%y') continue if line.startswith("Stk"): flag = True r = [] continue if flag: if line: vals = re.sub(" +", " ", line).split(" ") if indextype=='HY': vals.pop(2) vals.pop(9) else: vals.pop(1) vals.pop(8) r.append(vals) continue else: if indextype=='HY': cols = ['Strike', 'Sprd', 'Pay', 'DeltaPay', 'Rec', 'Vol', 'VolChg', 'VolBpd', 'Tail'] else: cols = ['Strike', 'Pay', 'DeltaPay', 'Rec', 'Vol', 'VolChg', 'VolBpd', 'Tail'] df = pd.DataFrame.from_records(r, columns = cols) df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True) df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True) df.drop(['Pay', 'Rec'], axis=1, inplace=True) for col in df: df[col] = pd.to_numeric(df[col], errors = 'coerce') df.set_index('Strike', inplace=True) d = {'quotedate': quotedate, 'expiry': date, 'index': indextype, 'series': series, 'ref': refspread if indextype =="IG" else refprice} if indextype == "IG": d['fwdspread'] = float(fwspread) else: d['fwdprice'] = float(fwprice) fwd_index.append(d) masterdf[date] = df flag = False r = [] continue all_df[(quotedate, indextype, series)] = pd.concat(masterdf, names=['expiry']) all_df = pd.concat(all_df, names = ['quotedate', 'index', 'series']) all_df['DeltaPay'] = - all_df['DeltaPay']/100 all_df['Vol'] /= 100 index_df = pd.DataFrame(fwd_index) all_df.reset_index(inplace=True) all_df = all_df.rename(columns={'Strike':'strike', 'Vol': 'vol', 'PayOffer': 'pay_offer', 'PayBid': 'pay_bid', 'RecOffer': 'rec_offer', 'RecBid': 'rec_bid', 'Tail': 'tail', 'DeltaPay': 'delta_pay'}) del all_df['VolBpd'], all_df['VolChg'] if 'Sprd' in all_df: del all_df['Sprd'] all_df['quote_source'] = 'GS' index_df = index_df.drop_duplicates(['quotedate', 'index', 'series', 'expiry']) ##insert quotes from db import dbengine serenitasdb = dbengine('serenitasdb') conn = serenitasdb.raw_connection() ## first delete quotes with conn.cursor() as c: c.execute("DELETE FROM swaption_quotes WHERE quote_source='GS'") conn.commit() all_df.to_sql('swaption_quotes', serenitasdb, if_exists='append', index=False) sqlstr = "INSERT INTO swaption_ref_quotes(quotedate, index, series, expiry, ref, fwdprice, fwdspread) "\ "VALUES(%(quotedate)s, %(index)s, %(series)s, %(expiry)s, %(ref)s, %(fwdprice)s, %(fwdspread)s) " \ "ON CONFLICT DO NOTHING" with conn.cursor() as c: c.executemany(sqlstr, index_df.to_dict(orient='records')) conn.commit() conn.close()