diff options
Diffstat (limited to 'python/parse_gs.py')
| -rw-r--r-- | python/parse_gs.py | 122 |
1 files changed, 76 insertions, 46 deletions
diff --git a/python/parse_gs.py b/python/parse_gs.py index 14137a06..ffbd038f 100644 --- a/python/parse_gs.py +++ b/python/parse_gs.py @@ -13,15 +13,20 @@ for index in ["IG", "HY"]: flag = False masterdf = {} for line in fh: - line = line.decode('utf-8', 'ignore') + line = line.decode("utf-8", "ignore") line = line.rstrip() - m = re.search("(IG|HY)(\d{2}) 5y (?:.*)SWAPTION (?:UPDATE|CLOSES|CLOSE) - Ref\D+(.+)$", line) + m = re.search( + "(IG|HY)(\d{2}) 5y (?:.*)SWAPTION (?:UPDATE|CLOSES|CLOSE) - Ref\D+(.+)$", + line, + ) if m: indextype = m.groups()[0] series = int(m.groups()[1]) - if indextype == 'HY': - refprice, refspread = map(float, - re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[2]).groups()) + if indextype == "HY": + refprice, refspread = map( + float, + re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[2]).groups(), + ) else: refspread = float(m.groups()[2]) continue @@ -29,10 +34,12 @@ for index in ["IG", "HY"]: quotedate = pd.to_datetime(line[4:]) continue if line.startswith("Expiry"): - m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line) + m = re.match( + "Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line + ) if m: date, fwprice, fwspread = m.groups() - date = pd.to_datetime(date, format='%d%b%y') + date = pd.to_datetime(date, format="%d%b%y") continue if line.startswith("Stk"): flag = True @@ -41,7 +48,7 @@ for index in ["IG", "HY"]: if flag: if line: vals = re.sub(" +", " ", line).split(" ") - if indextype=='HY': + if indextype == "HY": vals.pop(2) vals.pop(9) else: @@ -50,62 +57,85 @@ for index in ["IG", "HY"]: r.append(vals) continue else: - if indextype=='HY': - cols = ['Strike', 'Sprd', 'Pay', 'DeltaPay', 'Rec', 'Vol', - 'VolChg', 'VolBpd', 'Tail'] + if indextype == "HY": + cols = [ + "Strike", + "Sprd", + "Pay", + "DeltaPay", + "Rec", + "Vol", + "VolChg", + "VolBpd", + "Tail", + ] else: - cols = ['Strike', 'Pay', 'DeltaPay', 'Rec', 'Vol', - 'VolChg', 'VolBpd', 'Tail'] - df = pd.DataFrame.from_records(r, columns = cols) + cols = [ + "Strike", + "Pay", + "DeltaPay", + "Rec", + "Vol", + "VolChg", + "VolBpd", + "Tail", + ] + df = pd.DataFrame.from_records(r, columns=cols) - df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True) - df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True) - df.drop(['Pay', 'Rec'], axis=1, inplace=True) + df[["PayBid", "PayOffer"]] = df.Pay.str.split("/", expand=True) + df[["RecBid", "RecOffer"]] = df.Rec.str.split("/", expand=True) + df.drop(["Pay", "Rec"], axis=1, inplace=True) for col in df: - df[col] = pd.to_numeric(df[col], errors = 'coerce') - df.set_index('Strike', inplace=True) - d = {'quotedate': quotedate, - 'expiry': date, - 'index': indextype, - 'series': series, - 'ref': refspread if indextype =="IG" else refprice} + df[col] = pd.to_numeric(df[col], errors="coerce") + df.set_index("Strike", inplace=True) + d = { + "quotedate": quotedate, + "expiry": date, + "index": indextype, + "series": series, + "ref": refspread if indextype == "IG" else refprice, + } if indextype == "IG": - d['fwdspread'] = float(fwspread) + d["fwdspread"] = float(fwspread) else: - d['fwdprice'] = float(fwprice) + d["fwdprice"] = float(fwprice) fwd_index.append(d) masterdf[date] = df flag = False r = [] continue - all_df[(quotedate, indextype, series)] = pd.concat(masterdf, names=['expiry']) -all_df = pd.concat(all_df, names = ['quotedate', 'index', 'series']) -all_df['DeltaPay'] = - all_df['DeltaPay']/100 -all_df['Vol'] /= 100 + all_df[(quotedate, indextype, series)] = pd.concat(masterdf, names=["expiry"]) +all_df = pd.concat(all_df, names=["quotedate", "index", "series"]) +all_df["DeltaPay"] = -all_df["DeltaPay"] / 100 +all_df["Vol"] /= 100 index_df = pd.DataFrame(fwd_index) all_df.reset_index(inplace=True) -all_df = all_df.rename(columns={'Strike':'strike', - 'Vol': 'vol', - 'PayOffer': 'pay_offer', - 'PayBid': 'pay_bid', - 'RecOffer': 'rec_offer', - 'RecBid': 'rec_bid', - 'Tail': 'tail', - 'DeltaPay': 'delta_pay'}) -del all_df['VolBpd'], all_df['VolChg'] -if 'Sprd' in all_df: - del all_df['Sprd'] -all_df['quote_source'] = 'GS' -index_df = index_df.drop_duplicates(['quotedate', 'index', 'series', 'expiry']) +all_df = all_df.rename( + columns={ + "Strike": "strike", + "Vol": "vol", + "PayOffer": "pay_offer", + "PayBid": "pay_bid", + "RecOffer": "rec_offer", + "RecBid": "rec_bid", + "Tail": "tail", + "DeltaPay": "delta_pay", + } +) +del all_df["VolBpd"], all_df["VolChg"] +if "Sprd" in all_df: + del all_df["Sprd"] +all_df["quote_source"] = "GS" +index_df = index_df.drop_duplicates(["quotedate", "index", "series", "expiry"]) ##insert quotes from utils.db import dbengine -serenitasdb = dbengine('serenitasdb') + +serenitasdb = dbengine("serenitasdb") conn = serenitasdb.raw_connection() -format_str = "INSERT INTO swaption_ref_quotes({}) VALUES({}) " \ - "ON CONFLICT DO NOTHING" +format_str = "INSERT INTO swaption_ref_quotes({}) VALUES({}) " "ON CONFLICT DO NOTHING" cols = index_df.columns sqlstr = format_str.format(",".join(cols), ",".join(["%s"] * len(cols))) with conn.cursor() as c: |
