2 files changed, 0 insertions, 289 deletions
diff --git a/python/parse_gs.py b/python/parse_gs.py
deleted file mode 100644
index ffbd038f..00000000
--- a/python/parse_gs.py
+++ /dev/null
@@ -1,152 +0,0 @@
-import pandas as pd
-import pdb
-import re
-import os
-
-data_dir = "/home/share/guillaume/swaptions"
-all_df = {}
-fwd_index = []
-for index in ["IG", "HY"]:
-    full_path = os.path.join(data_dir, index + " swaptions")
-    for f in os.listdir(os.path.join(data_dir, index + " swaptions")):
-        with open(os.path.join(full_path, f), "rb") as fh:
-            flag = False
-            masterdf = {}
-            for line in fh:
-                line = line.decode("utf-8", "ignore")
-                line = line.rstrip()
-                m = re.search(
-                    "(IG|HY)(\d{2}) 5y (?:.*)SWAPTION (?:UPDATE|CLOSES|CLOSE) - Ref\D+(.+)$",
-                    line,
-                )
-                if m:
-                    indextype = m.groups()[0]
-                    series = int(m.groups()[1])
-                    if indextype == "HY":
-                        refprice, refspread = map(
-                            float,
-                            re.match("([\S]+)\s+\(([^)]+)\)", m.groups()[2]).groups(),
-                        )
-                    else:
-                        refspread = float(m.groups()[2])
-                    continue
-                if line.startswith("At"):
-                    quotedate = pd.to_datetime(line[4:])
-                    continue
-                if line.startswith("Expiry"):
-                    m = re.match(
-                        "Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line
-                    )
-                    if m:
-                        date, fwprice, fwspread = m.groups()
-                        date = pd.to_datetime(date, format="%d%b%y")
-                    continue
-                if line.startswith("Stk"):
-                    flag = True
-                    r = []
-                    continue
-                if flag:
-                    if line:
-                        vals = re.sub(" +", " ", line).split(" ")
-                        if indextype == "HY":
-                            vals.pop(2)
-                            vals.pop(9)
-                        else:
-                            vals.pop(1)
-                            vals.pop(8)
-                        r.append(vals)
-                        continue
-                    else:
-                        if indextype == "HY":
-                            cols = [
-                                "Strike",
-                                "Sprd",
-                                "Pay",
-                                "DeltaPay",
-                                "Rec",
-                                "Vol",
-                                "VolChg",
-                                "VolBpd",
-                                "Tail",
-                            ]
-                        else:
-                            cols = [
-                                "Strike",
-                                "Pay",
-                                "DeltaPay",
-                                "Rec",
-                                "Vol",
-                                "VolChg",
-                                "VolBpd",
-                                "Tail",
-                            ]
-                        df = pd.DataFrame.from_records(r, columns=cols)
-
-                        df[["PayBid", "PayOffer"]] = df.Pay.str.split("/", expand=True)
-                        df[["RecBid", "RecOffer"]] = df.Rec.str.split("/", expand=True)
-                        df.drop(["Pay", "Rec"], axis=1, inplace=True)
-                        for col in df:
-                            df[col] = pd.to_numeric(df[col], errors="coerce")
-                        df.set_index("Strike", inplace=True)
-                        d = {
-                            "quotedate": quotedate,
-                            "expiry": date,
-                            "index": indextype,
-                            "series": series,
-                            "ref": refspread if indextype == "IG" else refprice,
-                        }
-                        if indextype == "IG":
-                            d["fwdspread"] = float(fwspread)
-                        else:
-                            d["fwdprice"] = float(fwprice)
-                        fwd_index.append(d)
-
-                        masterdf[date] = df
-                        flag = False
-                        r = []
-                        continue
-        all_df[(quotedate, indextype, series)] = pd.concat(masterdf, names=["expiry"])
-all_df = pd.concat(all_df, names=["quotedate", "index", "series"])
-all_df["DeltaPay"] = -all_df["DeltaPay"] / 100
-all_df["Vol"] /= 100
-index_df = pd.DataFrame(fwd_index)
-
-all_df.reset_index(inplace=True)
-all_df = all_df.rename(
-    columns={
-        "Strike": "strike",
-        "Vol": "vol",
-        "PayOffer": "pay_offer",
-        "PayBid": "pay_bid",
-        "RecOffer": "rec_offer",
-        "RecBid": "rec_bid",
-        "Tail": "tail",
-        "DeltaPay": "delta_pay",
-    }
-)
-del all_df["VolBpd"], all_df["VolChg"]
-if "Sprd" in all_df:
-    del all_df["Sprd"]
-all_df["quote_source"] = "GS"
-index_df = index_df.drop_duplicates(["quotedate", "index", "series", "expiry"])
-
-##insert quotes
-from utils.db import dbengine
-
-serenitasdb = dbengine("serenitasdb")
-conn = serenitasdb.raw_connection()
-format_str = "INSERT INTO swaption_ref_quotes({}) VALUES({}) " "ON CONFLICT DO NOTHING"
-cols = index_df.columns
-sqlstr = format_str.format(",".join(cols), ",".join(["%s"] * len(cols)))
-with conn.cursor() as c:
-    c.executemany(sqlstr, index_df.itertuples(index=False))
-conn.commit()
-
-
-format_str = "INSERT INTO swaption_quotes({}) VALUES({}) ON CONFLICT DO NOTHING"
-cols = all_df.columns
-sqlstr = format_str.format(",".join(cols), ",".join(["%s"] * len(cols)))
-with conn.cursor() as c:
-    c.executemany(sqlstr, all_df.itertuples(index=False))
-conn.commit()
-conn.close()
diff --git a/python/parse_gs_exchange.py b/python/parse_gs_exchange.py
deleted file mode 100644
index a77f87c5..00000000
--- a/python/parse_gs_exchange.py
+++ /dev/null
@@ -1,137 +0,0 @@
-from pathlib import Path
-from exchange import get_msgs
-from pytz import timezone
-from parse_emails import write_todb
-import datetime
-import logging
-import os
-import pandas as pd
-import re
-
-
-class ParseError(Exception):
-    pass
-
-
-def parse_email(email, fwd_index):
-    m = re.search("(IG|HY)(\d{2}) 5y (?:.*)SWAPTION (?:UPDATE|CLOSES|CLOSE) - Ref\D+([\d.]+)(?:[^(]+\(([\d.]+)\))?",
-                  email.subject)
-    if m:
-        indextype, series, ref, refspread = m.groups()
-        series = int(series)
-        if indextype == 'IG':
-            refspread = ref
-        try:
-            refspread = float(ref)
-        except ValueError as e:
-            raise ParseError(str(e))
-    else:
-        raise ParseError(f"can't parse subject line: {email.subject}")
-
-    quotedate = datetime.datetime.fromtimestamp(email.datetime_sent.timestamp(),
-                                                timezone('America/New_York'))
-    flag = False
-    masterdf = {}
-    for line in email.body.split("\r\n"):
-        if line.startswith("Expiry"):
-            m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line)
-            if m:
-                date, fwprice, fwspread = m.groups()
-                date = pd.to_datetime(date, format='%d%b%y')
-            continue
-        if line.startswith("Stk"):
-            flag = True
-            r = []
-            continue
-        if flag:
-            if line:
-                vals = re.sub(" +", " ", line).split(" ")
-                if indextype == 'HY':
-                    vals.pop(2)
-                    vals.pop(9)
-                else:
-                    vals.pop(1)
-                    vals.pop(8)
-                r.append(vals)
-                continue
-            else:
-                if indextype == 'HY':
-                    cols = ['Strike', 'Sprd', 'Pay', 'DeltaPay', 'Rec', 'Vol',
-                            'VolChg', 'VolBpd', 'Tail']
-                else:
-                    cols = ['Strike', 'Pay', 'DeltaPay', 'Rec', 'Vol',
-                            'VolChg', 'VolBpd', 'Tail']
-                df = pd.DataFrame.from_records(r, columns=cols)
-
-                df[['PayBid', 'PayOffer']] = df.Pay.str.split('/', expand=True)
-                df[['RecBid', 'RecOffer']] = df.Rec.str.split('/', expand=True)
-                df.drop(['Pay', 'Rec'], axis=1, inplace=True)
-                for col in df:
-                    df[col] = pd.to_numeric(df[col], errors='coerce')
-                df.set_index('Strike', inplace=True)
-                d = {'quotedate': quotedate,
-                     'expiry': date,
-                     'index': indextype,
-                     'series': series,
-                     'ref': refspread if indextype == "IG" else ref,
-                     'msg_id': int(get_msg_id(email), 16)}
-                if indextype == "IG":
-                    d['fwdspread'] = float(fwspread)
-                else:
-                    d['fwdprice'] = float(fwprice)
-                fwd_index.append(d)
-
-                masterdf[date] = df
-                flag = False
-                r = []
-                continue
-    return quotedate, indextype, series, pd.concat(masterdf, names=['expiry'])
-
-
-def clean_df(all_df):
-    all_df = pd.concat(all_df, names=['quotedate', 'index', 'series'], sort=True)
-    all_df['DeltaPay'] = - all_df['DeltaPay']/100
-    all_df['Vol'] /= 100
-    all_df.reset_index(inplace=True)
-    all_df = all_df.rename(columns={'Strike':'strike',
-                                    'Vol': 'vol',
-                                    'PayOffer': 'pay_offer',
-                                    'PayBid': 'pay_bid',
-                                    'RecOffer': 'rec_offer',
-                                    'RecBid': 'rec_bid',
-                                    'Tail': 'tail',
-                                    'DeltaPay': 'delta_pay'})
-    del all_df['VolBpd'], all_df['VolChg']
-    if 'Sprd' in all_df:
-        del all_df['Sprd']
-    return all_df
-
-def get_msg_id(msg):
-    return msg.message_id[1:17].lower()
-
-def save_email(msg, path):
-    fname = path / ("{:%Y-%m-%d %H-%M-%S}_{}".
-                    format(msg.datetime_sent,
-                           get_msg_id(msg)))
-    with fname.open("w") as fh:
-        fh.write(msg.text_body)
-
-if __name__ == "__main__":
-    fwd_index = []
-    swaption_quotes = {}
-    save_path = Path(os.environ["DATA_DIR"]) / "swaptions" / "exchange"
-    for email in get_msgs(count=20):
-        save_email(email, save_path)
-        try:
-            quotedate, indextype, series, df = parse_email(email, fwd_index)
-        except ParseError as e:
-            logging.exception(e)
-            continue
-
-        swaption_quotes[(quotedate, indextype, series)] = df
-    index_df = pd.DataFrame(fwd_index)
-    index_df = index_df.drop_duplicates(['quotedate', 'index', 'series', 'expiry'])
-    index_df['quote_source'] = 'GS'
-    swaption_quotes = clean_df(swaption_quotes)
-    swaption_quotes = swaption_quotes.set_index(['quotedate', 'index', 'series', 'expiry'])
-    write_todb(swaption_quotes, index_df)