diff options
| -rw-r--r-- | python/dtcc_sdr.py | 104 |
1 files changed, 66 insertions, 38 deletions
diff --git a/python/dtcc_sdr.py b/python/dtcc_sdr.py index e5f05bf7..435fb1da 100644 --- a/python/dtcc_sdr.py +++ b/python/dtcc_sdr.py @@ -5,7 +5,7 @@ import requests import zipfile from utils.db import serenitas_engine, dawn_engine -from pathlib import Path +from env import DATA_DIR from typing import Dict, Any from utils.db import dbconn from pickle import dumps, loads @@ -19,7 +19,7 @@ def download_credit_slices(d: datetime.date) -> None: if r.status_code != 200: continue with zipfile.ZipFile(io.BytesIO(r.content)) as z: - z.extractall() + z.extractall(path=DATA_DIR / "DTCC") def download_cumulative_credit(d: datetime.date) -> None: @@ -28,11 +28,11 @@ def download_cumulative_credit(d: datetime.date) -> None: if r.status_code != 200: return with zipfile.ZipFile(io.BytesIO(r.content)) as z: - z.extractall(path="/home/serenitas/CorpCDOs/data/DTCC") + z.extractall(path=DATA_DIR / "DTCC") def load_data(): - base_dir = Path("/home/serenitas/CorpCDOs/data/DTCC/") + base_dir = DATA_DIR / "DTCC" df = pd.concat( [ pd.read_csv( @@ -130,44 +130,58 @@ def process_tranche_data(df): df.sort_values("EXECUTION_TIMESTAMP", inplace=True) return df + def map_tranche(df): idx_ver = pd.read_sql_query( - "select index, series, redindexcode, maturity, tenor " - "from index_desc", - serenitas_engine, - parse_dates='maturity', - index_col=["redindexcode", "maturity"] - ) + "select index, series, redindexcode, maturity, tenor " "from index_desc", + serenitas_engine, + parse_dates="maturity", + index_col=["redindexcode", "maturity"], + ) idx_ver = idx_ver.loc[idx_ver.index.dropna()] markit = pd.read_sql_query( - "select * from markit_tranche_quotes a " - "left join (select series, index, basketid " - "as id from index_version) b on a.basketid=b.id", - serenitas_engine, - parse_dates='quotedate') - df = df.reset_index().set_index(['UNDERLYING_ASSET_1', 'END_DATE']) - df.rename_axis(index={'UNDERLYING_ASSET_1': 'redindexcode', - 'END_DATE': 'maturity'}, inplace=True) - df = df.merge(idx_ver, left_index=True, right_index=True, how='left') - df['EXECUTION_TIMESTAMP'] = df.EXECUTION_TIMESTAMP.dt.normalize() - #Normalize the price format - df.PRICE_NOTATION = (df.PRICE_NOTATION / 1e2).where(df.PRICE_NOTATION_TYPE == "Percentage", df.PRICE_NOTATION) - df['price'] = (df.PRICE_NOTATION / 1e2).where(df['index'].isin(['IG', 'EU', 'XO']), 1-df.PRICE_NOTATION/1e2) - #Allow for bigger bid/offers for equity tranche - markit.upfront_bid = (markit.upfront_mid-.01).where(markit.attach == 0, markit.upfront_bid) - markit.upfront_ask = (markit.upfront_mid+.01).where(markit.attach == 0, markit.upfront_ask) - df = df.merge(markit, - left_on=['index', 'series', 'tenor', 'EXECUTION_TIMESTAMP'], - right_on=['index', 'series', 'tenor', 'quotedate'], - how='outer') - df['identified'] = df.apply(lambda df: - (df.price > df.upfront_bid) & (df.price < df.upfront_ask) - if df.upfront_bid > 0 - else (df.price < df.upfront_bid) & (df.price > df.upfront_ask), - axis=1) + "select * from markit_tranche_quotes a " + "left join (select series, index, basketid " + "as id from index_version) b on a.basketid=b.id", + serenitas_engine, + parse_dates="quotedate", + ) + df = df.reset_index().set_index(["UNDERLYING_ASSET_1", "END_DATE"]) + df.rename_axis( + index={"UNDERLYING_ASSET_1": "redindexcode", "END_DATE": "maturity"}, + inplace=True, + ) + df = df.merge(idx_ver, left_index=True, right_index=True, how="left") + df["EXECUTION_TIMESTAMP"] = df.EXECUTION_TIMESTAMP.dt.normalize() + # Normalize the price format + df.PRICE_NOTATION = (df.PRICE_NOTATION / 1e2).where( + df.PRICE_NOTATION_TYPE == "Percentage", df.PRICE_NOTATION + ) + df["price"] = (df.PRICE_NOTATION / 1e2).where( + df["index"].isin(["IG", "EU", "XO"]), 1 - df.PRICE_NOTATION / 1e2 + ) + # Allow for bigger bid/offers for equity tranche + markit.upfront_bid = (markit.upfront_mid - 0.01).where( + markit.attach == 0, markit.upfront_bid + ) + markit.upfront_ask = (markit.upfront_mid + 0.01).where( + markit.attach == 0, markit.upfront_ask + ) + df = df.merge( + markit, + left_on=["index", "series", "tenor", "EXECUTION_TIMESTAMP"], + right_on=["index", "series", "tenor", "quotedate"], + how="outer", + ) + df["identified"] = df.apply( + lambda df: (df.price > df.upfront_bid) & (df.price < df.upfront_ask) + if df.upfront_bid > 0 + else (df.price < df.upfront_bid) & (df.price > df.upfront_ask), + axis=1, + ) df = df[df.identified] - df = df[~df.duplicated(['DISSEMINATION_ID'], keep=False)] - return df.sort_values('EXECUTION_TIMESTAMP') + df = df[~df.duplicated(["DISSEMINATION_ID"], keep=False)] + return df.sort_values("EXECUTION_TIMESTAMP") def insert_correction(conn: connection, dissemination_id: int, **kwargs) -> None: @@ -191,6 +205,21 @@ def get_correction(conn: connection, dissemination_id: int) -> Dict[str, Any]: if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "workdate", + nargs="?", + type=lambda s: pd.datetime.strptime(s, "%Y-%m-%d").date(), + default=datetime.date.today(), + ) + parser.add_argument("-s", "--slice", action="store_true", help="download slice") + args = parser.parse_args() + if args.slice: + download_credit_slices(args.workdate) + else: + download_cumulative_credit(args.workdate) # from utils.db import serenitas_pool # serenitasdb = serenitas_pool.getconn() # insert_correction(serenitasdb, 107282774, OPTION_EXPIRATION_DATE="2019-09-18") @@ -201,4 +230,3 @@ if __name__ == "__main__": df = load_data() df = process_tranche_data(df) df = map_tranche(df) - |
