1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
|
import datetime
import io
import pandas as pd
import requests
import zipfile
from pathlib import Path
def download_credit_slices(d: datetime.date) -> None:
for i in range(1, 400):
url = f"https://kgc0418-tdw-data2-0.s3.amazonaws.com/slices/SLICE_CREDITS_{d:%Y_%m_%d}_{i}.zip"
r = requests.get(url)
if r.status_code != 200:
continue
with zipfile.ZipFile(io.BytesIO(r.content)) as z:
z.extractall()
def download_cumulative_credit(d: datetime.date) -> None:
url = f"https://kgc0418-tdw-data2-0.s3.amazonaws.com/slices/CUMULATIVE_CREDITS_{d:%Y_%m_%d}.zip"
r = requests.get(url)
if r.status_code != 200:
return
with zipfile.ZipFile(io.BytesIO(r.content)) as z:
z.extractall(path="/home/serenitas/CorpCDOs/data/DTCC")
def load_option_data():
base_dir = Path("/home/serenitas/CorpCDOs/data/DTCC/")
df = pd.concat([
pd.read_csv(f,
parse_dates=["EXECUTION_TIMESTAMP", "EFFECTIVE_DATE", "END_DATE"])
for f in base_dir.glob("*.csv")])
df = df[df.OPTION_FAMILY.notnull()]
df = df.dropna(axis=1, how='all')
del df["ASSET_CLASS"]
del df["OPTION_FAMILY"]
for col in ["INDICATION_OF_END_USER_EXCEPTION",
"INDICATION_OF_OTHER_PRICE_AFFECTING_TERM",
"BLOCK_TRADES_AND_LARGE_NOTIONAL_OFF-FACILITY_SWAPS"]:
df[col] = df[col].map({"N": False, "Y": True})
for col in ["ACTION", "CLEARED", "PRICE_NOTATION_TYPE", "OPTION_TYPE",
"OPTION_CURRENCY", "INDICATION_OF_COLLATERALIZATION", "EXECUTION_VENUE",
"DAY_COUNT_CONVENTION", "NOTIONAL_CURRENCY_1", "SETTLEMENT_CURRENCY"]:
df[col] = df[col].astype("category")
for col in ["OPTION_PREMIUM", "PRICE_NOTATION", "OPTION_STRIKE_PRICE"]:
df[col] = df[col].str.replace(",", "").astype("float")
df.UNDERLYING_ASSET_1 = df.UNDERLYING_ASSET_1.str.rsplit(":", n=1, expand=True)[1]
for col in ["EFFECTIVE_DATE", "OPTION_EXPIRATION_DATE", "OPTION_LOCK_PERIOD"]:
df[col+"_parsed"] = pd.to_datetime(df[col], errors="coerce")
df.ORIGINAL_DISSEMINATION_ID = df.ORIGINAL_DISSEMINATION_ID.astype("Int64")
return df
if __name__ == "__main__":
pass
# dr = pd.bdate_range("2018-01-01", "2019-02-11")
# for d in dr:
# download_cumulative_credit(d)
|