diff options
| -rw-r--r-- | python/markit/__main__.py | 5 | ||||
| -rw-r--r-- | python/markit/cds.py | 35 | ||||
| -rw-r--r-- | python/markit/import_quotes.py | 259 | ||||
| -rw-r--r-- | python/markit/loans.py | 93 | ||||
| -rw-r--r-- | python/markit/utils.py | 6 |
5 files changed, 260 insertions, 138 deletions
diff --git a/python/markit/__main__.py b/python/markit/__main__.py index 48db1c0c..05ea50a0 100644 --- a/python/markit/__main__.py +++ b/python/markit/__main__.py @@ -9,6 +9,7 @@ from .cds import download_cds_data, download_composite_data from .loans import download_facility, insert_facility, download_marks, update_facility from .rates import downloadMarkitIRData from .import_quotes import insert_cds, insert_index, insert_tranche +from pandas.tseries.offsets import BDay from sqlalchemy import create_engine from .utils import default_date from utils import SerenitasFileHandler @@ -33,9 +34,9 @@ args = parser.parse_args() historical = True if args.workdate is None and not arg.rates: historical = False - workdate = default_date() + workdate = pd.datetime.today() - BDay(1) elif arg.rates: - workdate = datetime.date.today() + workdate = pd.datetime.today() else: workdate = args.workdate diff --git a/python/markit/cds.py b/python/markit/cds.py index e080a015..a56fb5e3 100644 --- a/python/markit/cds.py +++ b/python/markit/cds.py @@ -7,6 +7,7 @@ import zipfile import time
from pandas.tseries.offsets import BDay
import pandas as pd
+
logger = logging.getLogger(__name__)
@@ -15,10 +16,17 @@ def convertToNone(v): def download_cds_data(payload):
- r = requests.post('https://www.markit.com/export.jsp', params=payload)
+ r = requests.post("https://www.markit.com/export.jsp", params=payload)
with zipfile.ZipFile(io.BytesIO(r.content)) as z:
- f2 = open(os.path.join(os.environ['BASE_DIR'], "Tranche_data", "CDS",
- "cds eod {0}.csv".format(payload['date'])), "wb")
+ f2 = open(
+ os.path.join(
+ os.environ["BASE_DIR"],
+ "Tranche_data",
+ "CDS",
+ "cds eod {0}.csv".format(payload["date"]),
+ ),
+ "wb",
+ )
for f in z.namelist():
if "csv" in f:
f1 = z.open(f)
@@ -32,19 +40,24 @@ def download_cds_data(payload): def download_composite_data(payload, historical=False):
# if historical, we want to maintain the invariant mtime(f)== payload['date'] + BDay(1)
if historical:
- ts = (pd.Timestamp(payload['date']) + BDay(1)).timestamp()
- for report in ['COMPOSITES', 'TRANCHE_COMPOSITES']:
- for family in ['CDX', 'ITRAXX-EUROPE']:
- payload.update({'family': family, 'report': report})
+ ts = (pd.Timestamp(payload["date"]) + BDay(1)).timestamp()
+ for report in ["COMPOSITES", "TRANCHE_COMPOSITES"]:
+ for family in ["CDX", "ITRAXX-EUROPE"]:
+ payload.update({"family": family, "report": report})
while True:
- r = requests.post('https://www.markit.com/export.jsp', params=payload)
+ r = requests.post("https://www.markit.com/export.jsp", params=payload)
try:
with zipfile.ZipFile(io.BytesIO(r.content)) as z:
for f in z.namelist():
if "csv" in f:
- path = z.extract(f, path=os.path.join(os.environ['BASE_DIR'],
- "Tranche_data",
- "Composite_reports"))
+ path = z.extract(
+ f,
+ path=os.path.join(
+ os.environ["BASE_DIR"],
+ "Tranche_data",
+ "Composite_reports",
+ ),
+ )
if historical:
os.utime(path, (ts, ts))
except zipfile.BadZipfile:
diff --git a/python/markit/import_quotes.py b/python/markit/import_quotes.py index c0ab7bed..22339bfe 100644 --- a/python/markit/import_quotes.py +++ b/python/markit/import_quotes.py @@ -11,43 +11,60 @@ from pandas.tseries.offsets import BDay logger = logging.getLogger(__name__) + def convert(x): try: return float(x[:-1]) except ValueError: return None + def get_index_list(database, workdate): with database.cursor() as c: - c.execute("SELECT distinct index, series FROM index_maturity " - "WHERE issue_date IS NOT NULL and issue_date <= %s + 10 " - "AND maturity >= %s", - (workdate, workdate)) + c.execute( + "SELECT distinct index, series FROM index_maturity " + "WHERE issue_date IS NOT NULL and issue_date <= %s + 10 " + "AND maturity >= %s", + (workdate, workdate), + ) for index, series in c: yield index + str(series) database.commit() -DOC_CLAUSE_MAPPING14 = {'Full Restructuring': 'MM14', - 'No Restructuring': 'XR14', - 'Modified Modified Restructurin': 'MM14'} -DOC_CLAUSE_MAPPING = {'Full Restructuring': 'MM', - 'No Restructuring': 'XR', - 'Modified Modified Restructurin': 'MM'} +DOC_CLAUSE_MAPPING14 = { + "Full Restructuring": "MM14", + "No Restructuring": "XR14", + "Modified Modified Restructurin": "MM14", +} + +DOC_CLAUSE_MAPPING = { + "Full Restructuring": "MM", + "No Restructuring": "XR", + "Modified Modified Restructurin": "MM", +} + def get_markit_bbg_mapping(database, basketid_list, workdate): markit_bbg_mapping = defaultdict(set) all_tickers = set([]) with database.cursor() as c: - c.execute("SELECT markit_ticker, markit_tier, spread, currency, cds_curve, " - " short_code FROM historical_cds_issuers(%s) " - "JOIN basket_constituents USING (company_id, seniority) " - "WHERE basketid=ANY(%s)", - (workdate, list(basketid_list))) + c.execute( + "SELECT markit_ticker, markit_tier, spread, currency, cds_curve, " + " short_code FROM historical_cds_issuers(%s) " + "JOIN basket_constituents USING (company_id, seniority) " + "WHERE basketid=ANY(%s)", + (workdate, list(basketid_list)), + ) for line in c: all_tickers.add((line.markit_ticker, line.markit_tier)) - key = (line.markit_ticker, line.markit_tier, line.currency, - line.short_code, float(line.spread)/10000) + key = ( + line.markit_ticker, + line.markit_tier, + line.currency, + line.short_code, + float(line.spread) / 10000, + ) ## each markit ticker can be mapped to multiple bbg tickers ## these bbg tickers can have different curves (ok) ## or same curves (not ok since date, curve_ticker needs to be unique) @@ -56,15 +73,19 @@ def get_markit_bbg_mapping(database, basketid_list, workdate): database.commit() return (all_tickers, markit_bbg_mapping) + def get_bbg_tickers(database, basketid_list, workdate): with database.cursor() as c: - c.execute("SELECT distinct cds_curve FROM historical_cds_issuers(%s) " - "JOIN basket_constituents USING(company_id, seniority) " - "WHERE basketid=ANY(%s)", - (workdate, list(basketid_list))) + c.execute( + "SELECT distinct cds_curve FROM historical_cds_issuers(%s) " + "JOIN basket_constituents USING(company_id, seniority) " + "WHERE basketid=ANY(%s)", + (workdate, list(basketid_list)), + ) yield from chain.from_iterable(e[0] for e in c) database.commit() + def get_basketids(database, index_list, workdate): with database.cursor() as c: for index in index_list: @@ -72,11 +93,13 @@ def get_basketids(database, index_list, workdate): yield c.fetchone()[0] database.commit() + def get_current_tickers(database, workdate): index_list = get_index_list(database, workdate) basketid_list = get_basketids(database, index_list, workdate) return get_markit_bbg_mapping(database, basketid_list, workdate) + def insert_cds(database, workdate): """insert Markit index quotes into the database @@ -86,29 +109,52 @@ def insert_cds(database, workdate): all_tickers, markit_bbg_mapping = get_current_tickers(database, workdate) filename = "cds eod {0:%Y%m%d}.csv".format(workdate) - colnames = ['Upfront'+tenor for tenor in ['6m', '1y', '2y', '3y', '4y', '5y', '7y', '10y']] - sqlstr = "INSERT INTO cds_quotes(date, curve_ticker, upfrontbid, upfrontask," \ - "runningbid, runningask, source, recovery) VALUES(%s, %s, %s, %s, %s, %s, %s, %s) " \ - "ON CONFLICT DO NOTHING" + colnames = [ + "Upfront" + tenor for tenor in ["6m", "1y", "2y", "3y", "4y", "5y", "7y", "10y"] + ] + sqlstr = ( + "INSERT INTO cds_quotes(date, curve_ticker, upfrontbid, upfrontask," + "runningbid, runningask, source, recovery) VALUES(%s, %s, %s, %s, %s, %s, %s, %s) " + "ON CONFLICT DO NOTHING" + ) tickers_found = set() - with open(os.path.join(os.environ['BASE_DIR'], "Tranche_data", "CDS", filename)) as fh: + with open( + os.path.join(os.environ["BASE_DIR"], "Tranche_data", "CDS", filename) + ) as fh: csvreader = csv.DictReader(fh) with database.cursor() as c: for line in csvreader: - spread = float(line['RunningCoupon']) - k = (line['Ticker'], line['Tier'], line['Ccy'], - line['DocClause'], spread) + spread = float(line["RunningCoupon"]) + k = ( + line["Ticker"], + line["Tier"], + line["Ccy"], + line["DocClause"], + spread, + ) if k in markit_bbg_mapping: for curves in markit_bbg_mapping[k]: - c.executemany(sqlstr, - [(workdate, t, convert(line[col]), convert(line[col]), - spread * 10000, spread * 10000, - 'MKIT', convert(line['RealRecovery'])/100) - for col, t in zip(colnames, curves)]) - tickers_found.add((line['Ticker'], line['Tier'])) + c.executemany( + sqlstr, + [ + ( + workdate, + t, + convert(line[col]), + convert(line[col]), + spread * 10000, + spread * 10000, + "MKIT", + convert(line["RealRecovery"]) / 100, + ) + for col, t in zip(colnames, curves) + ], + ) + tickers_found.add((line["Ticker"], line["Tier"])) database.commit() - logger.warning('missing_quotes for {0}'.format(all_tickers-tickers_found)) + logger.warning("missing_quotes for {0}".format(all_tickers - tickers_found)) + def get_date(f): with open(f) as fh: @@ -118,6 +164,7 @@ def get_date(f): date = next(fh).split(",", 1)[0][1:-1] return datetime.datetime.strptime(date, "%d-%b-%y").date() + def insert_index(engine, workdate=None): """insert Markit index quotes into the database @@ -125,20 +172,30 @@ def insert_index(engine, workdate=None): :param workdate: date. If None, we will try to reinsert all files """ - basedir = os.path.join(os.environ['BASE_DIR'], 'Tranche_data', 'Composite_reports') - filenames = [os.path.join(basedir, f) for f in os.listdir(basedir) if 'Indices' in f] + basedir = os.path.join(os.environ["BASE_DIR"], "Tranche_data", "Composite_reports") + filenames = [ + os.path.join(basedir, f) for f in os.listdir(basedir) if "Indices" in f + ] - name_mapping = {"CDXNAHY": "HY", - "CDXNAIG": "IG", - 'iTraxx Eur': "EU", - 'iTraxx Eur Xover': "XO"} - cols = ['close_price', 'close_spread', 'model_price', 'model_spread'] - colmapping={'Date': 'date', 'Name': 'index', 'Series': 'series', - 'Version': 'version', 'Term': 'tenor', - 'Composite Price': 'close_price', - 'Composite Spread': 'close_spread', - 'Model Price': 'model_price', 'Model Spread': 'model_spread'} - ext_cols = ['date', 'index', 'series', 'version', 'tenor'] + cols + ['source'] + name_mapping = { + "CDXNAHY": "HY", + "CDXNAIG": "IG", + "iTraxx Eur": "EU", + "iTraxx Eur Xover": "XO", + } + cols = ["close_price", "close_spread", "model_price", "model_spread"] + colmapping = { + "Date": "date", + "Name": "index", + "Series": "series", + "Version": "version", + "Term": "tenor", + "Composite Price": "close_price", + "Composite Spread": "close_spread", + "Model Price": "model_price", + "Model Spread": "model_spread", + } + ext_cols = ["date", "index", "series", "version", "tenor"] + cols + ["source"] dates_to_files = {} for f in filenames: @@ -154,21 +211,26 @@ def insert_index(engine, workdate=None): filenames = dates_to_files[workdate] for f in filenames: - data = pd.read_csv(f, skiprows=2, parse_dates=[0, 7], engine='python') + data = pd.read_csv(f, skiprows=2, parse_dates=[0, 7], engine="python") data = data.rename(columns=colmapping) - data.dropna(subset=['close_price'], inplace=True) + data.dropna(subset=["close_price"], inplace=True) for col in cols: - data[col] = data[col].str.replace('%', '').astype('float') - data['tenor'] = data['tenor'].apply(lambda x: x.lower()+'r') - data['index'] = data['index'].apply(lambda x: name_mapping[x] if x in name_mapping else np.NaN) - data = data.dropna(subset=['index']) - data['close_spread'] *= 100 - data['model_spread'] *= 100 + data[col] = data[col].str.replace("%", "").astype("float") + data["tenor"] = data["tenor"].apply(lambda x: x.lower() + "r") + data["index"] = data["index"].apply( + lambda x: name_mapping[x] if x in name_mapping else np.NaN + ) + data = data.dropna(subset=["index"]) + data["close_spread"] *= 100 + data["model_spread"] *= 100 ## we renumbered the version for HY9, 10 and 11 - data.loc[data.series.isin([9, 10, 11]) & (data.index=='HY'), 'version'] -= 3 - #data = data.groupby(['index', 'series', 'tenor', 'date'], as_index=False).last() - data['source'] = 'MKIT' - data[ext_cols].to_sql('index_quotes_pre', engine, if_exists='append', index=False) + data.loc[data.series.isin([9, 10, 11]) & (data.index == "HY"), "version"] -= 3 + # data = data.groupby(['index', 'series', 'tenor', 'date'], as_index=False).last() + data["source"] = "MKIT" + data[ext_cols].to_sql( + "index_quotes_pre", engine, if_exists="append", index=False + ) + def insert_tranche(engine, workdate=None): """insert Markit index quotes into the database @@ -178,30 +240,53 @@ def insert_tranche(engine, workdate=None): :type workdate: pd.Timestamp """ - basedir = os.path.join(os.environ['BASE_DIR'], 'Tranche_data', 'Composite_reports') - filenames = [os.path.join(basedir, f) for f in os.listdir(basedir) if f.startswith('Tranche Composites')] - index_version = pd.read_sql_table("index_version", engine, index_col='redindexcode') + basedir = os.path.join(os.environ["BASE_DIR"], "Tranche_data", "Composite_reports") + filenames = [ + os.path.join(basedir, f) + for f in os.listdir(basedir) + if f.startswith("Tranche Composites") + ] + index_version = pd.read_sql_table("index_version", engine, index_col="redindexcode") for f in filenames: - if workdate is None or \ - datetime.datetime.fromtimestamp(os.path.getmtime(f)).date()==(workdate+BDay(1)).date(): - df = pd.read_csv(f, skiprows=2, parse_dates=['Date']) - df.rename(columns={'Date':'quotedate', - 'Index Term':'tenor', - 'Attachment':'attach', - 'Detachment':'detach', - 'Tranche Upfront Bid': 'upfront_bid', - 'Tranche Upfront Mid': 'upfront_mid', - 'Tranche Upfront Ask': 'upfront_ask', - 'Index Price Mid': 'index_price', - 'Tranche Spread Mid': 'tranche_spread', - 'Red Code':'redindexcode'}, inplace=True) - df.attach = df.attach *100 - df.detach = df.detach * 100 - df.tranche_spread = df.tranche_spread*10000 - df.tenor = df.tenor.str.lower() + 'r' - df.set_index('redindexcode', inplace=True) - df = df.join(index_version) - df = df.filter(['basketid', 'quotedate', 'tenor', 'attach', 'detach', - 'upfront_bid', 'upfront_ask', 'upfront_mid', - 'tranche_spread', 'index_price']) - df.to_sql('markit_tranche_quotes', engine, if_exists='append', index=False) + if ( + workdate is None + or datetime.datetime.fromtimestamp(os.path.getmtime(f)).date() + == (workdate + BDay(1)).date() + ): + df = pd.read_csv(f, skiprows=2, parse_dates=["Date"]) + df.rename( + columns={ + "Date": "quotedate", + "Index Term": "tenor", + "Attachment": "attach", + "Detachment": "detach", + "Tranche Upfront Bid": "upfront_bid", + "Tranche Upfront Mid": "upfront_mid", + "Tranche Upfront Ask": "upfront_ask", + "Index Price Mid": "index_price", + "Tranche Spread Mid": "tranche_spread", + "Red Code": "redindexcode", + }, + inplace=True, + ) + df.attach = df.attach * 100 + df.detach = df.detach * 100 + df.tranche_spread = df.tranche_spread * 10000 + df.tenor = df.tenor.str.lower() + "r" + df.set_index("redindexcode", inplace=True) + df = df.join(index_version) + df = df.filter( + [ + "basketid", + "quotedate", + "tenor", + "attach", + "detach", + "upfront_bid", + "upfront_ask", + "upfront_mid", + "tranche_spread", + "index_price", + ] + ) + df.to_sql("markit_tranche_quotes", engine, if_exists="append", index=False) diff --git a/python/markit/loans.py b/python/markit/loans.py index 3e6766cd..5eae5334 100644 --- a/python/markit/loans.py +++ b/python/markit/loans.py @@ -8,21 +8,30 @@ from psycopg2 import IntegrityError logger = logging.getLogger(__name__)
+
def download_facility(workdate, payload):
- r = requests.post('https://loans.markit.com/loanx/LoanXFacilityUpdates.csv',
- params=payload)
- facility_filename = os.path.join(os.environ['DATA_DIR'], "Facility files", "facility_{0}.csv".format(workdate))
+ r = requests.post(
+ "https://loans.markit.com/loanx/LoanXFacilityUpdates.csv", params=payload
+ )
+ facility_filename = os.path.join(
+ os.environ["DATA_DIR"], "Facility files", "facility_{0}.csv".format(workdate)
+ )
with open(facility_filename, "wb") as fh:
fh.write(r.content)
-@with_connection('etdb')
+
+@with_connection("etdb")
def insert_facility(conn, workdate):
- facility_filename = os.path.join(os.environ['DATA_DIR'], "Facility files", "facility_{0}.csv".format(workdate))
- sqlstring = "INSERT INTO markit_facility VALUES( {0} )".format( ",".join(["%s"] * 13))
- with open( facility_filename, "r") as fh:
+ facility_filename = os.path.join(
+ os.environ["DATA_DIR"], "Facility files", "facility_{0}.csv".format(workdate)
+ )
+ sqlstring = "INSERT INTO markit_facility VALUES( {0} )".format(
+ ",".join(["%s"] * 13)
+ )
+ with open(facility_filename, "r") as fh:
reader = csv.reader(fh)
header = next(reader)
- if 'Authentication failed' in header:
+ if "Authentication failed" in header:
logger.error("Couldn't authenticate")
raise SystemExit
with conn.cursor() as c:
@@ -36,42 +45,60 @@ def insert_facility(conn, workdate): else:
conn.commit()
-@with_connection('etdb')
+
+@with_connection("etdb")
def download_marks(conn, workdate, payload):
- r = requests.post('https://loans.markit.com/loanx/LoanXMarks.csv',
- params=payload)
- marks_filename = os.path.join(os.environ['DATA_DIR'], "markit", "markit_data_{0}.csv".format(workdate))
+ r = requests.post("https://loans.markit.com/loanx/LoanXMarks.csv", params=payload)
+ marks_filename = os.path.join(
+ os.environ["DATA_DIR"], "markit", "markit_data_{0}.csv".format(workdate)
+ )
with open(marks_filename, "wb") as fh:
fh.write(r.content)
- sqlstring = "INSERT INTO markit_prices VALUES( {0} )".format(",".join([ "%s" ] * 5))
+ sqlstring = "INSERT INTO markit_prices VALUES( {0} )".format(",".join(["%s"] * 5))
with open(marks_filename, "r") as fh:
reader = csv.DictReader(fh)
- if 'Authentication failed' in reader.fieldnames[0]:
+ if "Authentication failed" in reader.fieldnames[0]:
logger.error("Couldn't authenticate")
raise SystemExit
with conn.cursor() as c:
for line in reader:
- if line['Depth'] == 'implied':
- line['Depth'] = 0
- c.execute(sqlstring, (line['LoanX ID'], line['Bid'], line['Offer'],
- line['Depth'], line['Mark Date']))
+ if line["Depth"] == "implied":
+ line["Depth"] = 0
+ c.execute(
+ sqlstring,
+ (
+ line["LoanX ID"],
+ line["Bid"],
+ line["Offer"],
+ line["Depth"],
+ line["Mark Date"],
+ ),
+ )
conn.commit()
-@with_connection('etdb')
+
+@with_connection("etdb")
def update_facility(conn, workdate, payload):
- #we update the missing facility loanxids
- sqlstring = "SELECT loanxid FROM markit_prices EXCEPT SELECT loanxid FROM markit_facility";
- facility_diff_filename = os.path.join(os.environ['DATA_DIR'], "Facility files",
- "facility_diff_{0}.csv".format(workdate))
- with open( facility_diff_filename, "wt") as fh:
+ # we update the missing facility loanxids
+ sqlstring = (
+ "SELECT loanxid FROM markit_prices EXCEPT SELECT loanxid FROM markit_facility"
+ )
+ facility_diff_filename = os.path.join(
+ os.environ["DATA_DIR"],
+ "Facility files",
+ "facility_diff_{0}.csv".format(workdate),
+ )
+ with open(facility_diff_filename, "wt") as fh:
flag = False
with conn.cursor() as c:
c.execute(sqlstring)
for loanxid in c:
- payload.update({'LOANXID': loanxid[0]})
- r = requests.post('https://loans.markit.com/loanx/LoanXOneFacility.csv',
- params=payload)
- header, *rest = r.content.decode().split('\n')
+ payload.update({"LOANXID": loanxid[0]})
+ r = requests.post(
+ "https://loans.markit.com/loanx/LoanXOneFacility.csv",
+ params=payload,
+ )
+ header, *rest = r.content.decode().split("\n")
if flag:
fh.write(rest[0] + "\n")
else:
@@ -79,9 +106,11 @@ def update_facility(conn, workdate, payload): fh.write(rest[0] + "\n")
flag = True
- sqlstring = "INSERT INTO markit_facility(LoanXID, PMDID, IssuerName, dealname, facility_type," \
- "loanx_facility_type, initial_amount, initial_spread, maturity, industry, modified_time)" \
- "VALUES( {0} )".format( ",".join( ["%s"] * 11))
+ sqlstring = (
+ "INSERT INTO markit_facility(LoanXID, PMDID, IssuerName, dealname, facility_type,"
+ "loanx_facility_type, initial_amount, initial_spread, maturity, industry, modified_time)"
+ "VALUES( {0} )".format(",".join(["%s"] * 11))
+ )
try:
with open(facility_diff_filename, "r") as fh:
reader = csv.reader(fh)
@@ -89,7 +118,7 @@ def update_facility(conn, workdate, payload): with conn.cursor() as c:
for line in reader:
newline = [v or None for v in line] + [workdate]
- newline.pop(9) # remove the spread to maturity value
+ newline.pop(9) # remove the spread to maturity value
c.execute(sqlstring, newline)
conn.commit()
except StopIteration:
diff --git a/python/markit/utils.py b/python/markit/utils.py deleted file mode 100644 index 6f6c6770..00000000 --- a/python/markit/utils.py +++ /dev/null @@ -1,6 +0,0 @@ -import pandas as pd -from pandas.tseries.offsets import BDay - -def default_date(): - d = pd.datetime.today() - BDay(1) - return d.date() |
