diff options
Diffstat (limited to 'python/markit')
| -rw-r--r-- | python/markit/__init__.py | 0 | ||||
| -rw-r--r-- | python/markit/__main__.py | 81 | ||||
| -rw-r--r-- | python/markit/cds.py | 46 | ||||
| -rw-r--r-- | python/markit/import_quotes.py | 176 | ||||
| -rw-r--r-- | python/markit/loans.py | 93 |
5 files changed, 396 insertions, 0 deletions
diff --git a/python/markit/__init__.py b/python/markit/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/python/markit/__init__.py diff --git a/python/markit/__main__.py b/python/markit/__main__.py new file mode 100644 index 00000000..a273860f --- /dev/null +++ b/python/markit/__main__.py @@ -0,0 +1,81 @@ +import argparse +import datetime +import logging +import os +import pandas as pd +import sys +import logging +import pdb + +from common import root +from .cds import download_cds_data, download_composite_data +from .import_quotes import insert_cds, insert_index, insert_tranche +from pandas.tseries.offsets import BDay +from sqlalchemy import create_engine +from db import dbconn + +## parse arguments +parser = argparse.ArgumentParser() +group = parser.add_mutually_exclusive_group(required=True) +group.add_argument("-l", "--loans", action="store_true", + help="download markit loan data") +group.add_argument("-c", "--cds", action="store_true", + help="download markit cds data") +parser.add_argument('workdate', nargs='?', type = lambda s: pd.datetime.strptime(sys.argv[1], "%Y-%m-%d")), +# , +# default = pd.datetime.today() - BDay(1)) + +args = parser.parse_args() +historical = True + +if args.loans: + log_file = os.path.join(root, 'logs', 'markit_loans.log') + if args.workdate is None: + historical = False + workdate = datetime.date.today() +else: + log_file = os.path.join(root, 'logs', 'markit_cds.log') + if args.workdate is None: + historical = False + workdate = pd.datetime.today() - BDay(1) + +## set up logging +logger = logging.getLogger('markit') +fh = logging.FileHandler(filename=log_file) +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +fh.setFormatter(formatter) +logger.addHandler(fh) +logger.setLevel(logging.INFO) + +if args.loans: + workdate = str(workdate) + payload={'LEGALENTITY': 'lmcg', + 'USERNAME': 'serecapuser', + 'PASSWORD': 'Welcome1'} + download_facility(workdate, payload) + logger.info('facility downloaded') + insert_facility(workdate) + logger.info('facility inserted') + payload.update({'EOD':'Y'}) + download_marks(workdate, payload) + logger.info('marks downloaded') + payload.pop('EOD') + update_facility(workdate, payload) + logger.info('facility updated') +else: + payload = {'user': 'GuillaumeHorel', + 'password': 'password', + 'version': '5', + 'format': 'csv', + 'report': 'FIXED_COUPON', + 'date': workdate.strftime("%Y%m%d"), + 'type': 'CDS'} + download_cds_data(payload) + payload.update({'type':'CredIndex','version':4}) + download_composite_data(payload, historical) + engine = create_engine('postgresql://serenitas_user@debian/serenitasdb') + serenitasdb = dbconn('serenitasdb') + insert_cds(serenitasdb, workdate.date()) + insert_index(engine, workdate.date()) + insert_tranche(engine, workdate.date()) + serenitasdb.close() diff --git a/python/markit/cds.py b/python/markit/cds.py new file mode 100644 index 00000000..7fede498 --- /dev/null +++ b/python/markit/cds.py @@ -0,0 +1,46 @@ +import io
+import logging
+import os
+import requests
+import shutil
+import zipfile
+from common import root
+from pandas.tseries.offsets import BDay
+
+logger = logging.getLogger(__name__)
+
+def convertToNone(v):
+ return v if v else None
+
+def download_cds_data(payload):
+ r = requests.get('https://www.markit.com/export.jsp', params=payload)
+ f2 = open(os.path.join(root, "Tranche_data", "CDS", "cds eod {0}.csv".format(payload['date'])), "wb")
+ with zipfile.ZipFile(io.BytesIO(r.content)) as z:
+ for f in z.namelist():
+ if "csv" in f:
+ f1 = z.open(f)
+ next(f1)
+ next(f1)
+ shutil.copyfileobj(f1, f2)
+ f1.close()
+ f2.close()
+
+def download_composite_data(payload, historical=False):
+ ## if historical, we want to maintain the invariant mtime(f)== payload['date'] + BDay(1)
+ if historical:
+ ts = datetime.datetime.strptime(payload['date'], "%Y%m%d") + BDay(1)
+ ts = ts.timestamp()
+ for report in ['COMPOSITES', 'TRANCHE_COMPOSITES']:
+ for family in ['CDX', 'ITRAXX-EUROPE']:
+ payload.update({'family': family, 'report': report})
+ r = requests.get('https://www.markit.com/export.jsp', params=payload)
+ try:
+ with zipfile.ZipFile(io.BytesIO(r.content)) as z:
+ for f in z.namelist():
+ if "csv" in f:
+ path = z.extract(f, path=os.path.join(root, "Tranche_data", "Composite_reports"))
+ if historical:
+ os.utime(path, (ts, ts))
+ except zipfile.BadZipfile:
+ logger.error(r.content)
+ continue
diff --git a/python/markit/import_quotes.py b/python/markit/import_quotes.py new file mode 100644 index 00000000..7bbb6e89 --- /dev/null +++ b/python/markit/import_quotes.py @@ -0,0 +1,176 @@ +import csv +import datetime +import logging +import numpy as np +import pandas as pd +import os + +from collections import defaultdict +from common import root +from pandas.tseries.offsets import BDay + +logger = logging.getLogger(__name__) + +def convert(x): + try: + return float(x[:-1]) + except ValueError: + return None + +index_list = ['HY9', 'HY10'] + ['HY' + str(s) for s in range(15, 26)] + ['IG9'] + \ + ['IG' + str(s) for s in range(16, 26)] + ['XO22', 'XO23', 'XO24', 'EU9'] + \ + ['EU' +str(s) for s in range(19, 25)] + +DOC_CLAUSE_MAPPING14 = {'Full Restructuring': 'MM14', + 'No Restructuring': 'XR14', + 'Modified Modified Restructurin': 'MM14'} + +DOC_CLAUSE_MAPPING = {'Full Restructuring': 'MM', + 'No Restructuring': 'XR', + 'Modified Modified Restructurin': 'MM'} + +def get_markit_bbg_mapping(database, basketid_list, workdate): + if workdate>=datetime.date(2014, 9, 19): + doc_clause_mapping = DOC_CLAUSE_MAPPING14 + else: + doc_clause_mapping = DOC_CLAUSE_MAPPING + markit_bbg_mapping = defaultdict(set) + all_tickers = set([]) + with database.cursor() as c: + c.execute("SELECT markit_ticker, markit_tier, spread, currency, cds_curve, " \ + " doc_clause FROM historical_cds_issuers(%s) where index_list && %s", + (workdate, basketid_list)) + for line in c: + all_tickers.add((line['markit_ticker'], line['markit_tier'])) + key = (line['markit_ticker'], line['markit_tier'], line['currency'], + doc_clause_mapping[line['doc_clause']], float(line['spread'])/10000) + if key==('CESEOP', 'SNRFOR', 'USD', 'XR14', 0.05): + key=('CESEOP', 'SNRFOR', 'USD', 'XR', 0.05) + ## each markit ticker can be mapped to multiple bbg tickers + ## these bbg tickers can have different curves (ok) + ## or same curves (not ok since date, curve_ticker needs to be unique) + ## therefore we keep them in a set strucutre + markit_bbg_mapping[key].add(tuple(line['cds_curve'])) + database.commit() + return (all_tickers, markit_bbg_mapping) + + +def get_basketids(database, index_list, workdate): + r = [] + with database.cursor() as c: + for index in index_list: + c.execute("SELECT * FROM nameToBasketID(%s, %s)", (index, workdate)) + r.append(c.fetchone()[0]) + database.commit() + return r + +def get_current_tickers(database, workdate): + basketid_list = get_basketids(database, index_list, workdate) + return get_markit_bbg_mapping(database, basketid_list, workdate) + +def insert_cds(database, workdate): + """insert Markit index quotes into the database + + :param database: psycopg2 connection to the database. + :param workdate: + """ + + all_tickers, markit_bbg_mapping = get_current_tickers(database, workdate) + filename = "cds eod {0:%Y%m%d}.csv".format(workdate) + colnames = ['Upfront'+tenor for tenor in ['6m', '1y', '2y', '3y', '4y', '5y', '7y', '10y']] + sqlstr = "INSERT INTO cds_quotes(date, curve_ticker, upfrontbid, upfrontask," \ + "runningbid, runningask, source, recovery) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)" + + tickers_found = set([]) + with database.cursor() as c: + c.execute("DELETE from cds_quotes where date=%s", (workdate,)) + database.commit() + with open(os.path.join(root, "Tranche_data", "CDS", filename)) as fh: + csvreader = csv.DictReader(fh) + with database.cursor() as c: + for line in csvreader: + k = (line['Ticker'], line['Tier'], line['Ccy'], line['DocClause'], float(line['RunningCoupon'])) + if k in markit_bbg_mapping: + for curves in markit_bbg_mapping[k]: + c.executemany(sqlstr, + [(workdate, t, convert(line[colnames[i]]), convert(line[colnames[i]]), + float(line['RunningCoupon'])*10000, float(line['RunningCoupon'])*10000, + 'MKIT', convert(line['RealRecovery'])/100) + for i, t in enumerate(curves)]) + tickers_found.add((line['Ticker'], line['Tier'])) + database.commit() + logger.warning('missing_quotes for {0}'.format(all_tickers-tickers_found)) + +def insert_index(engine, workdate=None): + """insert Markit index quotes into the database + + :param engine: sqlalchemy engine to the database + :param workdate: date. If None, we will try to reinsert all files + """ + + basedir = os.path.join(root, 'Tranche_data', 'Composite_reports') + filenames = [os.path.join(basedir, f) for f in os.listdir(basedir) if 'Indices' in f] + + name_mapping = {"CDXNAHY":"HY", "CDXNAIG":"IG",'iTraxx Eur': "EU", 'iTraxx Eur Xover': "XO"} + cols = ['closeprice', 'closespread', 'modelprice', 'modelspread'] + colmapping={'Date':'date', 'Name': 'index', 'Series': 'series', 'Version': 'version', + 'Term': 'tenor', 'Composite Price': 'closeprice', 'Composite Spread': 'closespread', + 'Model Price': 'modelprice', 'Model Spread': 'modelspread'} + ext_cols = ['date', 'index', 'series', 'version', 'tenor'] + cols + \ + ['adjcloseprice', 'adjmodelprice'] + for f in filenames: + if workdate is None or \ + datetime.datetime.fromtimestamp(os.path.getmtime(f)).date()==(workdate+BDay(1)).date(): + data = pd.read_csv(f, skiprows=2, parse_dates=[0,7], engine='python') + data.rename(columns=colmapping, inplace=True) + data.dropna(subset=['closeprice'], inplace=True) + for col in cols: + data[col] = data[col].str.replace('%', '').astype('float') + data['tenor'] = data['tenor'].apply(lambda x: x.lower()+'r') + data['index'] = data['index'].apply(lambda x: name_mapping[x] if x in name_mapping else np.NaN) + data = data.dropna(subset=['index']) + data.set_index('index', drop=False, inplace=True) + data['closespread'] *= 100 + data['modelspread'] *= 100 + ## we renumbered the version for HY9, 10 and 11 + data.loc[data.series.isin([9, 10, 11]) & (data.index=='HY'),'version'] -= 3 + data['adjcloseprice'] = data['closeprice'] + data['adjmodelprice'] = data['modelprice'] + data = data.groupby(['index', 'series', 'tenor', 'date']).last() + data.reset_index(inplace=True) + data[ext_cols].to_sql('index_quotes', engine, if_exists='append', index=False) + +def insert_tranche(engine, workdate = None): + """insert Markit index quotes into the database + + :param engine: sqlalchemy engine to the database + :param workdate: date. If None, we will try to reinsert all files + """ + + basedir = os.path.join(root, 'Tranche_data', 'Composite_reports') + filenames = [os.path.join(basedir, f) for f in os.listdir(basedir) if f.startswith('Tranche Composites')] + index_version = pd.read_sql_table("index_version", engine, index_col='redindexcode') + for f in filenames: + if workdate is None or \ + datetime.datetime.fromtimestamp(os.path.getmtime(f)).date()==(workdate+BDay(1)).date(): + df = pd.read_csv(f, skiprows=2, parse_dates=['Date']) + df.rename(columns={'Date':'quotedate', + 'Index Term':'tenor', + 'Attachment':'attach', + 'Detachment':'detach', + 'Tranche Upfront Bid': 'upfront_bid', + 'Tranche Upfront Mid': 'upfront_mid', + 'Tranche Upfront Ask': 'upfront_ask', + 'Index Price Mid': 'index_price', + 'Tranche Spread Mid': 'tranche_spread', + 'Red Code':'redindexcode'}, inplace=True) + df.attach = df.attach *100 + df.detach = df.detach * 100 + df.tranche_spread = df.tranche_spread*10000 + df.tenor = df.tenor.str.lower() + 'r' + df.set_index('redindexcode', inplace=True) + df = df.join(index_version) + df = df.filter(['basketid', 'quotedate', 'tenor', 'attach', 'detach', + 'upfront_bid', 'upfront_ask', 'upfront_mid', + 'tranche_spread', 'index_price']) + df.to_sql('markit_tranche_quotes', engine, if_exists='append', index=False) diff --git a/python/markit/loans.py b/python/markit/loans.py new file mode 100644 index 00000000..0b14d60b --- /dev/null +++ b/python/markit/loans.py @@ -0,0 +1,93 @@ +import csv
+import logging
+import os
+import requests
+
+from common import root
+from db import with_connection
+
+logger = logging.getLogger(__name__)
+
+def download_facility(workdate, payload):
+ r = requests.get('https://loans.markit.com/loanx/LoanXFacilityUpdates.csv', params=payload)
+ facility_filename = os.path.join(root, "data", "Facility files", "facility_{0}.csv".format(workdate))
+ with open( facility_filename, "wb") as fh:
+ fh.write(r.content)
+
+@with_connection('etdb')
+def insert_facility(conn, workdate):
+ facility_filename = os.path.join(root, "data", "Facility files", "facility_{0}.csv".format(workdate))
+ sqlstring = "INSERT INTO markit_facility VALUES( {0} )".format( ",".join(["%s"] * 13))
+ with open( facility_filename, "r") as fh:
+ reader = csv.reader(fh)
+ header = next(reader)
+ if 'Authentication failed' in header:
+ logger.error("Couldn't authenticate")
+ raise SystemExit
+ with conn.cursor() as c:
+ for line in reader:
+ newline = tuple([v or None for v in line])
+ try:
+ c.execute(sqlstring, newline)
+ except IntegrityError as e:
+ logger.error(e)
+ conn.rollback()
+ else:
+ conn.commit()
+
+@with_connection('etdb')
+def download_marks(conn, workdate, payload):
+ r = requests.get('https://loans.markit.com/loanx/LoanXMarks.csv', params=payload)
+ marks_filename = os.path.join(root, "data", "markit", "markit_data_{0}.csv".format(workdate))
+ with open(marks_filename, "wb") as fh:
+ fh.write(r.content)
+ sqlstring = "INSERT INTO markit_prices VALUES( {0} )".format( ",".join([ "%s" ] * 5))
+ with open(marks_filename, "r") as fh:
+ reader = csv.DictReader(fh)
+ if 'Authentication failed' in reader.fieldnames[0]:
+ logger.error("Couldn't authenticate")
+ raise SystemExit
+ with conn.cursor() as c:
+ for line in reader:
+ if line['Depth']=='implied':
+ line['Depth']=0
+ c.execute(sqlstring, (line['LoanX ID'], line['Bid'], line['Offer'],
+ line['Depth'], line['Mark Date']))
+ conn.commit()
+
+@with_connection('etdb')
+def update_facility(conn, workdate, payload):
+ #we update the missing facility loanxids
+ sqlstring = "SELECT loanxid FROM markit_prices EXCEPT SELECT loanxid FROM markit_facility";
+ facility_diff_filename = os.path.join(root, "data", "Facility files",
+ "facility_diff_{0}.csv".format(workdate))
+ with open( facility_diff_filename, "wb") as fh:
+ flag = False
+ with conn.cursor() as c:
+ c.execute(sqlstring)
+ for loanxid in c:
+ payload.update({'LOANXID':loanxid[0]})
+ r = requests.get('https://loans.markit.com/loanx/LoanXOneFacility.csv', params=payload)
+ if flag:
+ fh.write(r.content.split('\n')[1] + "\n")
+ else:
+ fh.write(r.content.split('\n')[0] + "\n")
+ fh.write(r.content.split('\n')[1] + "\n")
+ flag = True
+
+ sqlstring = "INSERT INTO markit_facility(LoanXID, PMDID, IssuerName, dealname, facility_type," \
+ "loanx_facility_type, initial_amount, initial_spread, maturity, industry, modified_time)" \
+ "VALUES( {0} )".format( ",".join( ["%s"] * 11))
+ try:
+ with open(facility_diff_filename, "r") as fh:
+ reader = csv.reader(fh)
+ next(reader)
+ with conn.cursor() as c:
+ for line in reader:
+ newline = [v or None for v in line] + [workdate]
+ newline.pop(9) # remove the spread to maturity value
+ c.execute(sqlstring, newline)
+ conn.commit()
+ except StopIteration:
+ pass
+ conn.close()
|
