diff options
Diffstat (limited to 'python/bbg_newids.py')
| -rw-r--r-- | python/bbg_newids.py | 136 |
1 files changed, 97 insertions, 39 deletions
diff --git a/python/bbg_newids.py b/python/bbg_newids.py index 5d9852ca..a2222ca0 100644 --- a/python/bbg_newids.py +++ b/python/bbg_newids.py @@ -4,74 +4,132 @@ import pandas as pd import numpy as np from bbg_helpers import init_bbg_session, retrieve_data, BBG_IP -engine = create_engine('postgresql://et_user@debian/ET') +engine = create_engine("postgresql://et_user@debian/ET") meta = MetaData(bind=engine) -meta.reflect(only = ['bloomberg_corp_ref', 'bloomberg_corp', 'deal_indicative']) +meta.reflect(only=["bloomberg_corp_ref", "bloomberg_corp", "deal_indicative"]) -deal_indicative = meta.tables['deal_indicative'] -bloomberg_corp_ref = meta.tables['bloomberg_corp_ref'] -bloomberg_corp = meta.tables['bloomberg_corp'] +deal_indicative = meta.tables["deal_indicative"] +bloomberg_corp_ref = meta.tables["bloomberg_corp_ref"] +bloomberg_corp = meta.tables["bloomberg_corp"] -s = select([Column('cusip'), Column('loanxid')]).\ - select_from(func.et_latestdealinfo(bindparam('dealname'))).where(Column('cusip')!=None) +s = ( + select([Column("cusip"), Column("loanxid")]) + .select_from(func.et_latestdealinfo(bindparam("dealname"))) + .where(Column("cusip") != None) +) # we build a dictionary with cusips as keys and values is a set of loanxids mapped to this cusip result = select([deal_indicative.c.dealname]).execute() d = defaultdict(set) for r in result: - result2 = engine.execute(s, dealname = r.dealname) + result2 = engine.execute(s, dealname=r.dealname) for t in result2: d[t.cusip].add(t.loanxid) clean_mapping = ((cusip, loanxid - {None}) for cusip, loanxid in d.items()) + def f(s): if s: return "{%s}" % ",".join(s) else: return None + clean_mapping = {cusip: f(loanxid) for cusip, loanxid in clean_mapping} -mapping = pd.DataFrame.from_dict(clean_mapping, orient='index') -mapping.index.name = 'cusip' -mapping.columns = ['loanxid'] -currentdata = pd.read_sql_query("select id_bb_unique, cusip from bloomberg_corp_ref", - engine, index_col='cusip') +mapping = pd.DataFrame.from_dict(clean_mapping, orient="index") +mapping.index.name = "cusip" +mapping.columns = ["loanxid"] +currentdata = pd.read_sql_query( + "select id_bb_unique, cusip from bloomberg_corp_ref", engine, index_col="cusip" +) mapping = mapping.ix[mapping.index.difference(currentdata.index)] with init_bbg_session(BBG_IP) as session: - all_fields = ["ISSUE_DT", "LN_ISSUE_STATUS", "ID_CUSIP", "ID_BB_UNIQUE", - "SECURITY_TYP", "AMT_OUTSTANDING", "PX_LAST","LAST_UPDATE_DT", - "ISSUER", "MATURITY","CPN","CPN_TYP", "CPN_FREQ","FLT_SPREAD", - "LIBOR_FLOOR","LN_CURRENT_MARGIN", "LN_TRANCHE_SIZE", "AMT_ISSUED", - "LN_COVENANT_LITE","SECOND_LIEN_INDICATOR","DEFAULTED", "DEFAULT_DATE", - "CALLED", "CALLED_DT", "PRICING_SOURCE", "RESET_IDX"] + all_fields = [ + "ISSUE_DT", + "LN_ISSUE_STATUS", + "ID_CUSIP", + "ID_BB_UNIQUE", + "SECURITY_TYP", + "AMT_OUTSTANDING", + "PX_LAST", + "LAST_UPDATE_DT", + "ISSUER", + "MATURITY", + "CPN", + "CPN_TYP", + "CPN_FREQ", + "FLT_SPREAD", + "LIBOR_FLOOR", + "LN_CURRENT_MARGIN", + "LN_TRANCHE_SIZE", + "AMT_ISSUED", + "LN_COVENANT_LITE", + "SECOND_LIEN_INDICATOR", + "DEFAULTED", + "DEFAULT_DATE", + "CALLED", + "CALLED_DT", + "PRICING_SOURCE", + "RESET_IDX", + ] securities = ["{0} Corp".format(cusip) for cusip in mapping.index] df = retrieve_data(session, securities, all_fields) -df = pd.DataFrame.from_dict(df, orient='index') -df.index = df.index.str.slice(0,9) +df = pd.DataFrame.from_dict(df, orient="index") +df.index = df.index.str.slice(0, 9) df.security = df.index.to_series() -df['loanxid'] = mapping.loanxid -df.dropna(subset=['ID_BB_UNIQUE'], inplace=True) -df.loc[df.LN_TRANCHE_SIZE.isnull(),'LN_TRANCHE_SIZE'] = df[df.LN_TRANCHE_SIZE.isnull()].AMT_ISSUED.values -df.drop_duplicates(subset='ID_BB_UNIQUE', inplace=True) -df.set_index('ID_BB_UNIQUE', inplace=True, drop=False) -currentdata.set_index('id_bb_unique', inplace=True) +df["loanxid"] = mapping.loanxid +df.dropna(subset=["ID_BB_UNIQUE"], inplace=True) +df.loc[df.LN_TRANCHE_SIZE.isnull(), "LN_TRANCHE_SIZE"] = df[ + df.LN_TRANCHE_SIZE.isnull() +].AMT_ISSUED.values +df.drop_duplicates(subset="ID_BB_UNIQUE", inplace=True) +df.set_index("ID_BB_UNIQUE", inplace=True, drop=False) +currentdata.set_index("id_bb_unique", inplace=True) df = df.ix[df.index.difference(currentdata.index)] sql_colnames = [c.name for c in bloomberg_corp_ref.columns] -pd_colnames = ['ID_BB_UNIQUE', 'ID_CUSIP', 'ISSUER', 'MATURITY', 'CPN', 'CPN_TYP', - 'CPN_FREQ', 'FLT_SPREAD', 'LIBOR_FLOOR', 'LN_TRANCHE_SIZE', 'LN_COVENANT_LITE', - 'SECOND_LIEN_INDICATOR', 'SECURITY_TYP', 'ISSUE_DT', 'DEFAULTED', - 'DEFAULT_DATE', 'CALLED', 'CALLED_DT', 'LN_ISSUE_STATUS', 'loanxid'] +pd_colnames = [ + "ID_BB_UNIQUE", + "ID_CUSIP", + "ISSUER", + "MATURITY", + "CPN", + "CPN_TYP", + "CPN_FREQ", + "FLT_SPREAD", + "LIBOR_FLOOR", + "LN_TRANCHE_SIZE", + "LN_COVENANT_LITE", + "SECOND_LIEN_INDICATOR", + "SECURITY_TYP", + "ISSUE_DT", + "DEFAULTED", + "DEFAULT_DATE", + "CALLED", + "CALLED_DT", + "LN_ISSUE_STATUS", + "loanxid", +] to_insert = df.filter(pd_colnames) -to_insert.rename(columns={k: v for k, v in zip(pd_colnames, sql_colnames)}, inplace=True) -to_insert.to_sql("bloomberg_corp_ref", engine, if_exists='append', index=False) +to_insert.rename( + columns={k: v for k, v in zip(pd_colnames, sql_colnames)}, inplace=True +) +to_insert.to_sql("bloomberg_corp_ref", engine, if_exists="append", index=False) -pd_colnames = ['ID_BB_UNIQUE','LAST_UPDATE_DT','PX_LAST','LN_CURRENT_MARGIN', - 'AMT_OUTSTANDING','PRICING_SOURCE'] +pd_colnames = [ + "ID_BB_UNIQUE", + "LAST_UPDATE_DT", + "PX_LAST", + "LN_CURRENT_MARGIN", + "AMT_OUTSTANDING", + "PRICING_SOURCE", +] sql_colnames = [c.name for c in bloomberg_corp.columns] to_insert2 = df.filter(pd_colnames) -to_insert2.rename(columns = {k: v for k, v in zip(pd_colnames, sql_colnames)}, inplace=True) -to_insert2.dropna(subset=['pricingdate'], inplace=True) -to_insert2.set_index(['id_bb_unique', 'pricingdate'], inplace=True) -to_insert2.to_sql("bloomberg_corp", engine, if_exists='append', index=True) +to_insert2.rename( + columns={k: v for k, v in zip(pd_colnames, sql_colnames)}, inplace=True +) +to_insert2.dropna(subset=["pricingdate"], inplace=True) +to_insert2.set_index(["id_bb_unique", "pricingdate"], inplace=True) +to_insert2.to_sql("bloomberg_corp", engine, if_exists="append", index=True) |
