from sqlalchemy import create_engine, MetaData, select, Column, func, bindparam from collections import defaultdict import pandas as pd import numpy as np from bbg_helpers import init_bbg_session, retrieve_data, BBG_IP engine = create_engine("postgresql://et_user@debian/ET") meta = MetaData(bind=engine) meta.reflect(only=["bloomberg_corp_ref", "bloomberg_corp", "deal_indicative"]) deal_indicative = meta.tables["deal_indicative"] bloomberg_corp_ref = meta.tables["bloomberg_corp_ref"] bloomberg_corp = meta.tables["bloomberg_corp"] s = ( select([Column("cusip"), Column("loanxid")]) .select_from(func.et_latestdealinfo(bindparam("dealname"))) .where(Column("cusip") != None) ) # we build a dictionary with cusips as keys and values is a set of loanxids mapped to this cusip result = select([deal_indicative.c.dealname]).execute() d = defaultdict(set) for r in result: result2 = engine.execute(s, dealname=r.dealname) for t in result2: d[t.cusip].add(t.loanxid) clean_mapping = ((cusip, loanxid - {None}) for cusip, loanxid in d.items()) def f(s): if s: return "{%s}" % ",".join(s) else: return None clean_mapping = {cusip: f(loanxid) for cusip, loanxid in clean_mapping} mapping = pd.DataFrame.from_dict(clean_mapping, orient="index") mapping.index.name = "cusip" mapping.columns = ["loanxid"] currentdata = pd.read_sql_query( "select id_bb_unique, cusip from bloomberg_corp_ref", engine, index_col="cusip" ) mapping = mapping.ix[mapping.index.difference(currentdata.index)] with init_bbg_session(BBG_IP) as session: all_fields = [ "ISSUE_DT", "LN_ISSUE_STATUS", "ID_CUSIP", "ID_BB_UNIQUE", "SECURITY_TYP", "AMT_OUTSTANDING", "PX_LAST", "LAST_UPDATE_DT", "ISSUER", "MATURITY", "CPN", "CPN_TYP", "CPN_FREQ", "FLT_SPREAD", "LIBOR_FLOOR", "LN_CURRENT_MARGIN", "LN_TRANCHE_SIZE", "AMT_ISSUED", "LN_COVENANT_LITE", "SECOND_LIEN_INDICATOR", "DEFAULTED", "DEFAULT_DATE", "CALLED", "CALLED_DT", "PRICING_SOURCE", "RESET_IDX", ] securities = ["{0} Corp".format(cusip) for cusip in mapping.index] df = retrieve_data(session, securities, all_fields) df = pd.DataFrame.from_dict(df, orient="index") df.index = df.index.str.slice(0, 9) df.security = df.index.to_series() df["loanxid"] = mapping.loanxid df.dropna(subset=["ID_BB_UNIQUE"], inplace=True) df.loc[df.LN_TRANCHE_SIZE.isnull(), "LN_TRANCHE_SIZE"] = df[ df.LN_TRANCHE_SIZE.isnull() ].AMT_ISSUED.values df.drop_duplicates(subset="ID_BB_UNIQUE", inplace=True) df.set_index("ID_BB_UNIQUE", inplace=True, drop=False) currentdata.set_index("id_bb_unique", inplace=True) df = df.ix[df.index.difference(currentdata.index)] sql_colnames = [c.name for c in bloomberg_corp_ref.columns] pd_colnames = [ "ID_BB_UNIQUE", "ID_CUSIP", "ISSUER", "MATURITY", "CPN", "CPN_TYP", "CPN_FREQ", "FLT_SPREAD", "LIBOR_FLOOR", "LN_TRANCHE_SIZE", "LN_COVENANT_LITE", "SECOND_LIEN_INDICATOR", "SECURITY_TYP", "ISSUE_DT", "DEFAULTED", "DEFAULT_DATE", "CALLED", "CALLED_DT", "LN_ISSUE_STATUS", "loanxid", ] to_insert = df.filter(pd_colnames) to_insert.rename( columns={k: v for k, v in zip(pd_colnames, sql_colnames)}, inplace=True ) to_insert.to_sql("bloomberg_corp_ref", engine, if_exists="append", index=False) pd_colnames = [ "ID_BB_UNIQUE", "LAST_UPDATE_DT", "PX_LAST", "LN_CURRENT_MARGIN", "AMT_OUTSTANDING", "PRICING_SOURCE", ] sql_colnames = [c.name for c in bloomberg_corp.columns] to_insert2 = df.filter(pd_colnames) to_insert2.rename( columns={k: v for k, v in zip(pd_colnames, sql_colnames)}, inplace=True ) to_insert2.dropna(subset=["pricingdate"], inplace=True) to_insert2.set_index(["id_bb_unique", "pricingdate"], inplace=True) to_insert2.to_sql("bloomberg_corp", engine, if_exists="append", index=True)