library(RPostgreSQL) library(data.table) library(hash) options(stringsAsFactors=FALSE) drv <- dbDriver("PostgreSQL") dbCon <- dbConnect(drv, dbname="ET", user="et_user", password="Serenitas;1", host="192.168.1.108") markitdata <- dbGetQuery(dbCon, "select * from latest_markit_prices") CLOmap <- dbGetQuery(dbCon, "select * from dealcusipmapping") intexdata <- dbGetQuery(dbCon, sprintf("select * from et_historical_collateral('%s')",Sys.Date())) bbgmap <- read.csv(file="//WDSENTINEL/share/CorpCDOs/data/lx-bbgmap.csv", header = TRUE, colClasses = "character") bbgmap <- data.table(bbgmap, key="cusip") RMBSmap <- read.csv(file="//WDSENTINEL/share/CorpCDOs/data/MTGECusips.csv", header = FALSE, colClasses = "character") markitdata$mid <-(markitdata$bid + markitdata$offer)/2 markitdata$spread <- markitdata$spread/100 toscrub <- c(' LLC.?$', ', INC.?$', ' Inc.?$',' GMBH.?$', ' SA$', ' LTD$', ' PLC$', ' CORP$', ' S A.?$', ' S.A.?$', ' BV$', ' AB$', ' LP$', ' N.V.?$', ' AG$', ' CO.?$', ' HLDG.?$', ' HLDGS?$', ' S P A$', ' S.P.A.?$', ' A/S$', ' Holdings$', ' B.V.$',"\\(", "\\)") intexdata$scrubedissuer <- tolower(intexdata$issuername) markitdata$scrubedissuer <- tolower(markitdata$issuer) for (i in 1:length(toscrub)){ markitdata$scrubedissuer <- gsub(toscrub[i], '', markitdata$scrubedissuer, ignore.case = TRUE) intexdata$scrubedissuer <- gsub(toscrub[i], '', intexdata$scrubedissuer, ignore.case = TRUE) } markitdata <- data.table(markitdata, key="scrubedissuer") intexdatatomap <- intexdata[is.na(intexdata$loanxid),] intexdatatomap <- data.frame(intexdatatomap,src=rep(NA,nrow(intexdatatomap))) a <- unique(intexdatatomap$scrubedissuer) b <- unique(markitdata$scrubedissuer) levdist <- adist(a, b, costs=c("ins"=1, "del"=1, "sub"=3)) ratio <- (outer(nchar(a), nchar(b), "+")-levdist)/outer(nchar(a), nchar(b), "+") index <- hash(a,1:length(a)) rownames(ratio) <- a for (i in 1:nrow(intexdatatomap)){ #Match CUSIP vs. LoanxID matched0 <- bbgmap[intexdatatomap$cusip[i]]$loanxid if(!is.na(matched0[1])){ intexdatatomap$loanxid[i] <- matched0[1] intexdatatomap$src[i] <- "CUSIPMapped" next } issuermatch <- b[ratio[index[[intexdatatomap$scrubedissuer[i]]],]>=0.9] if( length(issuermatch) == 0){ next }else{ issuermatch <- markitdata[issuermatch] issuermatch <- issuermatch[!is.na(issuermatch$spread) & !is.na(issuermatch$maturity),] spreadmatch <- abs(issuermatch$spread-intexdatatomap$spread[i])<1 maturitymatch <- abs(issuermatch$maturity-intexdatatomap$maturity[i])<90 issuermatch <- issuermatch[spreadmatch & maturitymatch,] if(nrow(issuermatch)>0){ intexdatatomap$loanxid[i] <- issuermatch$loanxid[1] intexdatatomap$src[i] <- "loanxidmatched" } } }