aboutsummaryrefslogtreecommitdiffstats
path: root/R/mapping_fast.R
diff options
context:
space:
mode:
Diffstat (limited to 'R/mapping_fast.R')
-rw-r--r--R/mapping_fast.R62
1 files changed, 62 insertions, 0 deletions
diff --git a/R/mapping_fast.R b/R/mapping_fast.R
new file mode 100644
index 00000000..1692f3fd
--- /dev/null
+++ b/R/mapping_fast.R
@@ -0,0 +1,62 @@
+library(RPostgreSQL)
+library(data.table)
+library(hash)
+options(stringsAsFactors=FALSE)
+drv <- dbDriver("PostgreSQL")
+dbCon <- dbConnect(drv, dbname="ET", user="et_user", password="Serenitas;1", host="192.168.1.108")
+markitdata <- dbGetQuery(dbCon, "select * from latest_markit_prices")
+CLOmap <- dbGetQuery(dbCon, "select * from dealcusipmapping")
+intexdata <- dbGetQuery(dbCon, sprintf("select * from et_historical_collateral('%s')",Sys.Date()))
+
+bbgmap <- read.csv(file="//WDSENTINEL/share/CorpCDOs/data/lx-bbgmap.csv", header = TRUE, colClasses = "character")
+bbgmap <- data.table(bbgmap, key="cusip")
+RMBSmap <- read.csv(file="//WDSENTINEL/share/CorpCDOs/data/MTGECusips.csv", header = FALSE, colClasses = "character")
+markitdata$mid <-(markitdata$bid + markitdata$offer)/2
+markitdata$spread <- markitdata$spread/100
+
+toscrub <- c(' LLC.?$', ', INC.?$', ' Inc.?$',' GMBH.?$', ' SA$', ' LTD$', ' PLC$',
+ ' CORP$', ' S A.?$', ' S.A.?$', ' BV$', ' AB$', ' LP$', ' N.V.?$',
+ ' AG$', ' CO.?$', ' HLDG.?$', ' HLDGS?$', ' S P A$', ' S.P.A.?$',
+ ' A/S$', ' Holdings$', ' B.V.$',"\\(", "\\)")
+intexdata$scrubedissuer <- tolower(intexdata$issuername)
+markitdata$scrubedissuer <- tolower(markitdata$issuer)
+for (i in 1:length(toscrub)){
+ markitdata$scrubedissuer <- gsub(toscrub[i], '', markitdata$scrubedissuer, ignore.case = TRUE)
+ intexdata$scrubedissuer <- gsub(toscrub[i], '', intexdata$scrubedissuer, ignore.case = TRUE)
+}
+markitdata <- data.table(markitdata, key="scrubedissuer")
+
+intexdatatomap <- intexdata[is.na(intexdata$loanxid),]
+intexdatatomap <- data.frame(intexdatatomap,src=rep(NA,nrow(intexdatatomap)))
+
+a <- unique(intexdatatomap$scrubedissuer)
+b <- unique(markitdata$scrubedissuer)
+levdist <- adist(a, b, costs=c("ins"=1, "del"=1, "sub"=3))
+ratio <- (outer(nchar(a), nchar(b), "+")-levdist)/outer(nchar(a), nchar(b), "+")
+index <- hash(a,1:length(a))
+rownames(ratio) <- a
+
+for (i in 1:nrow(intexdatatomap)){
+ #Match CUSIP vs. LoanxID
+ matched0 <- bbgmap[intexdatatomap$cusip[i]]$loanxid
+ if(!is.na(matched0[1])){
+ intexdatatomap$loanxid[i] <- matched0[1]
+ intexdatatomap$src[i] <- "CUSIPMapped"
+ next
+ }
+
+ issuermatch <- b[ratio[index[[intexdatatomap$scrubedissuer[i]]],]>=0.9]
+ if( length(issuermatch) == 0){
+ next
+ }else{
+ issuermatch <- markitdata[issuermatch]
+ issuermatch <- issuermatch[!is.na(issuermatch$spread) & !is.na(issuermatch$maturity),]
+ spreadmatch <- abs(issuermatch$spread-intexdatatomap$spread[i])<1
+ maturitymatch <- abs(issuermatch$maturity-intexdatatomap$maturity[i])<90
+ issuermatch <- issuermatch[spreadmatch & maturitymatch,]
+ if(nrow(issuermatch)>0){
+ intexdatatomap$loanxid[i] <- issuermatch$loanxid[1]
+ intexdatatomap$src[i] <- "loanxidmatched"
+ }
+ }
+}