aboutsummaryrefslogtreecommitdiffstats
path: root/R/mapping_fast.R
blob: 1692f3fdcf987a91b106068ce0b0526a29d92fcf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
library(RPostgreSQL)
library(data.table)
library(hash)
options(stringsAsFactors=FALSE)
drv <- dbDriver("PostgreSQL")
dbCon <- dbConnect(drv, dbname="ET", user="et_user", password="Serenitas;1", host="192.168.1.108")
markitdata <- dbGetQuery(dbCon, "select * from latest_markit_prices")
CLOmap <- dbGetQuery(dbCon, "select * from dealcusipmapping")
intexdata <- dbGetQuery(dbCon, sprintf("select * from et_historical_collateral('%s')",Sys.Date()))

bbgmap <- read.csv(file="//WDSENTINEL/share/CorpCDOs/data/lx-bbgmap.csv", header = TRUE, colClasses = "character")
bbgmap <- data.table(bbgmap, key="cusip")
RMBSmap <- read.csv(file="//WDSENTINEL/share/CorpCDOs/data/MTGECusips.csv", header = FALSE, colClasses = "character")
markitdata$mid <-(markitdata$bid + markitdata$offer)/2
markitdata$spread <- markitdata$spread/100

toscrub <- c(' LLC.?$', ', INC.?$', ' Inc.?$',' GMBH.?$', ' SA$', ' LTD$', ' PLC$',
             ' CORP$', ' S A.?$', ' S.A.?$', ' BV$', ' AB$', ' LP$', ' N.V.?$',
             ' AG$', ' CO.?$', ' HLDG.?$', ' HLDGS?$', ' S P A$', ' S.P.A.?$',
             ' A/S$', ' Holdings$', ' B.V.$',"\\(", "\\)")
intexdata$scrubedissuer <- tolower(intexdata$issuername)
markitdata$scrubedissuer <- tolower(markitdata$issuer)
for (i in 1:length(toscrub)){
    markitdata$scrubedissuer <- gsub(toscrub[i], '', markitdata$scrubedissuer, ignore.case = TRUE)
    intexdata$scrubedissuer <- gsub(toscrub[i], '', intexdata$scrubedissuer, ignore.case = TRUE)
}
markitdata <- data.table(markitdata, key="scrubedissuer")

intexdatatomap <- intexdata[is.na(intexdata$loanxid),]
intexdatatomap <- data.frame(intexdatatomap,src=rep(NA,nrow(intexdatatomap)))

a <- unique(intexdatatomap$scrubedissuer)
b <- unique(markitdata$scrubedissuer)
levdist <- adist(a, b, costs=c("ins"=1, "del"=1, "sub"=3))
ratio <- (outer(nchar(a), nchar(b), "+")-levdist)/outer(nchar(a), nchar(b), "+")
index <- hash(a,1:length(a))
rownames(ratio) <- a

for (i in 1:nrow(intexdatatomap)){
    #Match CUSIP vs. LoanxID
    matched0 <- bbgmap[intexdatatomap$cusip[i]]$loanxid
    if(!is.na(matched0[1])){
        intexdatatomap$loanxid[i] <- matched0[1]
        intexdatatomap$src[i] <- "CUSIPMapped"
        next
    }

    issuermatch <- b[ratio[index[[intexdatatomap$scrubedissuer[i]]],]>=0.9]
    if( length(issuermatch) == 0){
        next
    }else{
        issuermatch <- markitdata[issuermatch]
        issuermatch <- issuermatch[!is.na(issuermatch$spread) & !is.na(issuermatch$maturity),]
        spreadmatch <- abs(issuermatch$spread-intexdatatomap$spread[i])<1
        maturitymatch <- abs(issuermatch$maturity-intexdatatomap$maturity[i])<90
        issuermatch <- issuermatch[spreadmatch & maturitymatch,]
        if(nrow(issuermatch)>0){
            intexdatatomap$loanxid[i] <- issuermatch$loanxid[1]
            intexdatatomap$src[i] <- "loanxidmatched"
        }
    }
}