From c78d628182fe56dadb2c25ce08de92908e106e53 Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Mon, 23 Jan 2023 16:59:34 -0500 Subject: zero copy buffer layout --- c_layer/survival_curve.hpp | 40 +-- pyisda/credit_index.pxd | 7 +- pyisda/credit_index.pyx | 231 ++++++------ pyisda/curve.pxd | 81 +++-- pyisda/curve.pyx | 865 ++++++++++++++++++++++----------------------- pyisda/date.pxd | 12 +- pyisda/date.pyx | 6 +- 7 files changed, 627 insertions(+), 615 deletions(-) diff --git a/c_layer/survival_curve.hpp b/c_layer/survival_curve.hpp index ce71cc1..b3dcebd 100644 --- a/c_layer/survival_curve.hpp +++ b/c_layer/survival_curve.hpp @@ -1,56 +1,35 @@ -#include +#include struct CurveName { - enum class __attribute__ ((__packed__)) Seniority { + enum class Seniority: std::uint8_t { Senior, Subordinated }; - enum class __attribute__ ((__packed__)) DocClause { + enum class DocClause: std::uint8_t { XR14, MR14, MM14, CR14 }; - void serialize(unsigned char* buf) { - memcpy(buf, &seniority, sizeof(Seniority)); - buf += sizeof(Seniority); - memcpy(buf, &doc_clause, sizeof(DocClause)); - buf += sizeof(DocClause); - strcpy((char*)buf, ticker.c_str()); - }; - std::string full_ticker() const { - std::string r = ticker; + std::string r = {ticker.begin(), ticker.end()}; return r.append("_").append(str_seniority()). append("_").append(str_doc_clause()); } - void serialize(unsigned char* buf, size_t num) { - memcpy(buf, &seniority, sizeof(Seniority)); - buf += sizeof(Seniority); - memcpy(buf, &doc_clause, sizeof(DocClause)); - buf += sizeof(DocClause); - strncpy((char*)buf, ticker.c_str(), num); - }; - - CurveName(std::string& ticker, Seniority seniority, DocClause doc_clause) : - ticker(ticker), - seniority(seniority), - doc_clause(doc_clause) {}; - - CurveName(const unsigned char* buf) { + CurveName(const char* buf) : name(buf) { memcpy(&seniority, buf, sizeof(Seniority)); buf += sizeof(Seniority); memcpy(&doc_clause, buf, sizeof(DocClause)); buf += sizeof(DocClause); - ticker = std::string((char*)buf); + ticker = std::string_view((const char*)buf); } CurveName() {}; - size_t size() { - return sizeof(Seniority) + sizeof(DocClause) + ticker.length() + 1; + size_t size() const { + return sizeof(Seniority) + sizeof(DocClause) + ticker.size() + 1; }; bool operator<(const CurveName &other) const { @@ -82,7 +61,8 @@ struct CurveName { } } - std::string ticker; + const char* name; + std::string_view ticker; Seniority seniority; DocClause doc_clause; }; diff --git a/pyisda/credit_index.pxd b/pyisda/credit_index.pxd index 42a9e85..2be66ff 100644 --- a/pyisda/credit_index.pxd +++ b/pyisda/credit_index.pxd @@ -1,6 +1,7 @@ from .legs cimport TContingentLeg, TFeeLeg from .date cimport TDate from .curve cimport TCurve, TRatePt, shared_ptr, CurveName +from libc.stdint cimport uint16_t from libcpp.vector cimport vector from libcpp.map cimport map from libcpp.string cimport string @@ -8,9 +9,9 @@ from libcpp.string cimport string cdef class CurveList: cdef TDate base_date cdef vector[double] _weights - cdef vector[shared_ptr[TCurve]] _curves + cdef vector[shared_ptr[char]] _curves cdef map[CurveName, size_t] names - cdef vector[shared_ptr[double]] recovery_rates + cdef vector[uint16_t] offset_recovery_rates cdef vector[TDate] defaulted cdef double defaulted_weight @@ -19,4 +20,4 @@ cdef class CreditIndex(CurveList): cdef vector[TDate] _maturities cdef TContingentLeg** contingent_legs cdef TFeeLeg** fee_legs - cdef char* calendar + cdef readonly char* cal_file diff --git a/pyisda/credit_index.pyx b/pyisda/credit_index.pyx index 0096bc5..0f76e6d 100644 --- a/pyisda/credit_index.pyx +++ b/pyisda/credit_index.pyx @@ -1,7 +1,7 @@ #cython: cdivision=True, boundscheck=False, c_string_type=unicode, c_string_encoding=ascii from libc.stdlib cimport malloc, free from libc.math cimport nan, isnan, log -from libc.string cimport memcpy, memset +from libc.string cimport memcpy, memset, strcpy from libcpp.pair cimport pair from libcpp.memory cimport make_shared from cython.operator cimport dereference as deref, preincrement as preinc @@ -13,8 +13,9 @@ from .legs cimport (JpmcdsCdsContingentLegMake, JpmcdsCdsFeeLegMake, JpmcdsContingentLegPV, JpmcdsFeeLegPV, FeeLegAI, JpmcdsFeeLegFree) from .curve cimport (SpreadCurve, JpmcdsCopyCurve, tweak_curve, YieldCurve, JpmcdsFreeTCurve, survival_prob, Hash64WithSeed, - Hash64, uint64_t, TCurve_size, serialize, CurveName, - Seniority, DocClause, get_TCurve, JpmcdsCdsParSpreads) + Hash64, uint64_t, CurveName, + Seniority, DocClause, JpmcdsCdsParSpreads, name_offset_from_buf, + buf_size) from .date cimport (pydate_to_TDate, TDate_to_pydate, JpmcdsDtFwdAny, TDateInterval, JpmcdsMakeDateInterval) from .cdsone cimport JpmcdsStringToStubMethod, TStubMethod @@ -45,6 +46,9 @@ cdef TContingentLeg* copyContingentLeg(TContingentLeg* leg) nogil: memcpy(new_leg, leg, sizeof(TContingentLeg)) return new_leg +cdef inline void char_free(char* ptr) nogil: + free(ptr) + @cython.auto_pickle(False) cdef class CurveList: @@ -56,6 +60,7 @@ cdef class CurveList: map[CurveName, size_t].iterator it size_t n = len(curves) double w + CurveName cn if value_date is not None: self.base_date = pydate_to_TDate(value_date) @@ -70,11 +75,12 @@ cdef class CurveList: self.defaulted_weight = 0.0 for w, sc in curves: if sc is not None: - it = self.names.find(deref(sc.name)) + cn = CurveName(sc.name()) + it = self.names.find(cn) if it == self.names.end(): - self.names[deref(sc.name)] = i - self._curves.push_back(sc._thisptr) - self.recovery_rates.push_back(sc.recovery_rates) + self.names[cn] = i + self._curves.push_back(sc.buf) + self.offset_recovery_rates.push_back(sc.offset_recovery_rates) self._weights.push_back(w) self.defaulted.push_back(sc.defaulted) i += 1 @@ -96,22 +102,29 @@ cdef class CurveList: def __getitem__(self, tuple name not None): if len(name) != 3: raise TypeError("`name` needs to be a string, Seniority, DocClause triplet") + ticker, sen, doc_clause = name + cdef: + char* buf = malloc(len(name[0]) + 3) + char* tmp + + buf[0] = sen + buf[1] = doc_clause + strcpy(&buf[2], ticker) + cdef: - string ticker_cpp = name[0] - CurveName.Seniority seniority = (name[1]) - CurveName.DocClause doc_clause = (name[2]) - shared_ptr[CurveName] name_cpp = make_shared[CurveName](ticker_cpp, seniority, doc_clause) + CurveName cn = CurveName(buf) map[CurveName, size_t].iterator got = \ - self.names.find(deref(name_cpp)) + self.names.find(cn) SpreadCurve sc - if got == self.names.end(): + free(buf) raise KeyError(name) else: sc = SpreadCurve.__new__(SpreadCurve) - sc._thisptr = self._curves[deref(got).second] - sc.recovery_rates = self.recovery_rates[deref(got).second] - sc.name = name_cpp + sc.buf = self._curves[deref(got).second] + sc.offset_name = deref(got).first.name - sc.buf.get() + sc.buf_size = sc.offset_name + deref(got).first.size() + sc.offset_recovery_rates = self.offset_recovery_rates[deref(got).second] sc.defaulted = self.defaulted[deref(got).second] return sc @@ -119,12 +132,15 @@ cdef class CurveList: cdef: SpreadCurve sc pair[CurveName, size_t] p + char* buf for p in self.names: sc = SpreadCurve.__new__(SpreadCurve) - sc._thisptr = self._curves[p.second] - sc.name = make_shared[CurveName](p.first) - sc.recovery_rates = self.recovery_rates[p.second] + sc.buf = self._curves[p.second] + buf = sc.buf.get() + sc.offset_name = p.first.name - buf + sc.buf_size = sc.offset_name + p.first.size() + sc.offset_recovery_rates = self.offset_recovery_rates[p.second] sc.defaulted = self.defaulted[p.second] yield ((sc.ticker, sc.seniority, sc.doc_clause), self._weights[p.second], sc) @@ -159,7 +175,7 @@ cdef class CurveList: int j = 0 for p in self.names: - out[j] = p.first.ticker + out[j] = string(p.first.ticker.begin(), p.first.ticker.end()) j += 1 return out @@ -180,12 +196,15 @@ cdef class CurveList: list r = [] SpreadCurve sc pair[CurveName, size_t] p + char* buf for p in self.names: sc = SpreadCurve.__new__(SpreadCurve) - sc._thisptr = self._curves[p.second] - sc.name = make_shared[CurveName](p.first) - sc.recovery_rates = self.recovery_rates[p.second] + sc.buf = self._curves[p.second] + buf = sc.buf.get() + sc.offset_name = p.first.name - buf + sc.buf_size = sc.offset_name + p.first.size() + sc.offset_recovery_rates = self.offset_recovery_rates[p.second] sc.defaulted = self.defaulted[p.second] r.append((self._weights[p.second], sc)) return r @@ -195,34 +214,38 @@ cdef class CurveList: self._curves.clear() self.names.clear() self._weights.clear() - self.recovery_rates.clear() + self.offset_recovery_rates.clear() self.defaulted.clear() self.defaulted_weight = 0.0 cdef TDate temp = self.base_date CurveList.__init__(self, l) self.base_date = temp - # def __deepcopy__(self, memo): - # cdef: - # shared_ptr[TCurve] sc - # double* temp - # size_t i = 0 - # TCurve* copy_sc - # CurveList copy = CurveList.__new__(CurveList) - # copy._weights = self._weights - # copy.T = self.T - # copy.base_date = self.base_date - - # for sc in self._curves: - # copy_sc = sc.get() - # copy._curves.push_back(make_shared(JpmcdsCopyCurve(copy_sc))) - # temp = malloc(copy_sc.fNumItems * sizeof(double)) - # if temp != NULL: - # memcpy(temp, self.recovery_rates[i].get(), - # copy_sc.fNumItems * sizeof(double)) - # copy.recovery_rates.push_back(shared_ptr[double](temp, double_free)) - # copy.tickers = self.tickers - # return copy + def __deepcopy__(self, memo): + cdef: + size_t i = 0 + CurveList copy = CurveList.__new__(CurveList) + char* buf + char* new_buf + int n + TCurve* curve + int size + CurveName cn + copy.base_date = self.base_date + copy._weights = self._weights + copy.base_date = self.base_date + copy._curves.resize(self._curves.size()) + copy.offset_recovery_rates = self.offset_recovery_rates + for p in self.names: + buf = self._curves[p.second].get() + size = buf_size((buf).fNumItems, p.first.ticker.size()) + new_buf = malloc(size) + memcpy(new_buf, buf, size) + cn = CurveName(new_buf + name_offset_from_buf(new_buf)) + copy._curves[p.second] = shared_ptr[char](new_buf, char_free) + copy.names[cn] = p.second + copy.tickers = self.tickers + return copy def __reduce__(self): return (self.__class__, (self.curves, TDate_to_pydate(self.base_date))) @@ -239,11 +262,11 @@ cdef class CreditIndex(CurveList): sizeof(TContingentLeg)) self.fee_legs = malloc(self._maturities.size() * sizeof(TFeeLeg)) - self.calendar = calendar + self.cal_file = calendar cdef size_t i cdef pair[TContingentLeg_ptr, TFeeLeg_ptr] r for i in range(self._maturities.size()): - r = get_legs(self._maturities[i], self._start_date, self.calendar) + r = get_legs(self._maturities[i], self._start_date, self.cal_file) self.contingent_legs[i] = r.first self.fee_legs[i] = r.second @@ -295,31 +318,20 @@ cdef class CreditIndex(CurveList): def __reduce__(self): return (self.__class__, (self.start_date, self.maturities, self.curves, - TDate_to_pydate(self.base_date))) + TDate_to_pydate(self.base_date), self.cal_file)) def __hash__(self): cdef: - TCurve* curve = self._curves[0].get() - size_t size = TCurve_size(curve) - size_t size_recovery = curve.fNumItems * sizeof(double) - size_t buf_size = size + size_recovery + sizeof(TDate) + \ - sizeof(CurveName.Seniority) + sizeof(DocClause) + 16 - unsigned char* buf = malloc(buf_size) - unsigned char* cursor uint64_t h = 0 pair[CurveName, size_t] p + char* buf + int n for p in self.names: - curve = self._curves[p.second].get() - cursor = serialize(curve, buf) - memcpy(cursor, self.recovery_rates[p.second].get(), size_recovery) - cursor += size_recovery - memcpy(cursor, &self.defaulted[p.second], sizeof(TDate)) - cursor += sizeof(TDate) - p.first.serialize(cursor, 16) - h ^= Hash64(buf, buf_size) - - free(buf) + buf = self._curves[p.second].get() + n = (buf).fNumItems + h ^= Hash64(buf, buf_size(n, p.first.ticker.size())) + h = Hash64WithSeed(&self._start_date, sizeof(TDate), h) h = Hash64WithSeed(&self.base_date, sizeof(TDate), h) h = Hash64WithSeed(self._maturities.data(), sizeof(TDate) * self._maturities.size(), h) @@ -351,11 +363,11 @@ cdef class CreditIndex(CurveList): j = 0 for p in self.names: - sc = self._curves[p.second].get() + sc = self._curves[p.second].get() tickers.append(p.first.full_ticker()) # TODO: pick the actual recovery on the curve # this only works for flat recovery curve - recovery_rate = self.recovery_rates[p.second].get()[0] + recovery_rate = ((sc + self.offset_recovery_rates[p.second]))[0] for j in range(self._maturities.size()): JpmcdsContingentLegPV(self.contingent_legs[j], sc.fBaseDate, @@ -400,18 +412,18 @@ cdef class CreditIndex(CurveList): int i = get_maturity_index(maturity_c, self._maturities) if i == -1: - legs = get_legs(maturity_c, self._start_date, self.calendar) + legs = get_legs(maturity_c, self._start_date, self.cal_file) else: legs.first = self.contingent_legs[i] legs.second = self.fee_legs[i] if epsilon != 0.: - mask = fill_mask(maturity_c, self._maturities, self._curves[0]) + mask = fill_mask(maturity_c, self._maturities, self._curves[0].get()) if mask == 0: raise ValueError("maturity is not correct") with nogil: r = pv(self._curves, self.base_date, step_in_date_c, cash_settle_date_c, - yc.get_TCurve(), legs, self.recovery_rates, fixed_rate, + yc.get_TCurve(), legs, self.offset_recovery_rates, fixed_rate, self._weights, epsilon, mask) if i == -1: free(legs.first) @@ -446,7 +458,7 @@ cdef class CreditIndex(CurveList): i = get_maturity_index(maturity_c, self._maturities) if i == -1: legs = get_legs(maturity_c, - self._start_date, self.calendar) + self._start_date, self.cal_file) else: legs.first = self.contingent_legs[i] legs.second = self.fee_legs[i] @@ -461,7 +473,7 @@ cdef class CreditIndex(CurveList): i = get_maturity_index(maturity_c, self._maturities) if i == -1: legs = get_legs(maturity_c, - self._start_date, self.calendar) + self._start_date, self.cal_file) else: legs.first = self.contingent_legs[i] legs.second = self.fee_legs[i] @@ -494,8 +506,8 @@ cdef class CreditIndex(CurveList): True) cl_pv = 0.0 for i in prange(self._curves.size()): - sc = self._curves[i].get() - recovery_rate = self.recovery_rates[i].get() + sc = self._curves[i].get() + recovery_rate = (sc + self.offset_recovery_rates[i]) # FIXME: do something better if isnan(deref(recovery_rate)): preinc(recovery_rate) @@ -541,7 +553,7 @@ cdef class CreditIndex(CurveList): cdef: double fl_pv, r = 0 - shared_ptr[TCurve] sc + shared_ptr[char] sc i = 0 for sc in self._curves: @@ -579,32 +591,41 @@ cdef class CreditIndex(CurveList): self._maturities[i] = pydate_to_TDate(val[i]) free(self.contingent_legs[i]) JpmcdsFeeLegFree(self.fee_legs[i]) - r = get_legs(self._maturities[i], self._start_date, self.calendar) + r = get_legs(self._maturities[i], self._start_date, self.cal_file) self.contingent_legs[i] = r.first self.fee_legs[i] = r.second def tweak_portfolio(self, double epsilon, maturity, bint inplace=True): cdef: TDate maturity_c = pydate_to_TDate(maturity) - unsigned long mask = fill_mask(maturity_c, self._maturities, self._curves[0]) + unsigned long mask = fill_mask(maturity_c, self._maturities, self._curves[0].get()) TCurve* sc_orig TCurve *sc_copy + char* buf + char* newbuf size_t i + CurveName cn if inplace: for i in range(self._curves.size()): - sc_orig = self._curves[i].get() + sc_orig = (self._curves[i].get()) sc_copy = sc_orig tweak_curve(sc_orig, sc_copy, epsilon, mask) else: for i in range(self._curves.size()): - sc_orig = self._curves[i].get() - sc_copy = JpmcdsCopyCurve(sc_orig) + buf = self._curves[i].get() + cn = CurveName(buf + name_offset_from_buf(buf)) + buf_size = cn.size() + name_offset_from_buf(buf) + new_buf = malloc(buf_size) + memcpy(new_buf, buf, buf_size) + sc_orig = buf + sc_copy = new_buf + sc_copy.fArray = (new_buf + sizeof(TCurve)) tweak_curve(sc_orig, sc_copy, epsilon, mask) - self._curves[i].reset(sc_copy, JpmcdsFreeTCurve) + self._curves[i].reset(new_buf, char_free) def survival_matrix(self, const TDate[::1] schedule=None, double epsilon=0., bint log=False): cdef: - shared_ptr[TCurve] sc + TCurve* sc pair[CurveName, size_t] p size_t i, j np.npy_intp[2] n @@ -625,17 +646,17 @@ cdef class CreditIndex(CurveList): j = 0 for p in self.names: - sc = self._curves[p.second] - tickers[j] = p.first.ticker + sc = (self._curves[p.second].get()) + tickers[j] = string(p.first.ticker.begin(), p.first.ticker.end()) for i in range(n[1]): - sp_view[j, i] = survival_prob(sc.get(), self.base_date, + sp_view[j, i] = survival_prob(sc, self.base_date, schedule_ptr[i], epsilon, log) j += 1 return sp, tickers def spreads(self, YieldCurve yc not None, const TDate[::1] schedule=None): cdef: - shared_ptr[TCurve] sc + TCurve* sc pair[CurveName, size_t] p np.npy_intp[2] n const TDate* schedule_ptr @@ -668,7 +689,7 @@ cdef class CreditIndex(CurveList): spreads_view[j, i] = nan("") j += 1 continue - sc = self._curves[p.second] + sc = (self._curves[p.second].get()) JpmcdsCdsParSpreads( self.base_date, self.base_date + 1, @@ -681,20 +702,21 @@ cdef class CreditIndex(CurveList): ACT_360, MODIFIED, b'NONE', - get_TCurve(yc), - sc.get(), - self.recovery_rates[p.second].get(), + yc.get_TCurve(), + sc, + (sc + self.offset_recovery_rates[p.second]), &spreads_view[j, 0]) j += 1 return spreads def dispersion(self, YieldCurve yc not None, const TDate[::1] schedule=None, const bint use_log=True, const bint exp_loss=False): cdef: - shared_ptr[TCurve] sc + TCurve* sc pair[CurveName, size_t] p np.npy_intp n, orig_size const TDate* schedule_ptr size_t i + double* recovery_rates if schedule is None: schedule_ptr = self._maturities.const_data() @@ -722,15 +744,16 @@ cdef class CreditIndex(CurveList): for j in range(self._curves.size()): if self.defaulted[j] != -1 and not exp_loss: continue - sc = self._curves[j] + sc = self._curves[j].get() + recovery_rates = (sc + self.offset_recovery_rates[j]) w = self._weights[j] if exp_loss: if self.defaulted[j] != -1: for i in range(n): - spreads[i] = (1 - survival_prob(sc.get(), self.base_date, schedule_ptr[i], 0., False)) * (1 - self.recovery_rates[j].get()[i]) + spreads[i] = (1 - survival_prob(sc, self.base_date, schedule_ptr[i], 0., False)) * (1 - recovery_rates[i]) else: for i in range(n): - spreads[i] = 1 - self.recovery_rates[j].get()[i] + spreads[i] = 1 - recovery_rates[i] else: JpmcdsCdsParSpreads( self.base_date, @@ -746,7 +769,7 @@ cdef class CreditIndex(CurveList): b'NONE', yc.get_TCurve(), sc, - self.recovery_rates[j], + recovery_rates, spreads) if first_time: @@ -791,12 +814,12 @@ cdef class CreditIndex(CurveList): return r cdef unsigned long fill_mask(const TDate maturity, const vector[TDate]& maturities, - const shared_ptr[TCurve]& sc) nogil: + const TCurve* sc) nogil: cdef: TDate prev_maturity = 0 TDate next_maturity = maturity size_t i - TRatePt* it = sc.get().fArray + TRatePt* it = sc.fArray unsigned long mask = 0 for i in range(maturities.size()): @@ -849,13 +872,13 @@ cdef pair[TContingentLeg_ptr, TFeeLeg_ptr] get_legs(TDate maturity, True) return r -cdef double pv(const vector[shared_ptr[TCurve]]& curves, +cdef double pv(const vector[shared_ptr[char]]& curves, TDate base_date, TDate step_in_date, TDate cash_settle_date, const TCurve* yc, pair[TContingentLeg_ptr, TFeeLeg_ptr]& legs, - const vector[shared_ptr[double]] &recovery_rates, + const vector[uint16_t] &offset_recovery_rates, double fixed_rate, const vector[double]& weights, double epsilon, @@ -874,8 +897,8 @@ cdef double pv(const vector[shared_ptr[TCurve]]& curves, fl_pv = 0. cl_pv = 0. for i in prange(curves.size()): - tweaked_curve = curves[i].get() - recovery_rate = recovery_rates[i].get() + tweaked_curve = (curves[i].get()) + recovery_rate = (curves[i].get() + offset_recovery_rates[i]) # FIXME: do something better if isnan(deref(recovery_rate)): preinc(recovery_rate) @@ -899,14 +922,14 @@ cdef double pv(const vector[shared_ptr[TCurve]]& curves, r += weights[i] * (cl_pv - fl_pv * fixed_rate) else: with parallel(): - tweaked_curve = JpmcdsCopyCurve(curves[0].get()) + tweaked_curve = JpmcdsCopyCurve((curves[0].get())) cl_pv = 0. fl_pv = 0. for i in prange(curves.size()): - orig_curve = curves[i].get() + orig_curve = (curves[i].get()) tweaked_curve.fBaseDate = orig_curve.fBaseDate tweak_curve(orig_curve, tweaked_curve, epsilon, mask) - recovery_rate = recovery_rates[i].get() + recovery_rate = (orig_curve + offset_recovery_rates[i]) # FIXME: do something better if isnan(deref(recovery_rate)): preinc(recovery_rate) diff --git a/pyisda/curve.pxd b/pyisda/curve.pxd index f372ca7..39e2e8b 100644 --- a/pyisda/curve.pxd +++ b/pyisda/curve.pxd @@ -1,11 +1,12 @@ from .date cimport TStubMethod from .legs cimport TContingentLeg, TFeeLeg +from cython.operator cimport dereference as deref from libcpp.vector cimport vector from libcpp cimport bool from libcpp.string cimport string -from libc.string cimport memcpy -from libc.stdlib cimport malloc, calloc, free -from libc.stdint cimport uint64_t +from libc.string cimport memcpy, memmove +from libc.stdlib cimport malloc, realloc, free +from libc.stdint cimport uint64_t, uint16_t cdef extern from "isda/zerocurve.h" nogil: ctypedef int TBoolean @@ -29,11 +30,11 @@ cdef extern from "isda/zerocurve.h" nogil: char* holidayFile) cdef extern from "isda/bastypes.h": ctypedef struct TCurve: - int fNumItems TRatePt* fArray + int fNumItems + int fDayCountConv TDate fBaseDate double fBasis - long fDayCountConv ctypedef struct TRatePt: TDate fDate @@ -44,8 +45,8 @@ cdef extern from "farmhash.h" namespace 'util' nogil: uint64_t Hash64WithSeed(const char *buff, size_t len, uint64_t len) cdef inline size_t TCurve_size(const TCurve* curve) nogil: - return sizeof(curve.fNumItems) + sizeof(TDate) + sizeof(curve.fBasis) + \ - sizeof(curve.fDayCountConv) + sizeof(TRatePt) * curve.fNumItems + # this relies on TCurve having no padding + return sizeof(TCurve) - sizeof(TRatePt*) + sizeof(TRatePt) * curve.fNumItems cdef inline unsigned char* serialize(const TCurve* curve, unsigned char* buf) nogil: memcpy(buf, &(curve.fNumItems), sizeof(curve.fNumItems)) @@ -65,6 +66,30 @@ cdef inline void serialize_vector(const vector[TDate]& v, unsigned char* cursor) cursor += sizeof(size_t) memcpy(cursor, v.data(), sizeof(TDate) * v.size()) +cdef inline int fixup(char* buf) nogil: + cdef: + TDate base_date + double basis + int dcc + int n + char* cursor + TCurve* curve + cursor = buf + sizeof(TRatePt*) + n = deref(cursor) + cursor += sizeof(int) + sizeof(TRatePt) * n + base_date = deref(cursor) + cursor += sizeof(TDate) + basis = deref(cursor) + cursor += sizeof(double) + dcc = deref(cursor) + memmove(buf + sizeof(TCurve), buf + sizeof(TRatePt*) + sizeof(int), sizeof(TRatePt) * n) + curve = buf + curve.fArray = (buf + sizeof(TCurve)) + curve.fDayCountConv = dcc + curve.fBaseDate = base_date + curve.fBasis = basis + return sizeof(TCurve) + n * sizeof(TRatePt) + cdef inline const unsigned char* deserialize(const unsigned char* buf, TCurve* curve) nogil: memcpy(&curve.fNumItems, buf, sizeof(curve.fNumItems)) @@ -167,13 +192,13 @@ cdef extern from "isda/tcurve.h" nogil: double *rates, int numPts, double basis, - long dayCountConv) + int dayCountConv) TCurve* JpmcdsCopyCurve(const TCurve* curve) TDate* JpmcdsDatesFromCurve(TCurve* curve) TCurve* JpmcdsNewTCurve(TDate baseDate, # (I) Base date int numPts, # (I) Length of dates & rates double basis, # (I) Compounding periods/year - long dayCountConv) + int dayCountConv) cdef extern from "isda/cxzerocurve.h" nogil: double JpmcdsZeroPrice(TCurve* curve, TDate date) @@ -211,6 +236,14 @@ cdef extern from "" namespace "std" nogil: long use_count() cdef shared_ptr[T] make_shared[T](...) +cdef extern from "" namespace "std" nogil: + cdef cppclass string_view: + string_view() + char* data() + size_t size() + string.iterator begin() + string.iterator end() + cdef extern from "survival_curve.hpp" nogil: cdef cppclass CurveName: enum Seniority: @@ -223,13 +256,13 @@ cdef extern from "survival_curve.hpp" nogil: MM14 CR14 - string ticker + string_view ticker Seniority seniority DocClause doc_clause - CurveName(string&, Seniority, DocClause) - CurveName(const unsigned char* buf) - void serialize(unsigned char* buf) - void serialize(unsigned char* buf, size_t) + char* const name + CurveName() + CurveName(const char*, Seniority, DocClause) + CurveName(const char* buf) string full_ticker() size_t size() @@ -245,22 +278,30 @@ cpdef enum Seniority: SLA cdef inline const TCurve* get_TCurve(Curve c) nogil: - return c._thisptr.get() + return c.buf.get() cdef class Curve: - cdef shared_ptr[TCurve] _thisptr - cdef size_t size(self) nogil + cdef shared_ptr[char] buf + cdef readonly size_t buf_size + cdef inline const TCurve* get_TCurve(self) nogil cdef class YieldCurve(Curve): - cdef vector[TDate] dates + pass cdef class SpreadCurve(Curve): - cdef shared_ptr[CurveName] name - cdef shared_ptr[double] recovery_rates cdef TDate defaulted + cdef uint16_t offset_recovery_rates + cdef uint16_t offset_name + cpdef bytes as_buffer(self, bint compressed) + cdef inline double* recovery_rates_ptr(self) nogil + cdef inline char* name(self) nogil cdef fArray_to_list(TRatePt* fArray, int fNumItems) cdef void tweak_curve(const TCurve* sc, TCurve* sc_tweaked, double epsilon, unsigned long mask) nogil + +cdef uint16_t name_offset_from_buf(const char* buf) nogil + +cdef size_t buf_size(int n, int ticker_len) nogil diff --git a/pyisda/curve.pyx b/pyisda/curve.pyx index f82f6b7..5f6a125 100644 --- a/pyisda/curve.pyx +++ b/pyisda/curve.pyx @@ -1,10 +1,11 @@ from cython.operator import dereference as deref, preincrement as preinc from libc.math cimport log1p, log, exp, isnan +from libc.string cimport strcpy, strncpy, strlen from .date cimport (JpmcdsStringToDateInterval, pydate_to_TDate, dcc, JpmcdsDateIntervalToFreq, JpmcdsDateFwdThenAdjust, TDate_to_pydate, JpmcdsDateFromBusDaysOffset, JpmcdsStringToDayCountConv, ACT_360, - BadDay, FOLLOW, MODIFIED, NONE) + ACT_365F, BadDay, FOLLOW, MODIFIED, NONE) from .date import dcc_tostring from .date cimport _previous_twentieth, _roll_date, c_datetime from .cdsone cimport JpmcdsStringToStubMethod @@ -40,6 +41,8 @@ cdef int SUCCESS = 0 cdef inline void double_free(double* ptr) nogil: free(ptr) +cdef inline void char_free(char* ptr) nogil: + free(ptr) cdef double survival_prob(const TCurve* curve, TDate start_date, TDate maturity_date, double eps, bint log) nogil: cdef: @@ -68,47 +71,93 @@ cdef class Curve(object): def __getstate__(self): cdef: - size_t curve_size = self.size() - unsigned char* buf = malloc(curve_size * sizeof(unsigned char)) - serialize(get_TCurve(self), buf) - cdef bytes r = buf[:curve_size] - free(buf) + size_t state_size = self.buf_size - sizeof(TRatePt*) + int dst_capacity, compressed_size + dst_capacity = LZ4_compressBound(state_size) + dst = malloc(dst_capacity) + compressed_size = LZ4_compress_default(self.buf.get() + sizeof(TRatePt*), dst, state_size, dst_capacity) + cdef bytes r = dst[:compressed_size] + free(dst) return r - def __setstate__(self, bytes state): + def __setstate__(self, bytes state not None): cdef: - TCurve* curve = malloc(sizeof(TCurve)) - unsigned char* cursor = state - deserialize(cursor, curve) - self._thisptr.reset(curve, JpmcdsFreeTCurve) + const char* src = state + int decomp_size = 512 + char* curve = NULL + int state_size + size_t size = PyBytes_GET_SIZE(state) + int retry = 0 + bytes r - cdef size_t size(self) nogil: - return TCurve_size(get_TCurve(self)) + with nogil: + while True: + curve = realloc(curve, decomp_size) + state_size = LZ4_decompress_safe(src, curve + sizeof(TRatePt*), size, decomp_size) + if state_size < 0: + retry += 1 + if retry == 2: + free(curve) + raise MemoryError("something went wrong") + else: + decomp_size *= 2 + else: + break + self.buf_size = state_size + sizeof(TRatePt*) + (curve).fArray = (curve + sizeof(TCurve)) + self.buf.reset(curve, char_free) @classmethod - def from_bytes(cls, object state): + def from_bytes(cls, object state, bint compressed=True, int version=0): cdef: - Curve instance = Curve.__new__(Curve) - TCurve* curve = malloc(sizeof(TCurve)) + Curve instance = cls.__new__(cls) Py_buffer* py_buf - unsigned char* cursor + char* src + char* curve = NULL + int size, state_size + int decomp_size = 512 + int retry = 0 + TDate base_date + double basis + if PyMemoryView_Check(state): py_buf = PyMemoryView_GET_BUFFER(state) - cursor = py_buf.buf + src = py_buf.buf + size = py_buf.len else: - cursor = state - deserialize(cursor, curve) - instance._thisptr.reset(curve, JpmcdsFreeTCurve) + src = state + size = PyBytes_GET_SIZE(state) + + with nogil: + if compressed: + while True: + curve = realloc(curve, decomp_size) + state_size = LZ4_decompress_safe(src, curve + sizeof(TRatePt*), size, decomp_size) + if state_size < 0: + retry += 1 + if retry == 2: + free(curve) + raise MemoryError("something went wrong") + else: + decomp_size *= 2 + else: + break + else: + curve = malloc(size + sizeof(TRatePt*)) + memcpy(curve + sizeof(TRatePt*), src, size) + state_size = size + if version == 0: + instance.buf_size = fixup(curve) + elif version == 1: + (curve).fArray = (curve + sizeof(TCurve)) + instance.buf_size = state_size + sizeof(TRatePt*) + instance.buf.reset(curve, char_free) return instance def __hash__(self): cdef: - const TCurve* curve = get_TCurve(self) - size_t curve_size = self.size() - unsigned char* buf = malloc(curve_size * sizeof(unsigned char)) - serialize(curve, buf) - cdef uint64_t r = Hash64(buf, curve_size) - free(buf) + size_t curve_size = self.buf_size - sizeof(TRatePt*) + cdef uint64_t r = Hash64((self.buf.get() + sizeof(TRatePt*)), curve_size) return r def inspect(self): @@ -153,7 +202,7 @@ cdef class Curve(object): if isinstance(self, YieldCurve): name = 'forward_rates' elif isinstance(self, SpreadCurve): - name = (self).name.get().ticker + name = self.name()[2] if name == "": name = "hazard_rates" return pd.Series(h, index=d.view('M8[D]'), name=name) @@ -161,17 +210,20 @@ cdef class Curve(object): def __iter__(self): cdef: size_t i = 0 - TRatePt* it = get_TCurve(self).fArray - for i in range(get_TCurve(self).fNumItems): + TRatePt* it = self.get_TCurve().fArray + for i in range(self.get_TCurve().fNumItems): yield (TDate_to_pydate(it[i].fDate), it[i].fRate) def __len__(self): - return get_TCurve(self).fNumItems + return self.get_TCurve().fNumItems def __deepcopy__(self, dict memo): - cdef Curve sc = Curve.__new__(Curve) - sc._thisptr.reset(JpmcdsCopyCurve(get_TCurve(self)), - JpmcdsFreeTCurve) + cdef Curve sc = type(self).__new__(type(self)) + cdef char* curve = malloc(self.buf_size) + memcpy(curve, self.buf.get(), self.buf_size) + (curve).fArray = (curve + sizeof(TCurve)) + sc.buf_size = self.buf_size + sc.buf.reset(curve, char_free) memo[id(self)] = sc return sc @@ -300,9 +352,8 @@ cdef class YieldCurve(Curve): char* routine = 'zerocurve' TDate value_date = pydate_to_TDate(date) TCurve* curve - - self.dates = vector[TDate](len(periods)) - + TDate* dates + char* buf cdef TDate settle_date if JpmcdsDateFromBusDaysOffset(value_date, 2, "None", &settle_date) != SUCCESS: @@ -314,196 +365,114 @@ cdef class YieldCurve(Curve): size_t i char* period_bytes + dates = malloc(len(periods) * sizeof(TDate)) for i, p in enumerate(periods): period_bytes = p if JpmcdsStringToDateInterval(period_bytes, routine, &tmp) != SUCCESS: + free(dates) raise ValueError # according to Markit docs, Swap maturities should be Unadjusted # but this doesn't match with Bloomberg # period_adjust = MODIFIED if types[i] == 'M' else NONE period_adjust = MODIFIED if JpmcdsDateFwdThenAdjust(settle_date, &tmp, period_adjust, - "None", &self.dates[i]) != SUCCESS: + "None", &dates[i]) != SUCCESS: + free(dates) raise ValueError('Invalid interval') cdef char* fixed_bytes = fixed_swap_period cdef char* float_bytes = float_swap_period cdef char* types_bytes = types if JpmcdsStringToDateInterval(fixed_bytes, routine, &ivl) != SUCCESS: + free(dates) raise ValueError if JpmcdsDateIntervalToFreq(&ivl, &fixed_freq) != SUCCESS: + free(dates) raise ValueError if JpmcdsStringToDateInterval(float_bytes, routine, &ivl) != SUCCESS: + free(dates) raise ValueError if JpmcdsDateIntervalToFreq(&ivl, &float_freq) != SUCCESS: + free(dates) raise ValueError curve = JpmcdsBuildIRZeroCurve( - value_date, types_bytes, self.dates.data(), - &rates[0], self.dates.size(), dcc(mm_dcc), fixed_freq, + value_date, types_bytes, dates, + &rates[0], rates.shape[0], dcc(mm_dcc), fixed_freq, float_freq, dcc(fixed_swap_dcc), dcc(float_swap_dcc), bad_day_conv, b"None" ) if curve is NULL: + free(dates) raise ValueError("Curve didn't build") else: - self._thisptr.reset(curve, JpmcdsFreeTCurve) - - cdef size_t size(self) nogil: - return Curve.size(self) + sizeof(size_t) + sizeof(TDate) * self.dates.size() - - def __getstate__(self): - cdef: - const TCurve* curve = get_TCurve(self) - size_t buf_size = self.size() - char* buf = malloc(buf_size) - unsigned char* cursor = serialize(curve, buf) - int dst_capacity, compressed_size - char* dst - bytes r - - serialize_vector(self.dates, cursor) - - dst_capacity = LZ4_compressBound(buf_size) - dst = malloc(dst_capacity) - compressed_size = LZ4_compress_default(buf, dst, buf_size, dst_capacity) - r = dst[:compressed_size] - free(dst) - free(buf) - return r - - def __setstate__(self, bytes state not None): - cdef: - TCurve* curve = malloc(sizeof(TCurve)) - const char* src = state - const unsigned char* cursor - char* dst - size_t num_instr - size_t size = PyBytes_GET_SIZE(state) - - with nogil: - dst = malloc(500) - if LZ4_decompress_safe(src, dst, size, 500) < 0: - free(dst) - raise MemoryError("something went wrong") - else: - cursor = dst - cursor = deserialize(cursor, curve) - self._thisptr.reset(curve, JpmcdsFreeTCurve) - memcpy(&num_instr, cursor, sizeof(size_t)) - cursor += sizeof(size_t) - self.dates = vector[TDate](num_instr) - memcpy(self.dates.data(), cursor, num_instr * sizeof(TDate)) - free(dst) - - def __deepcopy__(self, dict memo): - cdef YieldCurve yc = YieldCurve.__new__(YieldCurve) - yc._thisptr.reset(JpmcdsCopyCurve(get_TCurve(self)), JpmcdsFreeTCurve) - yc.dates = vector[TDate](self.dates) - memo[id(self)] = yc - return yc - - @classmethod - def from_bytes(cls, object state): - cdef: - YieldCurve instance = YieldCurve.__new__(YieldCurve) - size_t num_instr - const unsigned char* cursor - const char* src - char* dst - TCurve* curve = malloc(sizeof(TCurve)) - int size = 0 - Py_buffer* py_buf - - - if PyMemoryView_Check(state): - py_buf = PyMemoryView_GET_BUFFER(state) - src = py_buf.buf - size = py_buf.len - else: - src = state - size = PyBytes_GET_SIZE(state) - + # we copy curve into a continuous buffer + self.buf_size = sizeof(TCurve) + curve.fNumItems * sizeof(TRatePt) + buf = malloc(self.buf_size) + memcpy(buf, curve, sizeof(TCurve)) + (buf).fArray = (buf + sizeof(TCurve)) + memcpy(buf + sizeof(TCurve), curve.fArray, curve.fNumItems * sizeof(TRatePt)) + self.buf.reset(buf, char_free) + free(dates) - with nogil: - dst = malloc(600) - if LZ4_decompress_safe(src, dst, size, 600) < 0: - free(dst) - raise MemoryError("something went wrong") - else: - cursor = dst - - cursor = deserialize(cursor, curve) - instance._thisptr.reset(curve, JpmcdsFreeTCurve) - memcpy(&num_instr, cursor, sizeof(size_t)) - cursor += sizeof(size_t) - instance.dates = vector[TDate](num_instr) - memcpy(instance.dates.data(), cursor, num_instr * sizeof(TDate)) - free(dst) - - return instance - - def __hash__(self): - cdef: - const TCurve* curve = get_TCurve(self) - size_t buf_size = self.size() - size_t size - unsigned char* buf = malloc(buf_size) - unsigned char* cursor = serialize(curve, buf) - - size = self.dates.size() - memcpy(cursor, &size, sizeof(size_t)) - cursor += sizeof(size_t) - memcpy(cursor, self.dates.data(), sizeof(TDate) * size) - cdef uint64_t r = Hash64(buf, buf_size) - free(buf) - return r @classmethod def from_discount_factors(cls, base_date, list dates, double[:] dfs, str day_count_conv, Basis basis=CONTINUOUS): """ build a yield curve from a list of discount factors """ - cdef TDate base_date_c = pydate_to_TDate(base_date) cdef YieldCurve yc = YieldCurve.__new__(YieldCurve) - yc.dates = vector[TDate](len(dates)) + yc.buf_size = sizeof(TCurve) + len(dates) * sizeof(TRatePt) + cdef char* buf = malloc(yc.buf_size) + cdef TCurve* curve = buf + curve.fBaseDate = pydate_to_TDate(base_date) + curve.fNumItems = len(dates) + curve.fArray = (buf + sizeof(TCurve)) + curve.fBasis = basis + curve.fDayCountConv = dcc(day_count_conv) + cdef size_t i - cdef double* rates = malloc(sizeof(double) * yc.dates.size()) for i, d in enumerate(dates): - yc.dates[i] = pydate_to_TDate(d) - JpmcdsDiscountToRateYearFrac(dfs[i], (yc.dates[i]-base_date_c)/365., - basis, &rates[i]) + curve.fArray[i].fDate = pydate_to_TDate(d) + JpmcdsDiscountToRateYearFrac(dfs[i], (curve.fArray[i].fDate-curve.fBaseDate)/365., + basis, &curve.fArray[i].fRate) - yc._thisptr.reset( - JpmcdsMakeTCurve(base_date_c, yc.dates.data(), rates, dfs.shape[0], - basis, dcc(day_count_conv)), JpmcdsFreeTCurve) + yc.buf.reset(buf, char_free) return yc @classmethod def from_zero_rates(cls, base_date, list dates, double[:] rates, str day_count_conv, Basis basis=CONTINUOUS): """ build a yield curve from a list of discount factors """ - cdef TDate base_date_c = pydate_to_TDate(base_date) cdef YieldCurve yc = YieldCurve.__new__(YieldCurve) - yc.dates = vector[TDate](len(dates)) + yc.buf_size = sizeof(TCurve) + len(dates) * sizeof(TRatePt) + cdef char* buf = malloc(yc.buf_size) + cdef TCurve* curve = buf + curve.fBaseDate = pydate_to_TDate(base_date) + curve.fNumItems = len(dates) + curve.fArray = (buf + sizeof(TCurve)) + curve.fBasis = basis + curve.fDayCountConv = dcc(day_count_conv) + cdef size_t i for i, d in enumerate(dates): - yc.dates[i] = pydate_to_TDate(d) - yc._thisptr.reset( - JpmcdsMakeTCurve(base_date_c, yc.dates.data(), &rates[0], rates.shape[0], - basis, dcc(day_count_conv)), JpmcdsFreeTCurve) + curve.fArray[i].fDate = pydate_to_TDate(d) + curve.fArray[i].fRate = rates[i] + yc.buf.reset(buf, char_free) return yc def bump(self, epsilon): cdef: YieldCurve yc = YieldCurve.__new__(YieldCurve) - const TCurve* curve = get_TCurve(self) - TCurve* new_curve = malloc(sizeof(TCurve)) + char* buf = malloc(self.buf_size) + const TCurve* curve = self.get_TCurve() + TCurve* new_curve = buf TRatePt *ptr1, *ptr2 int N = curve.fNumItems int i + yc.buf_size = self.buf_size new_curve.fNumItems = N new_curve.fBaseDate = curve.fBaseDate new_curve.fBasis = curve.fBasis - new_curve.fArray = malloc(N * sizeof(TRatePt)) + new_curve.fArray = (buf + sizeof(TCurve)) new_curve.fDayCountConv = curve.fDayCountConv ptr1 = curve.fArray ptr2 = new_curve.fArray @@ -513,8 +482,7 @@ cdef class YieldCurve(Curve): preinc(ptr1) preinc(ptr2) - yc.dates = vector[TDate](self.dates) - yc._thisptr.reset(new_curve, JpmcdsFreeTCurve) + yc.buf.reset(buf, char_free) return yc def discount_factor(self, d2, d1=None): @@ -561,29 +529,35 @@ cdef class YieldCurve(Curve): """ returns the list of instrument dates """ - return [TDate_to_pydate(d) for d in self.dates] + return [TDate_to_pydate(self.get_TCurve().fArray[i].fDate) for i in range(self.get_TCurve().fNumItems)] def expected_forward_curve(self, forward_date): """ returns the expected forward curve """ cdef TDate forward_date_c = pydate_to_TDate(forward_date) cdef YieldCurve yc = YieldCurve.__new__(YieldCurve) + cdef const TCurve* curve = self.get_TCurve() cdef size_t i = 0 - while self.dates[i] < forward_date_c: + cdef int n = curve.fNumItems + while curve.fArray[i].fDate < forward_date_c: i += 1 - yc.dates = vector[TDate](self.dates.size() - i) - cdef double* rates = malloc(sizeof(double) * yc.dates.size()) - cdef size_t k + yc.buf_size = sizeof(TCurve) + (n - i) * sizeof(TRatePt) + yc.buf.reset(malloc(yc.buf_size), char_free) + cdef TCurve* forward_curve = yc.buf.get() + forward_curve.fNumItems = n - i + forward_curve.fDayCountConv = curve.fDayCountConv + forward_curve.fBaseDate = forward_date_c + forward_curve.fBasis = 1.0 + forward_curve.fArray = (yc.buf.get() + sizeof(TCurve)) cdef double df - for k in range(yc.dates.size()): - yc.dates[k] = self.dates[i] - df = JpmcdsForwardZeroPrice(self._thisptr.get(), forward_date_c, self.dates[i]) + cdef TRatePt* arr = forward_curve.fArray + while i < n: + arr.fDate = curve.fArray[i].fDate + df = JpmcdsForwardZeroPrice(curve, forward_date_c, arr.fDate) JpmcdsDiscountToRateYearFrac( - df, (self.dates[i] - forward_date_c)/365., - 1, &rates[k]) + df, (arr.fDate - forward_date_c)/365., + 1, &arr.fRate) i += 1 - yc._thisptr.reset(JpmcdsMakeTCurve( - forward_date_c, yc.dates.data(), rates, yc.dates.size(), - 1, self._thisptr.get().fDayCountConv), JpmcdsFreeTCurve) + preinc(arr) return yc @cython.cdivision(True) @@ -606,6 +580,14 @@ cdef void tweak_curve(const TCurve* sc, TCurve* sc_tweaked, double epsilon, h1 = h2 t1 = t2 +cdef size_t buf_size(int n, int ticker_len) nogil: + return sizeof(TCurve) + n * sizeof(TRatePt) + n * sizeof(double) + sizeof(TDate) + sizeof(CurveName.DocClause) + sizeof(CurveName.Seniority) + ticker_len + 1 + +cdef uint16_t name_offset_from_buf(char* buf) nogil: + cdef TCurve* curve = buf + cdef int n = curve.fNumItems + return sizeof(TCurve) + n * (sizeof(TRatePt) + sizeof(double)) + sizeof(TDate) + cdef class SpreadCurve(Curve): """ Initialize a SpreadCurve from a list of spreads and maturity. @@ -623,6 +605,11 @@ cdef class SpreadCurve(Curve): recovery_rates : double[:] pay_accrued_on_default : bool, optional Default to True + ticker : string, optional + seniority: Seniority, optional + doc_clause: DocClause, optional + fill_curve: bool, optional + calendar: bytes, optional default to "NONE" """ @cython.boundscheck(False) @@ -634,17 +621,23 @@ cdef class SpreadCurve(Curve): const double[:] recovery_rates, bint pay_accrued_on_default=True, str ticker="", Seniority seniority=Senior, DocClause doc_clause=XR14, - bint fill_curve=True, defaulted=None): + bint fill_curve=True, defaulted=None, + calendar=b"NONE"): cdef TDate today_c = pydate_to_TDate(today) cdef TDate step_in_date_c cdef TDate cash_settle_date_c cdef TDate start_date_c - cdef string ticker_cpp = ticker + cdef char* ticker_c = ticker cdef TRatePt* ptr = NULL + cdef char* buf + cdef char* cursor + cdef char* c_cal = calendar + cdef int ticker_len = len(ticker) + cdef const char* c_ticker = ticker if start_date is None: - start_date_c = _previous_twentieth(today_c, True, b"NONE") + start_date_c = _previous_twentieth(today_c, True, calendar) if start_date_c == -1: raise ValueError("incorrect today's date: " + today) else: @@ -726,11 +719,32 @@ cdef class SpreadCurve(Curve): ACT_360, &stub_type, MODIFIED, - b'NONE') + c_cal) + if curve is not NULL: + if fill_curve and curve.fNumItems != n_dates: + self.buf_size = buf_size(n_dates, ticker_len) + buf = malloc(self.buf_size) + _fill_curve(curve, end_dates_c, n_dates, buf) + JpmcdsFreeTCurve(curve) + else: + self.buf_size = buf_size(curve.fNumItems, ticker_len) + buf = malloc(buf_size(curve.fNumItems, ticker_len)) + memcpy(buf, curve, sizeof(TCurve)) + memcpy(buf + sizeof(TCurve), curve.fArray, curve.fNumItems * sizeof(TRatePt)) + (buf).fArray = (buf + sizeof(TCurve)) + free(curve.fArray) + free(curve) + curve = buf + else: + if freeup: + free(end_dates_c) + raise ValueError("Didn't init the survival curve properly") else: - curve = malloc(sizeof(TCurve)) + self.buf_size = buf_size(n_dates, ticker_len) + buf = malloc(self.buf_size) + curve = buf curve.fNumItems = n_dates - curve.fArray = malloc(n_dates * sizeof(TRatePt)) + curve.fArray = (buf + sizeof(TCurve)) curve.fBaseDate = today_c curve.fBasis = CONTINUOUS curve.fDayCountConv = ACT_360 @@ -740,29 +754,25 @@ cdef class SpreadCurve(Curve): ptr.fRate = JPMCDS_MAX_RATE preinc(ptr) - if curve is not NULL: - if fill_curve and curve.fNumItems != n_dates: - new_curve = _fill_curve(curve, end_dates_c, n_dates) - JpmcdsFreeTCurve(curve) - curve = new_curve - if freeup: - free(end_dates_c) - self._thisptr.reset(curve, JpmcdsFreeTCurve) - self.recovery_rates = shared_ptr[double]( - malloc(curve.fNumItems * sizeof(double)), - double_free) - memcpy(self.recovery_rates.get(), &recovery_rates[0], - curve.fNumItems * sizeof(double)) - - self.name = make_shared[CurveName](ticker_cpp, - seniority, - doc_clause) - - if curve is NULL: if freeup: free(end_dates_c) - raise ValueError("Didn't init the survival curve properly") + self.buf.reset(buf, char_free) + self.offset_recovery_rates = sizeof(TCurve) + curve.fNumItems * sizeof(TRatePt) + cursor = self.recovery_rates_ptr() + memcpy(cursor, &recovery_rates[0], curve.fNumItems * sizeof(double)) + cursor += curve.fNumItems * sizeof(double) + memcpy(cursor, &self.defaulted, sizeof(TDate)) + cursor += sizeof(TDate) + self.offset_name = cursor - self.buf.get() + cursor[0] =seniority + cursor[1] = doc_clause + strncpy(&cursor[2], c_ticker, ticker_len + 1) + + cdef inline double* recovery_rates_ptr(self) nogil: + return (self.buf.get() + self.offset_recovery_rates) + cdef inline char* name(self) nogil: + return self.buf.get() + self.offset_name def survival_probability(self, d2, d1=None): """ computes the survival probability at a given date. @@ -795,70 +805,51 @@ cdef class SpreadCurve(Curve): return JpmcdsForwardZeroPrice(curve, pydate_to_TDate(d1), pydate_to_TDate(d2)) - cdef size_t size(self) nogil: - cdef const TCurve* curve = get_TCurve(self) - return Curve.size(self) + curve.fNumItems * sizeof(double) + \ - sizeof(TDate) + self.name.get().size() def __getstate__(self): - return self.as_buffer(False) + return self.as_buffer(True) cpdef bytes as_buffer(self, bint compressed): cdef: - const TCurve* curve = get_TCurve(self) - size_t size_recovery = curve.fNumItems * sizeof(double) - size_t buf_size = TCurve_size(curve) + size_recovery + sizeof(TDate) + \ - self.name.get().size() - char* buf = malloc(buf_size) - unsigned char* cursor = serialize(curve, buf) + bytes r int dst_capacity, compressed_size char* dst - bytes r - memcpy(cursor, self.recovery_rates.get(), size_recovery) - cursor += size_recovery - memcpy(cursor, &self.defaulted, sizeof(TDate)) - cursor += sizeof(TDate) - self.name.get().serialize(cursor) + int size = self.buf_size - sizeof(TRatePt*) if compressed: - dst_capacity = LZ4_compressBound(buf_size) + dst_capacity = LZ4_compressBound(size) dst = malloc(dst_capacity) - compressed_size = LZ4_compress_default(buf, dst, buf_size, dst_capacity) + compressed_size = LZ4_compress_default(self.buf.get() + sizeof(TRatePt*), dst, size, dst_capacity) r = dst[:compressed_size] free(dst) else: - r = buf[:buf_size] - - free(buf) + r = self.buf.get()[sizeof(TRatePt*):self.buf_size] return r def __setstate__(self, bytes state): - cdef: - TCurve* curve = malloc(sizeof(TCurve)) - const unsigned char* cursor = state - size_t recovery_size - double* recovery_rates - - cursor = deserialize(cursor, curve) - self._thisptr.reset(curve, JpmcdsFreeTCurve) - recovery_size = curve.fNumItems * sizeof(double) - recovery_rates = malloc(recovery_size) - memcpy(recovery_rates, cursor, recovery_size) - cursor += recovery_size - self.recovery_rates.reset(recovery_rates, double_free) - memcpy(&self.defaulted, cursor, sizeof(TDate)) + super().__setstate__(state) + cdef char* cursor = self.buf.get() + cdef int n = self.get_TCurve().fNumItems + self.offset_recovery_rates = sizeof(TCurve) + n * sizeof(TRatePt) + cursor = self.buf.get() + self.offset_recovery_rates + n * sizeof(double) + self.defaulted = deref(cursor) cursor += sizeof(TDate) - self.name = make_shared[CurveName](cursor) + self.offset_name = cursor - self.buf.get() def __deepcopy__(self, dict memo): cdef SpreadCurve sc = SpreadCurve.__new__(SpreadCurve) - cdef const TCurve* curve = get_TCurve(self) - cdef size_t recovery_size = curve.fNumItems * sizeof(double) - sc._thisptr.reset(JpmcdsCopyCurve(curve), JpmcdsFreeTCurve) - sc.name = make_shared[CurveName](deref(self.name)) - sc.recovery_rates = shared_ptr[double](malloc(recovery_size), double_free) - memcpy(sc.recovery_rates.get(), self.recovery_rates.get(), recovery_size) - sc.defaulted = self.defaulted + cdef char* buf = malloc(self.buf_size) + cdef int n = self.get_TCurve().fNumItems + memcpy(buf, self.buf.get(), self.buf_size) + (buf).fArray = (buf + sizeof(TCurve)) + sc.buf_size = self.buf_size + sc.buf.reset(buf, char_free) + sc.offset_recovery_rates = sizeof(TCurve) + n * sizeof(TRatePt) + buf += sc.offset_recovery_rates + buf += n * sizeof(double) + sc.defaulted = deref(buf) + buf += sizeof(TDate) + sc.offset_name = buf - sc.buf.get() memo[id(self)] = sc return sc @@ -875,240 +866,216 @@ cdef class SpreadCurve(Curve): def default_date(self, d): self.defaulted = pydate_to_TDate(d) cdef: - TCurve* curve = get_TCurve(self) - n_dates = curve.fNumItems + TCurve* curve = self.get_TCurve() + int n_dates = curve.fNumItems int i + char* buf = curve + self.offset_recovery_rates + n_dates * sizeof(double) + memcpy(buf, &self.defaulted, sizeof(TDate)) for i in range(n_dates): curve.fArray[i].fRate = JPMCDS_MAX_RATE @classmethod - def from_bytes(cls, object state, const bint compressed=False): + def from_bytes(cls, object state, const bint compressed=False, version=0): cdef: - SpreadCurve instance = SpreadCurve.__new__(SpreadCurve) - const unsigned char* cursor - const char* src - char* dst - TCurve* curve = malloc(sizeof(TCurve)) - size_t size - Py_buffer* py_buf - - - if PyMemoryView_Check(state): - py_buf = PyMemoryView_GET_BUFFER(state) - src = py_buf.buf - size = py_buf.len - else: - src = state - size = PyBytes_GET_SIZE(state) - with nogil: - if compressed: - dst = malloc(500) - if LZ4_decompress_safe(src, dst, size, 500) < 0: - free(dst) - raise MemoryError("something went wrong") - else: - cursor = dst - else: - cursor = src - cursor = deserialize(cursor, curve) - size = curve.fNumItems * sizeof(double) - instance.recovery_rates = shared_ptr[double](malloc(size), - double_free) - - instance._thisptr.reset(curve, JpmcdsFreeTCurve) - memcpy(instance.recovery_rates.get(), cursor, size) - cursor += size - memcpy(&instance.defaulted, cursor, sizeof(TDate)) + SpreadCurve instance = super().from_bytes(state, compressed, version) + char* cursor = instance.buf.get() + TCurve* curve = cursor + int n = curve.fNumItems + int ticker_len + if version == 1: + instance.offset_recovery_rates = sizeof(TCurve) + n * sizeof(TRatePt) + cursor += sizeof(TCurve) + n * (sizeof(TRatePt) + sizeof(double)) + instance.defaulted = deref(cursor) cursor += sizeof(TDate) - instance.name = make_shared[CurveName](cursor) - if compressed: - free(dst) - + instance.offset_name = cursor - instance.buf.get() + elif version == 0: + cursor += sizeof(TCurve) + n * sizeof(TRatePt) + memmove(cursor, cursor + 4, n * sizeof(double)) + instance.offset_recovery_rates = sizeof(TCurve) + n * sizeof(TRatePt) + cursor += n * sizeof(double) + memmove(cursor, cursor + 4, sizeof(TDate)) + instance.defaulted = deref(cursor) + cursor += sizeof(TDate) + instance.offset_name = cursor - instance.buf.get() + cursor[0] = deref(cursor + 4) + cursor[1] = deref(cursor + 8) + strcpy(cursor + 2, cursor + 12) + ticker_len = strlen(cursor + 2) + instance.buf_size = cursor - instance.buf.get() + 2 + ticker_len + 1 return instance - def __hash__(self): - # same code as __getstate__ - cdef: - const TCurve* curve = get_TCurve(self) - size_t buf_size = self.size() - size_t size_recovery = curve.fNumItems * sizeof(double) - unsigned char* buf = malloc(buf_size) - unsigned char* cursor = serialize(curve, buf) - memcpy(cursor, self.recovery_rates.get(), size_recovery) - cursor += size_recovery - memcpy(cursor, &self.defaulted, sizeof(TDate)) - cursor += sizeof(TDate) - self.name.get().serialize(cursor) - cdef uint64_t r = Hash64(buf, buf_size) - free(buf) - return r - - @classmethod - def from_flat_hazard(cls, base_date, double rate, Basis basis=CONTINUOUS, - str day_count_conv='Actual/365F', double recov=0.4, - str ticker="", Seniority sen=Senior, - DocClause doc=XR14): - """ - Alternative constructor for flat hazard rate Curve. - - Parameters - ---------- - base_date : datetime.date - Starting date of the curve - rate : float - Flat hazard rate. - basis : int, optional - Default to :data:`CONTINUOUS` - day_count_conv : str, optional - Default to 'Actual/365F' - - """ - cdef TDate base_date_c = pydate_to_TDate(base_date) - cdef SpreadCurve sc = SpreadCurve.__new__(SpreadCurve) - cdef TDate max_date = 200000 # can go higher but this should be more than enough - cdef string ticker_cpp = ticker - cdef double* recovery_rates - - sc._thisptr.reset(JpmcdsMakeTCurve(base_date_c, &max_date, &rate, 1, - basis, dcc(day_count_conv)), - JpmcdsFreeTCurve) - recovery_rates = malloc(sizeof(double)) - recovery_rates[0] = recov - sc.recovery_rates.reset(recovery_rates, double_free) - sc.name = make_shared[CurveName](ticker_cpp, (sen), - (doc)) - return sc - @cython.boundscheck(False) - def tweak_curve(self, double epsilon, bint multiplicative=True, - unsigned long mask=-1, bint inplace=False): - """ - Tweak the survival curve in place. - - Parameters - ---------- - epsilon : double - tweaking factor (either additive or multiplicative) - multiplicative : bool, optional - do we scale by 1+epsilon or add epsilon (default multiplicative). - mask : bitmask - Default is tweak everything, otherwise only tweak values - in the mask. - """ - cdef: - const TCurve* curve_orig = get_TCurve(self) - TCurve* curve_tweaked - SpreadCurve sc - int num_items = curve_orig.fNumItems - double* recovery_rates - - if not inplace: - sc = SpreadCurve.__new__(SpreadCurve) - curve_tweaked = JpmcdsCopyCurve(curve_orig) - sc._thisptr.reset(curve_tweaked, JpmcdsFreeTCurve) - sc.name = make_shared[CurveName](deref(self.name)) - recovery_rates = malloc(sizeof(double) * num_items) - sc.recovery_rates.reset(recovery_rates, double_free) - memcpy(sc.recovery_rates.get(), self.recovery_rates.get(), - num_items * sizeof(double)) - else: - sc = self - curve_tweaked = curve_orig - - if mask != 0: - tweak_curve(curve_orig, curve_tweaked, epsilon, mask) - return sc - - @cython.boundscheck(False) - def par_spread(self, today, step_in_date, start_date, end_dates, - const double[:] recovery_rates, YieldCurve yc not None, - bint pay_accrued_on_default=True): - """ - Parameters - ---------- - recovery_rates : should be same length as end_dates - """ - cdef TDate today_c = pydate_to_TDate(today) - cdef TDate step_in_date_c = pydate_to_TDate(step_in_date) - cdef TDate start_date_c = pydate_to_TDate(start_date) - cdef int n_dates = len(end_dates) - cdef TDate* end_dates_c = malloc(n_dates * sizeof(TDate)) - cdef size_t i - for i, d in enumerate(end_dates): - end_dates_c[i] = pydate_to_TDate(d) - cdef double* par_spreads - - cdef TStubMethod stub_type - if JpmcdsStringToStubMethod(b"f/s", &stub_type) != 0: - free(end_dates_c) - raise ValueError("can't convert stub") - - cdef int result - with nogil: - par_spreads = malloc(n_dates * sizeof(double)) - result = JpmcdsCdsParSpreads(today_c, - step_in_date_c, - start_date_c, - n_dates, - end_dates_c, - pay_accrued_on_default, - NULL, - &stub_type, - ACT_360, - MODIFIED, - b'NONE', - get_TCurve(yc), - get_TCurve(self), - &recovery_rates[0], - par_spreads) - free(end_dates_c) - cdef list r = [] - if result != SUCCESS: - free(par_spreads) - raise ValueError("can't compute par spread") - else: - for i in range(n_dates): - r.append(par_spreads[i]) - free(par_spreads) - return r +# @classmethod +# def from_flat_hazard(cls, base_date, double rate, Basis basis=CONTINUOUS, +# str day_count_conv='Actual/365F', double recov=0.4, +# str ticker="", Seniority sen=Senior, +# DocClause doc=XR14): +# """ +# Alternative constructor for flat hazard rate Curve. + +# Parameters +# ---------- +# base_date : datetime.date +# Starting date of the curve +# rate : float +# Flat hazard rate. +# basis : int, optional +# Default to :data:`CONTINUOUS` +# day_count_conv : str, optional +# Default to 'Actual/365F' + +# """ +# cdef TDate base_date_c = pydate_to_TDate(base_date) +# cdef SpreadCurve sc = SpreadCurve.__new__(SpreadCurve) +# cdef TDate max_date = 200000 # can go higher but this should be more than enough +# cdef string ticker_cpp = ticker +# cdef double* recovery_rates + +# sc._thisptr.reset(JpmcdsMakeTCurve(base_date_c, &max_date, &rate, 1, +# basis, dcc(day_count_conv)), +# JpmcdsFreeTCurve) +# recovery_rates = malloc(sizeof(double)) +# recovery_rates[0] = recov +# sc.recovery_rates.reset(recovery_rates, double_free) +# sc.name = make_shared[CurveName](ticker_cpp, (sen), +# (doc)) +# return sc + +# @cython.boundscheck(False) +# def tweak_curve(self, double epsilon, bint multiplicative=True, +# unsigned long mask=-1, bint inplace=False): +# """ +# Tweak the survival curve in place. + +# Parameters +# ---------- +# epsilon : double +# tweaking factor (either additive or multiplicative) +# multiplicative : bool, optional +# do we scale by 1+epsilon or add epsilon (default multiplicative). +# mask : bitmask +# Default is tweak everything, otherwise only tweak values +# in the mask. +# """ +# cdef: +# const TCurve* curve_orig = get_TCurve(self) +# TCurve* curve_tweaked +# SpreadCurve sc +# int num_items = curve_orig.fNumItems +# double* recovery_rates + +# if not inplace: +# sc = SpreadCurve.__new__(SpreadCurve) +# curve_tweaked = JpmcdsCopyCurve(curve_orig) +# sc._thisptr.reset(curve_tweaked, JpmcdsFreeTCurve) +# sc.name = make_shared[CurveName](deref(self.name)) +# recovery_rates = malloc(sizeof(double) * num_items) +# sc.recovery_rates.reset(recovery_rates, double_free) +# memcpy(sc.recovery_rates.get(), self.recovery_rates.get(), +# num_items * sizeof(double)) +# else: +# sc = self +# curve_tweaked = curve_orig + +# if mask != 0: +# tweak_curve(curve_orig, curve_tweaked, epsilon, mask) +# return sc + +# @cython.boundscheck(False) +# def par_spread(self, today, step_in_date, start_date, end_dates, +# const double[:] recovery_rates, YieldCurve yc not None, +# bint pay_accrued_on_default=True): +# """ +# Parameters +# ---------- +# recovery_rates : should be same length as end_dates +# """ +# cdef TDate today_c = pydate_to_TDate(today) +# cdef TDate step_in_date_c = pydate_to_TDate(step_in_date) +# cdef TDate start_date_c = pydate_to_TDate(start_date) +# cdef int n_dates = len(end_dates) +# cdef TDate* end_dates_c = malloc(n_dates * sizeof(TDate)) +# cdef size_t i +# for i, d in enumerate(end_dates): +# end_dates_c[i] = pydate_to_TDate(d) +# cdef double* par_spreads + +# cdef TStubMethod stub_type +# if JpmcdsStringToStubMethod(b"f/s", &stub_type) != 0: +# free(end_dates_c) +# raise ValueError("can't convert stub") + +# cdef int result +# with nogil: +# par_spreads = malloc(n_dates * sizeof(double)) +# result = JpmcdsCdsParSpreads(today_c, +# step_in_date_c, +# start_date_c, +# n_dates, +# end_dates_c, +# pay_accrued_on_default, +# NULL, +# &stub_type, +# ACT_360, +# MODIFIED, +# b'NONE', +# get_TCurve(yc), +# get_TCurve(self), +# &recovery_rates[0], +# par_spreads) +# free(end_dates_c) +# cdef list r = [] +# if result != SUCCESS: +# free(par_spreads) +# raise ValueError("can't compute par spread") +# else: +# for i in range(n_dates): +# r.append(par_spreads[i]) +# free(par_spreads) +# return r @property def recovery_rates(self): - cdef np.npy_intp shape = get_TCurve(self).fNumItems + cdef np.npy_intp shape = self.get_TCurve().fNumItems cdef np.ndarray[np.float64_t] out = \ np.PyArray_SimpleNewFromData(1, &shape, np.NPY_DOUBLE, - self.recovery_rates.get()) + self.recovery_rates_ptr()) return out @property def ticker(self): - return self.name.get().ticker + return &self.name()[2] @property def full_ticker(self): - return self.name.get().full_ticker() + cdef CurveName cn = CurveName(self.name()) + return cn.full_ticker() @property def seniority(self): - return Seniority(self.name.get().seniority) + return Seniority(self.name()[0]) @property def doc_clause(self): - return DocClause(self.name.get().doc_clause) + return DocClause(self.name()[1]) @cython.cdivision(True) @cython.boundscheck(False) -cdef TCurve* _fill_curve(const TCurve* sc, const TDate* end_dates, int n_dates) nogil: +cdef void _fill_curve(const TCurve* sc, const TDate* end_dates, int n_dates, char* buf) nogil: cdef: size_t i TDate base_date = sc.fBaseDate double t - TCurve* curve = JpmcdsNewTCurve(base_date, n_dates, CONTINUOUS, 2) - TRatePt* it = curve.fArray + TCurve* curve = buf + curve.fArray = (buf + sizeof(TCurve)) + curve.fNumItems = n_dates + curve.fBaseDate = base_date + curve.fBasis = CONTINUOUS + curve.fDayCountConv = ACT_365F + cdef TRatePt* it = curve.fArray for i in range(n_dates): t = (end_dates[i] - base_date)/365. it[i].fDate = end_dates[i] it[i].fRate = -JpmcdsLogForwardZeroPrice(sc, base_date, end_dates[i]) / t - return curve diff --git a/pyisda/date.pxd b/pyisda/date.pxd index 0159ad8..5072556 100644 --- a/pyisda/date.pxd +++ b/pyisda/date.pxd @@ -1,10 +1,10 @@ from cpython cimport datetime as c_datetime cdef extern from "isda/yearfrac.h" nogil: - int JpmcdsStringToDayCountConv(char* day_count, long* type) - char* JpmcdsFormatDayCountConv(long dayCountConv) + int JpmcdsStringToDayCountConv(char* day_count, int* type) + char* JpmcdsFormatDayCountConv(int dayCountConv) -cdef long dcc(str day_count) except -1 +cdef int dcc(str day_count) except -1 cdef extern from "isda/cdate.h": ctypedef struct TDateInterval: @@ -45,9 +45,9 @@ cdef extern from "isda/ldate.h" nogil: int JpmcdsDtFwdAny(TDate date, TDateInterval* interval, TDate* sumDate) cdef enum DCC: - ACT_365 = 1L - ACT_365F = 2L - ACT_360 = 3L + ACT_365 = 1 + ACT_365F = 2 + ACT_360 = 3 cdef extern from "isda/busday.h" nogil: int JpmcdsDateFromBusDaysOffset(TDate fromDate, # (I) input date diff --git a/pyisda/date.pyx b/pyisda/date.pyx index 674413f..8b3db83 100644 --- a/pyisda/date.pyx +++ b/pyisda/date.pyx @@ -24,8 +24,8 @@ cpdef c_datetime.date TDate_to_pydate(TDate d): raise ValueError("incorrect date") -cdef long dcc(str day_count) except -1: - cdef long r +cdef int dcc(str day_count) except -1: + cdef int r cdef char* dc = day_count cdef int err = JpmcdsStringToDayCountConv(dc, &r) if err == 0: @@ -34,7 +34,7 @@ cdef long dcc(str day_count) except -1: raise ValueError('{0} is not a valid day count'.format(day_count)) -def dcc_tostring(long day_count): +def dcc_tostring(int day_count): cdef char* c_string = JpmcdsFormatDayCountConv(day_count) return c_string -- cgit v1.2.3-70-g09d2