summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuillaume Horel <guillaume.horel@gmail.com>2017-06-06 14:52:27 -0400
committerGuillaume Horel <guillaume.horel@gmail.com>2017-06-06 14:52:27 -0400
commit9638e84f90e3661d75d44231295b48cbf5a63d59 (patch)
tree5513d098dd50dcaa2e8770e51a5ba38ce4bbd4e2
parentb0ac54500bc53f5f41bebefaf726548666a705f2 (diff)
downloadpyisda-9638e84f90e3661d75d44231295b48cbf5a63d59.tar.gz
add hand written serialize and deserialize function in C
-rw-r--r--pyisda/curve.pxd40
-rw-r--r--pyisda/curve.pyx244
-rw-r--r--setup.py2
3 files changed, 205 insertions, 81 deletions
diff --git a/pyisda/curve.pxd b/pyisda/curve.pxd
index 14c881c..632d2e1 100644
--- a/pyisda/curve.pxd
+++ b/pyisda/curve.pxd
@@ -3,7 +3,9 @@ from legs cimport TContingentLeg, TFeeLeg
from libcpp.vector cimport vector
from libcpp cimport bool
from libcpp.string cimport string
-
+from libc.string cimport memcpy
+from libc.stdlib cimport malloc, calloc, free
+from libc.stdint cimport uint64_t
cdef extern from "isda/zerocurve.h" nogil:
ctypedef int TBoolean
@@ -36,6 +38,39 @@ cdef extern from "isda/bastypes.h":
TDate fDate
double fRate
+cdef extern from "city.h" nogil:
+ uint64_t c_CityHash64 "CityHash64" (char *buff, size_t len)
+
+cdef inline size_t TCurve_size(int num_items) nogil:
+ return sizeof(int) + sizeof(TDate) + sizeof(double) + \
+ sizeof(long) + sizeof(TRatePt) * num_items
+
+cdef inline void serialize(TCurve* curve, unsigned char* buf) nogil:
+ memcpy(buf, &(curve.fNumItems), sizeof(curve.fNumItems))
+ buf += sizeof(curve.fNumItems)
+ memcpy(buf, curve.fArray, sizeof(TRatePt) * curve.fNumItems)
+ buf += sizeof(TRatePt) * curve.fNumItems
+ memcpy(buf, &(curve.fBaseDate), sizeof(TDate))
+ buf += sizeof(TDate)
+ memcpy(buf, &(curve.fBasis), sizeof(double))
+ buf += sizeof(double)
+ memcpy(buf, &(curve.fDayCountConv), sizeof(long))
+
+cdef inline unsigned char* unserialize(unsigned char* buf, TCurve* curve) nogil:
+ memcpy(&curve.fNumItems, buf, sizeof(curve.fNumItems))
+ buf += sizeof(curve.fNumItems)
+ cdef size_t array_size = sizeof(TRatePt) * curve.fNumItems
+ curve.fArray = <TRatePt*>malloc(array_size)
+ memcpy(curve.fArray, buf, array_size)
+ buf += array_size
+ memcpy(&curve.fBaseDate, buf, sizeof(TDate))
+ buf += sizeof(TDate)
+ memcpy(&curve.fBasis, buf, sizeof(double))
+ buf += sizeof(double)
+ memcpy(&curve.fDayCountConv, buf, sizeof(long))
+ buf += sizeof(long)
+ return buf
+
cdef extern from "isda/cds.h" nogil:
TCurve* JpmcdsCleanSpreadCurve(
# Risk starts at the end of today
@@ -159,8 +194,7 @@ cdef class Curve:
cdef shared_ptr[TCurve] _thisptr
cdef class YieldCurve(Curve):
- cdef TDate* _dates
- cdef size_t _ninstr
+ cdef vector[TDate] dates
cdef class SpreadCurve(Curve):
cdef string ticker
diff --git a/pyisda/curve.pyx b/pyisda/curve.pyx
index 6819def..082d5e0 100644
--- a/pyisda/curve.pyx
+++ b/pyisda/curve.pyx
@@ -1,8 +1,7 @@
from libc.stdlib cimport malloc, free, calloc
-from libc.string cimport memcpy
from libc.math cimport log1p, log
from libcpp.vector cimport vector
-
+from libcpp.string cimport string
from date cimport (JpmcdsStringToDateInterval, pydate_to_TDate, dcc,
JpmcdsDateIntervalToFreq, JpmcdsDateFwdThenAdjust, TDate_to_pydate,
JpmcdsDateFromBusDaysOffset, JpmcdsStringToDayCountConv, ACT_360)
@@ -28,46 +27,45 @@ cpdef public enum BadDay:
NONE = <long>'N'
MODIFIED = <long>'M'
-cdef inline shared_ptr[TCurve] make_shared(TCurve* ptr):
+cdef inline shared_ptr[TCurve] make_shared(TCurve* ptr) nogil:
return shared_ptr[TCurve](ptr, JpmcdsFreeTCurve)
cdef class Curve(object):
def __getstate__(self):
- cdef TCurve* curve = self._thisptr.get()
- cdef int num_items = curve.fNumItems
- return (num_items,
- <bytes>(<char*>curve.fArray)[:sizeof(TRatePt)*num_items],
- curve.fBaseDate,
- curve.fBasis,
- curve.fDayCountConv)
+ cdef:
+ TCurve* curve = self._thisptr.get()
+ size_t curve_size = TCurve_size(curve.fNumItems)
+ unsigned char* buf = <unsigned char*>malloc(curve_size * sizeof(unsigned char))
+ serialize(curve, buf)
+ return <bytes>buf[:curve_size]
- def __setstate__(self, state):
+ def __setstate__(self, bytes state):
cdef:
- int num_items
- char* rates
- TDate base_date
- double basis
- long dcc
- TCurve* new_curve
- num_items, rates, base_date, basis, dcc = state
- new_curve = <TCurve*>malloc(sizeof(TCurve))
- new_curve.fNumItems = num_items
- new_curve.fArray = <TRatePt*>malloc(sizeof(TRatePt) * num_items)
- memcpy(new_curve.fArray, rates, sizeof(TRatePt) * num_items)
- new_curve.fBaseDate = base_date
- new_curve.fBasis = basis
- new_curve.fDayCountConv = dcc
- self._thisptr = make_shared(new_curve)
+ TCurve* curve = <TCurve*>malloc(sizeof(TCurve))
+ unsigned char* cursor = state
+ unserialize(cursor, curve)
+ self._thisptr = make_shared(curve)
+
+ @classmethod
+ def from_bytes(cls, bytes state):
+ cdef:
+ Curve instance = cls.__new__(cls)
+ unsigned char* cursor = state
+ TCurve* curve = <TCurve*>malloc(sizeof(TCurve))
+ unserialize(cursor, curve)
+ instance._thisptr = make_shared(curve)
+ return instance
def __hash__(self):
- cdef TCurve* curve = self._thisptr.get()
- cdef int num_items = curve.fNumItems
- return hash((num_items,
- <bytes>(<char*>curve.fArray)[:sizeof(TRatePt)*num_items],
- curve.fBaseDate,
- curve.fBasis,
- curve.fDayCountConv))
+ cdef:
+ TCurve* curve = self._thisptr.get()
+ size_t curve_size = TCurve_size(curve.fNumItems)
+ unsigned char* buf = <unsigned char*>malloc(curve_size * sizeof(unsigned char))
+ serialize(curve, buf)
+ cdef uint64_t r = c_CityHash64(<char*>buf, curve_size)
+ free(buf)
+ return r
def inspect(self):
""" method to inspect the content of the C struct
@@ -84,6 +82,7 @@ cdef class Curve(object):
'data': fArray_to_list(self._thisptr.get().fArray, self._thisptr.get().fNumItems)}
@cython.boundscheck(False)
+ @cython.cdivision(True)
def to_series(self, bint forward=True):
cdef np.npy_intp n = self._thisptr.get().fNumItems
cdef np.ndarray[np.float64_t,ndim=1] h = np.PyArray_EMPTY(1, &n, np.NPY_DOUBLE, 0)
@@ -93,7 +92,7 @@ cdef class Curve(object):
cdef double t1, h1, t2, h2
t1 = 0
h1 = 0
- cdef base_date = self._thisptr.get().fBaseDate
+ cdef int base_date = self._thisptr.get().fBaseDate
if forward:
for i in range(n):
h2 = it[i].fRate
@@ -230,16 +229,18 @@ cdef class YieldCurve(Curve):
char* routine = 'zerocurve'
TDate value_date = pydate_to_TDate(date)
- self._ninstr = len(periods)
- self._dates = <TDate*>malloc(self._ninstr * sizeof(TDate))
+ self.dates = vector[TDate](len(periods))
cdef TDate settle_date
- if JpmcdsDateFromBusDaysOffset(value_date, 2, "None", &settle_date)!= SUCCESS:
+ if JpmcdsDateFromBusDaysOffset(value_date, 2, "None", &settle_date) != SUCCESS:
raise ValueError
- cdef TDateInterval tmp
- cdef long period_adjust
+ cdef:
+ TDateInterval tmp
+ long period_adjust
+ size_t i
+
for i, p in enumerate(periods):
period_bytes = p.encode('utf-8')
if JpmcdsStringToDateInterval(period_bytes, routine, &tmp) != SUCCESS:
@@ -249,7 +250,7 @@ cdef class YieldCurve(Curve):
else:
period_adjust = NONE
if JpmcdsDateFwdThenAdjust(settle_date, &tmp, period_adjust,
- "None", &self._dates[i]) != SUCCESS:
+ "None", &self.dates[i]) != SUCCESS:
raise ValueError('Invalid interval')
cdef bytes fixed_bytes = fixed_swap_period.encode('utf-8')
@@ -266,45 +267,86 @@ cdef class YieldCurve(Curve):
raise ValueError
self._thisptr = make_shared(JpmcdsBuildIRZeroCurve(
- value_date, types_bytes, self._dates,
- &rates[0], self._ninstr, dcc(mm_dcc), <long> fixed_freq,
+ value_date, types_bytes, self.dates.data(),
+ &rates[0], self.dates.size(), dcc(mm_dcc), <long> fixed_freq,
<long> float_freq, dcc(fixed_swap_dcc), dcc(float_swap_dcc),
bad_day_conv, b"None"
))
- def __dealloc__(self):
- ## __dealloc__ of superclass is called by cython so no need to call here
- if self._dates:
- free(self._dates)
-
def __getstate__(self):
- cdef Py_ssize_t size = sizeof(TRatePt) * self._ninstr
- cdef bytes dates = (<char*>self._dates)[:size]
- return super().__getstate__() + (self._ninstr, dates)
+ cdef:
+ TCurve* curve = self._thisptr.get()
+ size_t size = TCurve_size(curve.fNumItems)
+ size_t buf_size = size + sizeof(size_t) + sizeof(TDate) * self.dates.size()
+ unsigned char* buf = <unsigned char*>malloc(buf_size)
+ unsigned char* cursor = buf + size
+ serialize(curve, buf)
+ size = self.dates.size()
+ memcpy(cursor, &size, sizeof(size_t))
+ cursor += sizeof(size_t)
+ memcpy(cursor, self.dates.data(), sizeof(TDate) * size)
+ return <bytes>buf[:buf_size]
+
+ def __setstate__(self, bytes state):
+ cdef:
+ TCurve* curve = <TCurve*>malloc(sizeof(TCurve))
+ unsigned char* cursor = state
+ size_t num_instr
+
+ cursor = unserialize(cursor, curve)
+ self._thisptr = make_shared(curve)
+ memcpy(&num_instr, cursor, sizeof(size_t))
+ cursor += sizeof(size_t)
+ self.dates = vector[TDate](num_instr)
+ memcpy(self.dates.data(), cursor, num_instr * sizeof(TDate))
- def __setstate__(self, state):
- super().__setstate__(state[:5])
- self._ninstr = <int>state[5]
- cdef size_t size = sizeof(TRatePt) * self._ninstr
- self._dates = <TDate*>malloc(size)
- memcpy(self._dates, <char*> state[6], size)
+ @classmethod
+ def from_bytes(cls, bytes state):
+ cdef:
+ YieldCurve instance = cls.__new__(cls)
+ unsigned char* cursor = state
+ TCurve* curve = <TCurve*>malloc(sizeof(TCurve))
+ size_t num_instr
+
+ cursor = unserialize(cursor, curve)
+ instance._thisptr = make_shared(curve)
+ memcpy(&num_instr, cursor, sizeof(size_t))
+ cursor += sizeof(size_t)
+ instance.dates = vector[TDate](num_instr)
+ memcpy(instance.dates.data(), cursor, num_instr * sizeof(TDate))
+ return instance
+
+ def __hash__(self):
+ cdef:
+ TCurve* curve = self._thisptr.get()
+ size_t size = TCurve_size(curve.fNumItems)
+ size_t buf_size = size + sizeof(size_t) + sizeof(TDate) * self.dates.size()
+ unsigned char* buf = <unsigned char*>malloc(buf_size)
+ unsigned char* cursor = buf + size
+ serialize(curve, buf)
+ size = self.dates.size()
+ memcpy(cursor, &size, sizeof(size_t))
+ cursor += sizeof(size_t)
+ memcpy(cursor, self.dates.data(), sizeof(TDate) * size)
+ cdef uint64_t r = c_CityHash64(<char*>buf, buf_size)
+ free(buf)
+ return r
@classmethod
def from_discount_factors(cls, base_date, list dates, double[:] dfs, str day_count_conv):
""" build a yield curve from a list of discount factors """
cdef TDate base_date_c = pydate_to_TDate(base_date)
cdef YieldCurve yc = cls.__new__(cls)
- yc._ninstr = len(dates)
- yc._dates = <TDate*>malloc(sizeof(TDate) * yc._ninstr)
+ yc.dates = vector[TDate](len(dates))
cdef size_t i
- cdef double* rates = <double*>malloc(sizeof(double) * yc._ninstr)
+ cdef double* rates = <double*>malloc(sizeof(double) * yc.dates.size())
for i, d in enumerate(dates):
- yc._dates[i] = pydate_to_TDate(d)
- JpmcdsDiscountToRateYearFrac(dfs[i], <double>(yc._dates[i]-base_date_c)/365.,
+ yc.dates[i] = pydate_to_TDate(d)
+ JpmcdsDiscountToRateYearFrac(dfs[i], <double>(yc.dates[i]-base_date_c)/365.,
<double>1, &rates[i])
yc._thisptr = make_shared(
- JpmcdsMakeTCurve(base_date_c, yc._dates, rates, dfs.shape[0],
+ JpmcdsMakeTCurve(base_date_c, yc.dates.data(), rates, dfs.shape[0],
<double>1, dcc(day_count_conv)))
return yc
@@ -315,30 +357,28 @@ cdef class YieldCurve(Curve):
""" returns the list of instrument dates
"""
- cdef size_t i
- return [TDate_to_pydate(self._dates[i]) for i in range(self._ninstr)]
+ return [TDate_to_pydate(d) for d in self.dates]
def expected_forward_curve(self, forward_date):
""" returns the expected forward curve """
cdef TDate forward_date_c = pydate_to_TDate(forward_date)
cdef YieldCurve yc = YieldCurve.__new__(YieldCurve)
cdef size_t i = 0
- while self._dates[i] < forward_date_c:
+ while self.dates[i] < forward_date_c:
i += 1
- yc._ninstr = self._ninstr - i
- yc._dates = <TDate*>malloc(sizeof(TDate) * (self._ninstr-i))
- cdef double* rates = <double*>malloc(sizeof(double) * yc._ninstr)
+ yc.dates = vector[TDate](self.dates.size() - i)
+ cdef double* rates = <double*>malloc(sizeof(double) * yc.dates.size())
cdef size_t k
cdef double df
- for k in range(yc._ninstr):
- yc._dates[k] = self._dates[i]
- df = JpmcdsForwardZeroPrice(self._thisptr.get(), forward_date_c, self._dates[i])
+ for k in range(yc.dates.size()):
+ yc.dates[k] = self.dates[i]
+ df = JpmcdsForwardZeroPrice(self._thisptr.get(), forward_date_c, self.dates[i])
JpmcdsDiscountToRateYearFrac(
- df, <double>(self._dates[i] - forward_date_c)/365.,
+ df, <double>(self.dates[i] - forward_date_c)/365.,
<double>1, &rates[k])
i += 1
yc._thisptr = make_shared(JpmcdsMakeTCurve(
- forward_date_c, yc._dates, rates, yc._ninstr,
+ forward_date_c, yc.dates.data(), rates, yc.dates.size(),
<double>1, self._thisptr.get().fDayCountConv))
return yc
@@ -453,11 +493,61 @@ cdef class SpreadCurve(Curve):
survival_probability = Curve.__forward_zero_price
def __getstate__(self):
- return super().__getstate__() + (self.ticker,)
+ cdef:
+ TCurve* curve = self._thisptr.get()
+ size_t size = TCurve_size(curve.fNumItems)
+ size_t buf_size = size + sizeof(size_t) + self.ticker.length()
+ unsigned char* buf = <unsigned char*>malloc(buf_size)
+ unsigned char* cursor = buf + size
+ serialize(curve, buf)
+ size = self.ticker.length()
+ memcpy(cursor, &size, sizeof(size_t))
+ cursor += sizeof(size_t)
+ self.ticker.copy(<char*>cursor, size, 0)
+ return <bytes>buf[:buf_size]
- def __setstate__(self, state):
- super().__setstate__(state[:5])
- self.ticker = state[5]
+ def __setstate__(self, bytes state):
+ cdef:
+ TCurve* curve = <TCurve*>malloc(sizeof(TCurve))
+ unsigned char* cursor = state
+ size_t ticker_length
+
+ cursor = unserialize(cursor, curve)
+ self._thisptr = make_shared(curve)
+ memcpy(&ticker_length, cursor, sizeof(size_t))
+ cursor += sizeof(size_t)
+ self.ticker = string(<char*>cursor, ticker_length)
+
+ @classmethod
+ def from_bytes(cls, bytes state):
+ cdef:
+ SpreadCurve instance = cls.__new__(cls)
+ unsigned char* cursor = state
+ TCurve* curve = <TCurve*>malloc(sizeof(TCurve))
+ size_t ticker_length
+
+ cursor = unserialize(cursor, curve)
+ instance._thisptr = make_shared(curve)
+ memcpy(&ticker_length, cursor, sizeof(size_t))
+ cursor += sizeof(size_t)
+ instance.ticker = string(<char*>cursor, ticker_length)
+ return instance
+
+ def __hash__(self):
+ cdef:
+ TCurve* curve = self._thisptr.get()
+ size_t size = TCurve_size(curve.fNumItems)
+ size_t buf_size = size + sizeof(size_t) + self.ticker.length()
+ unsigned char* buf = <unsigned char*>malloc(buf_size)
+ unsigned char* cursor = buf + size
+ serialize(curve, buf)
+ size = self.ticker.length()
+ memcpy(cursor, &size, sizeof(size_t))
+ cursor += sizeof(size_t)
+ self.ticker.copy(<char*>cursor, size, 0)
+ cdef uint64_t r = c_CityHash64(<char*>buf, buf_size)
+ free(buf)
+ return r
@classmethod
def from_flat_hazard(cls, base_date, double rate, Basis basis=CONTINUOUS,
diff --git a/setup.py b/setup.py
index 6f22f1a..b7cd880 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@ import numpy
all_extensions = Extension("*", ["pyisda/*.pyx"],
include_dirs = ['c_layer', numpy.get_include()],
- libraries = ["cds"],
+ libraries = ["cds", "cityhash"],
language = 'c++')
c_extension = Extension("pyisda.flat_hazard",