diff options
| author | Guillaume Horel <guillaume.horel@gmail.com> | 2019-12-18 14:49:01 -0500 |
|---|---|---|
| committer | Guillaume Horel <guillaume.horel@gmail.com> | 2019-12-18 14:49:01 -0500 |
| commit | 5cd0cd4a573e2ce68b09f332178ee4839caad0f9 (patch) | |
| tree | 4766121f9a8b57187a69a6b0b6a8a664d6b0efef | |
| parent | 2a869b4cd7d56316a52ed14bf80d3054e0f5d1ca (diff) | |
| download | pyisda-5cd0cd4a573e2ce68b09f332178ee4839caad0f9.tar.gz | |
allow to compress buffer representation of SpreadCurve
| -rw-r--r-- | pyisda/curve.pxd | 1 | ||||
| -rw-r--r-- | pyisda/curve.pyx | 77 | ||||
| -rw-r--r-- | setup.py | 2 |
3 files changed, 60 insertions, 20 deletions
diff --git a/pyisda/curve.pxd b/pyisda/curve.pxd index 88fad34..3d57cb1 100644 --- a/pyisda/curve.pxd +++ b/pyisda/curve.pxd @@ -258,6 +258,7 @@ cdef class SpreadCurve(Curve): cdef shared_ptr[CurveName] name cdef shared_ptr[double] recovery_rates cdef TDate defaulted + cdef bytes as_buffer(self, bint compressed) cdef fArray_to_list(TRatePt* fArray, int fNumItems) diff --git a/pyisda/curve.pyx b/pyisda/curve.pyx index b7b94e1..2004528 100644 --- a/pyisda/curve.pyx +++ b/pyisda/curve.pyx @@ -16,6 +16,7 @@ import numpy as np np.import_array() import pandas as pd from cpython cimport Py_buffer +from cpython.bytes cimport PyBytes_GET_SIZE cdef extern from "Python.h": int PyMemoryView_Check(object) @@ -28,6 +29,11 @@ cdef extern from "numpy/arrayobject.h": int PyArray_TYPE(object) np.npy_intp PyArray_Size(object) +cdef extern from "lz4.h" nogil: + int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity) + int LZ4_decompress_safe(const char* src, char* dst, int compressedSize, int dstCapacity) + int LZ4_compressBound(int inputSize) + cdef int SUCCESS = 0 cdef inline void double_free(double* ptr) nogil: @@ -625,19 +631,34 @@ cdef class SpreadCurve(Curve): sizeof(TDate) + self.name.get().size() def __getstate__(self): + return self.as_buffer(False) + + cdef bytes as_buffer(self, bint compressed): cdef: const TCurve* curve = get_TCurve(self) - size_t buf_size = self.size() size_t size_recovery = curve.fNumItems * sizeof(double) - unsigned char* buf = <unsigned char*>malloc(buf_size) - unsigned char* cursor = serialize(curve, buf) - + size_t buf_size = TCurve_size(curve) + size_recovery + sizeof(TDate) + \ + self.name.get().size() + char* buf = <char*>malloc(buf_size) + unsigned char* cursor = serialize(curve, <unsigned char*>buf) + int dst_capacity, compressed_size + char* dst + bytes r memcpy(cursor, self.recovery_rates.get(), size_recovery) cursor += size_recovery memcpy(cursor, &self.defaulted, sizeof(TDate)) cursor += sizeof(TDate) self.name.get().serialize(cursor) - cdef bytes r = buf[:buf_size] + + if compressed: + dst_capacity = LZ4_compressBound(buf_size) + dst = <char*>malloc(dst_capacity) + compressed_size = LZ4_compress_default(buf, dst, buf_size, dst_capacity) + r = dst[:compressed_size] + free(dst) + else: + r = buf[:buf_size] + free(buf) return r @@ -681,30 +702,48 @@ cdef class SpreadCurve(Curve): return TDate_to_pydate(self.defaulted) @classmethod - def from_bytes(cls, bytes state): + def from_bytes(cls, object state, bint compressed=False): cdef: SpreadCurve instance = SpreadCurve.__new__(SpreadCurve) const unsigned char* cursor + const char* src + char* dst = <char*>malloc(500) TCurve* curve = <TCurve*>malloc(sizeof(TCurve)) - size_t recovery_size + size_t size Py_buffer* py_buf + if PyMemoryView_Check(state): py_buf = PyMemoryView_GET_BUFFER(state) - cursor = <unsigned char*>py_buf.buf + src = <char*>py_buf.buf + size = py_buf.len else: - cursor = <bytes?>state + src = <bytes?>state + size = PyBytes_GET_SIZE(src) + with nogil: + if compressed: + dst = <char*>malloc(500) + if LZ4_decompress_safe(src, dst, size, 500) < 0: + free(dst) + raise MemoryError("something went wrong") + else: + cursor = <unsigned char*>dst + else: + cursor = <unsigned char*>src + cursor = deserialize(cursor, curve) + size = curve.fNumItems * sizeof(double) + instance.recovery_rates = shared_ptr[double](<double*>malloc(size), + double_free) + + instance._thisptr.reset(curve, JpmcdsFreeTCurve) + memcpy(instance.recovery_rates.get(), cursor, size) + cursor += size + memcpy(&instance.defaulted, cursor, sizeof(TDate)) + cursor += sizeof(TDate) + instance.name = make_shared[CurveName](cursor) + if compressed: + free(dst) - cursor = deserialize(cursor, curve) - instance._thisptr.reset(curve, JpmcdsFreeTCurve) - recovery_size = curve.fNumItems * sizeof(double) - instance.recovery_rates = shared_ptr[double](<double*>malloc(recovery_size), - double_free) - memcpy(instance.recovery_rates.get(), cursor, recovery_size) - cursor += recovery_size - memcpy(&instance.defaulted, cursor, sizeof(TDate)) - cursor += sizeof(TDate) - instance.name = make_shared[CurveName](cursor) return instance def __hash__(self): @@ -7,7 +7,7 @@ all_extensions = Extension( "*", ["pyisda/*.pyx"], include_dirs=["c_layer", numpy.get_include()], - libraries=["cds", "farmhash"], + libraries=["cds", "farmhash", "lz4"], language="c++", extra_compile_args=["-fopenmp"], extra_link_args=["-fopenmp", "-Wl,--strip-all"], |
