| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| |
| """TZlibTransport provides a compressed transport and transport factory |
| class, using the python standard library zlib module to implement |
| data compression. |
| """ |
| |
| from __future__ import division |
| import zlib |
| from cStringIO import StringIO |
| |
| from thrift.transport.TTransport import TTransportBase, CReadableTransport |
| |
| |
| class TZlibTransportFactory(object): |
| """Factory transport that builds zlib compressed transports. |
| |
| This factory caches the last single client/transport that it was passed |
| and returns the same TZlibTransport object that was created. |
| |
| This caching means the TServer class will get the _same_ transport |
| object for both input and output transports from this factory. |
| (For non-threaded scenarios only, since the cache only holds one object) |
| |
| The purpose of this caching is to allocate only one TZlibTransport where |
| only one is really needed (since it must have separate read/write buffers), |
| and makes the statistics from getCompSavings() and getCompRatio() |
| easier to understand. |
| """ |
| # class scoped cache of last transport given and zlibtransport returned |
| _last_trans = None |
| _last_z = None |
| |
| def getTransport(self, trans, compresslevel=9): |
| """Wrap a transport, trans, with the TZlibTransport |
| compressed transport class, returning a new |
| transport to the caller. |
| |
| @param compresslevel: The zlib compression level, ranging |
| from 0 (no compression) to 9 (best compression). Defaults to 9. |
| @type compresslevel: int |
| |
| This method returns a TZlibTransport which wraps the |
| passed C{trans} TTransport derived instance. |
| """ |
| if trans == self._last_trans: |
| return self._last_z |
| ztrans = TZlibTransport(trans, compresslevel) |
| self._last_trans = trans |
| self._last_z = ztrans |
| return ztrans |
| |
| |
| class TZlibTransport(TTransportBase, CReadableTransport): |
| """Class that wraps a transport with zlib, compressing writes |
| and decompresses reads, using the python standard |
| library zlib module. |
| """ |
| # Read buffer size for the python fastbinary C extension, |
| # the TBinaryProtocolAccelerated class. |
| DEFAULT_BUFFSIZE = 4096 |
| |
| def __init__(self, trans, compresslevel=9): |
| """Create a new TZlibTransport, wrapping C{trans}, another |
| TTransport derived object. |
| |
| @param trans: A thrift transport object, i.e. a TSocket() object. |
| @type trans: TTransport |
| @param compresslevel: The zlib compression level, ranging |
| from 0 (no compression) to 9 (best compression). Default is 9. |
| @type compresslevel: int |
| """ |
| self.__trans = trans |
| self.compresslevel = compresslevel |
| self.__rbuf = StringIO() |
| self.__wbuf = StringIO() |
| self._init_zlib() |
| self._init_stats() |
| |
| def _reinit_buffers(self): |
| """Internal method to initialize/reset the internal StringIO objects |
| for read and write buffers. |
| """ |
| self.__rbuf = StringIO() |
| self.__wbuf = StringIO() |
| |
| def _init_stats(self): |
| """Internal method to reset the internal statistics counters |
| for compression ratios and bandwidth savings. |
| """ |
| self.bytes_in = 0 |
| self.bytes_out = 0 |
| self.bytes_in_comp = 0 |
| self.bytes_out_comp = 0 |
| |
| def _init_zlib(self): |
| """Internal method for setting up the zlib compression and |
| decompression objects. |
| """ |
| self._zcomp_read = zlib.decompressobj() |
| self._zcomp_write = zlib.compressobj(self.compresslevel) |
| |
| def getCompRatio(self): |
| """Get the current measured compression ratios (in,out) from |
| this transport. |
| |
| Returns a tuple of: |
| (inbound_compression_ratio, outbound_compression_ratio) |
| |
| The compression ratios are computed as: |
| compressed / uncompressed |
| |
| E.g., data that compresses by 10x will have a ratio of: 0.10 |
| and data that compresses to half of ts original size will |
| have a ratio of 0.5 |
| |
| None is returned if no bytes have yet been processed in |
| a particular direction. |
| """ |
| r_percent, w_percent = (None, None) |
| if self.bytes_in > 0: |
| r_percent = self.bytes_in_comp / self.bytes_in |
| if self.bytes_out > 0: |
| w_percent = self.bytes_out_comp / self.bytes_out |
| return (r_percent, w_percent) |
| |
| def getCompSavings(self): |
| """Get the current count of saved bytes due to data |
| compression. |
| |
| Returns a tuple of: |
| (inbound_saved_bytes, outbound_saved_bytes) |
| |
| Note: if compression is actually expanding your |
| data (only likely with very tiny thrift objects), then |
| the values returned will be negative. |
| """ |
| r_saved = self.bytes_in - self.bytes_in_comp |
| w_saved = self.bytes_out - self.bytes_out_comp |
| return (r_saved, w_saved) |
| |
| def isOpen(self): |
| """Return the underlying transport's open status""" |
| return self.__trans.isOpen() |
| |
| def open(self): |
| """Open the underlying transport""" |
| self._init_stats() |
| return self.__trans.open() |
| |
| def listen(self): |
| """Invoke the underlying transport's listen() method""" |
| self.__trans.listen() |
| |
| def accept(self): |
| """Accept connections on the underlying transport""" |
| return self.__trans.accept() |
| |
| def close(self): |
| """Close the underlying transport,""" |
| self._reinit_buffers() |
| self._init_zlib() |
| return self.__trans.close() |
| |
| def read(self, sz): |
| """Read up to sz bytes from the decompressed bytes buffer, and |
| read from the underlying transport if the decompression |
| buffer is empty. |
| """ |
| ret = self.__rbuf.read(sz) |
| if len(ret) > 0: |
| return ret |
| # keep reading from transport until something comes back |
| while True: |
| if self.readComp(sz): |
| break |
| ret = self.__rbuf.read(sz) |
| return ret |
| |
| def readComp(self, sz): |
| """Read compressed data from the underlying transport, then |
| decompress it and append it to the internal StringIO read buffer |
| """ |
| zbuf = self.__trans.read(sz) |
| zbuf = self._zcomp_read.unconsumed_tail + zbuf |
| buf = self._zcomp_read.decompress(zbuf) |
| self.bytes_in += len(zbuf) |
| self.bytes_in_comp += len(buf) |
| old = self.__rbuf.read() |
| self.__rbuf = StringIO(old + buf) |
| if len(old) + len(buf) == 0: |
| return False |
| return True |
| |
| def write(self, buf): |
| """Write some bytes, putting them into the internal write |
| buffer for eventual compression. |
| """ |
| self.__wbuf.write(buf) |
| |
| def flush(self): |
| """Flush any queued up data in the write buffer and ensure the |
| compression buffer is flushed out to the underlying transport |
| """ |
| wout = self.__wbuf.getvalue() |
| if len(wout) > 0: |
| zbuf = self._zcomp_write.compress(wout) |
| self.bytes_out += len(wout) |
| self.bytes_out_comp += len(zbuf) |
| else: |
| zbuf = '' |
| ztail = self._zcomp_write.flush(zlib.Z_SYNC_FLUSH) |
| self.bytes_out_comp += len(ztail) |
| if (len(zbuf) + len(ztail)) > 0: |
| self.__wbuf = StringIO() |
| self.__trans.write(zbuf + ztail) |
| self.__trans.flush() |
| |
| @property |
| def cstringio_buf(self): |
| """Implement the CReadableTransport interface""" |
| return self.__rbuf |
| |
| def cstringio_refill(self, partialread, reqlen): |
| """Implement the CReadableTransport interface for refill""" |
| retstring = partialread |
| if reqlen < self.DEFAULT_BUFFSIZE: |
| retstring += self.read(self.DEFAULT_BUFFSIZE) |
| while len(retstring) < reqlen: |
| retstring += self.read(reqlen - len(retstring)) |
| self.__rbuf = StringIO(retstring) |
| return self.__rbuf |