disk.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. import gzip
  2. from diskcache import FanoutCache, Disk
  3. from diskcache.core import BytesType, MODE_BINARY, BytesIO
  4. from util.logconf import logging
  5. log = logging.getLogger(__name__)
  6. # log.setLevel(logging.WARN)
  7. log.setLevel(logging.INFO)
  8. # log.setLevel(logging.DEBUG)
  9. class GzipDisk(Disk):
  10. def store(self, value, read, key=None):
  11. """
  12. Override from base class diskcache.Disk.
  13. Chunking is due to needing to work on pythons < 2.7.13:
  14. - Issue #27130: In the "zlib" module, fix handling of large buffers
  15. (typically 2 or 4 GiB). Previously, inputs were limited to 2 GiB, and
  16. compression and decompression operations did not properly handle results of
  17. 2 or 4 GiB.
  18. :param value: value to convert
  19. :param bool read: True when value is file-like object
  20. :return: (size, mode, filename, value) tuple for Cache table
  21. """
  22. # pylint: disable=unidiomatic-typecheck
  23. if type(value) is BytesType:
  24. if read:
  25. value = value.read()
  26. read = False
  27. str_io = BytesIO()
  28. gz_file = gzip.GzipFile(mode='wb', compresslevel=1, fileobj=str_io)
  29. for offset in range(0, len(value), 2**30):
  30. gz_file.write(value[offset:offset+2**30])
  31. gz_file.close()
  32. value = str_io.getvalue()
  33. return super(GzipDisk, self).store(value, read)
  34. def fetch(self, mode, filename, value, read):
  35. """
  36. Override from base class diskcache.Disk.
  37. Chunking is due to needing to work on pythons < 2.7.13:
  38. - Issue #27130: In the "zlib" module, fix handling of large buffers
  39. (typically 2 or 4 GiB). Previously, inputs were limited to 2 GiB, and
  40. compression and decompression operations did not properly handle results of
  41. 2 or 4 GiB.
  42. :param int mode: value mode raw, binary, text, or pickle
  43. :param str filename: filename of corresponding value
  44. :param value: database value
  45. :param bool read: when True, return an open file handle
  46. :return: corresponding Python value
  47. """
  48. value = super(GzipDisk, self).fetch(mode, filename, value, read)
  49. if mode == MODE_BINARY:
  50. str_io = BytesIO(value)
  51. gz_file = gzip.GzipFile(mode='rb', fileobj=str_io)
  52. read_csio = BytesIO()
  53. while True:
  54. uncompressed_data = gz_file.read(2**30)
  55. if uncompressed_data:
  56. read_csio.write(uncompressed_data)
  57. else:
  58. break
  59. value = read_csio.getvalue()
  60. return value
  61. def getCache(scope_str):
  62. return FanoutCache('data-unversioned/cache/' + scope_str,
  63. disk=GzipDisk,
  64. shards=64,
  65. timeout=1,
  66. size_limit=2e11,
  67. # disk_min_file_size=2**20,
  68. )
  69. # def disk_cache(base_path, memsize=2):
  70. # def disk_cache_decorator(f):
  71. # @functools.wraps(f)
  72. # def wrapper(*args, **kwargs):
  73. # args_str = repr(args) + repr(sorted(kwargs.items()))
  74. # file_str = hashlib.md5(args_str.encode('utf8')).hexdigest()
  75. #
  76. # cache_path = os.path.join(base_path, f.__name__, file_str + '.pkl.gz')
  77. #
  78. # if not os.path.exists(os.path.dirname(cache_path)):
  79. # os.makedirs(os.path.dirname(cache_path), exist_ok=True)
  80. #
  81. # if os.path.exists(cache_path):
  82. # return pickle_loadgz(cache_path)
  83. # else:
  84. # ret = f(*args, **kwargs)
  85. # pickle_dumpgz(cache_path, ret)
  86. # return ret
  87. #
  88. # return wrapper
  89. #
  90. # return disk_cache_decorator
  91. #
  92. #
  93. # def pickle_dumpgz(file_path, obj):
  94. # log.debug("Writing {}".format(file_path))
  95. # with open(file_path, 'wb') as file_obj:
  96. # with gzip.GzipFile(mode='wb', compresslevel=1, fileobj=file_obj) as gz_file:
  97. # pickle.dump(obj, gz_file, pickle.HIGHEST_PROTOCOL)
  98. #
  99. #
  100. # def pickle_loadgz(file_path):
  101. # log.debug("Reading {}".format(file_path))
  102. # with open(file_path, 'rb') as file_obj:
  103. # with gzip.GzipFile(mode='rb', fileobj=file_obj) as gz_file:
  104. # return pickle.load(gz_file)
  105. #
  106. #
  107. # def dtpath(dt=None):
  108. # if dt is None:
  109. # dt = datetime.datetime.now()
  110. #
  111. # return str(dt).rsplit('.', 1)[0].replace(' ', '--').replace(':', '.')
  112. #
  113. #
  114. # def safepath(s):
  115. # s = s.replace(' ', '_')
  116. # return re.sub('[^A-Za-z0-9_.-]', '', s)