Spaces:

tomaszki
/

PythonFileCompressor

Sleeping

App Files Files Community

zb12138 commited on Oct 24, 2021

Commit

e389f7b

0 Parent(s):

numpyAc

Browse files

Files changed (6) hide show

README.md +85 -0
numpyAc/__init__.py +1 -0
numpyAc/backend/numpyAc_backend.cpp +347 -0
numpyAc/numpyAc.py +172 -0
test.py +27 -0
testTorchac.py +50 -0

README.md ADDED Viewed

	@@ -0,0 +1,85 @@

+# NumpyAc: Fast Autoregressive Arithmetic Coding
+## About
+This is a modified version of the [torchac](https://github.com/fab-jul/torchac). NumpyAc takes numpy array as input and can decode in an autoregressive mode.
+The backend is written in C++, the API is for PyTorch tensors. It will compile in the first run with ninja.
+The implementation is based on [this blog post](https://marknelson.us/posts/2014/10/19/data-compression-with-arithmetic-coding.html),
+meaning that we implement _arithmetic coding_.
+While it could be further optimized, it is already much faster than doing the equivalent thing in pure-Python (because of all the
+ bit-shifts etc.).
+### Set up conda environment
+This library has been tested with
+- PyTorch 1.5, 1.6, 1.7
+- Python 3.8
+And that's all you need. Other versions of Python may also work,
+but on-the-fly ninja compilation only works for PyTorch 1.5+.
+### Example
+```python
+import numpyAc
+import numpy as np
+# Generate random symbols and pdf.
+dim = 128
+symsNum = 2000
+pdf = np.random.rand(symsNum,dim)
+pdf = pdf / (np.sum(pdf,1,keepdims=True))
+sym = np.random.randint(0,dim,symsNum,dtype=np.int16)
+output_pdf = pdf
+# Encode to bytestream.
+codec = numpyAc.arithmeticCoding()
+byte_stream,real_bits = codec.encode(pdf, sym,'out.b')
+# Number of bits taken by the stream.
+print('real_bits',real_bits)
+# Theoretical bits number
+print('shannon entropy',-int(np.log2(pdf[range(0,symsNum),sym]).sum()))
+# Decode from bytestream.
+decodec = numpyAc.arithmeticDeCoding(None,symsNum,dim,'out.b')
+# Autoregressive decoding and output will be equal to the input.
+for i,s in enumerate(sym):
+    assert decodec.decode(output_pdf[i:i+1,:]) == s
+```
+## Important Implementation Details
+### How we represent probability distributions
+The probabilities are specified as [PDFs](https://en.wikipedia.org/wiki/Probability_density_function).
+For each possible symbol, we need one PDF. This means that if there are `symsNum` possible symbols, and the values of them are distributed in `{0, ..., dim-1}`. The PDF ( shape (`symsNum,dim`) ) must specified the value for `symsNum` symbols.
+**Example**:
+```
+For a symsNum = 1 particular symbol, let's say we have dim = 3 possible values.
+We can draw 4 CDF from 3 PDF to specify the symbols distribution:
+symbol:        0     1     2
+pdf:          P(0)  P(1)  P(2)
+cdf:       C_0   C_1   C_2   C_3
+This corresponds to the 3 probabilities
+P(0) = C_1 - C_0
+P(1) = C_2 - C_1
+P(2) = C_3 - C_2
+where PDF =[[ P(0), P(1) ,P(2) ]]
+NOTE: The arithmetic coder assumes that P(0) + P(1) + P(2) = 1, C_0 = 0, C_3 = 1
+```
+The theoretical bits number can estimated by Shannon’s source coding theorem:
+$\sum_{s}-log_2P(s)$
+## Citation
+Reference from [torchac](https://github.com/fab-jul/torchac), thanks!

numpyAc/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from numpyAc.numpyAc import arithmeticCoding,arithmeticDeCoding

numpyAc/backend/numpyAc_backend.cpp ADDED Viewed

	@@ -0,0 +1,347 @@

+/**
+ * COPYRIGHT 2020 ETH Zurich
+ * BASED on
+ *
+ * https://marknelson.us/posts/2014/10/19/data-compression-with-arithmetic-coding.html
+ */
+#include <torch/extension.h>
+#include <iostream>
+#include <vector>
+#include <tuple>
+#include <fstream>
+#include <algorithm>
+#include <string>
+#include <chrono>
+#include <numeric>
+#include <iterator>
+#include <bitset>
+using cdf_t = uint16_t;
+/** Encapsulates a pointer to a CDF tensor */
+struct cdf_ptr {
+    cdf_t* data;  // expected to be a N_sym x Lp matrix, stored in row major.
+    const int N_sym;  // Number of symbols stored by `data`.
+    const int Lp;  // == L+1, where L is the number of possible values a symbol can take.
+    cdf_ptr(cdf_t* data,
+            const int N_sym,
+            const int Lp) : data(data), N_sym(N_sym), Lp(Lp) {};
+};
+/** Class to save output bit by bit to a byte string */
+class OutCacheString {
+private:
+public:
+    std::string out="";
+    uint8_t cache=0;
+    uint8_t count=0;
+    void append(const int bit) {
+        cache <<= 1;
+        cache |= bit;
+        count += 1;
+        if (count == 8) {
+            out.append(reinterpret_cast<const char *>(&cache), 1);
+            count = 0;
+        }
+    }
+    void flush() {
+        if (count > 0) {
+            for (int i = count; i < 8; ++i) {
+                append(0);
+            }
+            assert(count==0);
+        }
+    }
+    void append_bit_and_pending(const int bit, uint64_t &pending_bits) {
+        append(bit);
+        while (pending_bits > 0) {
+            append(!bit);
+            pending_bits -= 1;
+        }
+    }
+};
+/** Class to read byte string bit by bit */
+class InCacheString {
+private:
+    const std::string in_;
+public:
+    explicit InCacheString(const std::string& in) : in_(in) {};
+    uint8_t cache=0;
+    uint8_t cached_bits=0;
+    size_t in_ptr=0;
+    void get(uint32_t& value) {
+        if (cached_bits == 0) {
+            if (in_ptr == in_.size()){
+                value <<= 1;
+                return;
+            }
+            /// Read 1 byte
+            cache = (uint8_t) in_[in_ptr];
+            in_ptr++;
+            cached_bits = 8;
+        }
+        value <<= 1;
+        value |= (cache >> (cached_bits - 1)) & 1;
+        cached_bits--;
+    }
+    void initialize(uint32_t& value) {
+        for (int i = 0; i < 32; ++i) {
+            get(value);
+        }
+    }
+};
+//------------------------------------------------------------------------------
+cdf_t binsearch(py::list &cdf, cdf_t target, cdf_t max_sym,
+                const int offset)  /* i * Lp */
+{
+    cdf_t left = 0;
+    cdf_t right = max_sym + 1;  // len(cdf) == max_sym + 2
+    while (left + 1 < right) {  // ?
+        // Left and right will be < 0x10000 in practice,
+        // so left+right fits in uint16_t.
+        const auto m = static_cast<const cdf_t>((left + right) / 2);
+        const auto v = cdf[offset + m].cast<cdf_t>();
+        if (v < target) {
+            left = m;
+        } else if (v > target) {
+            right = m;
+        } else {
+            return m;
+        }
+    }
+    return left;
+}
+class decode
+{
+private:
+public:
+    int dataID=0;
+    const int Lp;// To calculate offset
+    const int N_sym;// To know the # of syms to decode. Is encoded in the stream!
+    const int max_symbol;
+    uint32_t low = 0;
+    uint32_t high = 0xFFFFFFFFU;
+    const uint32_t c_count = 0x10000U;
+    const int precision = 16;
+    cdf_t sym_i = 0;
+    uint32_t value = 0;
+    InCacheString in_cache;
+    decode(const std::string &in, const int&sysNum_,const int&sysNumDim_):in_cache(in),N_sym(sysNum_),Lp(sysNumDim_),max_symbol(sysNumDim_-2){
+        in_cache.initialize(value);
+    };
+    int16_t decodeAsym(py::list cdf) {
+        for (; dataID < N_sym; ++dataID) {
+            const uint64_t span = static_cast<uint64_t>(high) - static_cast<uint64_t>(low) + 1;
+            // always < 0x10000 ???
+            const uint16_t count = ((static_cast<uint64_t>(value) - static_cast<uint64_t>(low) + 1) * c_count - 1) / span;
+            int offset = 0;
+            sym_i = binsearch(cdf, count, (cdf_t)max_symbol, offset);
+            if (dataID == N_sym-1) {
+                break;
+            }
+            const uint32_t c_low = cdf[offset + sym_i].cast<cdf_t>();
+            const uint32_t c_high = sym_i == max_symbol ? 0x10000U : cdf[offset + sym_i + 1].cast<cdf_t>();
+            high = (low - 1) + ((span * static_cast<uint64_t>(c_high)) >> precision);
+            low =  (low)     + ((span * static_cast<uint64_t>(c_low))  >> precision);
+            while (true) {
+                if (low >= 0x80000000U || high < 0x80000000U) {
+                    low <<= 1;
+                    high <<= 1;
+                    high |= 1;
+                    in_cache.get(value);
+                } else if (low >= 0x40000000U && high < 0xC0000000U) {
+                    /**
+                     * 0100 0000 ... <= value <  1100 0000 ...
+                     * <=>
+                     * 0100 0000 ... <= value <= 1011 1111 ...
+                     * <=>
+                     * value starts with 01 or 10.
+                     * 01 - 01 == 00  |  10 - 01 == 01
+                     * i.e., with shifts
+                     * 01A -> 0A  or  10A -> 1A, i.e., discard 2SB as it's all the same while we are in
+                     *    near convergence
+                     */
+                    low <<= 1;
+                    low &= 0x7FFFFFFFU;  // make MSB 0
+                    high <<= 1;
+                    high |= 0x80000001U;  // add 1 at the end, retain MSB = 1
+                    value -= 0x40000000U;
+                    in_cache.get(value);
+                } else {
+                    break;
+                }
+            }
+            return (int16_t)sym_i;
+        }
+    }
+};
+const void check_sym(const torch::Tensor& sym) {
+    TORCH_CHECK(sym.sizes().size() == 1,
+                "Invalid size for sym. Expected just 1 dim.")
+}
+/** Get an instance of the `cdf_ptr` struct. */
+const struct cdf_ptr get_cdf_ptr(const torch::Tensor& cdf)
+{
+    TORCH_CHECK(!cdf.is_cuda(), "cdf must be on CPU!")
+    const auto s = cdf.sizes();
+    TORCH_CHECK(s.size() == 2, "Invalid size for cdf! Expected (N, Lp)")
+    const int N_sym = s[0];
+    const int Lp = s[1];
+    const auto cdf_acc = cdf.accessor<int16_t, 2>();
+    cdf_t* cdf_ptr = (uint16_t*)cdf_acc.data();
+    const struct cdf_ptr res(cdf_ptr, N_sym, Lp);
+    return res;
+}
+// -----------------------------------------------------------------------------
+/** Encode symbols `sym` with CDF represented by `cdf_ptr`. NOTE: this is not exposted to python. */
+py::bytes encode(
+        const cdf_ptr& cdf_ptr,
+        const torch::Tensor& sym){
+    OutCacheString out_cache;
+    uint32_t low = 0;
+    uint32_t high = 0xFFFFFFFFU;
+    uint64_t pending_bits = 0;
+    const int precision = 16;
+    const cdf_t* cdf = cdf_ptr.data;
+    const int N_sym = cdf_ptr.N_sym;
+    const int Lp = cdf_ptr.Lp;
+    const int max_symbol = Lp - 2;
+    auto sym_ = sym.accessor<int16_t, 1>();
+    for (int i = 0; i < N_sym; ++i) {
+        const int16_t sym_i = sym_[i];
+        const uint64_t span = static_cast<uint64_t>(high) - static_cast<uint64_t>(low) + 1;
+        const int offset = i * Lp;
+        // Left boundary is at offset + sym_i
+        const uint32_t c_low = cdf[offset + sym_i];
+        // Right boundary is at offset + sym_i + 1, except for the `max_symbol`
+        // For which we hardcode the maxvalue. So if e.g.
+        // L == 4, it means that Lp == 5, and the allowed symbols are
+        // {0, 1, 2, 3}. The max symbol is thus Lp - 2 == 3. It's probability
+        // is then given by c_max - cdf[-2].
+        const uint32_t c_high = sym_i == max_symbol ? 0x10000U : cdf[offset + sym_i + 1];
+        high = (low - 1) + ((span * static_cast<uint64_t>(c_high)) >> precision);
+        low =  (low)     + ((span * static_cast<uint64_t>(c_low))  >> precision);
+        while (true) {
+            if (high < 0x80000000U) {
+                out_cache.append_bit_and_pending(0, pending_bits);
+                low <<= 1;
+                high <<= 1;
+                high |= 1;
+            } else if (low >= 0x80000000U) {
+                out_cache.append_bit_and_pending(1, pending_bits);
+                low <<= 1;
+                high <<= 1;
+                high |= 1;
+            } else if (low >= 0x40000000U && high < 0xC0000000U) {
+                pending_bits++;
+                low <<= 1;
+                low &= 0x7FFFFFFF;
+                high <<= 1;
+                high |= 0x80000001;
+            } else {
+                break;
+            }
+        }
+    }
+    pending_bits += 1;
+    if (pending_bits) {
+        if (low < 0x40000000U) {
+            out_cache.append_bit_and_pending(0, pending_bits);
+        } else {
+            out_cache.append_bit_and_pending(1, pending_bits);
+        }
+    }
+    out_cache.flush();
+#ifdef VERBOSE
+    std::chrono::steady_clock::time_point end= std::chrono::steady_clock::now();
+    std::cout << "Time difference (sec) = " << (std::chrono::duration_cast<std::chrono::microseconds>(end - begin).count()) /1000000.0 <<std::endl;
+#endif
+    return py::bytes(out_cache.out);
+}
+/** See torchac.py */
+py::bytes encode_cdf(
+        const torch::Tensor& cdf, /* NHWLp, must be on CPU! */
+        const torch::Tensor& sym)
+{
+    check_sym(sym);
+    const auto cdf_ptr = get_cdf_ptr(cdf);
+    return encode(cdf_ptr, sym);
+}
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+    m.def("encode_cdf", &encode_cdf, "Encode from CDF");
+    py::class_<decode>(m, "decode")
+        .def(py::init([] (const std::string in, const int&sysNum_,const int&sysNumDim_) {
+            return new decode(in,sysNum_,sysNumDim_);
+        }))
+        .def("decodeAsym", &decode::decodeAsym);
+}

numpyAc/numpyAc.py ADDED Viewed

	@@ -0,0 +1,172 @@

+import os
+import torch
+import numpy as np
+from torch.autograd.grad_mode import F
+from torch.utils.cpp_extension import load
+PRECISION = 16 # DO NOT EDIT!
+# Load on-the-fly with ninja.
+torchac_dir = os.path.dirname(os.path.realpath(__file__))
+backend_dir = os.path.join(torchac_dir, 'backend')
+numpyAc_backend = load(
+  name="numpyAc_backend",
+  sources=[os.path.join(backend_dir, "numpyAc_backend.cpp")],
+  verbose=False)
+def _encode_float_cdf(cdf_float,
+                     sym,
+                     needs_normalization=True,
+                     check_input_bounds=False):
+  """Encode symbols `sym` with potentially unnormalized floating point CDF.
+  Check the README for more details.
+  :param cdf_float: CDF tensor, float32, on CPU. Shape (N1, ..., Nm, Lp).
+  :param sym: The symbols to encode, int16, on CPU. Shape (N1, ..., Nm).
+  :param needs_normalization: if True, assume `cdf_float` is un-normalized and
+    needs normalization. Otherwise only convert it, without normalizing.
+  :param check_input_bounds: if True, ensure inputs have valid values.
+    Important: may take significant time. Only enable to check.
+  :return: byte-string, encoding `sym`.
+  """
+  if check_input_bounds:
+    if cdf_float.min() < 0:
+      raise ValueError(f'cdf_float.min() == {cdf_float.min()}, should be >=0.!')
+    if cdf_float.max() > 1:
+      raise ValueError(f'cdf_float.max() == {cdf_float.max()}, should be <=1.!')
+    Lp = cdf_float.shape[-1]
+    if sym.max() >= Lp - 1:
+      raise ValueError(f'sym.max() == {sym.max()}, should be <=Lp - 1.!')
+  cdf_int = _convert_to_int_and_normalize(cdf_float, needs_normalization)
+  return _encode_int16_normalized_cdf(cdf_int, sym)
+def _encode_int16_normalized_cdf(cdf_int, sym):
+  """Encode symbols `sym` with a normalized integer cdf `cdf_int`.
+  Check the README for more details.
+  :param cdf_int: CDF tensor, int16, on CPU. Shape (N1, ..., Nm, Lp).
+  :param sym: The symbols to encode, int16, on CPU. Shape (N1, ..., Nm).
+  :return: byte-string, encoding `sym`
+  """
+  cdf_int, sym = _check_and_reshape_inputs(cdf_int, sym)
+  return numpyAc_backend.encode_cdf( torch.ShortTensor(cdf_int), torch.ShortTensor(sym))
+def _check_and_reshape_inputs(cdf, sym=None):
+  """Check device, dtype, and shapes."""
+  if sym is not None and sym.dtype != np.int16:
+    raise ValueError('Symbols must be int16!')
+  if sym is not None:
+    if len(cdf.shape) != len(sym.shape) + 1 or cdf.shape[:-1] != sym.shape:
+      raise ValueError(f'Invalid shapes of cdf={cdf.shape}, sym={sym.shape}! '
+                       'The first m elements of cdf.shape must be equal to '
+                       'sym.shape, and cdf should only have one more dimension.')
+  Lp = cdf.shape[-1]
+  cdf = cdf.reshape(-1, Lp)
+  if sym is None:
+    return cdf
+  sym = sym.reshape(-1)
+  return cdf, sym
+# def _reshape_output(cdf_shape, sym):
+#   """Reshape single dimension `sym` back to the correct spatial dimensions."""
+#   spatial_dimensions = cdf_shape[:-1]
+#   if len(sym) != np.prod(spatial_dimensions):
+#     raise ValueError()
+#   return sym.reshape(*spatial_dimensions)
+def _convert_to_int_and_normalize(cdf_float, needs_normalization):
+  """Convert floatingpoint CDF to integers. See README for more info.
+  The idea is the following:
+  When we get the cdf here, it is (assumed to be) between 0 and 1, i.e,
+    cdf \in [0, 1)
+  (note that 1 should not be included.)
+  We now want to convert this to int16 but make sure we do not get
+  the same value twice, as this would break the arithmetic coder
+  (you need a strictly monotonically increasing function).
+  So, if needs_normalization==True, we multiply the input CDF
+  with 2**16 - (Lp - 1). This means that now,
+    cdf \in [0, 2**16 - (Lp - 1)].
+  Then, in a final step, we add an arange(Lp), which is just a line with
+  slope one. This ensure that for sure, we will get unique, strictly
+  monotonically increasing CDFs, which are \in [0, 2**16)
+  """
+  Lp = cdf_float.shape[-1]
+  factor = 2**PRECISION
+  new_max_value = factor
+  if needs_normalization:
+    new_max_value = new_max_value - (Lp - 1)
+  cdf_float = cdf_float*(new_max_value)
+  cdf_float = np.round(cdf_float)
+  cdf = cdf_float.astype(np.int16)
+  if needs_normalization:
+    r = np.arange(Lp)
+    cdf+=r
+  return cdf
+def pdf_convert_to_cdf_and_normalize(pdf):
+    assert pdf.ndim==2
+    pdf = pdf / (np.sum(pdf,1,keepdims=True))/(1+10**(-10))
+    cdfF = np.cumsum( pdf, axis=1)
+    cdfF = np.hstack((np.zeros((pdf.shape[0],1)),cdfF))
+    return cdfF
+class arithmeticCoding():
+  def __init__(self) -> None:
+    self.binfile = None
+    self.sysNum = None
+    self.byte_stream = None
+  def encode(self,pdf,sym,binfile=None):
+    assert pdf.shape[0]==sym.shape[0]
+    assert pdf.ndim==2 and sym.ndim==1
+    self.sysNum = sym.shape[0]
+    cdfF = pdf_convert_to_cdf_and_normalize(pdf)
+    # pdf = np.diff(cdfF)
+    # print( -np.log2(pdf[range(0,self.sysNum),sym]).sum())
+    self.byte_stream = _encode_float_cdf(cdfF, sym, check_input_bounds=True)
+    real_bits = len(self.byte_stream) * 8
+    # # Write to a file.
+    if binfile is not None:
+      with open(binfile, 'wb') as fout:
+          fout.write(self.byte_stream)
+    return self.byte_stream,real_bits
+class arithmeticDeCoding():
+  """
+    Decoding class
+    byte_stream: the bin file stream.
+    sysNum: the Number of symbols that you are going to decode. This value should be
+            saved in other ways.
+    sysDim: the Number of the possible symbols.
+    binfile: bin file path, if it is Not None, 'byte_stream' will read from this file
+            and copy to Cpp backend Class 'InCacheString'
+  """
+  def __init__(self,byte_stream,sysNum,symDim,binfile=None) -> None:
+      if binfile is not None:
+        with open(binfile, 'rb') as fin:
+          byte_stream = fin.read()
+      self.byte_stream = byte_stream
+      self.decoder = numpyAc_backend.decode(self.byte_stream,sysNum,symDim+1)
+  def decode(self,pdf):
+    cdfF = pdf_convert_to_cdf_and_normalize(pdf)
+    pro = _convert_to_int_and_normalize(cdfF,needs_normalization=True)
+    pro = pro.squeeze(0).astype(np.uint16).tolist()
+    sym_out = self.decoder.decodeAsym(pro)
+    return sym_out

test.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import numpyAc
+import numpy as np
+# Generate random symbols and pdf.
+dim = 128
+symsNum = 2000
+pdf = np.random.rand(symsNum,dim)
+pdf = pdf / (np.sum(pdf,1,keepdims=True))
+sym = np.random.randint(0,dim,symsNum,dtype=np.int16)
+output_pdf = pdf
+# Encode to bytestream.
+codec = numpyAc.arithmeticCoding()
+byte_stream,real_bits = codec.encode(pdf, sym,'out.b')
+# Number of bits taken by the stream.
+print('real_bits',real_bits)
+# Theoretical bits number
+print('shannon entropy',-int(np.log2(pdf[range(0,symsNum),sym]).sum()))
+# Decode from bytestream.
+decodec = numpyAc.arithmeticDeCoding(None,symsNum,dim,'out.b')
+# Autoregressive decoding and output will be equal to the input.
+for i,s in enumerate(sym):
+    assert decodec.decode(output_pdf[i:i+1,:]) == s

testTorchac.py ADDED Viewed

	@@ -0,0 +1,50 @@

+'''
+LastEditors: fcy
+'''
+import torchac
+import torch
+import numpy as np
+# Encode to bytestream.
+seed=6
+torch.manual_seed(seed)
+np.random.seed(seed)
+dim = 500
+symsNum = 40000
+pdf = np.random.rand(symsNum,dim)
+pdf = pdf / (np.sum(pdf,1,keepdims=True))
+sym = torch.ShortTensor(np.random.randint(0,dim,symsNum,dtype=np.int16))
+def pdf_convert_to_cdf_and_normalize(pdf):
+    assert pdf.ndim==2
+    pdf = pdf / (np.sum(pdf,1,keepdims=True))/(1+10**(-10))
+    cdfF = np.cumsum( pdf, axis=1)
+    cdfF = np.hstack((np.zeros((pdf.shape[0],1)),cdfF))
+    return cdfF
+output_cdf = torch.Tensor(pdf_convert_to_cdf_and_normalize(pdf)) # Get CDF from your model, shape B, C, H, W, Lp
+byte_stream = torchac.encode_float_cdf(output_cdf, sym, check_input_bounds=True)
+# pdf = np.diff(cdfF)
+# print( -np.log2(pdf[range(0,oct_len),sym]).sum())
+# Number of bits taken by the stream
+real_bits = len(byte_stream) * 8
+print(real_bits)
+# Write to a file.
+with open('outfile.b', 'wb') as fout:
+    fout.write(byte_stream)
+# Read from a file.
+with open('outfile.b', 'rb') as fin:
+    byte_stream = fin.read()
+# Decode from bytestream.
+sym_out = torchac.decode_float_cdf(output_cdf, byte_stream)
+# Output will be equal to the input.
+assert sym_out.equal(sym)