Faster reading bits?

In my application, 20% of the processor’s time is spent reading bits ( skip) through my bit reader. Does anyone have an idea on how to make the following code faster? At any given time, I don't need more than 20 valid bits (which is why I can use it in some situations fast_skip).

Bits are read in big-endian order, so a composite byte is needed.

class bit_reader
{   
    std::uint32_t* m_data;
    std::size_t    m_pos;
    std::uint64_t  m_block;

public:
    bit_reader(void* data)
        : m_data(reinterpret_cast<std::uint32_t*>(data))
        , m_pos(0)
        , m_block(_byteswap_uint64(*reinterpret_cast<std::uint64_t*>(data)))
    {
    }

    std::uint64_t value(std::size_t n_bits = 64)
    {               
        assert(m_pos + n_bits < 64);

        return (m_block << m_pos) >> (64 - n_bits);
    }

    void skip(std::size_t n_bits) // 20% cpu time
    {
        assert(m_pos + n_bits < 42);

        m_pos  += n_bits;

        if(m_pos > 31)
        {
            m_block = _byteswap_uint64(reinterpret_cast<std::uint64_t*>(++m_data)[0]);
            m_pos  -= 32;
        }
    }   

    void fast_skip(std::size_t n_bits)
    {
        assert(m_pos + n_bits < 42);
        m_pos  += n_bits;
    }   
};

Target hardware is x64.

+5
source share
7 answers

As you can see from the previous comment, you decompress the Huffman / arithmetic coding streams in JPEG format.

  • skip() value() , . , . restrict , , -, .
  • Huffman/artimetic - ~ 7 8, 64- . .
  • JPEG 32 . ?
  • , skip() , , . , , ? , , 0 1 .
  • , LSB. value()
+2

64 . - . . 8 .

, , . :

    if (data[bit_inx/64] & mask[bit_inx % 64])
    {
        ....
    }
+1

skip:

m_block  = (m_block << 32) | _byteswap_uint32(*++m_data);
+1

, _byteswap_uint64, , , .

. - , . . .

[EDIT] . : , . 64- 64 , , . , (++m_data)[0], x64 64- , reinterpret_cast a uint32_t* uint64_t*, .

0

, , , , bit_reader!

bit_reader , :

  • : , , , - , , 64- , ,

, , . , , , .

: , assert() .

2

()

3

, :

uint32_t source[] = { 0x00112233, 0x44556677, 0x8899AABB, 0xCCDDEEFF };
bit_reader br(source); // -> m_block = 0x7766554433221100
// reading...
br.value(16); // -> 0x77665544
br.skip(16);
br.value(16); // -> 0x33221100
br.skip(16);  // -> triggers reading more bits
              // -> m_block = 0xBBAA998877665544, m_pos = 0
br.value(16); // -> 0xBBAA9988
br.skip(16);
br.value(16); // -> 0x77665544
// that not what you expect, right ???

EDIT 4

, , EDIT 3 , , . ?

uint32_t source[] = { 0x00112233, 0x44556677, 0x8899AABB, 0xCCDDEEFF };
bit_reader br(source); // -> m_block = 0x7766554433221100
// reading...
br.value(16); // -> 0x7766
br.skip(16);
br.value(16); // -> 0x5544
br.skip(16);  // -> triggers reading more bits (because m_pos=32, which is: m_pos>31)
              // -> m_block = 0xBBAA998877665544, m_pos = 0
br.value(16); // -> 0xBBAA --> not what you expect, right?
0

, , .

class bit_reader
{   
public:     
    const std::uint64_t* m_data64;
    std::size_t          m_pos64;
    std::uint64_t        m_block0;
    std::uint64_t        m_block1;


    bit_reader(const void* data)
        : m_pos64(0)
        , m_data64(reinterpret_cast<const std::uint64_t*>(data))
        , m_block0(byte_swap(*m_data64++))
        , m_block1(byte_swap(*m_data64++))
    {
    }

    std::uint64_t value(std::size_t n_bits = 64)
    {               
        return __shiftleft128(m_block1, m_block0, m_pos64)  >> (64 - n_bits);
    }

    void skip(std::size_t n_bits)
    {
        m_pos64 += n_bits;

        if(m_pos64 > 63)
        {
            m_block0 = m_block1;
            m_block1 = byte_swap(*m_data64++);
            m_pos64  -= 64;
        }
    }   

    void fast_skip(std::size_t n_bits)
    {
        skip(n_bits);
    }   
};
0

, . .

,

const uint64_t * arr = data;

for(uint64_t * i = arr; i != &arr[len/sizeof(uint64_t)] ;i++)
{
     *i = _byteswap_uint64(*i); 
     //no more operations here
}
// another similar for loop

, .

, , .

-2

All Articles