56 typedef uint64_t word_t;
62 active(bv.active), m_vec(bv.m_vec) {};
73 void setBit(
const word_t i,
int val);
74 int getBit(
const word_t i)
const;
76 void erase(word_t i, word_t j);
80 void set(
int val, word_t n);
82 void clear() {nbits = 0; nset = 0; active.reset(); m_vec.
clear();}
117 void read(
const char *fn);
119 void write(
const char *fn)
const;
120 void write(FILE* fptr)
const;
134 if (nset==0) do_cnt();
return (nset+cnt_ones(active.val));
138 word_t
size()
const throw() {
return (nbits+active.nbits);};
148 return (m_vec.
size() + 1 + (active.nbits>0));
155 inline static double randomSize(word_t nb, word_t nc);
160 inline static double markovSize(word_t nb, word_t nc,
double f);
171 std::ostream& print(std::ostream &)
const;
175 inline iterator end();
176 inline iterator begin();
179 class const_iterator;
180 inline const_iterator end()
const;
181 inline const_iterator begin()
const;
185 inline indexSet firstIndexSet()
const;
188 friend class indexSet;
189 friend class iterator;
190 friend class const_iterator;
193 inline bool all0s()
const;
194 inline bool all1s()
const;
198 static const unsigned MAXBITS;
199 static const unsigned SECONDBIT;
200 static const word_t FILLBIT;
201 static const word_t HEADER0;
202 static const word_t HEADER1;
203 static const word_t ALLONES;
204 static const word_t MAXCNT;
216 active_word() : val(0), nbits(0) {};
217 void reset() {val = 0; nbits = 0;};
218 int is_full()
const {
return (nbits >= MAXBITS);};
220 val <<= 1; nbits ++; val += b;
231 run() : isFill(0), fillBit(0), nWords(0), it(0) {};
233 fillBit = (*it > HEADER1);
235 nWords = (*it & MAXCNT);
246 if (nWords > 1) {--nWords;}
247 else {++ it; nWords = 0;}
253 if (nWords == 0) decode();
255 if (nWords > len) {nWords -= len; len = 0;}
256 else if (nWords == len) {nWords = 0; len = 0; ++ it;}
257 else {len -= nWords; ++ it; nWords = 0;}
259 else {-- len; ++ it; nWords = 0;}
264 friend struct active_word;
270 array_t<word_t> m_vec;
303 inline void copy_runs(run& it, word_t& nw);
304 inline void copy_runsn(run& it, word_t& nw);
305 inline void copy_fill(array_t<word_t>::iterator& jt, run& it);
306 inline void copy_runs(array_t<word_t>::iterator& jt, run& it,
308 inline void copy_runsn(array_t<word_t>::iterator& jt, run& it,
311 void copy_comp(array_t<word_t>& tmp)
const;
312 inline void append_active();
313 inline void append_counter(
int val, word_t
cnt);
314 inline unsigned cnt_ones(word_t)
const;
315 inline word_t cnt_bits(word_t)
const;
316 word_t do_cnt()
const;
334 friend indexSet ibis::bitvector64::firstIndexSet()
const;
337 bool isRange()
const {
return (nind>=ibis::bitvector64::MAXBITS);}
338 const word_t* indices()
const {
return ind;};
339 word_t nIndices()
const {
return nind;}
345 const active_word* active;
353 inline bool operator*()
const;
362 ibis::bitvector64::word_t compressed;
363 ibis::bitvector64::word_t ind;
364 ibis::bitvector64::word_t nbits;
365 ibis::bitvector64::word_t literalvalue;
367 const active_word* active;
390 inline bool operator*()
const;
393 inline int operator!=(
const iterator& rhs)
const throw ();
394 inline int operator==(
const iterator& rhs)
const throw ();
402 ibis::bitvector64::word_t compressed;
403 ibis::bitvector64::word_t ind;
404 ibis::bitvector64::word_t nbits;
405 ibis::bitvector64::word_t literalvalue;
414 friend iterator ibis::bitvector64::begin();
415 friend iterator ibis::bitvector64::end();
423 else if (m_vec.
size() == 1) {
424 return (m_vec[0] == 0 || (m_vec[0] >= HEADER0 && m_vec[0] < HEADER1));
433 if (m_vec.size() == 1) {
434 return (m_vec[0] == ALLONES || (m_vec[0] > HEADER1));
442 inline ibis::bitvector64::word_t
443 ibis::bitvector64::cnt_bits(ibis::bitvector64::word_t val)
const {
444 return ((val>ALLONES) ? ((val&MAXCNT)*MAXBITS) : MAXBITS);
449 ibis::bitvector64::cnt_ones(ibis::bitvector64::word_t val)
const {
451 static const unsigned table[256] = {
452 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
453 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
454 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
455 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
456 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
457 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
458 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
459 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
460 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
461 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
462 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
463 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
464 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
465 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
466 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
467 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
469 return table[val&0xFF] + table[(val>>8)&0xFF] +
470 table[(val>>16)&0xFF] + table[(val>>24)&0xFF] +
471 table[(val>>32)&0xFF] + table[(val>>40)&0xFF] +
472 table[(val>>48)&0xFF] + table[val>>56];
475 inline ibis::bitvector64::word_t
479 while (it != m_vec.end()) {
480 cnt += (*it >> ibis::bitvector64::MAXBITS);
491 nbits = bv.nbits; nset = bv.nset; active = bv.active;
492 m_vec.deepCopy(bv.m_vec);
499 nbits = bv.nbits; nset = bv.nset; active = bv.active;
500 m_vec.deepCopy(bv.m_vec);
507 tmp = bv.nbits; bv.nbits = nbits; nbits = tmp;
508 tmp = bv.nset; bv.nset = nset; nset = tmp;
509 active_word atmp = bv.active;
510 bv.active = active; active = atmp;
511 m_vec.swap(bv.m_vec);
517 inline void ibis::bitvector64::append_active() {
519 m_vec.push_back(active.val);
521 else if (active.val == 0) {
522 if (m_vec.back() == 0) {
523 m_vec.back() = (HEADER0 + 2);
525 else if (m_vec.back() >= HEADER0 && m_vec.back() < HEADER1) {
529 m_vec.push_back(active.val);
532 else if (active.val == ALLONES) {
533 if (m_vec.back() == ALLONES) {
534 m_vec.back() = (HEADER1 | 2);
536 else if (m_vec.back() >= HEADER1) {
540 m_vec.push_back(active.val);
544 m_vec.push_back(active.val);
553 inline void ibis::bitvector64::append_counter(
int val, word_t cnt) {
554 word_t head = 2 + val;
555 word_t w = (head << SECONDBIT) + cnt;
556 nbits += cnt*MAXBITS;
560 else if ((m_vec.back()>>SECONDBIT) == head) {
563 else if ((m_vec.back()==ALLONES) && head==3) {
564 m_vec.back() = w + 1;
566 else if ((m_vec.back() == 0) && head==2) {
567 m_vec.back() = w + 1;
577 if (active.is_full()) append_active();
583 if (active.nbits >= MAXBITS)
586 if (active.nbits+8 < MAXBITS) {
591 else if (active.nbits+8 > MAXBITS) {
592 unsigned na = MAXBITS - nbits;
593 unsigned hi = (c >> (8 - na));
597 active.nbits = 8 - na;
598 active.val = ((hi << active.nbits) ^ c);
610 (
int val, ibis::bitvector64::word_t n) {
611 if (active.nbits > 0) {
612 word_t tmp = (MAXBITS - active.nbits);
613 if (tmp > n) tmp = n;
618 active.val |= (
static_cast<word_t
>(1)<<tmp) - 1;
619 if (active.nbits >= MAXBITS) append_active();
622 word_t cnt = n / MAXBITS;
624 append_counter(val, cnt);
626 active.val = ALLONES;
637 active.val = val*((
static_cast<word_t
>(1)<<n)-1);
643 inline void ibis::bitvector64::copy_runs(run& it, word_t& nw) {
647 append_counter(it.fillBit, it.nWords);
650 else if (it.nWords == 1) {
651 active.val = (it.fillBit != 0 ? ALLONES : 0);
657 active.val = *(it.it);
665 nbits += MAXBITS * nw;
666 while (nw >= it.nWords && nw > 0) {
667 m_vec.push_back(*(it.it));
672 nbits -= MAXBITS * nw;
677 inline void ibis::bitvector64::copy_runsn(run& it, word_t& nw) {
681 append_counter(!it.fillBit, it.nWords);
684 else if (it.nWords == 1) {
685 active.val = (it.fillBit != 0 ? 0 : ALLONES);
691 active.val = ALLONES ^ *(it.it);
699 nbits += MAXBITS * nw;
700 while (nw >= it.nWords && nw > 0) {
701 m_vec.push_back((it.isFill?FILLBIT:ALLONES) ^ *(it.it));
706 nbits -= MAXBITS * nw;
710 inline void ibis::bitvector64::copy_fill
711 (array_t<ibis::bitvector64::word_t>::iterator& jt, run& it) {
712 const array_t<word_t>::iterator iend = jt + it.nWords;
713 if (it.fillBit == 0) {
717 *jt = 0; ++jt;
break;
719 *jt = 0; ++jt; *jt = 0; ++jt;
break;
721 *jt = 0; ++jt; *jt = 0; ++jt; *jt = 0; ++jt;
break;
734 *jt = ALLONES; ++jt;
break;
736 *jt = ALLONES; ++jt; *jt = ALLONES; ++jt;
break;
738 *jt = ALLONES; ++jt; *jt = ALLONES; ++jt; *jt = ALLONES; ++jt;
754 inline void ibis::bitvector64::copy_runs
755 (array_t<ibis::bitvector64::word_t>::iterator& jt, run& it, word_t& nw) {
756 while (nw >= it.nWords && nw > 0) {
758 const array_t<word_t>::iterator iend = jt + it.nWords;
759 if (it.fillBit == 0) {
785 inline void ibis::bitvector64::copy_runsn
786 (array_t<ibis::bitvector64::word_t>::iterator& jt, run& it, word_t& nw) {
787 while (nw >= it.nWords && nw > 0) {
789 const array_t<word_t>::iterator iend = jt + it.nWords;
790 if (it.fillBit == 0) {
805 *jt = *(it.it) ^ ALLONES;
816 it.it = m_vec.begin();
831 it.it = m_vec.end() + 1;
840 it.it = m_vec.begin();
841 it.begin = m_vec.begin();
842 it.end = m_vec.end();
849 inline bool ibis::bitvector64::iterator::operator*()
const {
850 #if defined(DEBUG) && DEBUG + 0 > 1
851 if (vec==0 || it<vec->begin() || it>vec->end())
852 throw "bitvector64::const_iterator not initialized correctly.";
855 return (fillbit != 0);
857 return ((word_t)1 & (literalvalue >> (bitvector64::SECONDBIT - ind)));
861 inline int ibis::bitvector64::iterator::operator!=
863 return (it != rhs.it);
865 inline int ibis::bitvector64::iterator::operator==
867 return (it == rhs.it);
869 inline int ibis::bitvector64::iterator::operator!=
871 return (it != rhs.it);
873 inline int ibis::bitvector64::iterator::operator==
875 return (it == rhs.it);
880 #if defined(DEBUG) && DEBUG + 0 > 1
881 if (vec==0 || it<vec->begin() || it>vec->end())
882 throw "bitvector64::iterator not formed correctly.";
884 if (ind+1<nbits) {++ind;}
885 else {++ it; decodeWord();}
891 #if defined(DEBUG) && DEBUG + 0 > 1
892 if (vec==0 || it<vec->begin() || it>vec->end()+1)
893 throw "bitvector64::iterator not formed correctly.";
897 if (it <= vec->end()) -- it;
898 else if (active->nbits) it = vec->end();
899 else it = vec->end() - 1;
901 if (nbits) ind = nbits - 1;
913 it.it = m_vec.end() + 1;
914 it.begin = m_vec.begin();
915 it.end = m_vec.end();
921 inline bool ibis::bitvector64::const_iterator::operator*()
const {
922 #if defined(DEBUG) && DEBUG + 0 > 1
923 if (it==0 || end==0 || it>end || nbits<=ind)
924 throw "bitvector64::const_iterator not initialized correctly.";
927 return (fillbit != 0);
929 return ((word_t)1 & (literalvalue >> (bitvector64::SECONDBIT - ind)));
933 inline int ibis::bitvector64::const_iterator::operator!=
935 return (it != rhs.it);
937 inline int ibis::bitvector64::const_iterator::operator==
939 return (it == rhs.it);
944 ibis::bitvector64::const_iterator::operator++() {
945 #if defined(DEBUG) && DEBUG + 0 > 1
946 if (it==0 || end==0 || it>end)
947 throw "bitvector64::const_iterator not formed correctly.";
949 if (ind+1<nbits) {++ind;}
950 else {++ it; decodeWord();}
956 ibis::bitvector64::const_iterator::operator--() {
957 #if defined(DEBUG) && DEBUG + 0 > 1
958 if (it==0 || end==0 || it>end)
959 throw "bitvector64::const_iterator not formed correctly.";
963 if (it <= end) -- it;
964 else if (active->nbits) it = end;
967 if (nbits) ind = nbits - 1;
974 if (m_vec.end() > m_vec.begin()) {
975 is.it = m_vec.begin() - 1;
976 is.end = m_vec.end();
983 is.ind[0] =
static_cast<word_t
>(-1);
991 if (nb > 0 && nb >= nc) {
992 const double den =
static_cast<double>(nc) /
993 static_cast<double>(nb);
994 const word_t nw = (nb > SECONDBIT ? nb / SECONDBIT - 1 : 0);
996 (pow(1.0-den, static_cast<int>(2*SECONDBIT)) +
997 pow(den, static_cast<int>(2*SECONDBIT)));
999 return sz*
sizeof(word_t);
1005 if (nb > 0 && nb >= nc) {
1006 const double den =
static_cast<double>(nc) /
1007 static_cast<double>(nb);
1008 const word_t nw = (nb > SECONDBIT ? nb / SECONDBIT - 1 : 0);
1009 if ((den <= 0.5 && f > 1.0) || (den > 0.5 && (1.0-den)*f > den))
1010 sz = ((1.0-den) * pow(1.0-den/((1.0-den)*f),
1011 static_cast<int>(2*MAXBITS-3)) +
1012 den * pow(1.0-1.0/f, static_cast<int>(2*MAXBITS-3)));
1014 sz = (pow(1.0-den, static_cast<int>(2*SECONDBIT)) +
1015 pow(den, static_cast<int>(2*SECONDBIT)));
1016 sz = 3.0 + nw * (1.0 - sz);
1018 return sz*
sizeof(word_t);
1022 #endif // __BITVECTOR64_H
void adjustSize(word_t nv, word_t nt)
Adjust the size of the bit sequence.
Definition: bitvector64.cpp:3619
bitvector64 & swap(bitvector64 &bv)
!<
Definition: bitvector64.h:505
static unsigned bitsPerLiteral()
Return the number of bits in a literal word.
Definition: bitvector64.h:151
bitvector64 * operator^(const bitvector64 &) const
Perform bitwise XOR, return the pointer to the result.
Definition: bitvector64.cpp:1042
static double clusteringFactor(word_t nb, word_t nc, word_t nw)
Estimate clustering factor based on the size.
Definition: bitvector64.cpp:3638
size_t size() const
Definition: array_t.h:69
bitvector64 * operator|(const bitvector64 &) const
Perform bitwise OR, return the pointer to the result.
Definition: bitvector64.cpp:932
word_t bytes() const
Return the number of bytes used by the bitvector object in memory.
Definition: bitvector64.h:142
bitvector64()
!< The basic unit of data storage is 64-bit.
Definition: bitvector64.h:59
bool isCompressed() const
Does this bit vector use less space than the maximum? Return true if yes, otherwise false...
Definition: bitvector64.h:130
void clear()
Reset the size to zero.
Definition: array_t.h:171
void write(const char *fn) const
Write the bit vector to a file.
Definition: bitvector64.cpp:1354
void erase(word_t i, word_t j)
Remove the bits in the range of [i, j).
Definition: bitvector64.cpp:641
void setBit(const word_t i, int val)
Replace a single bit at position i.
Definition: bitvector64.cpp:383
word_t numFillWords() const
Return the number of fill words.
Definition: bitvector64.h:476
int operator==(const bitvector64 &rhs) const
Return 1 if two bit sequences have the same content, 0 otherwise.
Definition: bitvector64.cpp:743
The indexSet stores positions of bits that are one.
Definition: bitvector64.h:328
A data structure to represent a sequence of bits.
Definition: bitvector64.h:54
word_t cnt() const
Return the number of bits that are one.
Definition: bitvector64.h:133
static double markovSize(word_t nb, word_t nc, double f)
Compute the expected size (bytes) of a random sequence generated from a Markov process.
Definition: bitvector64.h:1003
void appendByte(unsigned char)
!< Append a single bit.
Definition: bitvector64.h:582
void decompress()
!< Merge fills into fill words.
Definition: bitvector64.cpp:235
word_t size() const
Return the total number of bits in the bit sequence.
Definition: bitvector64.h:138
bitvector64 & copy(const bitvector64 &bv)
!<
Definition: bitvector64.h:498
void read(const char *fn)
Read a bit vector from the file.
Definition: bitvector64.cpp:1288
bool all0s() const
Are all bits in regular words 0?
Definition: bitvector64.h:419
static double randomSize(word_t nb, word_t nc)
Compute the expected number of bytes required to store a random sequence.
Definition: bitvector64.h:989
void appendWord(word_t w)
Append a WAH compressed word.
Definition: bitvector64.cpp:93
bitvector64 * operator&(const bitvector64 &) const
Perform bitwise AND, return the pointer to the result.
Definition: bitvector64.cpp:812
void operator-=(const bitvector64 &rhs)
Perform bitwise subtraction (a & !b).
Definition: bitvector64.cpp:1096
void set(int val, word_t n)
Create a vector with n bits of value val (cf.
Definition: bitvector64.cpp:70
const word_t * const_iterator
!< Iterator type.
Definition: array_t.h:27
The iterator that allows modification of bits.
Definition: bitvector64.h:388
void operator&=(const bitvector64 &rhs)
Perform bitwise AND between this bitvector64 and rhs.
Definition: bitvector64.cpp:755
void flip()
Complement all bits of a bit sequence.
Definition: bitvector64.cpp:699
void operator^=(const bitvector64 &rhs)
Perform bitwise exclusive or (XOR).
Definition: bitvector64.cpp:994
void appendFill(int val, word_t n)
!< Append a WAH word.
Definition: bitvector64.h:610
The const_iterator class. It iterates on the individual bits.
Definition: bitvector64.h:351
void clear()
Remove the existing content of a bitvector64.
Definition: bitvector64.h:82
word_t compressible() const
!< Turn all fill words into literal words.
Definition: bitvector64.cpp:342
bool all1s() const
Are all bits in regular words 1?
Definition: bitvector64.h:432
int getBit(const word_t i) const
Return the value of a bit.
Definition: bitvector64.cpp:607
void operator|=(const bitvector64 &rhs)
Perform bitwise OR.
Definition: bitvector64.cpp:874
bitvector64 * operator-(const bitvector64 &) const
Perform bitwise subtraction and return the pointer to the result.
Definition: bitvector64.cpp:1161
bitvector64 & operator=(const bitvector64 &bv)
!< Read the content of the named file.
Definition: bitvector64.h:490
word_t getSerialSize() const
Compute the number of words in serialized version of the bitvector object.
Definition: bitvector64.h:147