00001
00002
00003
00004
00005 #ifndef BITVECTOR_H
00006 #define BITVECTOR_H
00009
00010 #include "array_t.h"
00011
00012 #include <stdio.h>
00013 #include <limits.h>
00014 #include <iomanip>
00015
00016 #if defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)) && defined(CXX_USE_DLL) && !defined(DLL_EXPORTS)
00017 #if defined(_MSC_VER)
00018
00019 #pragma warning (disable : 4231)
00020 #endif
00021 extern template class FASTBIT_CXX_DLLSPEC array_t<uint32_t>;
00022 #endif
00023
00024
00063 class FASTBIT_CXX_DLLSPEC ibis::bitvector {
00064 public:
00065 typedef uint32_t word_t;
00066
00067
00068 bitvector() : nbits(0), nset(0), active(), m_vec() {};
00069 ~bitvector() {clear();};
00071 bitvector(const bitvector& bv) : nbits(bv.nbits), nset(bv.nset),
00072 active(bv.active), m_vec(bv.m_vec) {};
00073 bitvector(const array_t<word_t>& arr);
00074 bitvector(const char* file);
00075
00076 inline const bitvector& operator=(const bitvector& bv);
00078 inline bitvector& copy(const bitvector& bv);
00079 inline bitvector& swap(bitvector& bv);
00080
00081
00082
00085 void setBit(const word_t i, int val);
00087 inline void turnOnRawBit(const word_t i);
00089 void erase(word_t i, word_t j);
00090
00093 void set(int val, word_t n);
00095 void clear() {nbits = 0; nset = 0; active.reset(); m_vec.clear();}
00096
00097 bitvector& operator+=(const bitvector& bv);
00098 inline bitvector& operator+=(int b);
00099 void appendWord(word_t w);
00100
00101 inline void appendFill(int val, word_t n);
00102
00104 int operator==(const bitvector& rhs) const;
00105
00107 void flip();
00109 void operator&=(const bitvector& rhs);
00112 bitvector* operator&(const bitvector&) const;
00114 void operator|=(const bitvector& rhs);
00116 bitvector* operator|(const bitvector&) const;
00118 void operator^=(const bitvector& rhs);
00120 bitvector* operator^(const bitvector&) const;
00122 void operator-=(const bitvector& rhs);
00125 bitvector* operator-(const bitvector&) const;
00126
00127 void subset(const bitvector& mask, bitvector& res) const;
00128 word_t count(const bitvector& mask) const;
00129
00130
00133 void read(const char *fn);
00135 void write(const char *fn) const;
00137 void write(int fdes) const;
00141 void write(array_t<word_t>& arr) const;
00142
00143 void compress();
00144 void decompress();
00145
00146 word_t compressible() const;
00147
00149 inline word_t size() const throw();
00152 inline void setSize(word_t n) const;
00154 inline word_t cnt() const;
00156 inline word_t numFillWords() const;
00158 uint32_t bytes() const throw() {
00159 return (m_vec.size()*sizeof(word_t) + sizeof(bitvector));
00160 };
00164 uint32_t getSerialSize() const throw() {
00165 return (m_vec.size() + 1 + (active.nbits>0));
00166 };
00168 static word_t bitsPerLiteral() {return MAXBITS;}
00172 inline static double randomSize(word_t nb, word_t nc);
00177 inline static double markovSize(word_t nb, word_t nc, double f);
00181 static double clusteringFactor(word_t nb, word_t nc, word_t sz);
00182
00188 void adjustSize(word_t nv, word_t nt);
00189 void reserve(unsigned nb, unsigned nc, double cf=0.0);
00190
00193 bool empty() const {return all0s() && active.val == 0;}
00194
00195 std::ostream& print(std::ostream &) const;
00196
00198 class iterator;
00199 inline iterator end();
00200 inline iterator begin();
00201
00203 class const_iterator;
00204 inline const_iterator end() const;
00205 inline const_iterator begin() const;
00206
00208 class indexSet;
00209 inline indexSet firstIndexSet() const;
00210
00211
00212 friend class indexSet;
00213 friend class iterator;
00214 friend class const_iterator;
00215
00216 protected:
00217 bool isCompressed() const {return (m_vec.size()*MAXBITS < nbits);}
00218 inline bool all0s() const;
00219 inline bool all1s() const;
00220
00221 private:
00222
00223 static const unsigned MAXBITS;
00224 static const unsigned SECONDBIT;
00225 static const word_t FILLBIT;
00226 static const word_t HEADER0;
00227 static const word_t HEADER1;
00228 static const word_t ALLONES;
00229 static const word_t MAXCNT;
00230
00237 struct active_word {
00238 word_t val;
00239 word_t nbits;
00240
00241 active_word() : val(0), nbits(0) {};
00242 void reset() {val = 0; nbits = 0;};
00243 int is_full() const {return (nbits >= MAXBITS);};
00244 void append(int b) {
00245 val <<= 1; nbits ++; val += b;
00246 };
00247 };
00248
00251 struct run {
00252 int isFill;
00253 int fillBit;
00254 word_t nWords;
00255 array_t<word_t>::const_iterator it;
00256 run() : isFill(0), fillBit(0), nWords(0), it(0) {};
00257 void decode() {
00258 fillBit = (*it > HEADER1);
00259 if (*it > ALLONES) {
00260 nWords = (*it & MAXCNT);
00261 isFill = 1;
00262 }
00263 else {
00264 nWords = 1;
00265 isFill = 0;
00266 }
00267 };
00270 void operator--() {
00271 if (nWords > 1) {--nWords;}
00272 else {++ it; nWords = 0;}
00273 };
00276 void operator-=(word_t len) {
00277 while (len > 0) {
00278 if (nWords == 0) decode();
00279 if (isFill != 0) {
00280 if (nWords > len) {nWords -= len; len = 0;}
00281 else if (nWords == len) {nWords = 0; len = 0; ++ it;}
00282 else {len -= nWords; ++ it; nWords = 0;}
00283 }
00284 else {-- len; ++ it; nWords = 0;}
00285 }
00286 };
00287 };
00288 friend struct run;
00289 friend struct active_word;
00290
00291
00292 mutable word_t nbits;
00293 mutable word_t nset;
00294 active_word active;
00295 array_t<word_t> m_vec;
00296
00297
00298 word_t count_c1(const bitvector& mask) const;
00299 word_t count_c2(const bitvector& mask) const;
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309 void or_c2(const bitvector& rhs, bitvector& res) const;
00310 void or_c1(const bitvector& rhs, bitvector& res) const;
00311 void or_c0(const bitvector& rhs);
00312 void or_d1(const bitvector& rhs);
00313 void or_d2(const bitvector& rhs, bitvector& res) const;
00314 void and_c2(const bitvector& rhs, bitvector& res) const;
00315 void and_c1(const bitvector& rhs, bitvector& res) const;
00316 void and_c0(const bitvector& rhs);
00317 void and_d1(const bitvector& rhs);
00318 void and_d2(const bitvector& rhs, bitvector& res) const;
00319 void xor_c2(const bitvector& rhs, bitvector& res) const;
00320 void xor_c1(const bitvector& rhs, bitvector& res) const;
00321 void xor_c0(const bitvector& rhs);
00322 void xor_d1(const bitvector& rhs);
00323 void xor_d2(const bitvector& rhs, bitvector& res) const;
00324 void minus_c2(const bitvector& rhs, bitvector& res) const;
00325 void minus_c1(const bitvector& rhs, bitvector& res) const;
00326 void minus_c1x(const bitvector& rhs, bitvector& res) const;
00327 void minus_c0(const bitvector& rhs);
00328 void minus_d1(const bitvector& rhs);
00329 void minus_d2(const bitvector& rhs, bitvector& res) const;
00330 inline void copy_runs(run& it, word_t& nw);
00331 inline void copy_runsn(run& it, word_t& nw);
00332 inline void copy_fill(array_t<word_t>::iterator& jt, run& it);
00333 inline void copy_runs(array_t<word_t>::iterator& jt, run& it,
00334 word_t& nw);
00335 inline void copy_runsn(array_t<word_t>::iterator& jt, run& it,
00336 word_t& nw);
00337 void decompress(array_t<word_t>& tmp) const;
00338 void copy_comp(array_t<word_t>& tmp) const;
00339 inline void append_active();
00340 inline void append_counter(int val, word_t cnt);
00341 inline word_t cnt_ones(word_t) const;
00342 inline word_t cnt_bits(word_t) const;
00345 word_t do_cnt() const throw ();
00346 };
00347
00357 class ibis::bitvector::indexSet {
00358 public:
00359
00360
00361
00362 friend indexSet ibis::bitvector::firstIndexSet() const;
00363
00364
00365 bool isRange() const {return (nind>=ibis::bitvector::MAXBITS);}
00366 const word_t* indices() const {return ind;};
00367 word_t nIndices() const {return nind;}
00368 const word_t& currentWord() const {return *it;}
00369 indexSet& operator++();
00370
00371 private:
00372 array_t<word_t>::const_iterator it;
00373 array_t<word_t>::const_iterator end;
00374 const active_word* active;
00375 word_t nind;
00376 word_t ind[32];
00377 };
00378
00380 class ibis::bitvector::const_iterator {
00381 public:
00382 const_iterator() : compressed(0), ind(0), nbits(0), literalvalue(0),
00383 fillbit(0), active(0) {}
00384
00385 const_iterator(const const_iterator& r)
00386 : compressed(r.compressed), ind(r.ind), nbits(r.nbits),
00387 literalvalue(r.literalvalue), fillbit(r.fillbit), active(r.active),
00388 end(r.end), begin(r.begin), it(r.it) {};
00389 const_iterator& operator=(const const_iterator& r) {
00390 compressed = r.compressed; ind = r.ind; nbits = r.nbits;
00391 literalvalue = r.literalvalue; fillbit = r.fillbit; active = r.active;
00392 end = r.end; begin = r.begin; it = r.it;
00393 return *this;}
00394
00395 inline bool operator*() const;
00396 inline int operator!=(const const_iterator& rhs) const throw ();
00397 inline int operator==(const const_iterator& rhs) const throw ();
00398
00399 inline const_iterator& operator++();
00400 inline const_iterator& operator--();
00401 const_iterator& operator+=(int incr);
00402
00403 private:
00404 ibis::bitvector::word_t compressed;
00405 ibis::bitvector::word_t ind;
00406 ibis::bitvector::word_t nbits;
00407 ibis::bitvector::word_t literalvalue;
00408 int fillbit;
00409 const active_word* active;
00410 array_t<word_t>::const_iterator end;
00411 array_t<word_t>::const_iterator begin;
00412 array_t<word_t>::const_iterator it;
00413
00414 void decodeWord();
00415
00416
00417 friend void ibis::bitvector::erase(word_t i, word_t j);
00418 friend const_iterator ibis::bitvector::begin() const;
00419 friend const_iterator ibis::bitvector::end() const;
00420 friend class ibis::bitvector::iterator;
00421 };
00422
00430 class ibis::bitvector::iterator {
00431 public:
00432 iterator() : compressed(0), ind(0), nbits(0), literalvalue(0), fillbit(0),
00433 bitv(0), active(0), vec(0) {}
00434 iterator(const iterator& r)
00435 : compressed(r.compressed), ind(r.ind), nbits(r.nbits),
00436 literalvalue(r.literalvalue), fillbit(r.fillbit), bitv(r.bitv),
00437 active(r.active), vec(r.vec), it(r.it) {};
00438 const iterator& operator=(const iterator& r) {
00439 compressed = r.compressed; ind = r.ind; nbits = r.nbits;
00440 literalvalue = r.literalvalue; fillbit = r.fillbit; bitv = r.bitv;
00441 active = r.active; vec = r.vec; it = r.it; return *this;}
00442
00443 inline bool operator*() const;
00444 inline int operator!=(const const_iterator& rhs) const throw ();
00445 inline int operator==(const const_iterator& rhs) const throw ();
00446 inline int operator!=(const iterator& rhs) const throw ();
00447 inline int operator==(const iterator& rhs) const throw ();
00448
00449 inline iterator& operator++();
00450 inline iterator& operator--();
00451 iterator& operator+=(int incr);
00452 const iterator& operator=(int val);
00453
00454 private:
00455 ibis::bitvector::word_t compressed;
00456 ibis::bitvector::word_t ind;
00457 ibis::bitvector::word_t nbits;
00458 ibis::bitvector::word_t literalvalue;
00459 int fillbit;
00460 ibis::bitvector* bitv;
00461 active_word* active;
00462 array_t<word_t>* vec;
00463 array_t<word_t>::iterator it;
00464
00465 void decodeWord();
00466
00467 friend iterator ibis::bitvector::begin();
00468 friend iterator ibis::bitvector::end();
00469 };
00470
00471 inline void ibis::bitvector::setSize(word_t n) const {
00472 nbits = n-active.nbits;
00473 #if defined(WAH_CHECK_SIZE)
00474 word_t nb = do_cnt();
00475 if (nb != nbits) {
00476 const_cast<ibis::bitvector*>(this)->adjustSize(0, nbits);
00477 LOGGER(ibis::gVerbose >= 0)
00478 << "ibis::bitvector::setSize -- adjust the number of bits to"
00479 << n;
00480 }
00481 #endif
00482 }
00483
00485 inline bool ibis::bitvector::all0s() const {
00486 if (m_vec.empty()) {
00487 return true;
00488 }
00489 else if (m_vec.size() == 1) {
00490 return (m_vec[0] == 0 || (m_vec[0]>=HEADER0 && m_vec[0]<HEADER1));
00491 }
00492 else {
00493 return false;
00494 }
00495 }
00496
00498 inline bool ibis::bitvector::all1s() const {
00499 if (m_vec.size() == 1) {
00500 return (m_vec[0] == ALLONES || (m_vec[0] > HEADER1));
00501 }
00502 else {
00503 return false;
00504 }
00505 }
00506
00508 inline ibis::bitvector::word_t
00509 ibis::bitvector::cnt_bits(ibis::bitvector::word_t val) const {
00510 return ((val>ALLONES) ? ((val&MAXCNT)*MAXBITS) : MAXBITS);
00511 }
00512
00514 inline ibis::bitvector::word_t
00515 ibis::bitvector::cnt_ones(ibis::bitvector::word_t val) const {
00516
00517 static const word_t table[256] = {
00518 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
00519 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00520 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00521 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00522 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00523 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00524 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00525 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00526 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
00527 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00528 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00529 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00530 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
00531 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00532 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
00533 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
00534 word_t cnt;
00535 if (val > 0) {
00536
00537 cnt = table[val&0xFFU] + table[(val>>8)&0xFFU] +
00538 table[(val>>16)&0xFFU] + table[(val>>24)&0xFFU];
00539
00540
00541
00542
00543
00544
00545
00546
00547
00548
00549 }
00550 else {
00551 cnt = 0;
00552 }
00553 return cnt;
00554 }
00555
00556 inline ibis::bitvector::word_t ibis::bitvector::size() const throw() {
00557 return ((nbits?nbits:(nbits=do_cnt()))+active.nbits);
00558 }
00559
00560 inline ibis::bitvector::word_t ibis::bitvector::cnt() const {
00561 if (nset==0 && !m_vec.empty())
00562 nbits = do_cnt();
00563 return (nset+cnt_ones(active.val));
00564 }
00565
00566 inline ibis::bitvector::word_t ibis::bitvector::numFillWords() const {
00567 word_t cnt=0;
00568 array_t<ibis::bitvector::word_t>::const_iterator it = m_vec.begin();
00569 while (it != m_vec.end()) {
00570 cnt += (*it >> ibis::bitvector::MAXBITS);
00571 it++;
00572 }
00573 return cnt;
00574 }
00575
00579 inline const ibis::bitvector&
00580 ibis::bitvector::operator=(const ibis::bitvector& bv) {
00581 nbits = bv.nbits; nset = bv.nset; active = bv.active;
00582 m_vec.deepCopy(bv.m_vec);
00583 return *this;
00584 }
00585
00587 inline ibis::bitvector& ibis::bitvector::copy(const ibis::bitvector& bv) {
00588 nbits = bv.nbits; nset = bv.nset; active = bv.active;
00589 m_vec.deepCopy(bv.m_vec);
00590 return *this;
00591 }
00592
00593 inline ibis::bitvector& ibis::bitvector::swap(bitvector& bv) {
00594 word_t tmp;
00595 tmp = bv.nbits; bv.nbits = nbits; nbits = tmp;
00596 tmp = bv.nset; bv.nset = nset; nset = tmp;
00597 active_word atmp = bv.active;
00598 bv.active = active; active = atmp;
00599 m_vec.swap(bv.m_vec);
00600 return *this;
00601 }
00602
00606 inline void ibis::bitvector::append_active() {
00607
00608
00609
00610
00611 if (m_vec.empty()) {
00612 m_vec.push_back(active.val);
00613 }
00614 else if (active.val == 0) {
00615 if (m_vec.back() == 0) {
00616 m_vec.back() = (HEADER0 + 2);
00617 }
00618 else if (m_vec.back() >= HEADER0 && m_vec.back() < HEADER1) {
00619 ++ m_vec.back();
00620 }
00621 else {
00622 m_vec.push_back(active.val);
00623 }
00624 }
00625 else if (active.val == ALLONES) {
00626 if (m_vec.back() == ALLONES) {
00627 m_vec.back() = (HEADER1 | 2);
00628 }
00629 else if (m_vec.back() >= HEADER1) {
00630 ++m_vec.back();
00631 }
00632 else {
00633 m_vec.push_back(active.val);
00634 }
00635 }
00636 else {
00637 m_vec.push_back(active.val);
00638 }
00639 nbits += MAXBITS;
00640 active.reset();
00641 nset = 0;
00642
00643
00644 }
00645
00649 inline void ibis::bitvector::append_counter(int val, word_t cnt) {
00650 word_t head = 2 + val;
00651 word_t w = (head << SECONDBIT) + cnt;
00652 nbits += cnt*MAXBITS;
00653 if (m_vec.empty()) {
00654 m_vec.push_back(w);
00655 }
00656 else if ((m_vec.back()>>SECONDBIT) == head) {
00657 m_vec.back() += cnt;
00658 }
00659 else if ((m_vec.back()==ALLONES) && head==3) {
00660 m_vec.back() = w + 1;
00661 }
00662 else if ((m_vec.back() == 0) && head==2) {
00663 m_vec.back() = w + 1;
00664 }
00665 else {
00666 m_vec.push_back(w);
00667 }
00668 }
00669
00670
00671 inline ibis::bitvector& ibis::bitvector::operator+=(int b) {
00672 active.append(b);
00673 if (active.is_full())
00674 append_active();
00675 return *this;
00676 }
00677
00680 inline void ibis::bitvector::appendFill(int val, word_t n) {
00681 if (n == 0) return;
00682 if (active.nbits > 0) {
00683 word_t tmp = (MAXBITS - active.nbits);
00684 if (tmp > n) tmp = n;
00685 active.nbits += tmp;
00686 active.val <<= tmp;
00687 n -= tmp;
00688 if (val != 0)
00689 active.val |= (1U<<tmp) - 1;
00690 if (active.nbits >= MAXBITS)
00691 append_active();
00692 }
00693 if (n >= MAXBITS) {
00694 word_t cnt = n / MAXBITS;
00695 if (cnt > 1)
00696 append_counter(val, cnt);
00697 else if (val != 0) {
00698 active.val = ALLONES;
00699 append_active();
00700 }
00701 else {
00702 active.val = 0;
00703 append_active();
00704 }
00705 n -= cnt * MAXBITS;
00706 }
00707 if (n > 0) {
00708 active.nbits = n;
00709 active.val = val*((1U<<n)-1);
00710 }
00711 }
00712
00713
00714
00715 inline void ibis::bitvector::copy_runs(run& it, word_t& nw) {
00716 #if defined(DEBUG) && DEBUG + 0 > 1
00717 LOGGER(ibis::gVerbose >= 0)
00718 << "ibis::bitvector::copy_runs(0x"
00719 << std::hex << std::setw(8) << std::setfill('0')
00720 << *(it.it) << ", " << std::dec << nw
00721 << ") ... it.nWords = " << it.nWords;
00722 #endif
00723
00724 if (it.isFill != 0) {
00725 append_counter(it.fillBit, it.nWords);
00726 nw -= it.nWords;
00727 }
00728 else {
00729 active.val = *(it.it);
00730 append_active();
00731 -- nw;
00732 }
00733 ++ it.it;
00734
00735 it.decode();
00736 nset = 0;
00737 nbits += MAXBITS * nw;
00738 while (nw >= it.nWords && nw > 0) {
00739 m_vec.push_back(*(it.it));
00740 nw -= it.nWords;
00741 ++ it.it;
00742 it.decode();
00743 }
00744 nbits -= MAXBITS * nw;
00745 }
00746
00747
00748
00749 inline void ibis::bitvector::copy_runsn(run& it, word_t& nw) {
00750 #if defined(DEBUG) && DEBUG + 0 > 1
00751 LOGGER(ibis::gVerbose >= 0)
00752 << "ibis::bitvector::copy_runsn(0x"
00753 << std::hex << std::setw(8) << std::setfill('0')
00754 << *(it.it) << ", " << std::dec << nw
00755 << ") ... it.nWords = " << it.nWords;
00756 #endif
00757
00758 if (it.isFill != 0) {
00759 append_counter(!it.fillBit, it.nWords);
00760 nw -= it.nWords;
00761 }
00762 else {
00763 active.val = ALLONES ^ *(it.it);
00764 append_active();
00765 -- nw;
00766 }
00767 ++ it.it;
00768
00769 it.decode();
00770 nset = 0;
00771 nbits += MAXBITS * nw;
00772 while (nw >= it.nWords && nw > 0) {
00773 m_vec.push_back((it.isFill?FILLBIT:ALLONES) ^ *(it.it));
00774 nw -= it.nWords;
00775 ++ it.it;
00776 it.decode();
00777 }
00778 nbits -= MAXBITS * nw;
00779 }
00780
00781
00782
00783
00784 inline void ibis::bitvector::copy_fill
00785 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it) {
00786 if (it.fillBit == 0) {
00787 while (it.nWords > 3) {
00788 *jt = 0;
00789 jt[1] = 0;
00790 jt[2] = 0;
00791 jt[3] = 0;
00792 jt += 4;
00793 it.nWords -= 4;
00794 }
00795 if (it.nWords == 1) {
00796 *jt = 0; ++jt;
00797 }
00798 else if (it.nWords == 2) {
00799 *jt = 0; jt[1] = 0; jt += 2;
00800 }
00801 else if (it.nWords == 3) {
00802 *jt = 0; jt[1] = 0; jt[2] = 0; jt += 3;
00803 }
00804 }
00805 else {
00806 while (it.nWords > 3) {
00807 *jt = ALLONES;
00808 jt[1] = ALLONES;
00809 jt[2] = ALLONES;
00810 jt[3] = ALLONES;
00811 jt += 4;
00812 it.nWords -= 4;
00813 }
00814 if (it.nWords == 1) {
00815 *jt = ALLONES; ++jt;
00816 }
00817 else if (it.nWords == 2) {
00818 *jt = ALLONES; jt[1] = ALLONES; jt += 2;
00819 }
00820 else if (it.nWords == 3) {
00821 *jt = ALLONES; jt[1] = ALLONES; jt[2] = ALLONES; jt += 3;
00822 }
00823 }
00824 it.nWords = 0;
00825 ++ it.it;
00826 }
00827
00828
00829
00830
00831
00832 inline void ibis::bitvector::copy_runs
00833 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it, word_t& nw) {
00834 while (nw >= it.nWords && nw > 0) {
00835 if (it.isFill != 0) {
00836 const array_t<word_t>::iterator iend = jt + it.nWords;
00837 if (it.fillBit == 0) {
00838 while (jt < iend) {
00839 *jt = 0;
00840 ++ jt;
00841 }
00842 }
00843 else {
00844 while (jt < iend) {
00845 *jt = ALLONES;
00846 ++ jt;
00847 }
00848 }
00849 nw -= it.nWords;
00850 }
00851 else {
00852 *jt = *(it.it);
00853 ++ jt;
00854 -- nw;
00855 }
00856 ++ it.it;
00857 it.decode();
00858 }
00859 }
00860
00861
00862
00863 inline void ibis::bitvector::copy_runsn
00864 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it, word_t& nw) {
00865 while (nw >= it.nWords && nw > 0) {
00866 if (it.isFill != 0) {
00867 const array_t<word_t>::iterator iend = jt + it.nWords;
00868 if (it.fillBit == 0) {
00869 while (jt < iend) {
00870 *jt = ALLONES;
00871 ++ jt;
00872 }
00873 }
00874 else {
00875 while (jt < iend) {
00876 *jt = 0;
00877 ++ jt;
00878 }
00879 }
00880 nw -= it.nWords;
00881 }
00882 else {
00883 *jt = *(it.it) ^ ALLONES;
00884 ++ jt;
00885 -- nw;
00886 }
00887 ++ it.it;
00888 it.decode();
00889 }
00890 }
00891
00892 inline ibis::bitvector::iterator ibis::bitvector::begin() {
00893 iterator it;
00894 it.it = m_vec.begin();
00895 it.vec = &m_vec;
00896 it.bitv = this;
00897 it.active = &active;
00898 it.decodeWord();
00899 return it;
00900 }
00901
00902 inline ibis::bitvector::iterator ibis::bitvector::end() {
00903 iterator it;
00904 it.ind = 0;
00905 it.compressed = 0;
00906 it.nbits = 0;
00907 it.literalvalue = 0;
00908 it.fillbit = 0;
00909 it.it = m_vec.end() + 1;
00910 it.vec = &m_vec;
00911 it.bitv = this;
00912 it.active = &active;
00913 return it;
00914 }
00915
00916 inline ibis::bitvector::const_iterator ibis::bitvector::begin() const {
00917 const_iterator it;
00918 it.it = m_vec.begin();
00919 it.begin = m_vec.begin();
00920 it.end = m_vec.end();
00921 it.active = &active;
00922 it.decodeWord();
00923 return it;
00924 }
00925
00926
00927 inline bool ibis::bitvector::iterator::operator*() const {
00928 #if defined(DEBUG) && DEBUG + 0 > 1
00929 if (vec==0 || it<vec->begin() || it>vec->end())
00930 throw "bitvector::const_iterator not initialized correctly.";
00931 #endif
00932 if (compressed != 0)
00933 return (fillbit != 0);
00934 else
00935 return (1 & (literalvalue >> (bitvector::SECONDBIT - ind)));
00936 }
00937
00938
00939 inline int ibis::bitvector::iterator::operator!=
00940 (const ibis::bitvector::const_iterator& rhs) const throw () {
00941 return (it != rhs.it);
00942 }
00943 inline int ibis::bitvector::iterator::operator==
00944 (const ibis::bitvector::const_iterator& rhs) const throw () {
00945 return (it == rhs.it);
00946 }
00947 inline int ibis::bitvector::iterator::operator!=
00948 (const ibis::bitvector::iterator& rhs) const throw () {
00949 return (it != rhs.it);
00950 }
00951 inline int ibis::bitvector::iterator::operator==
00952 (const ibis::bitvector::iterator& rhs) const throw () {
00953 return (it == rhs.it);
00954 }
00955
00956
00957 inline ibis::bitvector::iterator& ibis::bitvector::iterator::operator++() {
00958 #if defined(DEBUG) && DEBUG + 0 > 1
00959 if (vec==0 || it<vec->begin() || it>vec->end())
00960 throw "bitvector::iterator not formed correctly.";
00961 #endif
00962 if (ind+1<nbits) {++ind;}
00963 else {++ it; decodeWord();}
00964 return *this;
00965 }
00966
00967
00968 inline ibis::bitvector::iterator& ibis::bitvector::iterator::operator--() {
00969 #if defined(DEBUG) && DEBUG + 0 > 1
00970 if (vec==0 || it<vec->begin() || it>vec->end()+1)
00971 throw "bitvector::iterator not formed correctly.";
00972 #endif
00973 if (ind) -- ind;
00974 else {
00975 if (it <= vec->end()) -- it;
00976 else if (active->nbits) it = vec->end();
00977 else it = vec->end() - 1;
00978 decodeWord();
00979 if (nbits) ind = nbits - 1;
00980 }
00981 return *this;
00982 }
00983
00984 inline ibis::bitvector::const_iterator ibis::bitvector::end() const {
00985 const_iterator it;
00986 it.ind = 0;
00987 it.compressed = 0;
00988 it.nbits = 0;
00989 it.literalvalue = 0;
00990 it.fillbit = 0;
00991 it.it = m_vec.end() + 1;
00992 it.begin = m_vec.begin();
00993 it.end = m_vec.end();
00994 it.active = &active;
00995 return it;
00996 }
00997
00998
00999 inline bool ibis::bitvector::const_iterator::operator*() const {
01000 #if defined(DEBUG) && DEBUG + 0 > 1
01001 if (it==0 || end==0 || it>end || nbits<=ind)
01002 throw "bitvector::const_iterator not initialized correctly.";
01003 #endif
01004 if (compressed != 0)
01005 return (fillbit != 0);
01006 else
01007 return (1 & (literalvalue >> (bitvector::SECONDBIT - ind)));
01008 }
01009
01010
01011 inline int ibis::bitvector::const_iterator::operator!=
01012 (const ibis::bitvector::const_iterator& rhs) const throw (){
01013 return (it != rhs.it);
01014 }
01015 inline int ibis::bitvector::const_iterator::operator==
01016 (const ibis::bitvector::const_iterator& rhs) const throw () {
01017 return (it == rhs.it);
01018 }
01019
01020
01021 inline ibis::bitvector::const_iterator&
01022 ibis::bitvector::const_iterator::operator++() {
01023 #if defined(DEBUG) && DEBUG + 0 > 1
01024 if (it==0 || end==0 || it>end)
01025 throw "bitvector::const_iterator not formed correctly.";
01026 #endif
01027 if (ind+1<nbits) {++ind;}
01028 else {++ it; decodeWord();}
01029 return *this;
01030 }
01031
01032
01033 inline ibis::bitvector::const_iterator&
01034 ibis::bitvector::const_iterator::operator--() {
01035 #if defined(DEBUG) && DEBUG + 0 > 1
01036 if (it==0 || end==0 || it>end)
01037 throw "bitvector::const_iterator not formed correctly.";
01038 #endif
01039 if (ind) -- ind;
01040 else {
01041 if (it <= end) -- it;
01042 else if (active->nbits) it = end;
01043 else it = end - 1;
01044 decodeWord();
01045 if (nbits) ind = nbits - 1;
01046 }
01047 return *this;
01048 }
01049
01050 inline ibis::bitvector::indexSet ibis::bitvector::firstIndexSet() const {
01051 indexSet is;
01052 if (m_vec.end() > m_vec.begin()) {
01053 is.it = m_vec.begin() - 1;
01054 is.end = m_vec.end();
01055 }
01056 else {
01057 is.it = 0;
01058 is.end = 0;
01059 }
01060 is.active = &active;
01061 is.ind[0] = static_cast<word_t>(-1);
01062 is.nind = 0;
01063 ++is;
01064 return is;
01065 }
01066
01067 inline double ibis::bitvector::randomSize(word_t nb, word_t nc) {
01068 double sz = 0.0;
01069 if (nb > 0 && nb >= nc && nb > MAXBITS) {
01070 const double den = static_cast<double>(nc) /
01071 static_cast<double>(nb);
01072 const word_t nw = nb / MAXBITS;
01073 sz = 3.0 + nw - nw * (pow(1.0-den, static_cast<int>(2*MAXBITS))
01074 + pow(den, static_cast<int>(2*MAXBITS)));
01075 }
01076 return sz*sizeof(word_t);
01077 }
01078
01079 inline double ibis::bitvector::markovSize(word_t nb, word_t nc, double f) {
01080 double sz = 0.0;
01081 if (nb > 0 && nb >= nc && nb > MAXBITS) {
01082 const double den = static_cast<double>(nc) /
01083 static_cast<double>(nb);
01084 const word_t nw = nb / MAXBITS;
01085 if ((den <= 0.5 && f > 1.00001) || (den > 0.5 && (1.0-den)*f > den))
01086 sz = ((1.0-den) * pow(1.0-den/((1.0-den)*f),
01087 static_cast<int>(2*MAXBITS-3)) +
01088 den * pow(1.0-1.0/f, static_cast<int>(2*MAXBITS-3)));
01089 else
01090 sz = (pow(1.0-den, static_cast<int>(2*MAXBITS)) +
01091 pow(den, static_cast<int>(2*MAXBITS)));
01092 sz = 3.0 + nw * (1.0 - sz);
01093 }
01094 return sz*sizeof(word_t);
01095 }
01096
01100 inline ibis::bitvector::bitvector(const array_t<ibis::bitvector::word_t>& arr)
01101 : nbits(0), nset(0), m_vec(arr) {
01102 if (m_vec.size() > 1) {
01103 if (m_vec.back() > 0) {
01104 if (m_vec.back() < MAXBITS) {
01105 active.nbits = m_vec.back();
01106 m_vec.pop_back();
01107 active.val = m_vec.back();
01108 }
01109 else {
01110 ibis::util::logMessage
01111 ("Error", "the serialized version of bitvector contains "
01112 "an unexpected last word (0x%.8lx)",
01113 static_cast<long unsigned>(m_vec.back()));
01114 #if defined(_DEBUG)
01115 {
01116 word_t nb = 0;
01117 ibis::util::logger lg(4);
01118 lg.buffer() << "bitvector constructor received an array["
01119 << arr.size()
01120 << "] with the following values:";
01121 for (word_t i = 0; i < arr.size(); ++ i) {
01122 if (arr[i] < HEADER0)
01123 nb += MAXBITS;
01124 else
01125 nb += (arr[i] & MAXCNT) * MAXBITS;
01126 lg.buffer() << "\n" << i << ",\t0x" << std::hex
01127 << std::setw(8) << std::setfill('0')
01128 << arr[i] << std::dec << "\tnb=" << nb;
01129 }
01130 }
01131
01132
01133 #endif
01134 }
01135 }
01136 else {
01137 active.reset();
01138 }
01139 m_vec.pop_back();
01140
01141 #if defined(WAH_CHECK_SIZE)
01142 nbits = do_cnt();
01143 #endif
01144 }
01145 else {
01146 clear();
01147 }
01148 }
01149
01150 inline void ibis::bitvector::turnOnRawBit(const word_t ind) {
01151 if (ind < nbits) {
01152 m_vec[ind / MAXBITS] |= (1 << (SECONDBIT - (ind % MAXBITS)));
01153 nset = 0;
01154 }
01155 else {
01156 active.val |= (1 << (active.nbits - (ind - nbits) - 1));
01157 #if defined(DEBUG)
01158 if (ind >= nbits + active.nbits ||
01159 active.val >= (1U << active.nbits)) {
01160 LOGGER(ibis::gVerbose >= 0)
01161 << "FastBit::bitvector::turnOnRawBit(" << ind
01162 << ") found a bad active word";
01163 }
01164 #endif
01165 }
01166 }
01167
01168 std::ostream& operator<<(std::ostream&, const ibis::bitvector&);
01169 #endif // __BITVECTOR_H