00001 // $Id$ 00002 // Author: John Wu <John.Wu at ACM.org> 00003 // Lawrence Berkeley National Laboratory 00004 // Copyright 2000-2009 the Regents of the University of California 00005 #ifndef BITVECTOR_H 00006 #define BITVECTOR_H 00009 00010 #include "array_t.h" // alternative to std::vector 00011 00012 #include <stdio.h> // sprintf 00013 #include <limits.h> // INT_MAX 00014 #include <iomanip> // setw() 00015 00016 #if defined(_MSC_VER) && defined(_WIN32) 00017 //disable warnings on extern before template instantiation 00018 #pragma warning (disable : 4231) 00019 #endif 00020 #if defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)) && defined(CXX_USE_DLL) && !defined(DLL_EXPORT) 00021 extern template class FASTBIT_CXX_DLLSPEC ibis::array_t<uint32_t>; 00022 #endif 00023 00024 00066 class FASTBIT_CXX_DLLSPEC ibis::bitvector { 00067 public: 00068 typedef uint32_t word_t; 00069 00070 // constructors of bitvector class 00072 bitvector(); 00074 ~bitvector() {clear();}; 00076 bitvector(const bitvector& bv); 00077 bitvector(const array_t<word_t>& arr); 00078 bitvector(const char* file); 00079 inline const bitvector& operator=(const bitvector& bv); 00080 inline bitvector& copy(const bitvector& bv); 00081 inline bitvector& swap(bitvector& bv); 00082 00083 // use bv to replace part of the existing value, match the ith bit with 00084 // the first of bv, return reference to self 00085 //bitvector& copy(const word_t i, const bitvector& bv); 00088 void setBit(const word_t i, int val); 00090 inline void turnOnRawBit(const word_t i); 00092 void erase(word_t i, word_t j); 00093 00096 void set(int val, word_t n); 00098 void clear(); 00099 00100 bitvector& operator+=(const bitvector& bv); 00101 inline bitvector& operator+=(int b); 00102 void appendWord(word_t w); 00103 00104 inline void appendFill(int val, word_t n); 00105 00107 int operator==(const bitvector& rhs) const; 00108 00110 void flip(); 00112 void operator&=(const bitvector& rhs); 00115 bitvector* operator&(const bitvector&) const; 00117 void operator|=(const bitvector& rhs); 00119 bitvector* operator|(const bitvector&) const; 00121 void operator^=(const bitvector& rhs); 00123 bitvector* operator^(const bitvector&) const; 00125 void operator-=(const bitvector& rhs); 00128 bitvector* operator-(const bitvector&) const; 00129 00130 void subset(const bitvector& mask, bitvector& res) const; 00131 word_t count(const bitvector& mask) const; 00132 00133 // I/O functions. 00136 void read(const char *fn); 00138 void write(const char *fn) const; 00140 void write(int fdes) const; 00144 void write(array_t<word_t>& arr) const; 00145 00146 void compress(); 00147 void decompress(); 00148 00149 word_t compressible() const; 00150 00152 inline word_t size() const throw(); 00153 inline void sloppySize(word_t n) const; 00155 inline word_t cnt() const; 00157 inline word_t numFillWords() const; 00159 uint32_t bytes() const throw() { 00160 return (m_vec.size()*sizeof(word_t) + sizeof(bitvector)); 00161 }; 00165 uint32_t getSerialSize() const throw() { 00166 return (m_vec.size() + 1 + (active.nbits>0)) * sizeof(word_t); 00167 }; 00169 static word_t bitsPerLiteral() {return MAXBITS;} 00173 inline static double randomSize(word_t nb, word_t nc); 00178 inline static double markovSize(word_t nb, word_t nc, double f); 00182 static double clusteringFactor(word_t nb, word_t nc, word_t sz); 00183 00184 void adjustSize(word_t nv, word_t nt); 00186 void reserve(unsigned nb, unsigned nc, double cf=0.0); 00187 00190 bool empty() const {return all0s() && active.val == 0;} 00191 00192 std::ostream& print(std::ostream &) const; 00193 00195 class iterator; 00196 inline iterator end(); 00197 inline iterator begin(); 00198 00200 class const_iterator; 00201 inline const_iterator end() const; 00202 inline const_iterator begin() const; 00203 00205 class indexSet; 00206 inline indexSet firstIndexSet() const; 00207 00208 // give accesses to some friends 00209 friend class indexSet; 00210 friend class iterator; 00211 friend class const_iterator; 00212 00213 protected: 00214 bool isCompressed() const {return (m_vec.size()*MAXBITS < nbits);} 00215 inline bool all0s() const; 00216 inline bool all1s() const; 00217 00218 private: 00219 // static members, i.e., constants to be used internally 00220 static const unsigned MAXBITS; 00221 static const unsigned SECONDBIT; 00222 static const word_t FILLBIT; 00223 static const word_t HEADER0; 00224 static const word_t HEADER1; 00225 static const word_t ALLONES; 00226 static const word_t MAXCNT; 00227 00234 struct active_word { 00235 word_t val; // the value 00236 word_t nbits; // total number of bits 00237 00238 active_word() : val(0), nbits(0) {}; 00239 void reset() {val = 0; nbits = 0;}; 00240 int is_full() const {return (nbits >= MAXBITS);}; 00241 void append(int b) { // append a single bit 00242 val <<= 1; nbits ++; val += b; 00243 }; 00244 }; // struct active_word 00245 00248 struct run { 00249 int isFill; 00250 int fillBit; 00251 word_t nWords; 00252 array_t<word_t>::const_iterator it; 00253 run() : isFill(0), fillBit(0), nWords(0), it(0) {}; 00254 void decode() { 00255 fillBit = (*it > HEADER1); 00256 if (*it > ALLONES) { 00257 nWords = (*it & MAXCNT); 00258 isFill = 1; 00259 } 00260 else { 00261 nWords = 1; 00262 isFill = 0; 00263 } 00264 }; 00267 void operator--() { 00268 if (nWords > 1) {--nWords;} 00269 else {++ it; nWords = 0;} 00270 }; 00273 void operator-=(word_t len) { 00274 while (len > 0) { 00275 if (nWords == 0) decode(); 00276 if (isFill != 0) { 00277 if (nWords > len) {nWords -= len; len = 0;} 00278 else if (nWords == len) {nWords = 0; len = 0; ++ it;} 00279 else {len -= nWords; ++ it; nWords = 0;} 00280 } 00281 else {-- len; ++ it; nWords = 0;} 00282 } 00283 }; 00284 }; 00285 friend struct run; 00286 friend struct active_word; 00287 00288 // member variables of bitvector class 00289 mutable word_t nbits; 00290 mutable word_t nset; 00291 active_word active; 00292 array_t<word_t> m_vec; 00293 00294 // private functions of bitvector class 00295 word_t count_c1(const bitvector& mask) const; 00296 word_t count_c2(const bitvector& mask) const; 00297 // The following three functions all performs or operation, _c2 and _c1 00298 // generate compressed solutions, but _c0, _d1, and _d2 generate 00299 // uncompressed solutions. 00300 // or_c2 assumes there are compressed word in both operands 00301 // or_c1 *this may contain compressed word, but not rhs 00302 // or_c0 assumes both operands are not compressed 00303 // or_d1 *this contains no compressed word and is overwritten with the 00304 // result 00305 // or_d2 both *this and rhs are compressed, but res is not compressed 00306 void or_c2(const bitvector& rhs, bitvector& res) const; 00307 void or_c1(const bitvector& rhs, bitvector& res) const; 00308 void or_c0(const bitvector& rhs); 00309 void or_d1(const bitvector& rhs); 00310 void or_d2(const bitvector& rhs, bitvector& res) const; 00311 void and_c2(const bitvector& rhs, bitvector& res) const; 00312 void and_c1(const bitvector& rhs, bitvector& res) const; 00313 void and_c0(const bitvector& rhs); 00314 void and_d1(const bitvector& rhs); 00315 void and_d2(const bitvector& rhs, bitvector& res) const; 00316 void xor_c2(const bitvector& rhs, bitvector& res) const; 00317 void xor_c1(const bitvector& rhs, bitvector& res) const; 00318 void xor_c0(const bitvector& rhs); 00319 void xor_d1(const bitvector& rhs); 00320 void xor_d2(const bitvector& rhs, bitvector& res) const; 00321 void minus_c2(const bitvector& rhs, bitvector& res) const; 00322 void minus_c1(const bitvector& rhs, bitvector& res) const; 00323 void minus_c1x(const bitvector& rhs, bitvector& res) const; 00324 void minus_c0(const bitvector& rhs); 00325 void minus_d1(const bitvector& rhs); 00326 void minus_d2(const bitvector& rhs, bitvector& res) const; 00327 inline void copy_runs(run& it, word_t& nw); // copy nw words 00328 inline void copy_runsn(run& it, word_t& nw); // copy nw words and negate 00329 inline void copy_fill(array_t<word_t>::iterator& jt, run& it); 00330 inline void copy_runs(array_t<word_t>::iterator& jt, run& it, 00331 word_t& nw); 00332 inline void copy_runsn(array_t<word_t>::iterator& jt, run& it, 00333 word_t& nw); 00334 void decompress(array_t<word_t>& tmp) const; 00335 void copy_comp(array_t<word_t>& tmp) const; 00336 inline void append_active(); 00337 inline void append_counter(int val, word_t cnt); 00338 inline word_t cnt_ones(word_t) const; // number of ones in a word 00339 inline word_t cnt_bits(word_t) const; // number of bits in a word 00342 word_t do_cnt() const throw (); 00343 }; // end class bitvector 00344 00354 class ibis::bitvector::indexSet { 00355 public: 00356 // let the compiler define most of the canonical functions 00357 00358 // allow bitvector::firstIndexSet() to access private member variables 00359 friend indexSet ibis::bitvector::firstIndexSet() const; 00360 00361 //int operator!=(const indexSet& rhs) const {return (it != rhs.it);}; 00362 bool isRange() const {return (nind>=ibis::bitvector::MAXBITS);} 00363 const word_t* indices() const {return ind;}; 00364 word_t nIndices() const {return nind;} 00365 const word_t& currentWord() const {return *it;} 00366 indexSet& operator++(); 00367 00368 private: 00369 array_t<word_t>::const_iterator it; 00370 array_t<word_t>::const_iterator end; 00371 const active_word* active; // points back to the active word 00372 word_t nind; // number of indices 00373 word_t ind[32]; 00374 }; // class ibis::bitvector::indexSet 00375 00377 class ibis::bitvector::const_iterator { 00378 public: 00379 const_iterator() : compressed(0), ind(0), nbits(0), literalvalue(0), 00380 fillbit(0), active(0) {} 00381 00382 const_iterator(const const_iterator& r) 00383 : compressed(r.compressed), ind(r.ind), nbits(r.nbits), 00384 literalvalue(r.literalvalue), fillbit(r.fillbit), active(r.active), 00385 end(r.end), begin(r.begin), it(r.it) {}; 00386 const_iterator& operator=(const const_iterator& r) { 00387 compressed = r.compressed; ind = r.ind; nbits = r.nbits; 00388 literalvalue = r.literalvalue; fillbit = r.fillbit; active = r.active; 00389 end = r.end; begin = r.begin; it = r.it; 00390 return *this;} 00391 00392 inline bool operator*() const; 00393 inline int operator!=(const const_iterator& rhs) const throw (); 00394 inline int operator==(const const_iterator& rhs) const throw (); 00395 00396 inline const_iterator& operator++(); 00397 inline const_iterator& operator--(); 00398 const_iterator& operator+=(int incr); 00399 00400 private: 00401 ibis::bitvector::word_t compressed; 00402 ibis::bitvector::word_t ind; 00403 ibis::bitvector::word_t nbits; 00404 ibis::bitvector::word_t literalvalue; 00405 int fillbit; 00406 const active_word* active; 00407 array_t<word_t>::const_iterator end; 00408 array_t<word_t>::const_iterator begin; 00409 array_t<word_t>::const_iterator it; 00410 00411 void decodeWord(); 00412 00413 // give three functions of bitvector access to private variables 00414 friend void ibis::bitvector::erase(word_t i, word_t j); 00415 friend const_iterator ibis::bitvector::begin() const; 00416 friend const_iterator ibis::bitvector::end() const; 00417 friend class ibis::bitvector::iterator; 00418 }; // end class ibis::bitvector::const_iterator 00419 00427 class ibis::bitvector::iterator { 00428 public: 00429 iterator() : compressed(0), ind(0), nbits(0), literalvalue(0), fillbit(0), 00430 bitv(0), active(0), vec(0) {} 00431 iterator(const iterator& r) 00432 : compressed(r.compressed), ind(r.ind), nbits(r.nbits), 00433 literalvalue(r.literalvalue), fillbit(r.fillbit), bitv(r.bitv), 00434 active(r.active), vec(r.vec), it(r.it) {}; 00435 const iterator& operator=(const iterator& r) { 00436 compressed = r.compressed; ind = r.ind; nbits = r.nbits; 00437 literalvalue = r.literalvalue; fillbit = r.fillbit; bitv = r.bitv; 00438 active = r.active; vec = r.vec; it = r.it; return *this;} 00439 00440 inline bool operator*() const; // still can not modify this 00441 inline int operator!=(const const_iterator& rhs) const throw (); 00442 inline int operator==(const const_iterator& rhs) const throw (); 00443 inline int operator!=(const iterator& rhs) const throw (); 00444 inline int operator==(const iterator& rhs) const throw (); 00445 00446 inline iterator& operator++(); 00447 inline iterator& operator--(); 00448 iterator& operator+=(int incr); 00449 const iterator& operator=(int val); 00450 00451 private: 00452 ibis::bitvector::word_t compressed; 00453 ibis::bitvector::word_t ind; 00454 ibis::bitvector::word_t nbits; 00455 ibis::bitvector::word_t literalvalue; 00456 int fillbit; 00457 ibis::bitvector* bitv; 00458 active_word* active; 00459 array_t<word_t>* vec; 00460 array_t<word_t>::iterator it; 00461 00462 void decodeWord(); 00463 00464 friend iterator ibis::bitvector::begin(); 00465 friend iterator ibis::bitvector::end(); 00466 }; // end class ibis::bitvector::iterator 00467 00474 inline void ibis::bitvector::sloppySize(word_t n) const { 00475 nbits = n-active.nbits; 00476 #if defined(WAH_CHECK_SIZE) 00477 word_t nb = do_cnt(); 00478 if (nb != nbits) { 00479 const_cast<ibis::bitvector*>(this)->adjustSize(0, nbits); 00480 LOGGER(ibis::gVerbose >= 0) 00481 << "ibis::bitvector::sloppySize -- adjust the number of bits to" 00482 << n; 00483 } 00484 #endif 00485 } // ibis::bitvector::sloppySize 00486 00488 inline bool ibis::bitvector::all0s() const { 00489 if (m_vec.empty()) { 00490 return true; 00491 } 00492 else if (m_vec.size() == 1) { 00493 return (m_vec[0] == 0 || (m_vec[0]>=HEADER0 && m_vec[0]<HEADER1)); 00494 } 00495 else { 00496 return false; 00497 } 00498 } // ibis::bitvector::all0s 00499 00501 inline bool ibis::bitvector::all1s() const { 00502 if (m_vec.size() == 1) { 00503 return (m_vec[0] == ALLONES || (m_vec[0] > HEADER1)); 00504 } 00505 else { 00506 return false; 00507 } 00508 } // ibis::bitvector::all1s 00509 00511 inline ibis::bitvector::word_t 00512 ibis::bitvector::cnt_bits(ibis::bitvector::word_t val) const { 00513 return ((val>ALLONES) ? ((val&MAXCNT)*MAXBITS) : MAXBITS); 00514 } // ibis::bitvector::cnt_bits 00515 00517 inline ibis::bitvector::word_t 00518 ibis::bitvector::cnt_ones(ibis::bitvector::word_t val) const { 00519 // number of 1 bits in a value between 0 and 255 00520 static const word_t table[256] = { 00521 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 00522 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 00523 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 00524 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00525 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 00526 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00527 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00528 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 00529 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 00530 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00531 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00532 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 00533 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 00534 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 00535 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 00536 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; 00537 word_t cnt; 00538 if (val > 0) { 00539 // 32-bit integers 00540 cnt = table[val&0xFFU] + table[(val>>8)&0xFFU] + 00541 table[(val>>16)&0xFFU] + table[(val>>24)&0xFFU]; 00542 // #if INT_MAX == 0x7FFFFFFFL 00543 // #else 00544 // // assume 64 bit integer 00545 // //assert(8 == sizeof(word_t)); 00546 // cnt = part[val&0xFFU] + 00547 // part[(val>>8)&0xFFU] + part[(val>>16)&0xFFU] + 00548 // part[(val>>24)&0xFFU] + part[(val>>32)&0xFFU] + 00549 // part[(val>>40)&0xFFU] + part[(val>>48)&0xFFU] + 00550 // part[(val>>56)&0xFFU]; 00551 // #endif 00552 } 00553 else { 00554 cnt = 0; 00555 } 00556 return cnt; 00557 } // ibis::bitvector::cnt_ones 00558 00559 inline ibis::bitvector::word_t ibis::bitvector::size() const throw() { 00560 return ((nbits?nbits:(nbits=do_cnt()))+active.nbits); 00561 } // ibis::bitvector::size 00562 00563 inline ibis::bitvector::word_t ibis::bitvector::cnt() const { 00564 if (nset==0 && !m_vec.empty()) 00565 nbits = do_cnt(); 00566 return (nset+cnt_ones(active.val)); 00567 } // ibis::bitvector::cnt 00568 00569 inline ibis::bitvector::word_t ibis::bitvector::numFillWords() const { 00570 word_t cnt=0; 00571 array_t<ibis::bitvector::word_t>::const_iterator it = m_vec.begin(); 00572 while (it != m_vec.end()) { 00573 cnt += (*it >> ibis::bitvector::MAXBITS); 00574 it++; 00575 } 00576 return cnt; 00577 } // inline word_t ibis::bitvector::numFillWords() const { 00578 00582 inline const ibis::bitvector& 00583 ibis::bitvector::operator=(const ibis::bitvector& bv) { 00584 nbits = bv.nbits; nset = bv.nset; active = bv.active; 00585 m_vec.deepCopy(bv.m_vec); 00586 return *this; 00587 } 00588 00590 inline ibis::bitvector& ibis::bitvector::copy(const ibis::bitvector& bv) { 00591 nbits = bv.nbits; nset = bv.nset; active = bv.active; 00592 m_vec.deepCopy(bv.m_vec); 00593 return *this; 00594 } 00595 00596 inline ibis::bitvector& ibis::bitvector::swap(bitvector& bv) { 00597 word_t tmp; 00598 tmp = bv.nbits; bv.nbits = nbits; nbits = tmp; 00599 tmp = bv.nset; bv.nset = nset; nset = tmp; 00600 active_word atmp = bv.active; 00601 bv.active = active; active = atmp; 00602 m_vec.swap(bv.m_vec); 00603 return *this; 00604 } 00605 00609 inline void ibis::bitvector::append_active() { 00610 // std::cout << "before: active.val = " << std::hex << active.val; 00611 // if (m_vec.size()) 00612 // std::cout << ", m_vec.back() = " << m_vec.back(); 00613 // std::cout << std::dec << std::endl; 00614 if (m_vec.empty()) { 00615 m_vec.push_back(active.val); 00616 } 00617 else if (active.val == 0) {// incoming word is zero 00618 if (m_vec.back() == 0) { 00619 m_vec.back() = (HEADER0 + 2); 00620 } 00621 else if (m_vec.back() >= HEADER0 && m_vec.back() < HEADER1) { 00622 ++ m_vec.back(); 00623 } 00624 else { 00625 m_vec.push_back(active.val); 00626 } 00627 } 00628 else if (active.val == ALLONES) {// incoming word is allones 00629 if (m_vec.back() == ALLONES) { 00630 m_vec.back() = (HEADER1 | 2); 00631 } 00632 else if (m_vec.back() >= HEADER1) { 00633 ++m_vec.back(); 00634 } 00635 else { 00636 m_vec.push_back(active.val); 00637 } 00638 } 00639 else { // incoming word contains a mixture of bits 00640 m_vec.push_back(active.val); 00641 } 00642 nbits += MAXBITS; 00643 active.reset(); 00644 nset = 0; 00645 // std::cout << "after: m_vec.back() = " << std::hex << m_vec.back() 00646 // << std::dec << std::endl; 00647 } // ibis::bitvector::append_active 00648 00652 inline void ibis::bitvector::append_counter(int val, word_t cnt) { 00653 word_t head = 2 + val; 00654 word_t w = (head << SECONDBIT) + cnt; 00655 nbits += cnt*MAXBITS; 00656 if (m_vec.empty()) { 00657 m_vec.push_back(w); 00658 } 00659 else if ((m_vec.back()>>SECONDBIT) == head) { 00660 m_vec.back() += cnt; 00661 } 00662 else if ((m_vec.back()==ALLONES) && head==3) { 00663 m_vec.back() = w + 1; 00664 } 00665 else if ((m_vec.back() == 0) && head==2) { 00666 m_vec.back() = w + 1; 00667 } 00668 else { 00669 m_vec.push_back(w); 00670 } 00671 } // ibis::bitvector::append_counter 00672 00673 // append a single bit 00674 inline ibis::bitvector& ibis::bitvector::operator+=(int b) { 00675 active.append(b); 00676 if (active.is_full()) 00677 append_active(); 00678 return *this; 00679 } // ibis::bitvector::operator+= 00680 00683 inline void ibis::bitvector::appendFill(int val, word_t n) { 00684 if (n == 0) return; 00685 if (active.nbits > 0) { 00686 word_t tmp = (MAXBITS - active.nbits); 00687 if (tmp > n) tmp = n; 00688 active.nbits += tmp; 00689 active.val <<= tmp; 00690 n -= tmp; 00691 if (val != 0) 00692 active.val |= (1U<<tmp) - 1; 00693 if (active.nbits >= MAXBITS) 00694 append_active(); 00695 } 00696 if (n >= MAXBITS) { 00697 word_t cnt = n / MAXBITS; 00698 if (cnt > 1) 00699 append_counter(val, cnt); 00700 else if (val != 0) { 00701 active.val = ALLONES; 00702 append_active(); 00703 } 00704 else { 00705 active.val = 0; 00706 append_active(); 00707 } 00708 n -= cnt * MAXBITS; 00709 } 00710 if (n > 0) { 00711 active.nbits = n; 00712 active.val = val*((1U<<n)-1); 00713 } 00714 } // ibis::bitvector::appendFill 00715 00721 inline void ibis::bitvector::copy_runs(run& it, word_t& nw) { 00722 #if defined(DEBUG) && DEBUG + 0 > 1 00723 LOGGER(ibis::gVerbose >= 0) 00724 << "ibis::bitvector::copy_runs(0x" 00725 << std::hex << std::setw(8) << std::setfill('0') 00726 << *(it.it) << ", " << std::dec << nw 00727 << ") ... it.nWords = " << it.nWords; 00728 #endif 00729 // deal with the first word -- attach it to the last word in m_vec 00730 if (it.isFill != 0) { 00731 append_counter(it.fillBit, it.nWords); 00732 nw -= it.nWords; 00733 } 00734 else { 00735 active.val = *(it.it); 00736 append_active(); 00737 -- nw; 00738 } 00739 ++ it.it; 00740 it.nWords = 0; 00741 nset = 0; 00742 nbits += MAXBITS * nw; 00743 00744 while (nw > 0) { // copy the words 00745 it.decode(); 00746 if (nw >= it.nWords) { 00747 m_vec.push_back(*(it.it)); 00748 nw -= it.nWords; 00749 it.nWords = 0; 00750 ++ it.it; 00751 } 00752 else { 00753 break; 00754 } 00755 } 00756 nbits -= MAXBITS * nw; 00757 } // ibis::bitvector::copy_runs 00758 00761 inline void ibis::bitvector::copy_runsn(run& it, word_t& nw) { 00762 #if defined(DEBUG) && DEBUG + 0 > 1 00763 LOGGER(ibis::gVerbose >= 0) 00764 << "ibis::bitvector::copy_runsn(0x" 00765 << std::hex << std::setw(8) << std::setfill('0') 00766 << *(it.it) << ", " << std::dec << nw 00767 << ") ... it.nWords = " << it.nWords; 00768 #endif 00769 // deal with the first word -- need to attach it to the last word in m_vec 00770 if (it.isFill != 0) { 00771 append_counter(!it.fillBit, it.nWords); 00772 nw -= it.nWords; 00773 } 00774 else { 00775 active.val = ALLONES ^ *(it.it); 00776 append_active(); 00777 -- nw; 00778 } 00779 ++ it.it; // advance to the next word 00780 it.nWords = 0; 00781 nset = 0; 00782 nbits += MAXBITS * nw; 00783 00784 while (nw > 0) { // copy the words 00785 it.decode(); 00786 if (nw >= it.nWords) { 00787 m_vec.push_back((it.isFill?FILLBIT:ALLONES) ^ *(it.it)); 00788 nw -= it.nWords; 00789 it.nWords = 0; 00790 ++ it.it; 00791 } 00792 else { 00793 break; 00794 } 00795 } 00796 nbits -= MAXBITS * nw; 00797 } // ibis::bitvector::copy_runsn 00798 00802 inline void ibis::bitvector::copy_fill 00803 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it) { 00804 if (it.fillBit == 0) { 00805 while (it.nWords > 3) { 00806 *jt = 0; 00807 jt[1] = 0; 00808 jt[2] = 0; 00809 jt[3] = 0; 00810 jt += 4; 00811 it.nWords -= 4; 00812 } 00813 if (it.nWords == 1) { 00814 *jt = 0; ++jt; 00815 } 00816 else if (it.nWords == 2) { 00817 *jt = 0; jt[1] = 0; jt += 2; 00818 } 00819 else if (it.nWords == 3) { 00820 *jt = 0; jt[1] = 0; jt[2] = 0; jt += 3; 00821 } 00822 } 00823 else { 00824 while (it.nWords > 3) { 00825 *jt = ALLONES; 00826 jt[1] = ALLONES; 00827 jt[2] = ALLONES; 00828 jt[3] = ALLONES; 00829 jt += 4; 00830 it.nWords -= 4; 00831 } 00832 if (it.nWords == 1) { 00833 *jt = ALLONES; ++jt; 00834 } 00835 else if (it.nWords == 2) { 00836 *jt = ALLONES; jt[1] = ALLONES; jt += 2; 00837 } 00838 else if (it.nWords == 3) { 00839 *jt = ALLONES; jt[1] = ALLONES; jt[2] = ALLONES; jt += 3; 00840 } 00841 } 00842 it.nWords = 0; 00843 ++ it.it; 00844 } // ibis::bitvector::copy_fill 00845 00850 inline void ibis::bitvector::copy_runs 00851 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it, word_t& nw) { 00852 while (nw >= it.nWords && nw > 0) { 00853 if (it.isFill != 0) { // copy a fill 00854 const array_t<word_t>::iterator iend = jt + it.nWords; 00855 if (it.fillBit == 0) { 00856 while (jt < iend) { 00857 *jt = 0; 00858 ++ jt; 00859 } 00860 } 00861 else { 00862 while (jt < iend) { 00863 *jt = ALLONES; 00864 ++ jt; 00865 } 00866 } 00867 nw -= it.nWords; 00868 } 00869 else { // copy a single word 00870 *jt = *(it.it); 00871 ++ jt; 00872 -- nw; 00873 } 00874 ++ it.it; // advance to the next word 00875 if (nw > 0) { 00876 it.decode(); 00877 } 00878 else { 00879 it.nWords = 0; 00880 return; 00881 } 00882 } 00883 } // ibis::bitvector::copy_runs 00884 00887 inline void ibis::bitvector::copy_runsn 00888 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it, word_t& nw) { 00889 while (nw >= it.nWords) { 00890 if (it.isFill != 0) { // a fill 00891 const array_t<word_t>::iterator iend = jt + it.nWords; 00892 if (it.fillBit == 0) { 00893 while (jt < iend) { 00894 *jt = ALLONES; 00895 ++ jt; 00896 } 00897 } 00898 else { 00899 while (jt < iend) { 00900 *jt = 0; 00901 ++ jt; 00902 } 00903 } 00904 nw -= it.nWords; 00905 } 00906 else { // a literal word 00907 *jt = *(it.it) ^ ALLONES; 00908 ++ jt; 00909 -- nw; 00910 } 00911 ++ it.it; // advance to the next word 00912 if (nw > 0) { 00913 it.decode(); 00914 } 00915 else { 00916 it.nWords = 0; 00917 return; 00918 } 00919 } 00920 } // ibis::bitvector::copy_runsn 00921 00922 inline ibis::bitvector::iterator ibis::bitvector::begin() { 00923 iterator it; 00924 it.it = m_vec.begin(); 00925 it.vec = &m_vec; 00926 it.bitv = this; 00927 it.active = &active; 00928 it.decodeWord(); 00929 return it; 00930 } // ibis::bitvector::begin 00931 00932 inline ibis::bitvector::iterator ibis::bitvector::end() { 00933 iterator it; 00934 it.ind = 0; 00935 it.compressed = 0; 00936 it.nbits = 0; 00937 it.literalvalue = 0; 00938 it.fillbit = 0; 00939 it.it = m_vec.end() + 1; 00940 it.vec = &m_vec; 00941 it.bitv = this; 00942 it.active = &active; 00943 return it; 00944 } // ibis::bitvector::end 00945 00946 inline ibis::bitvector::const_iterator ibis::bitvector::begin() const { 00947 const_iterator it; 00948 it.it = m_vec.begin(); 00949 it.begin = m_vec.begin(); 00950 it.end = m_vec.end(); 00951 it.active = &active; 00952 it.decodeWord(); 00953 return it; 00954 } // ibis::bitvector::begin 00955 00956 // dereference -- no error checking 00957 inline bool ibis::bitvector::iterator::operator*() const { 00958 #if defined(DEBUG) && DEBUG + 0 > 1 00959 if (vec==0 || it<vec->begin() || it>vec->end()) 00960 throw "bitvector::const_iterator not initialized correctly."; 00961 #endif 00962 if (compressed != 0) 00963 return (fillbit != 0); 00964 else 00965 return (1 & (literalvalue >> (bitvector::SECONDBIT - ind))); 00966 } // ibis::bitvector::iterator::operator* 00967 00968 // comparison only based on the iterator 00969 inline int ibis::bitvector::iterator::operator!= 00970 (const ibis::bitvector::const_iterator& rhs) const throw () { 00971 return (it != rhs.it); 00972 } 00973 inline int ibis::bitvector::iterator::operator== 00974 (const ibis::bitvector::const_iterator& rhs) const throw () { 00975 return (it == rhs.it); 00976 } 00977 inline int ibis::bitvector::iterator::operator!= 00978 (const ibis::bitvector::iterator& rhs) const throw () { 00979 return (it != rhs.it); 00980 } 00981 inline int ibis::bitvector::iterator::operator== 00982 (const ibis::bitvector::iterator& rhs) const throw () { 00983 return (it == rhs.it); 00984 } 00985 00986 // increment by one 00987 inline ibis::bitvector::iterator& ibis::bitvector::iterator::operator++() { 00988 #if defined(DEBUG) && DEBUG + 0 > 1 00989 if (vec==0 || it<vec->begin() || it>vec->end()) 00990 throw "bitvector::iterator not formed correctly."; 00991 #endif 00992 if (ind+1<nbits) {++ind;} 00993 else {++ it; decodeWord();} 00994 return *this; 00995 } 00996 00997 // decrement by one 00998 inline ibis::bitvector::iterator& ibis::bitvector::iterator::operator--() { 00999 #if defined(DEBUG) && DEBUG + 0 > 1 01000 if (vec==0 || it<vec->begin() || it>vec->end()+1) 01001 throw "bitvector::iterator not formed correctly."; 01002 #endif 01003 if (ind) -- ind; 01004 else { 01005 if (it <= vec->end()) -- it; 01006 else if (active->nbits) it = vec->end(); 01007 else it = vec->end() - 1; 01008 decodeWord(); 01009 if (nbits) ind = nbits - 1; 01010 } 01011 return *this; 01012 } 01013 01014 inline ibis::bitvector::const_iterator ibis::bitvector::end() const { 01015 const_iterator it; 01016 it.ind = 0; 01017 it.compressed = 0; 01018 it.nbits = 0; 01019 it.literalvalue = 0; 01020 it.fillbit = 0; 01021 it.it = m_vec.end() + 1; 01022 it.begin = m_vec.begin(); 01023 it.end = m_vec.end(); 01024 it.active = &active; 01025 return it; 01026 } // ibis::bitvector::end() 01027 01028 // dereference -- no error checking 01029 inline bool ibis::bitvector::const_iterator::operator*() const { 01030 #if defined(DEBUG) && DEBUG + 0 > 1 01031 if (it==0 || end==0 || it>end || nbits<=ind) 01032 throw "bitvector::const_iterator not initialized correctly."; 01033 #endif 01034 if (compressed != 0) 01035 return (fillbit != 0); 01036 else 01037 return (1 & (literalvalue >> (bitvector::SECONDBIT - ind))); 01038 } 01039 01040 // comparison only based on the iterator 01041 inline int ibis::bitvector::const_iterator::operator!= 01042 (const ibis::bitvector::const_iterator& rhs) const throw (){ 01043 return (it != rhs.it); 01044 } 01045 inline int ibis::bitvector::const_iterator::operator== 01046 (const ibis::bitvector::const_iterator& rhs) const throw () { 01047 return (it == rhs.it); 01048 } 01049 01050 // increment by one 01051 inline ibis::bitvector::const_iterator& 01052 ibis::bitvector::const_iterator::operator++() { 01053 #if defined(DEBUG) && DEBUG + 0 > 1 01054 if (it==0 || end==0 || it>end) 01055 throw "bitvector::const_iterator not formed correctly."; 01056 #endif 01057 if (ind+1<nbits) {++ind;} 01058 else {++ it; decodeWord();} 01059 return *this; 01060 } 01061 01062 // decrement by one 01063 inline ibis::bitvector::const_iterator& 01064 ibis::bitvector::const_iterator::operator--() { 01065 #if defined(DEBUG) && DEBUG + 0 > 1 01066 if (it==0 || end==0 || it>end) 01067 throw "bitvector::const_iterator not formed correctly."; 01068 #endif 01069 if (ind) -- ind; 01070 else { 01071 if (it <= end) -- it; 01072 else if (active->nbits) it = end; 01073 else it = end - 1; 01074 decodeWord(); 01075 if (nbits) ind = nbits - 1; 01076 } 01077 return *this; 01078 } 01079 01080 inline ibis::bitvector::indexSet ibis::bitvector::firstIndexSet() const { 01081 indexSet is; 01082 if (m_vec.end() > m_vec.begin()) { 01083 is.it = m_vec.begin() - 1; 01084 is.end = m_vec.end(); 01085 } 01086 else { 01087 is.it = 0; 01088 is.end = 0; 01089 } 01090 is.active = &active; 01091 is.ind[0] = static_cast<word_t>(-1); 01092 is.nind = 0; 01093 ++is; 01094 return is; 01095 } // end ibis::bitvector::firstIndexSet; 01096 01097 inline double ibis::bitvector::randomSize(word_t nb, word_t nc) { 01098 double sz = 0.0; 01099 if (nb > 0 && nb >= nc && nb > MAXBITS) { 01100 const double den = static_cast<double>(nc) / 01101 static_cast<double>(nb); 01102 const word_t nw = nb / MAXBITS; 01103 sz = 3.0 + nw - nw * (pow(1.0-den, static_cast<int>(2*MAXBITS)) 01104 + pow(den, static_cast<int>(2*MAXBITS))); 01105 } 01106 return sz*sizeof(word_t); 01107 } // end ibis::bitvector::randomSize 01108 01109 inline double ibis::bitvector::markovSize(word_t nb, word_t nc, double f) { 01110 double sz = 0.0; 01111 if (nb > 0 && nb >= nc && nb > MAXBITS) { 01112 const double den = static_cast<double>(nc) / 01113 static_cast<double>(nb); 01114 const word_t nw = nb / MAXBITS; 01115 if ((den <= 0.5 && f > 1.00001) || (den > 0.5 && (1.0-den)*f > den)) 01116 sz = ((1.0-den) * pow(1.0-den/((1.0-den)*f), 01117 static_cast<int>(2*MAXBITS-3)) + 01118 den * pow(1.0-1.0/f, static_cast<int>(2*MAXBITS-3))); 01119 else 01120 sz = (pow(1.0-den, static_cast<int>(2*MAXBITS)) + 01121 pow(den, static_cast<int>(2*MAXBITS))); 01122 sz = 3.0 + nw * (1.0 - sz); 01123 } 01124 return sz*sizeof(word_t); 01125 } // end ibis::bitvector::markovSize 01126 01127 inline void ibis::bitvector::turnOnRawBit(const word_t ind) { 01128 if (ind < nbits) { // in regular words 01129 m_vec[ind / MAXBITS] |= (1 << (SECONDBIT - (ind % MAXBITS))); 01130 nset = 0; // don't track nset 01131 } 01132 else { // assume to be in the active word 01133 active.val |= (1 << (active.nbits - (ind - nbits) - 1)); 01134 #if defined(DEBUG) 01135 if (ind >= nbits + active.nbits || 01136 active.val >= (1U << active.nbits)) { 01137 LOGGER(ibis::gVerbose >= 0) 01138 << "FastBit::bitvector::turnOnRawBit(" << ind 01139 << ") found a bad active word"; 01140 } 01141 #endif 01142 } 01143 } // ibis::bitvector::turnOnRawBit 01144 01145 inline void ibis::bitvector::clear() { 01146 nbits = 0; nset = 0; active.reset(); m_vec.clear(); 01147 LOGGER(ibis::gVerbose > 9) 01148 << "bitvector (" << static_cast<void*>(this) 01149 << ") cleared the content of bitvector with m_vec at " 01150 << static_cast<void*>(&m_vec); 01151 } 01152 01153 std::ostream& operator<<(std::ostream&, const ibis::bitvector&); 01154 #endif // __BITVECTOR_H
![]() |