bitvector.h
Go to the documentation of this file.
1 // $Id$
2 // Author: John Wu <John.Wu at ACM.org>
3 // Lawrence Berkeley National Laboratory
4 // Copyright (c) 2000-2016 the Regents of the University of California
5 #ifndef BITVECTOR_H
6 #define BITVECTOR_H
7 
10 #include "array_t.h" // alternative to std::vector
11 
12 #if defined(_MSC_VER) && defined(_WIN32)
13 //disable warnings on extern before template instantiation
14 #pragma warning (disable : 4231)
15 #endif
16 #if defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)) && defined(CXX_USE_DLL) && !defined(DLL_EXPORT)
17 extern template class FASTBIT_CXX_DLLSPEC ibis::array_t<uint32_t>;
18 #endif
19 
20 
62 class FASTBIT_CXX_DLLSPEC ibis::bitvector {
63 public:
64  typedef uint32_t word_t;
65 
67  ~bitvector() {clear();};
68  // constructors of bitvector class
69  bitvector();
70  bitvector(const bitvector& bv);
71  explicit bitvector(const char* file);
72  explicit bitvector(const array_t<word_t>& arr);
73  explicit bitvector(const array_t<word_t>& arr, const size_t begin,
74  const size_t end);
75  explicit bitvector(word_t*, size_t);
76 
77  inline const bitvector& operator=(const bitvector& bv);
78  inline bitvector& copy(const bitvector& bv);
79  inline bitvector& swap(bitvector& bv);
80 
81  // use bv to replace part of the existing value, match the ith bit with
82  // the first of bv, return reference to self
83  //bitvector& copy(const word_t i, const bitvector& bv);
84  void setBit(const word_t i, int val);
85  int getBit(const word_t i) const;
86  inline void turnOnRawBit(const word_t i);
87  void erase(word_t i, word_t j);
88 
89  void set(int val, word_t n);
90  void clear();
91 
92  bitvector& operator+=(const bitvector& bv);
93  inline bitvector& operator+=(int b);
94  inline void appendByte(unsigned char);
95  void appendWord(word_t w);
96  inline void appendFill(int val, word_t n);
97 
98  int operator==(const bitvector& rhs) const;
99 
100  void flip();
102  void operator&=(const bitvector& rhs);
105  bitvector* operator&(const bitvector&) const;
107  void operator|=(const bitvector& rhs);
109  bitvector* operator|(const bitvector&) const;
111  void operator^=(const bitvector& rhs);
113  bitvector* operator^(const bitvector&) const;
115  void operator-=(const bitvector& rhs);
118  bitvector* operator-(const bitvector&) const;
119  bool operator<(const bitvector&) const;
120 
121  void subset(const bitvector& mask, bitvector& res) const;
122  word_t count(const bitvector& mask) const;
123 
124  // I/O functions.
125  void read(const char *fn);
126  void write(const char *fn) const;
127  void write(int fdes) const;
128  void write(array_t<word_t>& arr) const;
129 
130  void compress();
131  void decompress();
132  word_t compressible() const;
135  bool isCompressed() const {return (m_vec.size()*MAXBITS < nbits);}
136 
137  inline word_t size() const throw();
138  inline void sloppySize(word_t n) const;
139  inline word_t cnt() const;
140  inline word_t count() const {return cnt();}
141  inline word_t sloppyCount() const;
142  inline word_t numFillWords() const;
144  uint32_t bytes() const throw() {
145  return (m_vec.size()*sizeof(word_t) + sizeof(bitvector));
146  };
150  uint32_t getSerialSize() const throw() {
151  return (m_vec.size() + 1 + (active.nbits>0)) * sizeof(word_t);
152  };
154  static word_t bitsPerLiteral() {return MAXBITS;}
155  inline static double randomSize(word_t nb, word_t nc);
156  inline static double markovSize(word_t nb, word_t nc, double f);
157  static double clusteringFactor(word_t nb, word_t nc, word_t sz);
158 
159  void adjustSize(word_t nv, word_t nt);
160  void reserve(unsigned nb, unsigned nc, double cf=0.0);
161 
164  bool empty() const {return all0s() && active.val == 0;}
166  std::ostream& print(std::ostream &) const;
167 
169  class iterator;
170  inline iterator end();
171  inline iterator begin();
172 
174  class const_iterator;
175  inline const_iterator end() const;
176  inline const_iterator begin() const;
177 
179  class indexSet;
180  inline indexSet firstIndexSet() const;
181 
183  class pit;
184 
185  // give accesses to some friends
186  friend class indexSet;
187  friend class iterator;
188  friend class const_iterator;
189 
190 protected:
191  inline bool all0s() const;
192  inline bool all1s() const;
193 
194 private:
195  // static members, i.e., constants to be used internally
196  static const unsigned MAXBITS;
197  static const unsigned SECONDBIT;
198  static const word_t FILLBIT;
199  static const word_t HEADER0;
200  static const word_t HEADER1;
201  static const word_t ALLONES;
202  static const word_t MAXCNT;
203 
210  struct active_word {
211  word_t val; // the value
212  word_t nbits; // total number of bits
213 
214  active_word() : val(0), nbits(0) {};
215  void reset() {val = 0; nbits = 0;};
216  int is_full() const {return (nbits >= MAXBITS);};
218  void append(int b) {
219  val <<= 1; nbits ++; val += b;
220  };
221  }; // struct active_word
222 
225  struct run {
226  int isFill;
227  int fillBit;
228  word_t nWords;
230  run() : isFill(0), fillBit(0), nWords(0), it(0) {};
231  void decode() {
232  fillBit = (*it > HEADER1);
233  if (*it > ALLONES) {
234  nWords = (*it & MAXCNT);
235  isFill = 1;
236  }
237  else {
238  nWords = 1;
239  isFill = 0;
240  }
241  };
244  void operator--() {
245  if (nWords > 1) {--nWords;}
246  else {++ it; nWords = 0;}
247  };
250  void operator-=(word_t len) {
251  while (len > 0) {
252  if (nWords == 0) decode();
253  if (isFill != 0) {
254  if (nWords > len) {nWords -= len; len = 0;}
255  else if (nWords == len) {nWords = 0; len = 0; ++ it;}
256  else {len -= nWords; ++ it; nWords = 0;}
257  }
258  else {-- len; ++ it; nWords = 0;}
259  }
260  };
261  };
262  friend struct run;
263  friend struct active_word;
264 
265  // member variables of bitvector class
266  mutable word_t nbits;
267  mutable word_t nset;
268  active_word active;
269  array_t<word_t> m_vec;
270 
271  // private functions of bitvector class
272  word_t count_c1(const bitvector& mask) const;
273  word_t count_c2(const bitvector& mask) const;
274  // The following three functions all performs or operation, _c2 and _c1
275  // generate compressed solutions, but _c0, _d1, and _d2 generate
276  // uncompressed solutions.
277  // or_c2 assumes there are compressed word in both operands
278  // or_c1 *this may contain compressed word, but not rhs
279  // or_c0 assumes both operands are not compressed
280  // or_d1 *this contains no compressed word and is overwritten with the
281  // result
282  // or_d2 both *this and rhs are compressed, but res is not compressed
283  void or_c2(const bitvector& rhs, bitvector& res) const;
284  void or_c1(const bitvector& rhs, bitvector& res) const;
285  void or_c0(const bitvector& rhs);
286  void or_d1(const bitvector& rhs);
287  void or_d2(const bitvector& rhs, bitvector& res) const;
288  void and_c2(const bitvector& rhs, bitvector& res) const;
289  void and_c1(const bitvector& rhs, bitvector& res) const;
290  void and_c0(const bitvector& rhs);
291  void and_d1(const bitvector& rhs);
292  void and_d2(const bitvector& rhs, bitvector& res) const;
293  void xor_c2(const bitvector& rhs, bitvector& res) const;
294  void xor_c1(const bitvector& rhs, bitvector& res) const;
295  void xor_c0(const bitvector& rhs);
296  void xor_d1(const bitvector& rhs);
297  void xor_d2(const bitvector& rhs, bitvector& res) const;
298  void minus_c2(const bitvector& rhs, bitvector& res) const;
299  void minus_c1(const bitvector& rhs, bitvector& res) const;
300  void minus_c1x(const bitvector& rhs, bitvector& res) const;
301  void minus_c0(const bitvector& rhs);
302  void minus_d1(const bitvector& rhs);
303  void minus_d2(const bitvector& rhs, bitvector& res) const;
304  inline void copy_runs(run& it, word_t& nw); // copy nw words
305  inline void copy_runsn(run& it, word_t& nw); // copy nw words and negate
306  inline void copy_fill(array_t<word_t>::iterator& jt, run& it);
307  inline void copy_runs(array_t<word_t>::iterator& jt, run& it,
308  word_t& nw);
309  inline void copy_runsn(array_t<word_t>::iterator& jt, run& it,
310  word_t& nw);
311  void decompress(array_t<word_t>& tmp) const;
312  void copy_comp(array_t<word_t>& tmp) const;
313  inline void append_active();
314  inline void append_counter(int val, word_t cnt);
315  inline word_t cnt_ones(word_t) const; // number of ones in a word
316  inline word_t cnt_bits(word_t) const; // number of bits in a word
317  word_t do_cnt() const throw ();
318 }; // class bitvector
319 
322 public:
323  const_iterator() : compressed(0), ind(0), nbits(0), literalvalue(0),
324  fillbit(0), active(0) {}
325 
326  const_iterator(const const_iterator& r)
327  : compressed(r.compressed), ind(r.ind), nbits(r.nbits),
328  literalvalue(r.literalvalue), fillbit(r.fillbit), active(r.active),
329  end(r.end), begin(r.begin), it(r.it) {};
330  const_iterator& operator=(const const_iterator& r) {
331  compressed = r.compressed; ind = r.ind; nbits = r.nbits;
332  literalvalue = r.literalvalue; fillbit = r.fillbit; active = r.active;
333  end = r.end; begin = r.begin; it = r.it;
334  return *this;}
335 
336  inline bool operator*() const;
337  inline int operator!=(const const_iterator& rhs) const throw ();
338  inline int operator==(const const_iterator& rhs) const throw ();
339 
340  inline const_iterator& operator++();
341  inline const_iterator& operator--();
342  const_iterator& operator+=(int incr);
343 
344 private:
345  ibis::bitvector::word_t compressed;
346  ibis::bitvector::word_t ind;
347  ibis::bitvector::word_t nbits;
348  ibis::bitvector::word_t literalvalue;
349  int fillbit;
350  const active_word* active;
354 
355  void decodeWord();
356 
357  // give three functions of bitvector access to private variables
358  friend void ibis::bitvector::erase(word_t i, word_t j);
359  friend const_iterator ibis::bitvector::begin() const;
360  friend const_iterator ibis::bitvector::end() const;
361  friend class ibis::bitvector::iterator;
362 }; // class ibis::bitvector::const_iterator
363 
372 public:
373  iterator() : compressed(0), ind(0), nbits(0), literalvalue(0), fillbit(0),
374  bitv(0), active(0), vec(0) {}
375  iterator(const iterator& r)
376  : compressed(r.compressed), ind(r.ind), nbits(r.nbits),
377  literalvalue(r.literalvalue), fillbit(r.fillbit), bitv(r.bitv),
378  active(r.active), vec(r.vec), it(r.it) {};
379  const iterator& operator=(const iterator& r) {
380  compressed = r.compressed; ind = r.ind; nbits = r.nbits;
381  literalvalue = r.literalvalue; fillbit = r.fillbit; bitv = r.bitv;
382  active = r.active; vec = r.vec; it = r.it; return *this;}
383 
384  inline bool operator*() const; // still can not modify this
385  inline int operator!=(const const_iterator& rhs) const throw ();
386  inline int operator==(const const_iterator& rhs) const throw ();
387  inline int operator!=(const iterator& rhs) const throw ();
388  inline int operator==(const iterator& rhs) const throw ();
389 
390  inline iterator& operator++();
391  inline iterator& operator--();
392  iterator& operator+=(int incr);
393  const iterator& operator=(int val);
394 
395 private:
396  ibis::bitvector::word_t compressed;
397  ibis::bitvector::word_t ind;
398  ibis::bitvector::word_t nbits;
399  ibis::bitvector::word_t literalvalue;
400  int fillbit;
401  ibis::bitvector* bitv;
402  active_word* active;
403  array_t<word_t>* vec;
405 
406  void decodeWord();
407 
408  friend iterator ibis::bitvector::begin();
409  friend iterator ibis::bitvector::end();
410 }; // class ibis::bitvector::iterator
411 
421 class FASTBIT_CXX_DLLSPEC ibis::bitvector::indexSet {
422 public:
424  indexSet() : it(0), end(0), active(0), nind(0) {}
426  indexSet(const indexSet& rhs)
427  : it(rhs.it), end(rhs.end), active(rhs.active), nind(rhs.nind) {
428  ind[0] = rhs.ind[0];
429  ind[1] = rhs.ind[1];
430  ind[2] = rhs.ind[2];
431  ind[3] = rhs.ind[3];
432  ind[4] = rhs.ind[4];
433  ind[5] = rhs.ind[5];
434  ind[6] = rhs.ind[6];
435  ind[7] = rhs.ind[7];
436  ind[8] = rhs.ind[8];
437  ind[9] = rhs.ind[9];
438  ind[10] = rhs.ind[10];
439  ind[11] = rhs.ind[11];
440  ind[12] = rhs.ind[12];
441  ind[13] = rhs.ind[13];
442  ind[14] = rhs.ind[14];
443  ind[15] = rhs.ind[15];
444  ind[16] = rhs.ind[16];
445  ind[17] = rhs.ind[17];
446  ind[18] = rhs.ind[18];
447  ind[19] = rhs.ind[19];
448  ind[20] = rhs.ind[20];
449  ind[21] = rhs.ind[21];
450  ind[22] = rhs.ind[22];
451  ind[23] = rhs.ind[23];
452  ind[24] = rhs.ind[24];
453  ind[25] = rhs.ind[25];
454  ind[26] = rhs.ind[26];
455  ind[27] = rhs.ind[27];
456  ind[28] = rhs.ind[28];
457  ind[29] = rhs.ind[29];
458  ind[30] = rhs.ind[30];
459  ind[31] = rhs.ind[31];
460  }
462  indexSet& operator=(const indexSet& rhs) {
463  it = rhs.it;
464  end = rhs.end;
465  active = rhs.active;
466  nind = rhs.nind;
467  ind[0] = rhs.ind[0];
468  ind[1] = rhs.ind[1];
469  ind[2] = rhs.ind[2];
470  ind[3] = rhs.ind[3];
471  ind[4] = rhs.ind[4];
472  ind[5] = rhs.ind[5];
473  ind[6] = rhs.ind[6];
474  ind[7] = rhs.ind[7];
475  ind[8] = rhs.ind[8];
476  ind[9] = rhs.ind[9];
477  ind[10] = rhs.ind[10];
478  ind[11] = rhs.ind[11];
479  ind[12] = rhs.ind[12];
480  ind[13] = rhs.ind[13];
481  ind[14] = rhs.ind[14];
482  ind[15] = rhs.ind[15];
483  ind[16] = rhs.ind[16];
484  ind[17] = rhs.ind[17];
485  ind[18] = rhs.ind[18];
486  ind[19] = rhs.ind[19];
487  ind[20] = rhs.ind[20];
488  ind[21] = rhs.ind[21];
489  ind[22] = rhs.ind[22];
490  ind[23] = rhs.ind[23];
491  ind[24] = rhs.ind[24];
492  ind[25] = rhs.ind[25];
493  ind[26] = rhs.ind[26];
494  ind[27] = rhs.ind[27];
495  ind[28] = rhs.ind[28];
496  ind[29] = rhs.ind[29];
497  ind[30] = rhs.ind[30];
498  ind[31] = rhs.ind[31];
499  return *this;
500  }
501 
502  //int operator!=(const indexSet& rhs) const {return (it != rhs.it);};
504  bool isRange() const {return (nind>=ibis::bitvector::MAXBITS);}
506  const word_t* indices() const {return ind;};
508  word_t nIndices() const {return nind;}
510  const word_t& currentWord() const {return *it;}
511  indexSet& operator++();
512 
513  // allow bitvector::firstIndexSet() to access private member variables
514  friend indexSet ibis::bitvector::firstIndexSet() const;
515 
516 private:
519  const active_word* active; // points back to the active word
520  word_t nind; // number of indices
521  word_t ind[32];
522 }; // class ibis::bitvector::indexSet
523 
530 public:
531  pit(): curr(0xFFFFFFFFU) {}
532  pit(const ibis::bitvector &bv) : curr(0xFFFFFFFFU) {init(bv);}
533 
534  inline ibis::bitvector::word_t operator*() const;
535  inline void next();
536  inline void skip(unsigned);
537  inline void init(const ibis::bitvector&);
538 
539 private:
540  ibis::bitvector::word_t curr;
542 }; // class ibis::bitvector::pit
543 
550 inline void ibis::bitvector::sloppySize(word_t n) const {
551  nbits = n-active.nbits;
552 #if defined(WAH_CHECK_SIZE)
553  word_t nb = do_cnt();
554  if (nb != nbits) {
555  const_cast<ibis::bitvector*>(this)->adjustSize(0, nbits);
556  LOGGER(ibis::gVerbose >= 0)
557  << "bitvector::sloppySize -- adjust the number of bits to "
558  << n;
559  }
560 #endif
561 } // ibis::bitvector::sloppySize
562 
572 inline ibis::bitvector::word_t ibis::bitvector::sloppyCount() const {
573  if (nset > 0) {
574  return nset;
575  }
576  else if (active.nbits == 0 || active.val == 0) {
577  if (m_vec.empty() ||
578  (m_vec.size() == 1 &&
579  (m_vec[0] == 0 ||
580  (m_vec[0]>=HEADER0 && m_vec[0]<HEADER1))))
581  return 0;
582  else
583  return 2;
584  }
585  else {
586  return 1;
587  }
588 } // ibis::bitvector::sloppyCount
589 
592 inline bool ibis::bitvector::all0s() const {
593  if (m_vec.empty()) {
594  return true;
595  }
596  else if (m_vec.size() == 1) {
597  return (m_vec[0] == 0 || (m_vec[0]>=HEADER0 && m_vec[0]<HEADER1));
598  }
599  else {
600  return false;
601  }
602 } // ibis::bitvector::all0s
603 
606 inline bool ibis::bitvector::all1s() const {
607  if (m_vec.size() == 1) {
608  return (m_vec[0] == ALLONES || (m_vec[0] > HEADER1));
609  }
610  else {
611  return false;
612  }
613 } // ibis::bitvector::all1s
614 
616 inline ibis::bitvector::word_t
617 ibis::bitvector::cnt_bits(ibis::bitvector::word_t val) const {
618  return ((val>ALLONES) ? ((val&MAXCNT)*MAXBITS) : MAXBITS);
619 } // ibis::bitvector::cnt_bits
620 
622 inline ibis::bitvector::word_t
623 ibis::bitvector::cnt_ones(ibis::bitvector::word_t val) const {
624  // number of 1 bits in a value between 0 and 255
625  static const word_t table[256] = {
626  0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
627  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
628  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
629  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
630  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
631  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
632  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
633  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
634  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
635  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
636  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
637  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
638  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
639  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
640  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
641  4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
642  return table[val&0xFFU] + table[(val>>8)&0xFFU] +
643  table[(val>>16)&0xFFU] + table[(val>>24)&0xFFU];
644 } // ibis::bitvector::cnt_ones
645 
647 inline ibis::bitvector::word_t ibis::bitvector::size() const throw() {
648  return ((nbits?nbits:(nbits=do_cnt()))+active.nbits);
649 } // ibis::bitvector::size
650 
652 inline ibis::bitvector::word_t ibis::bitvector::cnt() const {
653  if (nset==0 && !m_vec.empty())
654  nbits = do_cnt();
655  return (nset+cnt_ones(active.val));
656 } // ibis::bitvector::cnt
657 
659 inline ibis::bitvector::word_t ibis::bitvector::numFillWords() const {
660  word_t cnt=0;
662  while (it != m_vec.end()) {
663  cnt += (*it >> ibis::bitvector::MAXBITS);
664  it++;
665  }
666  return cnt;
667 } // inline word_t ibis::bitvector::numFillWords() const {
668 
672 inline const ibis::bitvector&
674  nbits = bv.nbits; nset = bv.nset; active = bv.active;
675  m_vec.deepCopy(bv.m_vec);
676  return *this;
677 }
678 
681  nbits = bv.nbits; nset = bv.nset; active = bv.active;
682  m_vec.deepCopy(bv.m_vec);
683  return *this;
684 }
685 
686 inline ibis::bitvector& ibis::bitvector::swap(bitvector& bv) {
687  word_t tmp;
688  tmp = bv.nbits; bv.nbits = nbits; nbits = tmp;
689  tmp = bv.nset; bv.nset = nset; nset = tmp;
690  active_word atmp = bv.active;
691  bv.active = active; active = atmp;
692  m_vec.swap(bv.m_vec);
693  return *this;
694 }
695 
699 inline void ibis::bitvector::append_active() {
700 // std::cout << "before: active.val = " << std::hex << active.val;
701 // if (m_vec.size())
702 // std::cout << ", m_vec.back() = " << m_vec.back();
703 // std::cout << std::dec << std::endl;
704  if (m_vec.empty()) {
705  m_vec.push_back(active.val);
706  }
707  else if (active.val == 0) {// incoming word is zero
708  if (m_vec.back() == 0) {
709  m_vec.back() = (HEADER0 + 2);
710  }
711  else if (m_vec.back() >= HEADER0 && m_vec.back() < HEADER1) {
712  ++ m_vec.back();
713  }
714  else {
715  m_vec.push_back(active.val);
716  }
717  }
718  else if (active.val == ALLONES) {// incoming word is allones
719  if (m_vec.back() == ALLONES) {
720  m_vec.back() = (HEADER1 | 2);
721  }
722  else if (m_vec.back() >= HEADER1) {
723  ++ m_vec.back();
724  }
725  else {
726  m_vec.push_back(active.val);
727  }
728  }
729  else { // incoming word contains a mixture of bits
730  m_vec.push_back(active.val);
731  }
732  nbits += MAXBITS;
733  active.reset();
734  nset = 0;
735 // std::cout << "after: m_vec.back() = " << std::hex << m_vec.back()
736 // << std::dec << std::endl;
737 } // ibis::bitvector::append_active
738 
741 inline void ibis::bitvector::append_counter(int val, word_t cnt) {
742  word_t head = 2 + val;
743  word_t w = (head << SECONDBIT) + cnt;
744  nbits += cnt*MAXBITS;
745  if (m_vec.empty()) {
746  m_vec.push_back(w);
747  }
748  else if ((m_vec.back()>>SECONDBIT) == head) {
749  m_vec.back() += cnt;
750  }
751  else if ((m_vec.back()==ALLONES) && head==3) {
752  m_vec.back() = w + 1;
753  }
754  else if ((m_vec.back() == 0) && head==2) {
755  m_vec.back() = w + 1;
756  }
757  else {
758  m_vec.push_back(w);
759  }
760 } // ibis::bitvector::append_counter
761 
764  active.append(b);
765  if (active.is_full())
766  append_active();
767  return *this;
768 } // ibis::bitvector::operator+=
769 
771 void ibis::bitvector::appendByte(unsigned char c) {
772  if (active.nbits >= MAXBITS)
773  append_active();
774 
775  if (active.nbits+8 < MAXBITS) {
776  active.val <<= 8;
777  active.nbits += 8;
778  active.val += c;
779  }
780  else if (active.nbits+8 > MAXBITS) {
781  unsigned na = MAXBITS - active.nbits;
782  unsigned hi = (c >> (8 - na));
783  active.val <<= na;
784  active.val += hi;
785  append_active();
786  active.nbits = 8 - na;
787  active.val = ((hi << active.nbits) ^ c);
788  }
789  else {
790  active.val <<= 8;
791  active.val += c;
792  append_active();
793  }
794 } // ibis::bitvector::appendByte
795 
800 inline void ibis::bitvector::appendFill(int val, word_t n) {
801  if (n == 0) return;
802  if (active.nbits > 0) {
803  word_t tmp = (MAXBITS - active.nbits);
804  if (tmp > n) tmp = n;
805  active.nbits += tmp;
806  active.val <<= tmp;
807  n -= tmp;
808  if (val != 0)
809  active.val |= (1U<<tmp) - 1;
810  if (active.nbits >= MAXBITS)
811  append_active();
812  }
813  if (n >= MAXBITS) {
814  word_t cnt = n / MAXBITS;
815  if (cnt > 1)
816  append_counter(val, cnt);
817  else if (val != 0) {
818  active.val = ALLONES;
819  append_active();
820  }
821  else {
822  active.val = 0;
823  append_active();
824  }
825  n -= cnt * MAXBITS;
826  }
827  if (n > 0) {
828  active.nbits = n;
829  active.val = val*((1U<<n)-1);
830  }
831 } // ibis::bitvector::appendFill
832 
838 inline void ibis::bitvector::copy_runs(run& it, word_t& nw) {
839  // deal with the first word -- attach it to the last word in m_vec
840  if (it.isFill == 0) {
841  active.val = *(it.it);
842  append_active();
843  -- nw;
844  }
845  else if (it.nWords > 1) {
846  append_counter(it.fillBit, it.nWords);
847  nw -= it.nWords;
848  }
849  else if (it.nWords == 1) {
850  active.val = (it.fillBit != 0 ? ALLONES : 0);
851  append_active();
852  -- nw;
853  }
854  ++ it.it;
855  nset = 0;
856  it.nWords = 0;
857  nbits += MAXBITS * nw;
858 
859  while (nw > 0) { // copy the words
860  it.decode();
861  if (nw >= it.nWords) {
862  m_vec.push_back(*(it.it));
863  nw -= it.nWords;
864  it.nWords = 0;
865  ++ it.it;
866  }
867  else {
868  break;
869  }
870  }
871  nbits -= MAXBITS * nw;
872 } // ibis::bitvector::copy_runs
873 
876 inline void ibis::bitvector::copy_runsn(run& it, word_t& nw) {
877  // deal with the first word -- need to attach it to the last word in m_vec
878  if (it.isFill == 0) {
879  active.val = ALLONES ^ *(it.it);
880  append_active();
881  -- nw;
882  }
883  else if (it.nWords > 1) {
884  append_counter(!it.fillBit, it.nWords);
885  nw -= it.nWords;
886  }
887  else if (it.nWords == 1) {
888  active.val = (it.fillBit != 0 ? 0 : ALLONES);
889  append_active();
890  -- nw;
891  }
892  ++ it.it; // advance to the next word
893  nset = 0;
894  it.nWords = 0;
895  nbits += MAXBITS * nw;
896 
897  while (nw > 0) { // copy the words
898  it.decode();
899  if (nw >= it.nWords) {
900  m_vec.push_back((it.isFill?FILLBIT:ALLONES) ^ *(it.it));
901  nw -= it.nWords;
902  it.nWords = 0;
903  ++ it.it;
904  }
905  else {
906  break;
907  }
908  }
909  nbits -= MAXBITS * nw;
910 } // ibis::bitvector::copy_runsn
911 
915 inline void ibis::bitvector::copy_fill
916 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it) {
917  if (it.fillBit == 0) {
918  while (it.nWords > 3) {
919  *jt = 0;
920  jt[1] = 0;
921  jt[2] = 0;
922  jt[3] = 0;
923  jt += 4;
924  it.nWords -= 4;
925  }
926  if (it.nWords == 1) {
927  *jt = 0; ++jt;
928  }
929  else if (it.nWords == 2) {
930  *jt = 0; jt[1] = 0; jt += 2;
931  }
932  else if (it.nWords == 3) {
933  *jt = 0; jt[1] = 0; jt[2] = 0; jt += 3;
934  }
935  }
936  else {
937  while (it.nWords > 3) {
938  *jt = ALLONES;
939  jt[1] = ALLONES;
940  jt[2] = ALLONES;
941  jt[3] = ALLONES;
942  jt += 4;
943  it.nWords -= 4;
944  }
945  if (it.nWords == 1) {
946  *jt = ALLONES; ++jt;
947  }
948  else if (it.nWords == 2) {
949  *jt = ALLONES; jt[1] = ALLONES; jt += 2;
950  }
951  else if (it.nWords == 3) {
952  *jt = ALLONES; jt[1] = ALLONES; jt[2] = ALLONES; jt += 3;
953  }
954  }
955  it.nWords = 0;
956  ++ it.it;
957 } // ibis::bitvector::copy_fill
958 
963 inline void ibis::bitvector::copy_runs
964 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it, word_t& nw) {
965  while (nw >= it.nWords && nw > 0) {
966  if (it.isFill != 0) { // copy a fill
967  const array_t<word_t>::iterator iend = jt + it.nWords;
968  if (it.fillBit == 0) {
969  while (jt < iend) {
970  *jt = 0;
971  ++ jt;
972  }
973  }
974  else {
975  while (jt < iend) {
976  *jt = ALLONES;
977  ++ jt;
978  }
979  }
980  nw -= it.nWords;
981  }
982  else { // copy a single word
983  *jt = *(it.it);
984  ++ jt;
985  -- nw;
986  }
987  ++ it.it; // advance to the next word
988  if (nw > 0) {
989  it.decode();
990  }
991  else {
992  it.nWords = 0;
993  return;
994  }
995  }
996 } // ibis::bitvector::copy_runs
997 
1000 inline void ibis::bitvector::copy_runsn
1001 (array_t<ibis::bitvector::word_t>::iterator& jt, run& it, word_t& nw) {
1002  while (nw >= it.nWords) {
1003  if (it.isFill != 0) { // a fill
1004  const array_t<word_t>::iterator iend = jt + it.nWords;
1005  if (it.fillBit == 0) {
1006  while (jt < iend) {
1007  *jt = ALLONES;
1008  ++ jt;
1009  }
1010  }
1011  else {
1012  while (jt < iend) {
1013  *jt = 0;
1014  ++ jt;
1015  }
1016  }
1017  nw -= it.nWords;
1018  }
1019  else { // a literal word
1020  *jt = *(it.it) ^ ALLONES;
1021  ++ jt;
1022  -- nw;
1023  }
1024  ++ it.it; // advance to the next word
1025  if (nw > 0) {
1026  it.decode();
1027  }
1028  else {
1029  it.nWords = 0;
1030  return;
1031  }
1032  }
1033 } // ibis::bitvector::copy_runsn
1034 
1035 inline ibis::bitvector::iterator ibis::bitvector::begin() {
1036  iterator it;
1037  it.it = m_vec.begin();
1038  it.vec = &m_vec;
1039  it.bitv = this;
1040  it.active = &active;
1041  it.decodeWord();
1042  return it;
1043 } // ibis::bitvector::begin
1044 
1045 inline ibis::bitvector::iterator ibis::bitvector::end() {
1046  iterator it;
1047  it.ind = 0;
1048  it.compressed = 0;
1049  it.nbits = 0;
1050  it.literalvalue = 0;
1051  it.fillbit = 0;
1052  it.it = m_vec.end() + 1;
1053  it.vec = &m_vec;
1054  it.bitv = this;
1055  it.active = &active;
1056  return it;
1057 } // ibis::bitvector::end
1058 
1059 inline ibis::bitvector::const_iterator ibis::bitvector::begin() const {
1060  const_iterator it;
1061  it.it = m_vec.begin();
1062  it.begin = m_vec.begin();
1063  it.end = m_vec.end();
1064  it.active = &active;
1065  it.decodeWord();
1066  return it;
1067 } // ibis::bitvector::begin
1068 
1071 #if defined(DEBUG) && DEBUG + 0 > 1
1072  if (vec==0 || it<vec->begin() || it>vec->end())
1073  throw "bitvector::const_iterator not initialized correctly.";
1074 #endif
1075  if (compressed != 0)
1076  return (fillbit != 0);
1077  else
1078  return (1 & (literalvalue >> (bitvector::SECONDBIT - ind)));
1079 } // ibis::bitvector::iterator::operator*
1080 
1083 inline int ibis::bitvector::iterator::operator!=
1084 (const ibis::bitvector::const_iterator& rhs) const throw () {
1085  return (it != rhs.it);
1086 }
1087 
1088 inline int ibis::bitvector::iterator::operator==
1089 (const ibis::bitvector::const_iterator& rhs) const throw () {
1090  return (it == rhs.it);
1091 }
1092 
1093 inline int ibis::bitvector::iterator::operator!=
1094 (const ibis::bitvector::iterator& rhs) const throw () {
1095  return (it != rhs.it);
1096 }
1097 
1098 inline int ibis::bitvector::iterator::operator==
1099 (const ibis::bitvector::iterator& rhs) const throw () {
1100  return (it == rhs.it);
1101 }
1102 
1105 #if defined(DEBUG) && DEBUG + 0 > 1
1106  if (vec==0 || it<vec->begin() || it>vec->end())
1107  throw "bitvector::iterator not formed correctly.";
1108 #endif
1109  if (ind+1<nbits) {++ind;}
1110  else {++ it; decodeWord();}
1111  return *this;
1112 }
1113 
1116 #if defined(DEBUG) && DEBUG + 0 > 1
1117  if (vec==0 || it<vec->begin() || it>vec->end()+1)
1118  throw "bitvector::iterator not formed correctly.";
1119 #endif
1120  if (ind) -- ind;
1121  else {
1122  if (it <= vec->end()) -- it;
1123  else if (active->nbits) it = vec->end();
1124  else it = vec->end() - 1;
1125  decodeWord();
1126  if (nbits) ind = nbits - 1;
1127  }
1128  return *this;
1129 }
1130 
1131 inline ibis::bitvector::const_iterator ibis::bitvector::end() const {
1132  const_iterator it;
1133  it.ind = 0;
1134  it.compressed = 0;
1135  it.nbits = 0;
1136  it.literalvalue = 0;
1137  it.fillbit = 0;
1138  it.it = m_vec.end() + 1;
1139  it.begin = m_vec.begin();
1140  it.end = m_vec.end();
1141  it.active = &active;
1142  return it;
1143 } // ibis::bitvector::end
1144 
1147 #if defined(DEBUG) && DEBUG + 0 > 1
1148  if (it==0 || end==0 || it>end || nbits<=ind)
1149  throw "bitvector::const_iterator not initialized correctly.";
1150 #endif
1151  if (compressed != 0)
1152  return (fillbit != 0);
1153  else
1154  return (1 & (literalvalue >> (bitvector::SECONDBIT - ind)));
1155 }
1156 
1159 inline int ibis::bitvector::const_iterator::operator!=
1160 (const ibis::bitvector::const_iterator& rhs) const throw (){
1161  return (it != rhs.it);
1162 }
1163 inline int ibis::bitvector::const_iterator::operator==
1164 (const ibis::bitvector::const_iterator& rhs) const throw () {
1165  return (it == rhs.it);
1166 }
1167 
1171 #if defined(DEBUG) && DEBUG + 0 > 1
1172  if (it==0 || end==0 || it>end)
1173  throw "bitvector::const_iterator not formed correctly.";
1174 #endif
1175  if (ind+1<nbits) {++ind;}
1176  else {++ it; decodeWord();}
1177  return *this;
1178 }
1179 
1183 #if defined(DEBUG) && DEBUG + 0 > 1
1184  if (it==0 || end==0 || it>end)
1185  throw "bitvector::const_iterator not formed correctly.";
1186 #endif
1187  if (ind) -- ind;
1188  else {
1189  if (it <= end) -- it;
1190  else if (active->nbits) it = end;
1191  else it = end - 1;
1192  decodeWord();
1193  if (nbits) ind = nbits - 1;
1194  }
1195  return *this;
1196 }
1197 
1202 inline ibis::bitvector::word_t ibis::bitvector::pit::operator*() const {
1203  if (curr < iset.nIndices()) {
1204  if (iset.isRange()) {
1205  return (*iset.indices())+curr;
1206  }
1207  else {
1208  return iset.indices()[curr];
1209  }
1210  }
1211  else {
1212  return 0xFFFFFFFFU;
1213  }
1214 } // ibis::bitvector::pit::operator*
1215 
1218  iset = bv.firstIndexSet();
1219  curr = 0;
1220 } // ibis::bitvector::pit::init
1221 
1224 void ibis::bitvector::pit::skip(unsigned k) {
1225  while (k > 0) {
1226  if (k+curr < iset.nIndices()) {
1227  // stop in the middle of a run
1228  curr += k;
1229  k = 0;
1230  }
1231  else if (iset.nIndices() > 0) {
1232  // skip over this run
1233  k = iset.nIndices() - curr;
1234  curr = 0;
1235  ++ iset;
1236  }
1237  else {
1238  // reached the end of the bitvector
1239  k = 0;
1240  }
1241  }
1242 } // ibis::bitvector::pit::next
1243 
1246  ++ curr;
1247  if (curr >= iset.nIndices()) {
1248  ++ iset;
1249  curr = 0;
1250  }
1251 } // ibis::bitvector::pit::next
1252 
1253 inline ibis::bitvector::indexSet ibis::bitvector::firstIndexSet() const {
1254  indexSet is;
1255  if (m_vec.end() > m_vec.begin()) {
1256  is.it = m_vec.begin() - 1;
1257  is.end = m_vec.end();
1258  }
1259  else {
1260  is.it = 0;
1261  is.end = 0;
1262  }
1263  is.active = &active;
1264  is.ind[0] = static_cast<word_t>(-1);
1265  is.nind = 0;
1266  ++is;
1267  return is;
1268 } // ibis::bitvector::firstIndexSet;
1269 
1273 inline double ibis::bitvector::randomSize(word_t nb, word_t nc) {
1274  double sz = 0.0;
1275  if (nb > 0 && nb >= nc && nb > MAXBITS) {
1276  const double den = static_cast<double>(nc) /
1277  static_cast<double>(nb);
1278  const word_t nw = nb / MAXBITS;
1279  sz = 3.0 + nw - nw * (pow(1.0-den, static_cast<int>(2*MAXBITS))
1280  + pow(den, static_cast<int>(2*MAXBITS)));
1281  }
1282  return sz*sizeof(word_t);
1283 } // ibis::bitvector::randomSize
1284 
1289 inline double ibis::bitvector::markovSize(word_t nb, word_t nc, double f) {
1290  double sz = 0.0;
1291  if (nb > 0 && nb >= nc && nb > MAXBITS) {
1292  const double den = static_cast<double>(nc) /
1293  static_cast<double>(nb);
1294  const word_t nw = nb / MAXBITS;
1295  if ((den <= 0.5 && f > 1.00001) || (den > 0.5 && (1.0-den)*f > den))
1296  sz = ((1.0-den) * pow(1.0-den/((1.0-den)*f),
1297  static_cast<int>(2*MAXBITS-3)) +
1298  den * pow(1.0-1.0/f, static_cast<int>(2*MAXBITS-3)));
1299  else
1300  sz = (pow(1.0-den, static_cast<int>(2*MAXBITS)) +
1301  pow(den, static_cast<int>(2*MAXBITS)));
1302  sz = 3.0 + nw * (1.0 - sz);
1303  }
1304  return sz*sizeof(word_t);
1305 } // ibis::bitvector::markovSize
1306 
1311 inline void ibis::bitvector::turnOnRawBit(const word_t ind) {
1312  if (ind < nbits) { // in regular words
1313  m_vec[ind / MAXBITS] |= (1 << (SECONDBIT - (ind % MAXBITS)));
1314  nset = 0; // don't track nset
1315  }
1316  else { // assume to be in the active word
1317  active.val |= (1 << (active.nbits - (ind - nbits) - 1));
1318 #if defined(DEBUG)
1319  if (ind >= nbits + active.nbits ||
1320  active.val >= (1U << active.nbits)) {
1321  LOGGER(ibis::gVerbose >= 0)
1322  << "bitvector::turnOnRawBit(" << ind
1323  << ") found a bad active word";
1324  }
1325 #endif
1326  }
1327 } // ibis::bitvector::turnOnRawBit
1328 
1329 std::ostream& operator<<(std::ostream&, const ibis::bitvector&);
1330 #endif // __BITVECTOR_H
int64_t write(int, const void *, int64_t)
A wrapper over POSIX write function.
Definition: util.cpp:999
word_t nIndices() const
Number of indices.
Definition: bitvector.h:508
static word_t bitsPerLiteral()
Return the number of bits in a literal word.
Definition: bitvector.h:154
const_iterator & operator--()
Decrement the iterator. Move back one bit.
Definition: bitvector.h:1182
indexSet(const indexSet &rhs)
Copy constructor.
Definition: bitvector.h:426
bool operator*() const
Dereference the current bit. No error checking.
Definition: bitvector.h:1070
static double markovSize(word_t nb, word_t nc, double f)
Compute the expected size (number of bytes) of a random sequence generated from a Markov process...
Definition: bitvector.h:1289
The iterator that allows modification of bits.
Definition: bitvector.h:371
The const_iterator class. It iterates on the individual bits.
Definition: bitvector.h:321
indexSet & operator++()
Advance to the next code word that is not zero.
Definition: bitvector.cpp:4524
bool all0s() const
Are all bits in regular words 0? It assumes the regular words have been properly compressed and there...
Definition: bitvector.h:592
void init(const ibis::bitvector &)
Initialize the data structure. The.
Definition: bitvector.h:1217
ibis::bitvector::word_t operator*() const
Operator to retrieve the position of the current bit.
Definition: bitvector.h:1202
The current implementation of FastBit is code named IBIS; most data structures and functions are in t...
Definition: bord.h:16
void appendFill(int val, word_t n)
Append n bits of val.
Definition: bitvector.h:800
const word_t * indices() const
Pointer to the indices.
Definition: bitvector.h:506
bool empty() const
Is the bitvector empty? For efficiency reasons, this funciton only works correctly on a properly comp...
Definition: bitvector.h:164
int operator!=(const const_iterator &rhs) const
Comparing two iterators.
Definition: bitvector.h:1084
bitvector & operator+=(const bitvector &bv)
Append a bitvector.
Definition: bitvector.cpp:334
bitvector & copy(const bitvector &bv)
Make a copy. Performs a deep copy.
Definition: bitvector.h:680
const word_t & currentWord() const
The value of the current compressed word.
Definition: bitvector.h:510
void appendByte(unsigned char)
Append all 8 bits of the incoming bytes as literal bits.
Definition: bitvector.h:771
bool operator*() const
Dereference the current bit value. No error checking.
Definition: bitvector.h:1146
iterator & operator--()
Decrement the interator. Move back by one bit.
Definition: bitvector.h:1115
int copy(const char *to, const char *from)
Copy file named "from" to a file named "to".
Definition: util.cpp:894
word_t size() const
Return the total number of bits in the bit sequence.
Definition: bitvector.h:647
word_t sloppyCount() const
Provide a sloppy count of the number of bits that are 1.
Definition: bitvector.h:572
uint32_t getSerialSize() const
Compute the number of bytes in the serialized version of this bitvector object.
Definition: bitvector.h:150
iterator & operator++()
Increment the interator. Move on to the next bit.
Definition: bitvector.h:1104
void next()
Move on to the next bit that is 1.
Definition: bitvector.h:1245
~bitvector()
!< The basic unit of data storage.
Definition: bitvector.h:67
void sloppySize(word_t n) const
Explicitly set the size of the bitvector.
Definition: bitvector.h:550
void erase(word_t i, word_t j)
Remove the bits in the range of [i, j).
Definition: bitvector.cpp:1012
The indexSet stores positions of bits that are one.
Definition: bitvector.h:421
bool isRange() const
Is the index set a consecutive range?
Definition: bitvector.h:504
int64_t read(int, void *, int64_t)
A wrapper over POSIX read function.
Definition: util.cpp:973
const word_t * const_iterator
!< Iterator type.
Definition: array_t.h:27
void clear(ibis::array_t< ibis::bitvector * > &bv)
Clear an array of bit vectors.
Definition: bitvector.cpp:4662
uint32_t bytes() const
Return the number of bytes used by the bitvector object in memory.
Definition: bitvector.h:144
indexSet & operator=(const indexSet &rhs)
Assignment operator.
Definition: bitvector.h:462
A data structure to represent a sequence of bits.
Definition: bitvector.h:62
indexSet()
Default constructor.
Definition: bitvector.h:424
void adjustSize(word_t nv, word_t nt)
Adjust the size of the bit sequence.
Definition: bitvector.cpp:4086
bool all1s() const
Are all bits in regular words 1? It assumes the regular words are properly compressed and therefore o...
Definition: bitvector.h:606
const_iterator & operator++()
Increment the iterator. Move on to the next bit.
Definition: bitvector.h:1170
word_t cnt() const
Return the number of bits that are one.
Definition: bitvector.h:652
const bitvector & operator=(const bitvector &bv)
The assignment operator.
Definition: bitvector.h:673
void turnOnRawBit(const word_t i)
Turn on a single bit in a uncompressed bitvector.
Definition: bitvector.h:1311
static double randomSize(word_t nb, word_t nc)
Compute the expected number of bytes required to store a random sequence.
Definition: bitvector.h:1273
Iterate over the positive positions one at a time.
Definition: bitvector.h:529
word_t numFillWords() const
Return the number of fill words.
Definition: bitvector.h:659
void skip(unsigned)
Skip over next k positive positions.
Definition: bitvector.h:1224
bool isCompressed() const
Does this bit vector use less space than the maximum? Return true if yes, otherwise false...
Definition: bitvector.h:135

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive