bitvector64.h
Go to the documentation of this file.
1 // $Id$
2 // Author: John Wu <John.Wu at ACM.org>
3 // Lawrence Berkeley National Laboratory
4 // Copyright (c) 2000-2016 the Regents of the University of California
5 #ifndef BITVECTOR64_H
6 #define BITVECTOR64_H
7 
10 #include "array_t.h" // alternative to std::vector
11 
12 #include <stdio.h> // sprintf
13 
14 
55 public:
56  typedef uint64_t word_t;
57 
58  // constructors of bitvector64 class
59  bitvector64() : nbits(0), nset(0), active(), m_vec() {};
60  ~bitvector64() {clear();};
61  bitvector64(const bitvector64& bv) : nbits(bv.nbits), nset(bv.nset),
62  active(bv.active), m_vec(bv.m_vec) {};
63  bitvector64(const array_t<word_t>& arr);
64  bitvector64(const char* file);
65  inline bitvector64& operator=(const bitvector64& bv);
66  inline bitvector64& copy(const bitvector64& bv);
67  inline bitvector64& swap(bitvector64& bv);
68  // use bv to replace part of the existing value, match the ith bit with
69  // the first of bv, return reference to self
70  //bitvector64& copy(const word_t i, const bitvector64& bv);
73  void setBit(const word_t i, int val);
74  int getBit(const word_t i) const;
76  void erase(word_t i, word_t j);
77 
80  void set(int val, word_t n);
82  void clear() {nbits = 0; nset = 0; active.reset(); m_vec.clear();}
83 
84  bitvector64& operator+=(const bitvector64& bv);
85  inline bitvector64& operator+=(int b);
86  inline void appendByte(unsigned char);
87  void appendWord(word_t w);
88  inline void appendFill(int val, word_t n);
90 
92  int operator==(const bitvector64& rhs) const;
93 
95  void flip();
97  void operator&=(const bitvector64& rhs);
99  bitvector64* operator&(const bitvector64&) const;
101  void operator|=(const bitvector64& rhs);
103  bitvector64* operator|(const bitvector64&) const;
105  void operator^=(const bitvector64& rhs);
107  bitvector64* operator^(const bitvector64&) const;
109  void operator-=(const bitvector64& rhs);
112  bitvector64* operator-(const bitvector64&) const;
113 
114  // I/O functions.
117  void read(const char *fn);
119  void write(const char *fn) const;
120  void write(FILE* fptr) const;
122  void write(array_t<word_t>& arr) const;
123 
124  void compress();
125  void decompress();
126  word_t compressible() const;
130  bool isCompressed() const {return (m_vec.size()*MAXBITS < nbits);}
131 
133  word_t cnt() const {
134  if (nset==0) do_cnt(); return (nset+cnt_ones(active.val));
135  };
136 
138  word_t size() const throw() {return (nbits+active.nbits);};
140  inline word_t numFillWords() const;
142  word_t bytes() const throw() {
143  return (m_vec.size()*sizeof(word_t) + sizeof(bitvector64));
144  };
147  word_t getSerialSize() const throw() {
148  return (m_vec.size() + 1 + (active.nbits>0));
149  };
151  static unsigned bitsPerLiteral() {return MAXBITS;}
155  inline static double randomSize(word_t nb, word_t nc);
160  inline static double markovSize(word_t nb, word_t nc, double f);
163  static double clusteringFactor(word_t nb, word_t nc, word_t nw);
164 
170  void adjustSize(word_t nv, word_t nt);
171  std::ostream& print(std::ostream &) const;
172 
174  class iterator;
175  inline iterator end();
176  inline iterator begin();
177 
179  class const_iterator;
180  inline const_iterator end() const;
181  inline const_iterator begin() const;
182 
184  class indexSet;
185  inline indexSet firstIndexSet() const;
186 
187  // give accesses to some friends
188  friend class indexSet;
189  friend class iterator;
190  friend class const_iterator;
191 
192 protected:
193  inline bool all0s() const;
194  inline bool all1s() const;
195 
196 private:
197  // static members, i.e., constants to be used internally
198  static const unsigned MAXBITS;
199  static const unsigned SECONDBIT;
200  static const word_t FILLBIT;
201  static const word_t HEADER0;
202  static const word_t HEADER1;
203  static const word_t ALLONES;
204  static const word_t MAXCNT;
205 
212  struct active_word {
213  word_t val; // the value
214  word_t nbits; // total number of bits
215 
216  active_word() : val(0), nbits(0) {};
217  void reset() {val = 0; nbits = 0;};
218  int is_full() const {return (nbits >= MAXBITS);};
219  void append(int b) { // append a single bit
220  val <<= 1; nbits ++; val += b;
221  };
222  }; // struct active_word
223 
226  struct run {
227  int isFill;
228  int fillBit;
229  word_t nWords;
231  run() : isFill(0), fillBit(0), nWords(0), it(0) {};
232  void decode() {
233  fillBit = (*it > HEADER1);
234  if (*it > ALLONES) {
235  nWords = (*it & MAXCNT);
236  isFill = 1;
237  }
238  else {
239  nWords = 1;
240  isFill = 0;
241  }
242  };
245  void operator--() {
246  if (nWords > 1) {--nWords;}
247  else {++ it; nWords = 0;}
248  };
251  void operator-=(word_t len) {
252  while (len > 0) {
253  if (nWords == 0) decode();
254  if (isFill) {
255  if (nWords > len) {nWords -= len; len = 0;}
256  else if (nWords == len) {nWords = 0; len = 0; ++ it;}
257  else {len -= nWords; ++ it; nWords = 0;}
258  }
259  else {-- len; ++ it; nWords = 0;}
260  }
261  };
262  };
263  friend struct run;
264  friend struct active_word;
265 
266  // member variables of bitvector64 class
267  word_t nbits;
268  mutable word_t nset;
269  active_word active;
270  array_t<word_t> m_vec;
271 
272  // private functions of bitvector64 class
273  // The following three functions all performs or operation, _c2 and _c1
274  // generate compressed solutions, but _c0, _d1, and _d2 generate
275  // uncompressed solutions.
276  // or_c2 assumes there are compressed word in both operands
277  // or_c1 *this may contain compressed word, but not rhs
278  // or_c0 assumes both operands are not compressed
279  // or_d1 *this contains no compressed word and is overwritten with the
280  // result
281  // or_d2 both *this and rhs are compressed, but res will not be compressed
282  void or_c2(const bitvector64& rhs, bitvector64& res) const;
283  void or_c1(const bitvector64& rhs, bitvector64& res) const;
284  void or_c0(const bitvector64& rhs);
285  void or_d1(const bitvector64& rhs);
286  void or_d2(const bitvector64& rhs, bitvector64& res) const;
287  void and_c2(const bitvector64& rhs, bitvector64& res) const;
288  void and_c1(const bitvector64& rhs, bitvector64& res) const;
289  void and_c0(const bitvector64& rhs);
290  void and_d1(const bitvector64& rhs);
291  void and_d2(const bitvector64& rhs, bitvector64& res) const;
292  void xor_c2(const bitvector64& rhs, bitvector64& res) const;
293  void xor_c1(const bitvector64& rhs, bitvector64& res) const;
294  void xor_c0(const bitvector64& rhs);
295  void xor_d1(const bitvector64& rhs);
296  void xor_d2(const bitvector64& rhs, bitvector64& res) const;
297  void minus_c2(const bitvector64& rhs, bitvector64& res) const;
298  void minus_c1(const bitvector64& rhs, bitvector64& res) const;
299  void minus_c1x(const bitvector64& rhs, bitvector64& res) const;
300  void minus_c0(const bitvector64& rhs);
301  void minus_d1(const bitvector64& rhs);
302  void minus_d2(const bitvector64& rhs, bitvector64& res) const;
303  inline void copy_runs(run& it, word_t& nw); // copy nw words
304  inline void copy_runsn(run& it, word_t& nw); // copy nw words and negate
305  inline void copy_fill(array_t<word_t>::iterator& jt, run& it);
306  inline void copy_runs(array_t<word_t>::iterator& jt, run& it,
307  word_t& nw);
308  inline void copy_runsn(array_t<word_t>::iterator& jt, run& it,
309  word_t& nw);
310  void decompress(array_t<word_t>& tmp) const;
311  void copy_comp(array_t<word_t>& tmp) const;
312  inline void append_active();
313  inline void append_counter(int val, word_t cnt);
314  inline unsigned cnt_ones(word_t) const; // number of 1s in a literal word
315  inline word_t cnt_bits(word_t) const; // number of bits in a word
316  word_t do_cnt() const; // count the number of bits and number of ones
317 }; // end class bitvector64
318 
329 public:
330  // let the compiler define most of the canonical functions
331 
332  // allow bitvector64::firstIndexSet() to access private variables of this
333  // class
334  friend indexSet ibis::bitvector64::firstIndexSet() const;
335 
336  //int operator!=(const indexSet& rhs) const {return (it != rhs.it);};
337  bool isRange() const {return (nind>=ibis::bitvector64::MAXBITS);}
338  const word_t* indices() const {return ind;};
339  word_t nIndices() const {return nind;}
340  indexSet& operator++();
341 
342 private:
345  const active_word* active; // points back to the active word
346  word_t nind; // number of indices
347  word_t ind[64];
348 }; // class ibis::bitvector64::indexSet
349 
352 public:
353  inline bool operator*() const;
354  inline int operator!=(const const_iterator& rhs) const throw ();
355  inline int operator==(const const_iterator& rhs) const throw ();
356 
357  inline const_iterator& operator++();
358  inline const_iterator& operator--();
359  const_iterator& operator+=(int64_t incr);
360 
361 private:
362  ibis::bitvector64::word_t compressed;
363  ibis::bitvector64::word_t ind;
364  ibis::bitvector64::word_t nbits;
365  ibis::bitvector64::word_t literalvalue;
366  int fillbit;
367  const active_word* active;
371 
372  void decodeWord();
373 
374  // give three functions of bitvector64 access to private variables
375  friend void ibis::bitvector64::erase(word_t i, word_t j);
376  friend const_iterator ibis::bitvector64::begin() const;
377  friend const_iterator ibis::bitvector64::end() const;
378  friend class ibis::bitvector64::iterator;
379 }; // end class ibis::bitvector64::const_iterator
380 
389 public:
390  inline bool operator*() const; // still can not modify this
391  inline int operator!=(const const_iterator& rhs) const throw ();
392  inline int operator==(const const_iterator& rhs) const throw ();
393  inline int operator!=(const iterator& rhs) const throw ();
394  inline int operator==(const iterator& rhs) const throw ();
395 
396  inline iterator& operator++();
397  inline iterator& operator--();
398  iterator& operator+=(int64_t incr);
399  iterator& operator=(int val);
400 
401 private:
402  ibis::bitvector64::word_t compressed;
403  ibis::bitvector64::word_t ind;
404  ibis::bitvector64::word_t nbits;
405  ibis::bitvector64::word_t literalvalue;
406  int fillbit;
407  ibis::bitvector64* bitv;
408  active_word* active;
409  array_t<word_t>* vec;
411 
412  void decodeWord();
413 
414  friend iterator ibis::bitvector64::begin();
415  friend iterator ibis::bitvector64::end();
416 }; // end class ibis::bitvector64::iterator
417 
419 inline bool ibis::bitvector64::all0s() const {
420  if (m_vec.empty()) {
421  return true;
422  }
423  else if (m_vec.size() == 1) {
424  return (m_vec[0] == 0 || (m_vec[0] >= HEADER0 && m_vec[0] < HEADER1));
425  }
426  else {
427  return false;
428  }
429 } // ibis::bitvector64::all0s
430 
432 inline bool ibis::bitvector64::all1s() const {
433  if (m_vec.size() == 1) {
434  return (m_vec[0] == ALLONES || (m_vec[0] > HEADER1));
435  }
436  else {
437  return false;
438  }
439 } // ibis::bitvector64::all1s()
440 
442 inline ibis::bitvector64::word_t
443 ibis::bitvector64::cnt_bits(ibis::bitvector64::word_t val) const {
444  return ((val>ALLONES) ? ((val&MAXCNT)*MAXBITS) : MAXBITS);
445 }
446 
448 inline unsigned
449 ibis::bitvector64::cnt_ones(ibis::bitvector64::word_t val) const {
450  // number of 1 bits in a value between 0 and 255
451  static const unsigned table[256] = {
452  0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
453  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
454  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
455  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
456  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
457  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
458  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
459  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
460  1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
461  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
462  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
463  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
464  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
465  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
466  3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
467  4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
468  //assert(8 == sizeof(word_t));
469  return table[val&0xFF] + table[(val>>8)&0xFF] +
470  table[(val>>16)&0xFF] + table[(val>>24)&0xFF] +
471  table[(val>>32)&0xFF] + table[(val>>40)&0xFF] +
472  table[(val>>48)&0xFF] + table[val>>56];
473 } // inline unsigned ibis::bitvector64::cnt_ones(word_t) const
474 
475 inline ibis::bitvector64::word_t
477  word_t cnt=0;
478  array_t<word_t>::const_iterator it = m_vec.begin();
479  while (it != m_vec.end()) {
480  cnt += (*it >> ibis::bitvector64::MAXBITS);
481  it++;
482  }
483  return cnt;
484 } // ibis::bitvector64::numFillWords
485 
486 // The assignment operator. Wanted to use shallow copy for efficiency
487 // consideration, but SHALLOW copy causes unexpected problem in test
488 // program bitty.cpp change to deep copy
489 inline ibis::bitvector64&
491  nbits = bv.nbits; nset = bv.nset; active = bv.active;
492  m_vec.deepCopy(bv.m_vec);
493  return *this;
494 }
495 
496 // deep copy
497 inline ibis::bitvector64&
499  nbits = bv.nbits; nset = bv.nset; active = bv.active;
500  m_vec.deepCopy(bv.m_vec);
501  return *this;
502 }
503 
504 inline ibis::bitvector64&
506  word_t tmp;
507  tmp = bv.nbits; bv.nbits = nbits; nbits = tmp;
508  tmp = bv.nset; bv.nset = nset; nset = tmp;
509  active_word atmp = bv.active;
510  bv.active = active; active = atmp;
511  m_vec.swap(bv.m_vec);
512  return *this;
513 }
514 
515 // append_active assumes that nbits == MAXBITS and refers to MAXBITS instead
516 // of nbits
517 inline void ibis::bitvector64::append_active() {
518  if (m_vec.empty()) {
519  m_vec.push_back(active.val);
520  }
521  else if (active.val == 0) {// incoming word is zero
522  if (m_vec.back() == 0) {
523  m_vec.back() = (HEADER0 + 2);
524  }
525  else if (m_vec.back() >= HEADER0 && m_vec.back() < HEADER1) {
526  ++ m_vec.back();
527  }
528  else {
529  m_vec.push_back(active.val);
530  }
531  }
532  else if (active.val == ALLONES) {// incoming word is allones
533  if (m_vec.back() == ALLONES) {
534  m_vec.back() = (HEADER1 | 2);
535  }
536  else if (m_vec.back() >= HEADER1) {
537  ++m_vec.back();
538  }
539  else {
540  m_vec.push_back(active.val);
541  }
542  }
543  else { // incoming word contains a mixture of bits
544  m_vec.push_back(active.val);
545  }
546  nbits += MAXBITS;
547  active.reset();
548  nset = 0;
549 } // void ibis::bitvector64::append_active()
550 
553 inline void ibis::bitvector64::append_counter(int val, word_t cnt) {
554  word_t head = 2 + val;
555  word_t w = (head << SECONDBIT) + cnt;
556  nbits += cnt*MAXBITS;
557  if (m_vec.empty()) {
558  m_vec.push_back(w);
559  }
560  else if ((m_vec.back()>>SECONDBIT) == head) {
561  m_vec.back() += cnt;
562  }
563  else if ((m_vec.back()==ALLONES) && head==3) {
564  m_vec.back() = w + 1;
565  }
566  else if ((m_vec.back() == 0) && head==2) {
567  m_vec.back() = w + 1;
568  }
569  else {
570  m_vec.push_back(w);
571  }
572 } // ibis::bitvector64::append_counter
573 
575 inline ibis::bitvector64& ibis::bitvector64::operator+=(int b) {
576  active.append(b);
577  if (active.is_full()) append_active();
578  return *this;
579 } // ibis::bitvector64& ibis::bitvector64::operator+=(int b)
580 
582 void ibis::bitvector64::appendByte(unsigned char c) {
583  if (active.nbits >= MAXBITS)
584  append_active();
585 
586  if (active.nbits+8 < MAXBITS) {
587  active.val <<= 8;
588  active.nbits += 8;
589  active.val += c;
590  }
591  else if (active.nbits+8 > MAXBITS) {
592  unsigned na = MAXBITS - nbits;
593  unsigned hi = (c >> (8 - na));
594  active.val <<= na;
595  active.val += hi;
596  append_active();
597  active.nbits = 8 - na;
598  active.val = ((hi << active.nbits) ^ c);
599  }
600  else {
601  active.val <<= 8;
602  active.val += c;
603  append_active();
604  }
605 } // ibis::bitvector64::appendByte
606 
610 (int val, ibis::bitvector64::word_t n) {
611  if (active.nbits > 0) {
612  word_t tmp = (MAXBITS - active.nbits);
613  if (tmp > n) tmp = n;
614  active.nbits += tmp;
615  active.val <<= tmp;
616  n -= tmp;
617  if (val)
618  active.val |= (static_cast<word_t>(1)<<tmp) - 1;
619  if (active.nbits >= MAXBITS) append_active();
620  }
621  if (n >= MAXBITS) {
622  word_t cnt = n / MAXBITS;
623  if (cnt > 1)
624  append_counter(val, cnt);
625  else if (val) {
626  active.val = ALLONES;
627  append_active();
628  }
629  else {
630  active.val = 0;
631  append_active();
632  }
633  n -= cnt * MAXBITS;
634  }
635  if (n > 0) {
636  active.nbits = n;
637  active.val = val*((static_cast<word_t>(1)<<n)-1);
638  }
639 } // ibis::bitvector64::appendFill
640 
641 // append nw words starting from 'it' to the current bit vector -- assume
642 // active is empty
643 inline void ibis::bitvector64::copy_runs(run& it, word_t& nw) {
644  // deal with the first word -- need to attach it to the last word in m_vec
645  if (it.isFill) {
646  if (it.nWords > 1) {
647  append_counter(it.fillBit, it.nWords);
648  nw -= it.nWords;
649  }
650  else if (it.nWords == 1) {
651  active.val = (it.fillBit != 0 ? ALLONES : 0);
652  append_active();
653  -- nw;
654  }
655  }
656  else {
657  active.val = *(it.it);
658  append_active();
659  -- nw;
660  }
661  ++ it.it; // advance to the next word
662  //if (nw == 0) {it.nWords = 0; return;}
663  it.decode();
664  nset = 0;
665  nbits += MAXBITS * nw;
666  while (nw >= it.nWords && nw > 0) { // only need to copy the word
667  m_vec.push_back(*(it.it));
668  nw -= it.nWords;
669  ++ it.it; // advance to the next word
670  it.decode();
671  }
672  nbits -= MAXBITS * nw;
673 } // ibis::bitvector64::copy_runs
674 
675 // append nw words starting from it to the current bit vector -- assume
676 // active is empty
677 inline void ibis::bitvector64::copy_runsn(run& it, word_t& nw) {
678  // deal with the first word -- need to attach it to the last word in m_vec
679  if (it.isFill) {
680  if (it.nWords > 1) {
681  append_counter(!it.fillBit, it.nWords);
682  nw -= it.nWords;
683  }
684  else if (it.nWords == 1) {
685  active.val = (it.fillBit != 0 ? 0 : ALLONES);
686  append_active();
687  -- nw;
688  }
689  }
690  else {
691  active.val = ALLONES ^ *(it.it);
692  append_active();
693  -- nw;
694  }
695  ++ it.it; // advance to the next word
696  //if (nw == 0) {it.nWords = 0; return;}
697  it.decode();
698  nset = 0;
699  nbits += MAXBITS * nw;
700  while (nw >= it.nWords && nw > 0) { // only need to copy the word
701  m_vec.push_back((it.isFill?FILLBIT:ALLONES) ^ *(it.it));
702  nw -= it.nWords;
703  ++ it.it; // advance to the next word
704  it.decode();
705  }
706  nbits -= MAXBITS * nw;
707 } // ibis::bitvector64::copy_runsn
708 
709 // copy the fill in "run it" as literal words
710 inline void ibis::bitvector64::copy_fill
711 (array_t<ibis::bitvector64::word_t>::iterator& jt, run& it) {
712  const array_t<word_t>::iterator iend = jt + it.nWords;
713  if (it.fillBit == 0) {
714  switch (it.nWords) {
715  case 0: break;
716  case 1:
717  *jt = 0; ++jt; break;
718  case 2:
719  *jt = 0; ++jt; *jt = 0; ++jt; break;
720  case 3:
721  *jt = 0; ++jt; *jt = 0; ++jt; *jt = 0; ++jt; break;
722  default:
723  while (jt < iend) {
724  *jt = 0;
725  ++ jt;
726  }
727  break;
728  }
729  }
730  else {
731  switch (it.nWords) {
732  case 0: break;
733  case 1:
734  *jt = ALLONES; ++jt; break;
735  case 2:
736  *jt = ALLONES; ++jt; *jt = ALLONES; ++jt; break;
737  case 3:
738  *jt = ALLONES; ++jt; *jt = ALLONES; ++jt; *jt = ALLONES; ++jt;
739  break;
740  default:
741  while (jt < iend) {
742  *jt = ALLONES;
743  ++ jt;
744  }
745  break;
746  }
747  }
748  it.nWords = 0;
749  ++ it.it;
750 } // ibis::bitvector64::copy_fill
751 
752 // copy the next nw words (nw X MAXBITS bits) starting with run it
753 // to an array_t as uncompressed words
754 inline void ibis::bitvector64::copy_runs
755 (array_t<ibis::bitvector64::word_t>::iterator& jt, run& it, word_t& nw) {
756  while (nw >= it.nWords && nw > 0) { // only need to copy the word
757  if (it.isFill) {
758  const array_t<word_t>::iterator iend = jt + it.nWords;
759  if (it.fillBit == 0) {
760  while (jt < iend) {
761  *jt = 0;
762  ++ jt;
763  }
764  }
765  else {
766  while (jt < iend) {
767  *jt = ALLONES;
768  ++ jt;
769  }
770  }
771  nw -= it.nWords;
772  }
773  else {
774  *jt = *(it.it);
775  ++ jt;
776  -- nw;
777  }
778  ++ it.it; // advance to the next word
779  it.decode();
780  }
781 } // ibis::bitvector64::copy_runs
782 
783 // copy the complements of the next nw words (nw X MAXBITS bits)
784 // starting with "run it" as uncompressed words
785 inline void ibis::bitvector64::copy_runsn
786 (array_t<ibis::bitvector64::word_t>::iterator& jt, run& it, word_t& nw) {
787  while (nw >= it.nWords && nw > 0) { // only need to copy the word
788  if (it.isFill) {
789  const array_t<word_t>::iterator iend = jt + it.nWords;
790  if (it.fillBit == 0) {
791  while (jt < iend) {
792  *jt = ALLONES;
793  ++ jt;
794  }
795  }
796  else {
797  while (jt < iend) {
798  *jt = 0;
799  ++ jt;
800  }
801  }
802  nw -= it.nWords;
803  }
804  else {
805  *jt = *(it.it) ^ ALLONES;
806  ++ jt;
807  -- nw;
808  }
809  ++ it.it; // advance to the next word
810  it.decode();
811  }
812 } // ibis::bitvector64::copy_runsn
813 
814 inline ibis::bitvector64::iterator ibis::bitvector64::begin() {
815  iterator it;
816  it.it = m_vec.begin();
817  it.vec = &m_vec;
818  it.bitv = this;
819  it.active = &active;
820  it.decodeWord();
821  return it;
822 } // ibis::bitvector64::begin()
823 
824 inline ibis::bitvector64::iterator ibis::bitvector64::end() {
825  iterator it;
826  it.ind = 0;
827  it.compressed = 0;
828  it.nbits = 0;
829  it.literalvalue = 0;
830  it.fillbit = 0;
831  it.it = m_vec.end() + 1;
832  it.vec = &m_vec;
833  it.bitv = this;
834  it.active = &active;
835  return it;
836 } // ibis::bitvector64::end()
837 
838 inline ibis::bitvector64::const_iterator ibis::bitvector64::begin() const {
839  const_iterator it;
840  it.it = m_vec.begin();
841  it.begin = m_vec.begin();
842  it.end = m_vec.end();
843  it.active = &active;
844  it.decodeWord();
845  return it;
846 } // ibis::bitvector64::begin()
847 
848 // dereference -- no error checking
849 inline bool ibis::bitvector64::iterator::operator*() const {
850 #if defined(DEBUG) && DEBUG + 0 > 1
851  if (vec==0 || it<vec->begin() || it>vec->end())
852  throw "bitvector64::const_iterator not initialized correctly.";
853 #endif
854  if (compressed != 0)
855  return (fillbit != 0);
856  else
857  return ((word_t)1 & (literalvalue >> (bitvector64::SECONDBIT - ind)));
858 }
859 
860 // comparison only based on the iterator
861 inline int ibis::bitvector64::iterator::operator!=
862 (const ibis::bitvector64::const_iterator& rhs) const throw () {
863  return (it != rhs.it);
864 }
865 inline int ibis::bitvector64::iterator::operator==
866 (const ibis::bitvector64::const_iterator& rhs) const throw () {
867  return (it == rhs.it);
868 }
869 inline int ibis::bitvector64::iterator::operator!=
870 (const ibis::bitvector64::iterator& rhs) const throw () {
871  return (it != rhs.it);
872 }
873 inline int ibis::bitvector64::iterator::operator==
874 (const ibis::bitvector64::iterator& rhs) const throw () {
875  return (it == rhs.it);
876 }
877 
878 // increment by one
879 inline ibis::bitvector64::iterator& ibis::bitvector64::iterator::operator++() {
880 #if defined(DEBUG) && DEBUG + 0 > 1
881  if (vec==0 || it<vec->begin() || it>vec->end())
882  throw "bitvector64::iterator not formed correctly.";
883 #endif
884  if (ind+1<nbits) {++ind;}
885  else {++ it; decodeWord();}
886  return *this;
887 }
888 
889 // decrement by one
890 inline ibis::bitvector64::iterator& ibis::bitvector64::iterator::operator--() {
891 #if defined(DEBUG) && DEBUG + 0 > 1
892  if (vec==0 || it<vec->begin() || it>vec->end()+1)
893  throw "bitvector64::iterator not formed correctly.";
894 #endif
895  if (ind) -- ind;
896  else {
897  if (it <= vec->end()) -- it;
898  else if (active->nbits) it = vec->end();
899  else it = vec->end() - 1;
900  decodeWord();
901  if (nbits) ind = nbits - 1;
902  }
903  return *this;
904 }
905 
906 inline ibis::bitvector64::const_iterator ibis::bitvector64::end() const {
907  const_iterator it;
908  it.ind = 0;
909  it.compressed = 0;
910  it.nbits = 0;
911  it.literalvalue = 0;
912  it.fillbit = 0;
913  it.it = m_vec.end() + 1;
914  it.begin = m_vec.begin();
915  it.end = m_vec.end();
916  it.active = &active;
917  return it;
918 } // ibis::bitvector64::end()
919 
920 // dereference -- no error checking
921 inline bool ibis::bitvector64::const_iterator::operator*() const {
922 #if defined(DEBUG) && DEBUG + 0 > 1
923  if (it==0 || end==0 || it>end || nbits<=ind)
924  throw "bitvector64::const_iterator not initialized correctly.";
925 #endif
926  if (compressed != 0)
927  return (fillbit != 0);
928  else
929  return ((word_t)1 & (literalvalue >> (bitvector64::SECONDBIT - ind)));
930 }
931 
932 // comparison only based on the iterator
933 inline int ibis::bitvector64::const_iterator::operator!=
934 (const ibis::bitvector64::const_iterator& rhs) const throw (){
935  return (it != rhs.it);
936 }
937 inline int ibis::bitvector64::const_iterator::operator==
938 (const ibis::bitvector64::const_iterator& rhs) const throw () {
939  return (it == rhs.it);
940 }
941 
942 // increment by one
944 ibis::bitvector64::const_iterator::operator++() {
945 #if defined(DEBUG) && DEBUG + 0 > 1
946  if (it==0 || end==0 || it>end)
947  throw "bitvector64::const_iterator not formed correctly.";
948 #endif
949  if (ind+1<nbits) {++ind;}
950  else {++ it; decodeWord();}
951  return *this;
952 }
953 
954 // decrement by one
956 ibis::bitvector64::const_iterator::operator--() {
957 #if defined(DEBUG) && DEBUG + 0 > 1
958  if (it==0 || end==0 || it>end)
959  throw "bitvector64::const_iterator not formed correctly.";
960 #endif
961  if (ind) -- ind;
962  else {
963  if (it <= end) -- it;
964  else if (active->nbits) it = end;
965  else it = end - 1;
966  decodeWord();
967  if (nbits) ind = nbits - 1;
968  }
969  return *this;
970 }
971 
972 inline ibis::bitvector64::indexSet ibis::bitvector64::firstIndexSet() const {
973  indexSet is;
974  if (m_vec.end() > m_vec.begin()) {
975  is.it = m_vec.begin() - 1;
976  is.end = m_vec.end();
977  }
978  else {
979  is.it = 0;
980  is.end = 0;
981  }
982  is.active = &active;
983  is.ind[0] = static_cast<word_t>(-1);
984  is.nind = 0;
985  ++is;
986  return is;
987 } // end ibis::bitvector64::firstIndexSet;
988 
989 inline double ibis::bitvector64::randomSize(word_t nb, word_t nc) {
990  double sz = 0.0;
991  if (nb > 0 && nb >= nc) {
992  const double den = static_cast<double>(nc) /
993  static_cast<double>(nb);
994  const word_t nw = (nb > SECONDBIT ? nb / SECONDBIT - 1 : 0);
995  sz = 3.0 + nw - nw *
996  (pow(1.0-den, static_cast<int>(2*SECONDBIT)) +
997  pow(den, static_cast<int>(2*SECONDBIT)));
998  }
999  return sz*sizeof(word_t);
1000 } // end ibis::bitvector64::randomSize
1001 
1002 inline double
1003 ibis::bitvector64::markovSize(word_t nb, word_t nc, double f) {
1004  double sz = 0.0;
1005  if (nb > 0 && nb >= nc) {
1006  const double den = static_cast<double>(nc) /
1007  static_cast<double>(nb);
1008  const word_t nw = (nb > SECONDBIT ? nb / SECONDBIT - 1 : 0);
1009  if ((den <= 0.5 && f > 1.0) || (den > 0.5 && (1.0-den)*f > den))
1010  sz = ((1.0-den) * pow(1.0-den/((1.0-den)*f),
1011  static_cast<int>(2*MAXBITS-3)) +
1012  den * pow(1.0-1.0/f, static_cast<int>(2*MAXBITS-3)));
1013  else
1014  sz = (pow(1.0-den, static_cast<int>(2*SECONDBIT)) +
1015  pow(den, static_cast<int>(2*SECONDBIT)));
1016  sz = 3.0 + nw * (1.0 - sz);
1017  }
1018  return sz*sizeof(word_t);
1019 } // end ibis::bitvector64::markovSize
1020 
1021 std::ostream& operator<<(std::ostream&, const ibis::bitvector64&);
1022 #endif // __BITVECTOR64_H
void adjustSize(word_t nv, word_t nt)
Adjust the size of the bit sequence.
Definition: bitvector64.cpp:3619
bitvector64 & swap(bitvector64 &bv)
!<
Definition: bitvector64.h:505
static unsigned bitsPerLiteral()
Return the number of bits in a literal word.
Definition: bitvector64.h:151
bitvector64 * operator^(const bitvector64 &) const
Perform bitwise XOR, return the pointer to the result.
Definition: bitvector64.cpp:1042
static double clusteringFactor(word_t nb, word_t nc, word_t nw)
Estimate clustering factor based on the size.
Definition: bitvector64.cpp:3638
size_t size() const
Definition: array_t.h:69
bitvector64 * operator|(const bitvector64 &) const
Perform bitwise OR, return the pointer to the result.
Definition: bitvector64.cpp:932
word_t bytes() const
Return the number of bytes used by the bitvector object in memory.
Definition: bitvector64.h:142
bitvector64()
!< The basic unit of data storage is 64-bit.
Definition: bitvector64.h:59
bool isCompressed() const
Does this bit vector use less space than the maximum? Return true if yes, otherwise false...
Definition: bitvector64.h:130
void clear()
Reset the size to zero.
Definition: array_t.h:171
void write(const char *fn) const
Write the bit vector to a file.
Definition: bitvector64.cpp:1354
void erase(word_t i, word_t j)
Remove the bits in the range of [i, j).
Definition: bitvector64.cpp:641
void setBit(const word_t i, int val)
Replace a single bit at position i.
Definition: bitvector64.cpp:383
word_t numFillWords() const
Return the number of fill words.
Definition: bitvector64.h:476
int operator==(const bitvector64 &rhs) const
Return 1 if two bit sequences have the same content, 0 otherwise.
Definition: bitvector64.cpp:743
The indexSet stores positions of bits that are one.
Definition: bitvector64.h:328
A data structure to represent a sequence of bits.
Definition: bitvector64.h:54
word_t cnt() const
Return the number of bits that are one.
Definition: bitvector64.h:133
static double markovSize(word_t nb, word_t nc, double f)
Compute the expected size (bytes) of a random sequence generated from a Markov process.
Definition: bitvector64.h:1003
void appendByte(unsigned char)
!< Append a single bit.
Definition: bitvector64.h:582
void decompress()
!< Merge fills into fill words.
Definition: bitvector64.cpp:235
word_t size() const
Return the total number of bits in the bit sequence.
Definition: bitvector64.h:138
bitvector64 & copy(const bitvector64 &bv)
!<
Definition: bitvector64.h:498
void read(const char *fn)
Read a bit vector from the file.
Definition: bitvector64.cpp:1288
bool all0s() const
Are all bits in regular words 0?
Definition: bitvector64.h:419
static double randomSize(word_t nb, word_t nc)
Compute the expected number of bytes required to store a random sequence.
Definition: bitvector64.h:989
void appendWord(word_t w)
Append a WAH compressed word.
Definition: bitvector64.cpp:93
bitvector64 * operator&(const bitvector64 &) const
Perform bitwise AND, return the pointer to the result.
Definition: bitvector64.cpp:812
void operator-=(const bitvector64 &rhs)
Perform bitwise subtraction (a & !b).
Definition: bitvector64.cpp:1096
void set(int val, word_t n)
Create a vector with n bits of value val (cf.
Definition: bitvector64.cpp:70
const word_t * const_iterator
!< Iterator type.
Definition: array_t.h:27
The iterator that allows modification of bits.
Definition: bitvector64.h:388
void operator&=(const bitvector64 &rhs)
Perform bitwise AND between this bitvector64 and rhs.
Definition: bitvector64.cpp:755
void flip()
Complement all bits of a bit sequence.
Definition: bitvector64.cpp:699
void operator^=(const bitvector64 &rhs)
Perform bitwise exclusive or (XOR).
Definition: bitvector64.cpp:994
void appendFill(int val, word_t n)
!< Append a WAH word.
Definition: bitvector64.h:610
The const_iterator class. It iterates on the individual bits.
Definition: bitvector64.h:351
void clear()
Remove the existing content of a bitvector64.
Definition: bitvector64.h:82
word_t compressible() const
!< Turn all fill words into literal words.
Definition: bitvector64.cpp:342
bool all1s() const
Are all bits in regular words 1?
Definition: bitvector64.h:432
int getBit(const word_t i) const
Return the value of a bit.
Definition: bitvector64.cpp:607
void operator|=(const bitvector64 &rhs)
Perform bitwise OR.
Definition: bitvector64.cpp:874
bitvector64 * operator-(const bitvector64 &) const
Perform bitwise subtraction and return the pointer to the result.
Definition: bitvector64.cpp:1161
bitvector64 & operator=(const bitvector64 &bv)
!< Read the content of the named file.
Definition: bitvector64.h:490
word_t getSerialSize() const
Compute the number of words in serialized version of the bitvector object.
Definition: bitvector64.h:147

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive