ibin.h
Go to the documentation of this file.
1 //File: $Id$
2 // Author: John Wu <John.Wu at ACM.org>
3 // Lawrence Berkeley National Laboratory
4 // Copyright (c) 2000-2016 the Regents of the University of California
5 #ifndef IBIS_IBIN_H
6 #define IBIS_IBIN_H
7 #include "index.h"
14 #if defined(_WIN32) && defined(_MSC_VER)
15 #pragma warning(disable:4786) // some identifier longer than 256 characters
16 #undef min
17 #undef max
18 #endif
19 
27 class ibis::bin : public ibis::index {
28 public:
29 
30  virtual ~bin() {clear();};
31  bin(const ibis::bin& rhs);
32  bin(const ibis::column* c=0, const char* f=0);
34  size_t offset = 8);
35  bin(const ibis::column* c, const char* f, const array_t<double>& bd);
36  bin(const ibis::column* c, const char* f, const std::vector<double>& bd);
37  bin(const ibis::column* c, uint32_t nb, double *keys, int64_t *offs);
38  bin(const ibis::column* c, uint32_t nb, double *keys, int64_t *offs,
39  uint32_t *bms);
40  bin(const ibis::column* c, uint32_t nb, double *keys, int64_t *offs,
41  void *bms, FastBitReadBitmaps rd);
42 
43  virtual index* dup() const;
44  virtual void print(std::ostream& out) const;
45  virtual void serialSizes(uint64_t&, uint64_t&, uint64_t&) const;
46  virtual int write(ibis::array_t<double> &,
48  ibis::array_t<uint32_t> &) const;
49  virtual int write(const char* dt) const;
50  virtual int read(const char* idxfile);
51  virtual int read(ibis::fileManager::storage* st);
52  virtual long append(const char* dt, const char* df, uint32_t nnew);
53 
57  virtual long evaluate(const ibis::qContinuousRange& expr,
58  ibis::bitvector& hits) const;
59  virtual long evaluate(const ibis::qDiscreteRange& expr,
60  ibis::bitvector& hits) const {
61  return ibis::index::evaluate(expr, hits);
62  }
63 
64  virtual long select(const ibis::qContinuousRange&, void*) const;
65  virtual long select(const ibis::qContinuousRange&, void*,
66  ibis::bitvector&) const;
67 
68  virtual void estimate(const ibis::qContinuousRange& expr,
69  ibis::bitvector& lower,
70  ibis::bitvector& upper) const;
71  virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
73  virtual float undecidable(const ibis::qContinuousRange& expr,
74  ibis::bitvector& iffy) const;
75 
77  virtual void estimate(const ibis::deprecatedJoin& expr,
78  ibis::bitvector64& lower,
79  ibis::bitvector64& upper) const;
80  virtual void estimate(const ibis::deprecatedJoin& expr,
81  const ibis::bitvector& mask,
82  ibis::bitvector64& lower,
83  ibis::bitvector64& upper) const;
84  virtual void estimate(const ibis::deprecatedJoin& expr,
85  const ibis::bitvector& mask,
86  const ibis::qRange* const range1,
87  const ibis::qRange* const range2,
88  ibis::bitvector64& lower,
89  ibis::bitvector64& upper) const;
90  virtual int64_t estimate(const ibis::deprecatedJoin& expr,
91  const ibis::bitvector& mask,
92  const ibis::qRange* const range1,
93  const ibis::qRange* const range2) const;
94 
96  virtual void estimate(const ibis::bin& idx2,
97  const ibis::deprecatedJoin& expr,
98  ibis::bitvector64& lower,
99  ibis::bitvector64& upper) const;
100  virtual void estimate(const ibis::bin& idx2,
101  const ibis::deprecatedJoin& expr,
102  const ibis::bitvector& mask,
103  ibis::bitvector64& lower,
104  ibis::bitvector64& upper) const;
105  virtual void estimate(const ibis::bin& idx2,
106  const ibis::deprecatedJoin& expr,
107  const ibis::bitvector& mask,
108  const ibis::qRange* const range1,
109  const ibis::qRange* const range2,
110  ibis::bitvector64& lower,
111  ibis::bitvector64& upper) const;
112  virtual int64_t estimate(const ibis::bin& idx2,
113  const ibis::deprecatedJoin& expr) const;
114  virtual int64_t estimate(const ibis::bin& idx2,
115  const ibis::deprecatedJoin& expr,
116  const ibis::bitvector& mask) const;
117  virtual int64_t estimate(const ibis::bin& idx2,
118  const ibis::deprecatedJoin& expr,
119  const ibis::bitvector& mask,
120  const ibis::qRange* const range1,
121  const ibis::qRange* const range2) const;
122 
123  virtual INDEX_TYPE type() const {return BINNING;}
124  virtual const char* name() const {return "bin";}
125  virtual uint32_t numBins() const {return (nobs>2?nobs-2:0);}
126  // bin boundaries and counts of each bin
127  virtual void binBoundaries(std::vector<double>&) const;
128  virtual void binWeights(std::vector<uint32_t>&) const;
129  // expand/contract the boundaries of a range condition
130  virtual int expandRange(ibis::qContinuousRange& rng) const;
131  virtual int contractRange(ibis::qContinuousRange& rng) const;
132  virtual void speedTest(std::ostream& out) const;
133  virtual double estimateCost(const ibis::qContinuousRange& expr) const;
134  virtual double estimateCost(const ibis::qDiscreteRange& expr) const;
135 
136  virtual long getCumulativeDistribution(std::vector<double>& bds,
137  std::vector<uint32_t>& cts) const;
138  virtual long getDistribution(std::vector<double>& bbs,
139  std::vector<uint32_t>& cts) const;
140  virtual double getMin() const;
141  virtual double getMax() const;
142  virtual double getSum() const;
143 
144  static unsigned parsePrec(const ibis::column&);
145  static uint32_t parseNbins(const ibis::column&);
146  static unsigned parseScale(const ibis::column&);
147  static unsigned parseScale(const char*);
148 
150  int read(int fdes, size_t offset, const char *fname, const char *header);
152  long append(const ibis::bin& tail);
154  long append(const array_t<uint32_t>& ind);
155  array_t<uint32_t>* indices(const ibis::bitvector& mask) const;
158  long checkBin(const ibis::qRange& cmp, uint32_t jbin,
159  ibis::bitvector& res) const;
163  long checkBin(const ibis::qRange& cmp, uint32_t jbin,
164  const ibis::bitvector& mask, ibis::bitvector& res) const;
165 
182  struct granule {
183  double minm, maxm; // min and max of values less than the target
184  double minp, maxp; // min and max of values greater than the target
185  ibis::bitvector* loce;
188 
190  granule() : minm(DBL_MAX), maxm(-DBL_MAX), minp(DBL_MAX),
191  maxp(-DBL_MAX), loce(0), locm(0), locp(0) {};
193  ~granule() {delete loce; delete locm; delete locp;};
194  private:
195  granule(const granule&); // no copy constructor
196  granule& operator=(const granule&); // no assignment
197  };
198  // key = target value
199  typedef std::map< double, granule* > granuleMap;
200 
202  template <typename T>
203  struct valpos {
205  const T *vals;
209  ibis::bitvector::word_t jv;
211  ibis::bitvector::word_t ji;
212 
214  valpos<T>() : vals(0), jv(0), ji(0) {}
216  valpos<T>(const array_t<T>& v, const bitvector& b)
217  : vals(v.begin()), ind(b.firstIndexSet()), jv(0), ji(0) {
218  if (ind.nIndices() > 0 && ind.isRange())
219  ji = *(ind.indices());
220  }
221 
223  bitvector::word_t position() const {
224  if (ind.isRange())
225  return ji;
226  else
227  return ind.indices()[ji];
228  }
229 
231  T value() const {
232  return vals[jv];
233  }
234 
236  void next() {
237  ++ jv;
238  ++ ji;
239  if (ind.isRange()) {
240  if (ji >= ind.indices()[1]) {
241  ++ ind;
242  if (ind.nIndices() > 0 && ind.isRange())
243  ji = ind.indices()[0];
244  else
245  ji = 0;
246  }
247  }
248  else if (ji >= ind.nIndices()) {
249  ++ ind;
250  if (ind.nIndices() > 0 && ind.isRange())
251  ji = ind.indices()[0];
252  else
253  ji = 0;
254  }
255  }
256  }; // valpos
257 
259  template<typename T>
260  struct comparevalpos {
261  bool operator()(const valpos<T>* x, const valpos<T>* y) {
262  return (x->position() > y->position());
263  }
264  }; // comparevalpos
265 
266  void construct(const char*);
267  template <typename E>
268  void construct(const array_t<E>& varr);
269 
271  virtual void locate(const ibis::qContinuousRange& expr,
272  uint32_t& cand0, uint32_t& cand1) const;
274  virtual void locate(const ibis::qContinuousRange& expr,
275  uint32_t& cand0, uint32_t& cand1,
276  uint32_t& hit0, uint32_t& hit1) const;
277 
278 protected:
279  // member variables shared by all derived classes -- the derived classes
280  // are allowed to interpret the actual content differently.
281  uint32_t nobs;
285 
287  bin(const ibis::column* c, const uint32_t nbits,
288  ibis::fileManager::storage* st, size_t offset = 8);
289 
291  void binning(const char* f, const std::vector<double>& bd);
292  void binning(const char* f, const array_t<double>& bd);
295  void binning(const char* f);
298  template <typename E>
299  void binningT(const char* fname);
300  template <typename E>
301  long checkBin0(const ibis::qRange& cmp, uint32_t jbin,
302  ibis::bitvector& res) const;
303  template <typename E>
304  long checkBin1(const ibis::qRange& cmp, uint32_t jbin,
305  const ibis::bitvector& mask, ibis::bitvector& res) const;
306  template <typename E>
307  long binOrderT(const char* fname) const;
308  long binOrder(const char* fname) const;
309 
311  void setBoundaries(const char* f);
312  void setBoundaries(array_t<double>& bnds,
313  const ibis::bin& bin0) const;
314  void setBoundaries(array_t<double>& bnds,
315  const ibis::bin& idx1,
316  const array_t<uint32_t> cnt1,
317  const array_t<uint32_t> cnt0) const;
318  // functions to deal with in-memory arrays
319  template <typename E>
320  void binning(const array_t<E>& varr);
321  template <typename E>
322  void binning(const array_t<E>& varr, const array_t<double>& bd);
323  template <typename E>
324  void setBoundaries(const array_t<E>& varr);
325  template <typename E>
326  void scanAndPartition(const array_t<E>&, unsigned);
327  template <typename E>
328  void mapGranules(const array_t<E>&, granuleMap& gmap) const;
329  void printGranules(std::ostream& out, const granuleMap& gmap) const;
330  void convertGranules(granuleMap& gmap);
331 
332  void readBinBoundaries(const char* name, uint32_t nb);
334  void scanAndPartition(const char*, unsigned, uint32_t nbins=0);
336  void addBounds(double lbd, double rbd, uint32_t nbins, uint32_t eqw);
337 
338  virtual size_t getSerialSize() const throw();
341  void divideBitmaps(const array_t<bitvector*>& bms,
342  std::vector<unsigned>& parts) const;
343 
345  virtual double computeSum() const;
347  virtual void adjustLength(uint32_t nrows);
349  virtual uint32_t locate(const double& val) const;
350 
352  void swap(bin& rhs) {
353  const ibis::column* c = col;
354  col = rhs.col;
355  rhs.col = c;
356  uint32_t tmp = nobs;
357  nobs = rhs.nobs;
358  rhs.nobs = tmp;
359  tmp = nrows;
360  nrows = rhs.nrows;
361  rhs.nrows = tmp;
362  bounds.swap(rhs.bounds);
363  maxval.swap(rhs.maxval);
364  minval.swap(rhs.minval);
365  bits.swap(rhs.bits);
366  } // swap
367 
368  virtual void clear();
369  int write32(int fptr) const;
370  int write64(int fptr) const;
371 
372  template <typename T> long
374  template <typename T> long
376  ibis::bitvector&) const;
377 
378 private:
379  // private member functions
380  bin& operator=(const bin&);
381 
382  void print(std::ostream& out, const uint32_t tot,
383  const double& lbound, const double& rbound) const;
384 
386  void equiJoin(ibis::bitvector64& lower,
387  ibis::bitvector64& iffy) const;
388  void equiJoin(const ibis::bin& idx2,
389  ibis::bitvector64& lower,
390  ibis::bitvector64& iffy) const;
391  void deprecatedJoin(const double& delta,
392  ibis::bitvector64& lower,
393  ibis::bitvector64& iffy) const;
394  void deprecatedJoin(const ibis::bin& idx2,
395  const double& delta,
396  ibis::bitvector64& lower,
397  ibis::bitvector64& iffy) const;
398  void compJoin(const ibis::math::term *expr,
399  ibis::bitvector64& lower,
400  ibis::bitvector64& iffy) const;
401  void compJoin(const ibis::bin& idx2,
402  const ibis::math::term *expr,
403  ibis::bitvector64& lower,
404  ibis::bitvector64& iffy) const;
405  void equiJoin(const ibis::bitvector& mask,
406  ibis::bitvector64& lower,
407  ibis::bitvector64& iffy) const;
408  void equiJoin(const ibis::bin& idx2,
409  const ibis::bitvector& mask,
410  ibis::bitvector64& lower,
411  ibis::bitvector64& iffy) const;
412  void deprecatedJoin(const double& delta,
413  const ibis::bitvector& mask,
414  ibis::bitvector64& lower,
415  ibis::bitvector64& iffy) const;
416  void deprecatedJoin(const ibis::bin& idx2,
417  const double& delta,
418  const ibis::bitvector& mask,
419  ibis::bitvector64& lower,
420  ibis::bitvector64& iffy) const;
421  void compJoin(const ibis::math::term *expr,
422  const ibis::bitvector& mask,
423  ibis::bitvector64& lower,
424  ibis::bitvector64& iffy) const;
425  void compJoin(const ibis::bin& idx2,
426  const ibis::math::term *expr,
427  const ibis::bitvector& mask,
428  ibis::bitvector64& lower,
429  ibis::bitvector64& iffy) const;
430 
431  void equiJoin(const ibis::bitvector& mask,
432  const ibis::qRange* const range1,
433  const ibis::qRange* const range2,
434  ibis::bitvector64& sure,
435  ibis::bitvector64& iffy) const;
436  void deprecatedJoin(const double& delta,
437  const ibis::bitvector& mask,
438  const ibis::qRange* const range1,
439  const ibis::qRange* const range2,
440  ibis::bitvector64& sure,
441  ibis::bitvector64& iffy) const;
442  void compJoin(const ibis::math::term *delta,
443  const ibis::bitvector& mask,
444  const ibis::qRange* const range1,
445  const ibis::qRange* const range2,
446  ibis::bitvector64& sure,
447  ibis::bitvector64& iffy) const;
448 
449  int64_t equiJoin(const ibis::bitvector& mask,
450  const ibis::qRange* const range1,
451  const ibis::qRange* const range2) const;
452  int64_t deprecatedJoin(const double& delta,
453  const ibis::bitvector& mask,
454  const ibis::qRange* const range1,
455  const ibis::qRange* const range2) const;
456  int64_t compJoin(const ibis::math::term *delta,
457  const ibis::bitvector& mask,
458  const ibis::qRange* const range1,
459  const ibis::qRange* const range2) const;
460 
461  void equiJoin(const ibis::bin& idx2,
462  const ibis::bitvector& mask,
463  const ibis::qRange* const range1,
464  const ibis::qRange* const range2,
465  ibis::bitvector64& sure,
466  ibis::bitvector64& iffy) const;
467  void deprecatedJoin(const ibis::bin& idx2,
468  const double& delta,
469  const ibis::bitvector& mask,
470  const ibis::qRange* const range1,
471  const ibis::qRange* const range2,
472  ibis::bitvector64& sure,
473  ibis::bitvector64& iffy) const;
474  void compJoin(const ibis::bin& idx2,
475  const ibis::math::term *delta,
476  const ibis::bitvector& mask,
477  const ibis::qRange* const range1,
478  const ibis::qRange* const range2,
479  ibis::bitvector64& sure,
480  ibis::bitvector64& iffy) const;
481 
482  int64_t equiJoin(const ibis::bin& idx2,
483  const ibis::bitvector& mask,
484  const ibis::qRange* const range1,
485  const ibis::qRange* const range2) const;
486  int64_t deprecatedJoin(const ibis::bin& idx2,
487  const double& delta,
488  const ibis::bitvector& mask,
489  const ibis::qRange* const range1,
490  const ibis::qRange* const range2) const;
491  int64_t compJoin(const ibis::bin& idx2,
492  const ibis::math::term *delta,
493  const ibis::bitvector& mask,
494  const ibis::qRange* const range1,
495  const ibis::qRange* const range2) const;
496 
497  // need these friendships to access the protected member variables
498  friend class ibis::mesa;
499  friend class ibis::range;
500  friend class ibis::ambit;
501  friend class ibis::pack;
502  friend class ibis::pale;
503  friend class ibis::zone;
504  friend class ibis::mesh;
505  friend class ibis::band;
506 }; // ibis::bin
507 
511 class ibis::range : public ibis::bin {
512 public:
513 
514  virtual ~range() {};
515  range(const ibis::column* c=0, const char* f=0);
517  size_t offset = 8);
518  explicit range(const ibis::bin& rhs); // convert a bin to a range
519 
520  virtual int read(const char* idxfile);
521  virtual int read(ibis::fileManager::storage* st);
522  virtual int write(const char* dt) const; // write to the named file
523  virtual void print(std::ostream& out) const;
524  virtual long append(const char* dt, const char* df, uint32_t nnew);
525 
526  virtual long select(const ibis::qContinuousRange&, void*) const {
527  return -1;}
528  virtual long select(const ibis::qContinuousRange&, void*,
529  ibis::bitvector&) const {
530  return -1;}
531 
532  virtual long evaluate(const ibis::qContinuousRange& expr,
533  ibis::bitvector& hits) const;
534  virtual long evaluate(const ibis::qDiscreteRange& expr,
535  ibis::bitvector& hits) const {
536  return ibis::index::evaluate(expr, hits);
537  }
538 
539  using ibis::bin::estimate;
541  virtual void estimate(const ibis::qContinuousRange& expr,
542  ibis::bitvector& lower,
543  ibis::bitvector& upper) const;
544  virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
546  virtual float undecidable(const ibis::qContinuousRange& expr,
547  ibis::bitvector& iffy) const;
548 
549  virtual INDEX_TYPE type() const {return RANGE;}
550  virtual const char* name() const {return "range";}
551  virtual uint32_t numBins() const {return (nobs>1?nobs-1:0);}
552  // bin boundaries and counts of each bin
553  virtual void binBoundaries(std::vector<double>&) const;
554  virtual void binWeights(std::vector<uint32_t>&) const;
555  // expand/contract the boundaries of a range condition
556  virtual int expandRange(ibis::qContinuousRange& range) const;
557  virtual int contractRange(ibis::qContinuousRange& range) const;
558  virtual double getMax() const;
559  virtual double getSum() const;
560 
561  int read(int fdes, size_t offset, const char *fname, const char *);
562  long append(const ibis::range& tail);
563  virtual void speedTest(std::ostream& out) const;
564 
565 protected:
566  // protected member variables
567  double max1, min1; // the min and max of the bin not explicitly tracked
568 
569  // have to have its own locate functions because a bin is not explicitly
570  // stored
571  virtual uint32_t locate(const double& val) const {
572  return ibis::bin::locate(val);
573  }
574  virtual void locate(const ibis::qContinuousRange& expr,
575  uint32_t& cand0, uint32_t& cand1) const;
576  virtual void locate(const ibis::qContinuousRange& expr,
577  uint32_t& cand0, uint32_t& cand1,
578  uint32_t& hit0, uint32_t& hit1) const;
579  virtual double computeSum() const;
581  void construct(const char*);
583  void construct(const char* f, const array_t<double>& bd);
584  virtual size_t getSerialSize() const throw();
585 
586 private:
587  // private member functions
588  int write32(int fptr) const; // write to the given stream
589  int write64(int fptr) const; // write to the given stream
590  void print(std::ostream& out, const uint32_t tot, const double& lbound,
591  const double& rbound) const;
592 
593  friend class ibis::pale; // pale uses ibis::range
594 }; // ibis::range
595 
598 class ibis::mesa : public ibis::bin {
599 public:
600  virtual ~mesa() {};
601  mesa(const ibis::column* c=0, const char* f=0);
603  size_t offset = 8);
604  explicit mesa(const ibis::bin& rhs); // convert a bin to a mesa
605 
606  virtual void print(std::ostream& out) const;
607  virtual int write(const char* dt) const; // write to the named file
608  virtual long append(const char* dt, const char* df, uint32_t nnew);
609 
610  virtual long select(const ibis::qContinuousRange&, void*) const {
611  return -1;}
612  virtual long select(const ibis::qContinuousRange&, void*,
613  ibis::bitvector&) const {
614  return -1;}
615 
616  virtual long evaluate(const ibis::qContinuousRange& expr,
617  ibis::bitvector& hits) const;
618  virtual long evaluate(const ibis::qDiscreteRange& expr,
619  ibis::bitvector& hits) const {
620  return ibis::index::evaluate(expr, hits);
621  }
622 
623  using ibis::bin::estimate;
625  virtual void estimate(const ibis::qContinuousRange& expr,
626  ibis::bitvector& lower,
627  ibis::bitvector& upper) const;
628  virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
630  virtual float undecidable(const ibis::qContinuousRange& expr,
631  ibis::bitvector& iffy) const;
632 
633  virtual INDEX_TYPE type() const {return MESA;}
634  virtual const char* name() const {return "interval";}
635  virtual uint32_t numBins() const {return (nobs>2?nobs-2:0);}
636  // bin boundaries and counts of each bin
637  virtual void binBoundaries(std::vector<double>&) const;
638  virtual void binWeights(std::vector<uint32_t>&) const;
639  virtual double getSum() const;
640 
641  virtual void speedTest(std::ostream& out) const;
642  long append(const ibis::mesa& tail);
643 
644 protected:
645  virtual double computeSum() const;
647  void construct(const char*);
648  virtual size_t getSerialSize() const throw();
649 
650 private:
651  // private member functions
652 
653  mesa(const mesa&);
654  mesa& operator=(const mesa&);
655 }; // ibis::mesa
656 
659 class ibis::ambit : public ibis::bin {
660 public:
661  virtual ~ambit() {clear();};
662  ambit(const ibis::column* c=0, const char* f=0);
664  size_t offset = 8);
665  explicit ambit(const ibis::bin& rhs); // convert from a ibis::bin
666 
667  virtual int read(const char* idxfile);
668  virtual int read(ibis::fileManager::storage* st);
669  virtual int write(const char* dt) const;
670  virtual void print(std::ostream& out) const;
671  virtual long append(const char* dt, const char* df, uint32_t nnew);
672 
673  virtual long select(const ibis::qContinuousRange&, void*) const {
674  return -1;}
675  virtual long select(const ibis::qContinuousRange&, void*,
676  ibis::bitvector&) const {
677  return -1;}
678 
679  virtual long evaluate(const ibis::qContinuousRange& expr,
680  ibis::bitvector& hits) const;
681  virtual long evaluate(const ibis::qDiscreteRange& expr,
682  ibis::bitvector& hits) const {
683  return ibis::index::evaluate(expr, hits);
684  }
685 
686  using ibis::bin::estimate;
688  virtual void estimate(const ibis::qContinuousRange& expr,
689  ibis::bitvector& lower,
690  ibis::bitvector& upper) const;
692  virtual float undecidable(const ibis::qContinuousRange& expr,
693  ibis::bitvector& iffy) const;
694 
695  virtual INDEX_TYPE type() const {return AMBIT;}
696  virtual const char* name() const {return "range-range";}
697  virtual uint32_t numBins() const {return (nobs>1?nobs-1:0);}
698  // bin boundaries and counts of each bin
699  virtual void binBoundaries(std::vector<double>&) const;
700  virtual void binWeights(std::vector<uint32_t>&) const;
701  virtual void adjustLength(uint32_t nrows);
702  virtual double getSum() const;
703 
704  virtual void speedTest(std::ostream& out) const;
705  long append(const ibis::ambit& tail);
706 
707 protected:
708  virtual double computeSum() const;
709  virtual void clear();
711  void construct(const char* f, const array_t<double>& bd);
712  virtual size_t getSerialSize() const throw();
713 
714 private:
715  // min and max of range nobs (the one that is not explicitly recorded)
716  double max1, min1;
717  std::vector<ibis::ambit*> sub;
718 
719  // private member functions
720  int write32(int fptr) const;
721  int write64(int fptr) const;
722  int read(int fdes, size_t offset, const char *fn, const char *header);
723  void print(std::ostream& out, const uint32_t tot, const double& lbound,
724  const double& rbound) const;
725 
726  ambit(const ambit&);
727  ambit& operator=(const ambit&);
728 }; // ibis::ambit
729 
732 class ibis::pale : public ibis::bin {
733 public:
734  virtual ~pale() {clear();};
735  pale(const ibis::column* c, ibis::fileManager::storage* st,
736  size_t offset = 8);
737  explicit pale(const ibis::bin& rhs); // convert from a ibis::bin
738 
739  virtual int read(const char* idxfile);
740  virtual int read(ibis::fileManager::storage* st);
741  virtual int write(const char* dt) const;
742  virtual void print(std::ostream& out) const;
743  virtual long append(const char* dt, const char* df, uint32_t nnew);
744 
745  virtual long select(const ibis::qContinuousRange&, void*) const {
746  return -1;}
747  virtual long select(const ibis::qContinuousRange&, void*,
748  ibis::bitvector&) const {
749  return -1;}
750 
751  virtual long evaluate(const ibis::qContinuousRange& expr,
752  ibis::bitvector& hits) const;
753  virtual long evaluate(const ibis::qDiscreteRange& expr,
754  ibis::bitvector& hits) const {
755  return ibis::index::evaluate(expr, hits);
756  }
757 
758  using ibis::bin::estimate;
759  virtual void estimate(const ibis::qContinuousRange& expr,
760  ibis::bitvector& lower,
761  ibis::bitvector& upper) const;
763  virtual float undecidable(const ibis::qContinuousRange& expr,
764  ibis::bitvector& iffy) const;
765 
766  virtual INDEX_TYPE type() const {return PALE;}
767  virtual const char* name() const {return "equality-range";}
768  virtual uint32_t numBins() const {return (nobs>1?nobs-1:0);}
769  // bin boundaries and counts of each bin
770  virtual void binBoundaries(std::vector<double>&) const;
771  virtual void binWeights(std::vector<uint32_t>&) const;
772  virtual void adjustLength(uint32_t nrows);
773 
774  virtual void speedTest(std::ostream& out) const;
775  long append(const ibis::pale& tail);
776 
777 protected:
778  virtual void clear();
779  virtual size_t getSerialSize() const throw();
780 
781 private:
782  // private member variables
783  std::vector<ibis::range*> sub;
784 
785  // private member functions
786  int write32(int fptr) const;
787  int write64(int fptr) const;
788 
789  pale(const pale&);
790  pale& operator=(const pale&);
791 }; // ibis::pale
792 
795 class ibis::pack : public ibis::bin {
796 public:
797  virtual ~pack() {clear();};
799  size_t offset = 8);
800  explicit pack(const ibis::bin& rhs); // convert from a ibis::bin
801 
802  virtual int read(const char* idxfile);
803  virtual int read(ibis::fileManager::storage* st);
804  virtual int write(const char* dt) const;
805  virtual void print(std::ostream& out) const;
806  virtual long append(const char* dt, const char* df, uint32_t nnew);
807 
808  virtual long select(const ibis::qContinuousRange&, void*) const {
809  return -1;}
810  virtual long select(const ibis::qContinuousRange&, void*,
811  ibis::bitvector&) const {
812  return -1;}
813 
814  virtual long evaluate(const ibis::qContinuousRange& expr,
815  ibis::bitvector& hits) const;
816  virtual long evaluate(const ibis::qDiscreteRange& expr,
817  ibis::bitvector& hits) const {
818  return ibis::index::evaluate(expr, hits);}
819 
820  using ibis::bin::estimate;
821  virtual void estimate(const ibis::qContinuousRange& expr,
822  ibis::bitvector& lower,
823  ibis::bitvector& upper) const;
825  virtual float undecidable(const ibis::qContinuousRange& expr,
826  ibis::bitvector& iffy) const;
827 
828  virtual INDEX_TYPE type() const {return PACK;}
829  virtual const char* name() const {return "range-equality";}
830  virtual uint32_t numBins() const {return (nobs>1?nobs-1:0);}
831  // bin boundaries and counts of each bin
832  virtual void binBoundaries(std::vector<double>&) const;
833  virtual void binWeights(std::vector<uint32_t>&) const;
834  virtual void adjustLength(uint32_t nrows);
835  virtual double getSum() const;
836 
837  virtual void speedTest(std::ostream& out) const;
838  long append(const ibis::pack& tail);
839 
840 protected:
841  virtual double computeSum() const;
842  virtual void clear();
843  virtual size_t getSerialSize() const throw();
844 
845 private:
846  // private member variables
847  // min and max of range nobs (the one that is not explicitly recorded)
848  double max1, min1;
849  std::vector<ibis::bin*> sub;
850 
851  // private member functions
852  int write32(int fptr) const;
853  int write64(int fptr) const;
854 
855  pack(const pack&);
856  pack& operator=(const pack&);
857 }; // ibis::pack
858 
861 class ibis::zone : public ibis::bin {
862 public:
863  virtual ~zone() {clear();};
865  size_t offset = 8);
866  explicit zone(const ibis::bin& rhs); // convert from a ibis::bin
867 
868  virtual int read(const char* idxfile);
869  virtual int read(ibis::fileManager::storage* st);
870  virtual int write(const char* dt) const;
871  virtual void print(std::ostream& out) const;
872  virtual long append(const char* dt, const char* df, uint32_t nnew);
873 
874  virtual long evaluate(const ibis::qContinuousRange& expr,
875  ibis::bitvector& hits) const;
876  virtual long evaluate(const ibis::qDiscreteRange& expr,
877  ibis::bitvector& hits) const {
878  return ibis::index::evaluate(expr, hits);
879  }
880 
881  using ibis::bin::estimate;
882  virtual void estimate(const ibis::qContinuousRange& expr,
883  ibis::bitvector& lower,
884  ibis::bitvector& upper) const;
886  virtual float undecidable(const ibis::qContinuousRange &,
887  ibis::bitvector &) const;
888 
889  virtual INDEX_TYPE type() const {return ZONE;}
890  virtual const char* name() const {return "equality-equality";}
891  virtual uint32_t numBins() const {return (nobs>1?nobs-1:0);}
892  // bin boundaries and counts of each bin
893  virtual void binBoundaries(std::vector<double>&) const;
894  virtual void binWeights(std::vector<uint32_t>&) const;
895  virtual void adjustLength(uint32_t nrows);
896 
897  virtual void speedTest(std::ostream& out) const;
898  long append(const ibis::zone& tail);
899 
900 protected:
901  virtual void clear();
902  virtual size_t getSerialSize() const throw();
903 
904 private:
905  // private member variable
906  std::vector<ibis::bin*> sub;
907 
908  // private member functions
909  int write32(int fptr) const;
910  int write64(int fptr) const;
911 
912  zone(const zone&);
913  zone& operator=(const zone&);
914 }; // ibis::zone
915 
921 class ibis::fuge : public ibis::bin {
922 public:
923  virtual ~fuge() {clear();};
925  size_t offset = 8);
926  fuge(const ibis::column*, const char*);
927  explicit fuge(const ibis::bin& rhs); // convert from a ibis::bin
928 
929  virtual int read(const char* idxfile);
930  virtual int read(ibis::fileManager::storage* st);
931  virtual void print(std::ostream& out) const;
932  virtual int write(const char* dt) const;
933  virtual long append(const char* dt, const char* df, uint32_t nnew);
934 
935  virtual long evaluate(const ibis::qContinuousRange& expr,
936  ibis::bitvector& hits) const;
937  virtual long evaluate(const ibis::qDiscreteRange& expr,
938  ibis::bitvector& hits) const {
939  return ibis::index::evaluate(expr, hits);
940  }
941 
942  using ibis::bin::estimate;
943  virtual void estimate(const ibis::qContinuousRange& expr,
944  ibis::bitvector& lower,
945  ibis::bitvector& upper) const;
946 
947  virtual INDEX_TYPE type() const {return FUGE;}
948  virtual const char* name() const {return "interval-equality";}
949  virtual void adjustLength(uint32_t nrows);
950 
951  long append(const ibis::fuge& tail);
952 
953 protected:
954  virtual void clear() {clearCoarse(); ibis::bin::clear();}
955  virtual size_t getSerialSize() const throw();
956 
957 private:
958  // private member variable
959  mutable array_t<bitvector*> cbits;
960  array_t<uint32_t> cbounds;
961  mutable array_t<int32_t> coffset32;
962  mutable array_t<int64_t> coffset64;
963 
964  void coarsen(); // given fine level, add coarse level
965  void activateCoarse() const; // activate all coarse level bitmaps
966  void activateCoarse(uint32_t i) const; // activate one bitmap
967  void activateCoarse(uint32_t i, uint32_t j) const;
968 
969  int writeCoarse32(int fdes) const;
970  int writeCoarse64(int fdes) const;
971  int readCoarse(const char *fn);
972  void clearCoarse();
973 
975  long coarseEstimate(uint32_t lo, uint32_t hi) const;
977  long coarseEvaluate(uint32_t lo, uint32_t hi, ibis::bitvector& res) const;
978 
979  fuge(const fuge&);
980  fuge& operator=(const fuge&);
981 }; // ibis::fuge
982 
986 class ibis::egale : public ibis::bin {
987 public:
988  virtual ~egale() {clear();};
989  egale(const ibis::column* c = 0, const char* f = 0,
990  const uint32_t nbase = 2);
992  size_t offset = 8);
993  egale(const ibis::bin& rhs, const uint32_t nbase = 2);
994 
995  virtual int read(const char* idxfile);
996  virtual int read(ibis::fileManager::storage* st);
997  virtual int write(const char* dt) const;
998  virtual void print(std::ostream& out) const;
999  virtual long append(const char* dt, const char* df, uint32_t nnew);
1000 
1001  virtual long evaluate(const ibis::qContinuousRange& expr,
1002  ibis::bitvector& hits) const;
1003  virtual long evaluate(const ibis::qDiscreteRange& expr,
1004  ibis::bitvector& hits) const {
1005  return ibis::index::evaluate(expr, hits);
1006  }
1007 
1008  using ibis::bin::estimate;
1009  virtual void estimate(const ibis::qContinuousRange& expr,
1010  ibis::bitvector& lower,
1011  ibis::bitvector& upper) const;
1012  virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
1013  using ibis::bin::undecidable;
1014  virtual float undecidable(const ibis::qContinuousRange& expr,
1015  ibis::bitvector& iffy) const;
1016 
1017  virtual INDEX_TYPE type() const {return EGALE;}
1018  virtual const char* name() const {return "MCBin";}
1019  // bin boundaries and counts of each bin
1020  virtual void binBoundaries(std::vector<double>& b) const;
1021  virtual void binWeights(std::vector<uint32_t>& b) const;
1022  virtual double getSum() const;
1023 
1024  virtual void speedTest(std::ostream& out) const;
1025  long append(const ibis::egale& tail);
1026  long append(const array_t<uint32_t>& ind);
1027 
1028 protected:
1029  // protected member variables
1030  uint32_t nbits; // number of bitvectors, (size of bits)
1031  uint32_t nbases; // size of array bases
1032  array_t<uint32_t> cnts; // number of records in each bin
1033  array_t<uint32_t> bases; // the size of the bases used
1034 
1035  // protected member functions
1036  egale(const ibis::column* c, const char* f, const array_t<double>& bd,
1037  const array_t<uint32_t> bs);
1038  void addBits_(uint32_t ib, uint32_t ie, ibis::bitvector& res) const;
1039  virtual double computeSum() const;
1040  virtual void clear() {
1041  cnts.clear(); bases.clear();
1042  ibis::bin::clear();
1043  }
1044 
1045  int write32(int fdes) const;
1046  int write64(int fdes) const;
1047  void construct(const char* f);
1048  virtual size_t getSerialSize() const throw();
1049 
1050 private:
1051  // private member functions
1052  void setBit(const uint32_t i, const double val);
1053  void convert();
1054 
1055  void evalEQ(ibis::bitvector& res, uint32_t b) const;
1056  void evalLE(ibis::bitvector& res, uint32_t b) const;
1057  void evalLL(ibis::bitvector& res, uint32_t b0, uint32_t b1) const;
1058 
1059  egale(const egale&);
1060  egale& operator=(const egale&);
1061 }; // ibis::egale
1062 
1066 class ibis::moins : public ibis::egale {
1067 public:
1068  virtual int write(const char* dt) const;
1069  virtual void print(std::ostream& out) const;
1070  virtual long append(const char* dt, const char* df, uint32_t nnew);
1071 
1072  virtual long select(const ibis::qContinuousRange&, void*) const {
1073  return -1;}
1074  virtual long select(const ibis::qContinuousRange&, void*,
1075  ibis::bitvector&) const {
1076  return -1;}
1077 
1078  virtual long evaluate(const ibis::qContinuousRange& expr,
1079  ibis::bitvector& hits) const;
1080  virtual long evaluate(const ibis::qDiscreteRange& expr,
1081  ibis::bitvector& hits) const {
1082  return ibis::index::evaluate(expr, hits);}
1083 
1084  using ibis::egale::estimate;
1085  virtual void estimate(const ibis::qContinuousRange& expr,
1086  ibis::bitvector& lower,
1087  ibis::bitvector& upper) const;
1088  virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
1089  virtual INDEX_TYPE type() const {return MOINS;}
1090  virtual const char* name() const {return "MCBRange";}
1091 
1092  virtual ~moins() {clear();};
1093  moins(const ibis::column* c = 0, const char* f = 0,
1094  const uint32_t nbase = 2);
1095  moins(const ibis::column* c, ibis::fileManager::storage* st,
1096  size_t offset = 8);
1097  moins(const ibis::bin& rhs, const uint32_t nbase = 2);
1098 
1099  virtual void speedTest(std::ostream& out) const;
1100  virtual double getSum() const;
1101 
1102  long append(const ibis::moins& tail);
1103  long append(const array_t<uint32_t>& ind);
1104 
1105 protected:
1106  virtual double computeSum() const;
1107 
1108 private:
1109  // private member functions
1110  moins(const ibis::column* c, const char* f, const array_t<double>& bd,
1111  const array_t<uint32_t> bs);
1112  void convert();
1113 
1114  void evalEQ(ibis::bitvector& res, uint32_t b) const;
1115  void evalLE(ibis::bitvector& res, uint32_t b) const;
1116  void evalLL(ibis::bitvector& res, uint32_t b0, uint32_t b1) const;
1117 
1118  moins(const moins&);
1119  moins& operator=(const moins&);
1120 }; // ibis::moins
1121 
1125 class ibis::entre : public ibis::egale {
1126 public:
1127  virtual ~entre() {clear();};
1128  entre(const ibis::column* c = 0, const char* f = 0,
1129  const uint32_t nbase = 2);
1131  size_t offset = 8);
1132  entre(const ibis::bin& rhs, const uint32_t nbase = 2);
1133 
1134  virtual int write(const char* dt) const;
1135  virtual void print(std::ostream& out) const;
1136  virtual long append(const char* dt, const char* df, uint32_t nnew);
1137 
1138  virtual long select(const ibis::qContinuousRange&, void*) const {
1139  return -1;}
1140  virtual long select(const ibis::qContinuousRange&, void*,
1141  ibis::bitvector&) const {
1142  return -1;}
1143 
1144  virtual long evaluate(const ibis::qContinuousRange& expr,
1145  ibis::bitvector& hits) const;
1146  virtual long evaluate(const ibis::qDiscreteRange& expr,
1147  ibis::bitvector& hits) const {
1148  return ibis::index::evaluate(expr, hits);}
1149 
1150  using ibis::egale::estimate;
1151  virtual void estimate(const ibis::qContinuousRange& expr,
1152  ibis::bitvector& lower,
1153  ibis::bitvector& upper) const;
1154  virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
1155  virtual INDEX_TYPE type() const {return ENTRE;}
1156  virtual const char* name() const {return "MCBInterval";}
1157 
1158  virtual void speedTest(std::ostream& out) const;
1159  virtual double getSum() const;
1160 
1161  long append(const ibis::entre& tail);
1162  long append(const array_t<uint32_t>& ind);
1163 
1164 protected:
1165  virtual double computeSum() const;
1166 
1167 private:
1168  // private member functions
1169  entre(const ibis::column* c, const char* f, const array_t<double>& bd,
1170  const array_t<uint32_t> bs);
1171  void convert();
1172 
1173  void evalEQ(ibis::bitvector& res, uint32_t b) const;
1174  void evalLE(ibis::bitvector& res, uint32_t b) const;
1175  void evalLL(ibis::bitvector& res, uint32_t b0, uint32_t b1) const;
1176 
1177  entre(const entre&);
1178  entre& operator=(const entre&);
1179 }; // ibis::entre
1180 
1187 class ibis::bak : public ibis::bin {
1188 public:
1189  virtual ~bak() {clear();};
1190  bak(const ibis::column* c=0, const char* f=0);
1192  size_t offset = 8) : ibis::bin(c, st, offset) {};
1193 
1194  virtual void print(std::ostream& out) const;
1195  virtual int write(const char* dt) const; // write to the named file
1196  using ibis::bin::read;
1197  virtual int read(const char* idxfile);
1198  virtual long append(const char* dt, const char* df, uint32_t nnew);
1199  virtual INDEX_TYPE type() const {return BAK;}
1200  virtual const char* name() const
1201  {return "equality code on mapped values";}
1202  // bin boundaries and counts of each bin
1203  virtual void binBoundaries(std::vector<double>&) const;
1204  virtual void binWeights(std::vector<uint32_t>&) const;
1205  // expand/contract the boundaries of a range condition
1206  virtual int expandRange(ibis::qContinuousRange& rng) const;
1207  virtual int contractRange(ibis::qContinuousRange& rng) const;
1208 
1209  long append(const ibis::bin& tail);
1210 
1211  // a simple structure to record the position of the values mapped to the
1212  // same value. The bitvector marked the locations of the values and the
1213  // min and max record the actual minimum and maximum value encountered.
1214  struct grain {
1215  double min, max;
1216  ibis::bitvector* loc;
1217 
1218  // the default construct, user to explicitly allocated the bitvector
1219  grain() : min(DBL_MAX), max(-DBL_MAX), loc(0) {}
1220  ~grain() {delete loc;}
1221  };
1222 
1223  typedef std::map< double, grain > bakMap;
1224 
1225 
1226 protected:
1227 
1228  // reads all values and records positions in bmap
1229  void mapValues(const char* f, bakMap& bmap) const;
1230  void printMap(std::ostream& out, const bakMap& bmap) const;
1231 
1232  virtual uint32_t locate(const double& val) const;
1233  virtual void locate(const ibis::qContinuousRange& expr,
1234  uint32_t& cand0, uint32_t& cand1) const {
1235  ibis::bin::locate(expr, cand0, cand1);
1236  }
1237  virtual void locate(const ibis::qContinuousRange& expr,
1238  uint32_t& cand0, uint32_t& cand1,
1239  uint32_t& hit0, uint32_t& hit1) const {
1240  ibis::bin::locate(expr, cand0, cand1, hit0, hit1);
1241  }
1242 
1243 private:
1244  // coverts the std::map structure into the structure defined in ibis::bin
1245  void construct(bakMap& bmap);
1246 
1247  bak(const bak&);
1248  const bak& operator&=(const bak&);
1249 }; // ibis::bak
1250 
1256 class ibis::bak2 : public ibis::bin {
1257 public:
1258  virtual ~bak2() {clear();};
1259  bak2(const ibis::column* c=0, const char* f=0);
1261  size_t offset = 8) : ibis::bin(c, st, offset) {};
1262 
1263  virtual void print(std::ostream& out) const;
1264  virtual int write(const char* dt) const; // write to the named file
1265  using ibis::bin::read;
1266  virtual int read(const char* idxfile);
1267  virtual long append(const char* dt, const char* df, uint32_t nnew);
1268  virtual INDEX_TYPE type() const {return BAK;}
1269  virtual const char* name() const
1270  {return "equality code on mapped values";}
1271  // bin boundaries and counts of each bin
1272  virtual void binBoundaries(std::vector<double>&) const;
1273  virtual void binWeights(std::vector<uint32_t>&) const;
1274  // expand/contract the boundaries of a range condition
1275  virtual int expandRange(ibis::qContinuousRange& rng) const;
1276  virtual int contractRange(ibis::qContinuousRange& rng) const;
1277 
1278  long append(const ibis::bin& tail);
1279 
1284  struct grain {
1285  double minm, maxm, minp, maxp;
1286  ibis::bitvector* locm;
1289 
1290  // the default construct, user to explicitly allocated the bitvector
1291  grain() : minm(DBL_MAX), maxm(-DBL_MAX), minp(DBL_MAX), maxp(-DBL_MAX),
1292  locm(0), loce(0), locp(0) {}
1293  ~grain() {delete locm; delete loce; delete locp;}
1294  };
1295 
1296  typedef std::map< double, grain > bakMap;
1297 
1298 
1299 protected:
1300 
1302  void mapValues(const char* f, bakMap& bmap) const;
1303  void printMap(std::ostream& out, const bakMap& bmap) const;
1304 
1305  virtual uint32_t locate(const double& val) const;
1306  virtual void locate(const ibis::qContinuousRange& expr,
1307  uint32_t& cand0, uint32_t& cand1) const {
1308  ibis::bin::locate(expr, cand0, cand1);
1309  }
1310  virtual void locate(const ibis::qContinuousRange& expr,
1311  uint32_t& cand0, uint32_t& cand1,
1312  uint32_t& hit0, uint32_t& hit1) const {
1313  ibis::bin::locate(expr, cand0, cand1, hit0, hit1);
1314  }
1315 
1316 private:
1319  void construct(bakMap& bmap);
1320 
1321  bak2(const bak2&);
1322  bak2& operator=(const bak2&);
1323 }; // ibis::bak2
1324 
1325 #endif // IBIS_IBIN_H
virtual void estimate(const ibis::qContinuousRange &expr, ibis::bitvector &lower, ibis::bitvector &upper) const
Provide an estimation based on the current index.
Definition: irange.cpp:3229
A class to represent simple range conditions.
Definition: qExpr.h:207
Definition: ibin.h:1214
A two-level index.
Definition: ibin.h:795
virtual long select(const ibis::qContinuousRange &, void *) const
Select the rows that satisfy the range condition.
Definition: ibin.h:808
ibis::range.
Definition: index.h:93
word_t nIndices() const
Number of indices.
Definition: bitvector.h:508
void binningT(const char *fname)
Read the data file, partition the values, and write out the bin ordered data with ...
Definition: ibin.cpp:1711
ibis::fuge, binned interval-equality encoding.
Definition: index.h:146
virtual long select(const ibis::qContinuousRange &, void *, ibis::bitvector &) const
Select the rows that satisfy the range condition.
Definition: ibin.h:675
A simple structure to record the position of the values mapped to the same low-precision target value...
Definition: ibin.h:1284
virtual int write(const char *dt) const
Write the existing content to the given directory or file.
Definition: irange.cpp:357
virtual const char * name() const
Returns the name of the index, similar to the function type, but returns a string instead...
Definition: ibin.h:1090
virtual double computeSum() const
Compute the sum of values from the information in the index.
Definition: ibin.cpp:8510
const char * fname
The name of the file containing the index.
Definition: index.h:404
void addBounds(double lbd, double rbd, uint32_t nbins, uint32_t eqw)
The function used by setBoudaries() to actually generate the bounds.
Definition: ibin.cpp:3984
virtual long select(const ibis::qContinuousRange &, void *, ibis::bitvector &) const
Select the rows that satisfy the range condition.
Definition: ibin.h:810
void binning(const char *f, const std::vector< double > &bd)
Generate bins according to the specified boundaries.
Definition: ibin.cpp:745
virtual const char * name() const
Returns the name of the index, similar to the function type, but returns a string instead...
Definition: ibin.h:1269
virtual long select(const ibis::qContinuousRange &, void *) const
Select the rows that satisfy the range condition.
Definition: ibin.h:673
virtual const char * name() const
Returns the name of the index, similar to the function type, but returns a string instead...
Definition: ibin.h:890
virtual double getSum() const
Compute the approximate value of the sum from the binned index.
Definition: ibin.cpp:8485
Definition of the common functions of an index.
virtual long evaluate(const ibis::qDiscreteRange &expr, ibis::bitvector &hits) const
To evaluate the exact hits.
Definition: ibin.h:816
long binOrder(const char *fname) const
Write bin-ordered values.
Definition: ibin.cpp:1942
ibis::zone, equality-equality encoding on bins.
Definition: index.h:103
static uint32_t parseNbins(const ibis::column &)
Parse the index specs to determine eqw and nbins.
Definition: ibin.cpp:3695
void clear()
Reset the size to zero.
Definition: array_t.h:171
uint32_t nrows
The number of rows represented by the index.
Definition: index.h:418
Simple range condition.
Definition: qExpr.h:252
A two-level index.
Definition: ibin.h:921
The storage class treats all memory as char*.
Definition: fileManager.h:237
ibis::bitvector * locp
!< Values exactly equal to the target.
Definition: ibin.h:1288
ibis::bitvector * locm
!< Values equal to the target.
Definition: ibin.h:186
array_t< ibis::bitvector * > bits
A list of bitvectors.
Definition: index.h:414
virtual const char * name() const
Returns the name of the index, similar to the function type, but returns a string instead...
Definition: ibin.h:696
virtual double getSum() const
Compute the approximate value of the sum from the binned index.
Definition: irange.cpp:3607
virtual double getMax() const
Compute the actual maximum value from the binned index.
Definition: irange.cpp:3597
virtual INDEX_TYPE type() const
Returns an index type identifier.
Definition: ibin.h:828
A data structure to assist the mapping of values to lower precisions.
Definition: ibin.h:182
array_t< double > minval
!< The maximal values in each bin.
Definition: ibin.h:284
ibis::bak, reduced precision mapping, equality code.
Definition: index.h:123
virtual float undecidable(const ibis::qContinuousRange &expr, ibis::bitvector &iffy) const
Mark the position of the rows that can not be decided with this index.
Definition: ibin.cpp:6895
virtual size_t getSerialSize() const
Compute the size of the serialized version of the index.
Definition: ibin.cpp:11800
long binOrderT(const char *fname) const
Write bin-ordered values.
Definition: ibin.cpp:1986
virtual long select(const ibis::qContinuousRange &, void *) const
Select the rows that satisfy the range condition.
Definition: ibin.h:610
double coarsen(const double in, unsigned prec=2)
Reduce the decimal precision of the incoming floating-point value to specified precision.
Definition: util.h:1633
virtual long select(const ibis::qContinuousRange &, void *, ibis::bitvector &) const
Select the rows that satisfy the range condition.
Definition: ibin.h:1074
STL namespace.
void swap(array_t< T > &rhs)
Swap the content of two array_t objects.
Definition: array_t.h:184
ibis::bin.
Definition: index.h:91
virtual long select(const ibis::qContinuousRange &, void *, ibis::bitvector &) const
Select the rows that satisfy the range condition.
Definition: ibin.h:747
The current implementation of FastBit is code named IBIS; most data structures and functions are in t...
Definition: bord.h:16
virtual void estimate(const ibis::qContinuousRange &, ibis::bitvector &lower, ibis::bitvector &upper) const
Computes an approximation of hits as a pair of lower and upper bounds.
Definition: index.h:191
virtual INDEX_TYPE type() const
Returns an index type identifier.
Definition: ibin.h:1089
The class to represent a column of a data partition.
Definition: column.h:65
ibis::pale, equality-range encoding on bins.
Definition: index.h:99
virtual long evaluate(const ibis::qContinuousRange &expr, ibis::bitvector &hits) const =0
To evaluate the exact hits.
const word_t * indices() const
Pointer to the indices.
Definition: bitvector.h:506
ibis::bitvector::word_t jv
The current index in vals.
Definition: ibin.h:209
virtual long append(const char *dt, const char *df, uint32_t nnew)
Create index for the data in df and append the result to the index in dt.
Definition: irange.cpp:1059
A data structure to represent a sequence of bits.
Definition: bitvector64.h:54
virtual void print(std::ostream &out) const
Prints human readable information.
Definition: ibin.cpp:6034
virtual void speedTest(std::ostream &out) const
Time some logical operations and print out their speed.
Definition: irange.cpp:948
virtual void binBoundaries(std::vector< double > &) const
The function binBoundaries and binWeights return bin boundaries and counts of each bin respectively...
Definition: irange.cpp:925
virtual long evaluate(const ibis::qContinuousRange &expr, ibis::bitvector &hits) const
To evaluate the exact hits.
Definition: irange.cpp:3108
grain()
!< Values greater than the target.
Definition: ibin.h:1291
A variation on ibis::bak, it splits each bin of ibis::bak in two, one for entries less than the mappe...
Definition: ibin.h:1256
virtual const char * name() const
Returns the name of the index, similar to the function type, but returns a string instead...
Definition: ibin.h:829
A two-level index.
Definition: ibin.h:732
virtual const char * name() const
Returns the name of the index, similar to the function type, but returns a string instead...
Definition: ibin.h:550
virtual float undecidable(const ibis::qContinuousRange &expr, ibis::bitvector &iffy) const
Mark the position of the rows that can not be decided with this index.
Definition: index.h:205
virtual long select(const ibis::qContinuousRange &, void *, ibis::bitvector &) const
Select the rows that satisfy the range condition.
Definition: ibin.h:1140
virtual const char * name() const
Returns the name of the index, similar to the function type, but returns a string instead...
Definition: ibin.h:1018
virtual double computeSum() const
Compute the sum of values from the information in the index.
Definition: irange.cpp:3628
virtual INDEX_TYPE type() const
Returns an index type identifier.
Definition: ibin.h:889
void next()
Move to the next row.
Definition: ibin.h:236
virtual INDEX_TYPE type() const
Returns an index type identifier.
Definition: ibin.h:1155
array_t< double > maxval
!< The nominal boundaries.
Definition: ibin.h:283
virtual long evaluate(const ibis::qDiscreteRange &expr, ibis::bitvector &hits) const
To evaluate the exact hits.
Definition: ibin.h:618
virtual const char * name() const
Returns the name of the index, similar to the function type, but returns a string instead...
Definition: ibin.h:124
void readBinBoundaries(const char *name, uint32_t nb)
Read a file containing a list of floating-point numbers.
Definition: ibin.cpp:4636
The base index class.
Definition: index.h:82
virtual void locate(const ibis::qContinuousRange &expr, uint32_t &cand0, uint32_t &cand1) const
Find the outer boundaries of the range expression.
Definition: ibin.h:1233
virtual INDEX_TYPE type() const
Returns an index type identifier.
Definition: ibin.h:549
virtual INDEX_TYPE type() const
Returns an index type identifier.
Definition: ibin.h:1199
bin(const ibis::bin &rhs)
Copy constructor. It performs a deep copy.
Definition: ibin.cpp:225
Maps each value to a lower prevision (decimal) values and use the the low precision value as center o...
Definition: ibin.h:1187
virtual INDEX_TYPE type() const
Returns an index type identifier.
Definition: ibin.h:766
long checkBin(const ibis::qRange &cmp, uint32_t jbin, ibis::bitvector &res) const
Candidate check using the binned values.
Definition: ibin.cpp:2220
virtual long evaluate(const ibis::qDiscreteRange &expr, ibis::bitvector &hits) const
To evaluate the exact hits.
Definition: ibin.h:1003
virtual const char * name() const
Returns the name of the index, similar to the function type, but returns a string instead...
Definition: ibin.h:948
virtual long evaluate(const ibis::qDiscreteRange &expr, ibis::bitvector &hits) const
To evaluate the exact hits.
Definition: ibin.h:937
INDEX_TYPE
The integer values of this enum type are used in the index files to differentiate the indexes...
Definition: index.h:86
virtual long evaluate(const ibis::qDiscreteRange &expr, ibis::bitvector &hits) const
To evaluate the exact hits.
Definition: ibin.h:1080
virtual double estimateCost(const ibis::qContinuousRange &) const
Estimate the cost of evaluating a range condition.
Definition: index.h:263
void divideBitmaps(const array_t< bitvector * > &bms, std::vector< unsigned > &parts) const
Partition the bitmaps into groups of takes about the same amount of storage.
Definition: ibin.cpp:5472
ibis::entre, multicomponent interval encoding on bins.
Definition: index.h:121
virtual int write(ibis::array_t< double > &, ibis::array_t< int64_t > &, ibis::array_t< uint32_t > &) const
Save index to three arrays. Serialize the index in memory.
Definition: ibin.cpp:5836
virtual void clear()
Clear the existing content.
Definition: ibin.h:1040
virtual float undecidable(const ibis::qContinuousRange &expr, ibis::bitvector &iffy) const
Mark the position of the rows that can not be decided with this index.
Definition: irange.cpp:3378
virtual void binBoundaries(std::vector< double > &) const
The function binBoundaries and binWeights return bin boundaries and counts of each bin respectively...
Definition: ibin.cpp:5889
static unsigned parsePrec(const ibis::column &)
Parse the index spec to extract precision.
Definition: ibin.cpp:3892
granule()
!< Values greater than the target.
Definition: ibin.h:190
ibis::pack, range-equality encoding on bins.
Definition: index.h:101
virtual long select(const ibis::qContinuousRange &, void *) const
Select the rows that satisfy the range condition.
Definition: ibin.h:745
virtual size_t getSerialSize() const
Estimate the size of serialized version of the index.
Definition: irange.cpp:3655
virtual const char * name() const
Returns the name of the index, similar to the function type, but returns a string instead...
Definition: ibin.h:1156
The multicomponent range code on bins.
Definition: ibin.h:1066
A list of values and their positions.
Definition: ibin.h:203
This class implements the interval encoding of Chan and Ioannidis.
Definition: ibin.h:598
virtual INDEX_TYPE type() const
Returns an index type identifier.
Definition: ibin.h:1268
virtual int expandRange(ibis::qContinuousRange &rng) const
The functions expandRange and contractRange expands or contracts the boundaries of a range condition ...
Definition: ibin.cpp:6941
ibis::bitvector * loce
!< Values less than the target.
Definition: ibin.h:1287
virtual long evaluate(const ibis::qDiscreteRange &expr, ibis::bitvector &hits) const
To evaluate the exact hits.
Definition: ibin.h:876
virtual long select(const ibis::qContinuousRange &, void *, ibis::bitvector &) const
Select the rows that satisfy the range condition.
Definition: ibin.h:612
virtual void locate(const ibis::qContinuousRange &expr, uint32_t &cand0, uint32_t &cand1, uint32_t &hit0, uint32_t &hit1) const
Find the bins related to the range expression.
Definition: ibin.h:1237
The equality encoded bitmap index with binning.
Definition: ibin.h:27
void swap(bin &rhs)
Swap the content of the index.
Definition: ibin.h:352
virtual void locate(const ibis::qContinuousRange &expr, uint32_t &cand0, uint32_t &cand1, uint32_t &hit0, uint32_t &hit1) const
Find the bins related to the range expression.
Definition: ibin.h:1310
virtual double estimateCost(const ibis::qContinuousRange &expr) const
Estimate the cost of evaluating a range condition.
Definition: ibin.cpp:6531
virtual uint32_t locate(const double &val) const
Find the bin containing val.
Definition: ibin.h:571
The indexSet stores positions of bits that are one.
Definition: bitvector.h:421
bool isRange() const
Is the index set a consecutive range?
Definition: bitvector.h:504
virtual void print(std::ostream &out) const
Prints human readable information.
Definition: irange.cpp:979
bitvector::word_t position() const
The current position (RID).
Definition: ibin.h:223
virtual long evaluate(const ibis::qContinuousRange &expr, ibis::bitvector &hits) const
To evaluate the exact hits.
Definition: ibin.cpp:6614
virtual int read(const char *idxfile)
Read from a file named f.
Definition: ibin.cpp:468
virtual long append(const char *dt, const char *df, uint32_t nnew)
Create index for the data in df and append the result to the index in dt.
Definition: ibin.cpp:6257
ibis::bitvector::word_t ji
The current index value inside the index set.
Definition: ibin.h:211
void construct(const char *)
Construct a new index.
Definition: irange.cpp:606
int write32(int fptr) const
Write the content to a file already open.
Definition: ibin.cpp:5656
ibis::moins, multicomponent range encoding on bins.
Definition: index.h:119
The abstract base class for arithmetic terms.
Definition: qExpr.h:728
virtual const char * name() const
Returns the name of the index, similar to the function type, but returns a string instead...
Definition: ibin.h:634
virtual double getMax() const
Compute the actual maximum value from the binned index.
Definition: ibin.cpp:8472
virtual void estimate(const ibis::qContinuousRange &expr, ibis::bitvector &lower, ibis::bitvector &upper) const
Provide an estimation based on the current index.
Definition: ibin.cpp:6722
virtual void locate(const ibis::qContinuousRange &expr, uint32_t &cand0, uint32_t &cand1) const
Find the outer boundaries of the range expression.
Definition: ibin.h:1306
virtual void estimate(const ibis::qContinuousRange &expr, ibis::bitvector &lower, ibis::bitvector &upper) const
Provide an estimation based on the current index.
Definition: icegale.cpp:1580
A data structure to represent a sequence of bits.
Definition: bitvector.h:62
virtual long select(const ibis::qContinuousRange &, void *) const
Select the rows that satisfy the range condition.
Definition: ibin.h:1138
void setBoundaries(const char *f)
Set bin boundaries.
Definition: ibin.cpp:4692
virtual INDEX_TYPE type() const
Returns an index type identifier.
Definition: ibin.h:695
ibis::interval.
Definition: index.h:95
virtual long getCumulativeDistribution(std::vector< double > &bds, std::vector< uint32_t > &cts) const
Compute the cumulative distribution from the binned index.
Definition: ibin.cpp:8520
virtual void clear()
Clear the existing content.
Definition: ibin.cpp:5881
virtual void adjustLength(uint32_t nrows)
Fill the bitmaps to the specified size.
Definition: ibin.cpp:661
const ibis::column * col
Pointer to the column this index is for.
Definition: index.h:395
The range encoded bitmap index based.
Definition: ibin.h:511
int write64(int fptr) const
write the content to a file already open.
Definition: ibin.cpp:5746
virtual long evaluate(const ibis::qDiscreteRange &expr, ibis::bitvector &hits) const
To evaluate the exact hits.
Definition: ibin.h:753
The multi-level range based (cumulative) index.
Definition: ibin.h:659
ibis::ambit, range-range two level encoding on bins.
Definition: index.h:97
virtual long select(const ibis::qContinuousRange &, void *) const
Select the rows that satisfy the range condition.
Definition: ibin.h:1072
array_t< double > bounds
!< Number of bitvectors.
Definition: ibin.h:282
virtual int expandRange(ibis::qContinuousRange &range) const
The functions expandRange and contractRange expands or contracts the boundaries of a range condition ...
Definition: irange.cpp:3413
virtual long evaluate(const ibis::qDiscreteRange &expr, ibis::bitvector &hits) const
To evaluate the exact hits.
Definition: ibin.h:1146
virtual void speedTest(std::ostream &out) const
Time some logical operations and print out their speed.
Definition: ibin.cpp:5922
ibis::bitvector::indexSet ind
The index set representing the positions.
Definition: ibin.h:207
int(* FastBitReadBitmaps)(void *context, uint64_t start, uint64_t count, uint32_t *data)
A function prototype for delayed index reconstruction.
Definition: const.h:341
The multicomponent equality code on bins.
Definition: ibin.h:986
virtual long select(const ibis::qContinuousRange &, void *) const
Select the rows that satisfy the range condition.
Definition: ibin.h:526
virtual INDEX_TYPE type() const
Returns an index type identifier.
Definition: ibin.h:633
A discrete range expression.
Definition: qExpr.h:337
The comparator used to build a min-heap based on positions.
Definition: ibin.h:260
virtual INDEX_TYPE type() const
Returns an index type identifier.
Definition: ibin.h:947
T value() const
The current value.
Definition: ibin.h:231
virtual INDEX_TYPE type() const
Returns an index type identifier.
Definition: ibin.h:123
virtual long select(const ibis::qContinuousRange &, void *) const
Select the rows that satisfy the range condition.
Definition: ibin.cpp:12109
virtual index * dup() const
Duplicate the content of an index object.
Definition: ibin.cpp:463
void construct(const char *)
Construct a binned bitmap index.
Definition: ibin.cpp:2591
long mergeValues(const ibis::qContinuousRange &, ibis::array_t< T > &) const
Extract values only.
Definition: ibin.cpp:11813
virtual void serialSizes(uint64_t &, uint64_t &, uint64_t &) const
Compute the size of arrays that would be generated by the serializatioin function (write)...
Definition: ibin.cpp:5862
virtual double getMin() const
Compute the actual minimum value from the binned index.
Definition: ibin.cpp:8461
virtual int read(const char *idxfile)
Read the content the named file.
Definition: irange.cpp:161
virtual long getDistribution(std::vector< double > &bbs, std::vector< uint32_t > &cts) const
Compute a histogram from the binned index.
Definition: ibin.cpp:8580
const T * vals
The list of values.
Definition: ibin.h:205
ibis::bitvector * locp
!< Values less than the target.
Definition: ibin.h:187
static unsigned parseScale(const ibis::column &)
Parse the specification about scaling.
Definition: ibin.cpp:3811
A two-level index.
Definition: ibin.h:861
The multicomponent interval code on bins.
Definition: ibin.h:1125
virtual long select(const ibis::qContinuousRange &, void *, ibis::bitvector &) const
Select the rows that satisfy the range condition.
Definition: ibin.h:528
virtual const char * name() const
Returns the name of the index, similar to the function type, but returns a string instead...
Definition: ibin.h:1200
ibis::egale, multicomponent equality encoding on bins.
Definition: index.h:117
virtual long evaluate(const ibis::qDiscreteRange &expr, ibis::bitvector &hits) const
To evaluate the exact hits.
Definition: ibin.h:534
virtual INDEX_TYPE type() const
Returns an index type identifier.
Definition: ibin.h:1017
~granule()
Destructor.
Definition: ibin.h:193
A join is defined by two names and a numerical expression.
Definition: qExpr.h:1240
virtual void locate(const ibis::qContinuousRange &expr, uint32_t &cand0, uint32_t &cand1) const
Find the outer boundaries of the range expression.
Definition: ibin.cpp:7050
virtual void clear()
Clear the existing content.
Definition: ibin.h:954
virtual long evaluate(const ibis::qDiscreteRange &expr, ibis::bitvector &hits) const
To evaluate the exact hits.
Definition: ibin.h:681
void convertGranules(granuleMap &gmap)
Convert the granule map into binned index.
Definition: ibin.cpp:3620
virtual long evaluate(const ibis::qDiscreteRange &expr, ibis::bitvector &hits) const
To evaluate the exact hits.
Definition: ibin.h:59
virtual const char * name() const
Returns the name of the index, similar to the function type, but returns a string instead...
Definition: ibin.h:767

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive