column.h
Go to the documentation of this file.
1 //File: $Id$
2 // Author: John Wu <John.Wu at ACM.org>
3 // Copyright (c) 2000-2016 the Regents of the University of California
4 #ifndef IBIS_COLUMN_H
5 #define IBIS_COLUMN_H
6 #include "table.h" // ibis::TYPE_T
14 #include "qExpr.h" // ibis::qContinuousRange
15 #include "bitvector.h"
16 #include <string>
17 
18 namespace ibis { // additional names to the namespace ibis
19  // derived classes of ibis::column, implemented in category.cpp
20  class category; // for categorical values (low-cardinality text fields)
21  class text; // arbitrary cardinality text fields
22  class blob; // text may contain null characters
23  class collis; // data accessed through FastBitReadExtArray
24 
25  // the following are used for storing selected values of different types
26  // of columns (implemented in colValues.cpp)
27  class colBytes;
28  class colUBytes;
29  class colShorts;
30  class colUShorts;
31  class colInts;
32  class colUInts;
33  class colLongs;
34  class colULongs;
35  class colFloats;
36  class colDoubles;
37  class colStrings;
38  class colBlobs;
39 } // namespace
40 
65 class FASTBIT_CXX_DLLSPEC ibis::column {
66 public:
67 
68  virtual ~column();
69  column(const column& rhs);
70  column(const part* tbl, FILE* file);
71  column(const part* tbl, ibis::TYPE_T t, const char* name,
72  const char* desc="", double low=DBL_MAX, double high=-DBL_MAX);
73 
76  ibis::TYPE_T type() const {return m_type;}
78  const char* name() const {return m_name.c_str();}
80  void name(const char* nm) {m_name = nm;}
82  const char* description() const {return m_desc.c_str();}
84  std::string fullname() const;
86  const double& lowerBound() const {return lower;}
88  const double& upperBound() const {return upper;}
89 
90  bool isFloat() const;
91  bool isInteger() const;
92  bool isSignedInteger() const;
93  bool isUnsignedInteger() const;
94  bool isNumeric() const;
95  bool isSorted() const {return m_sorted;}
96  bool hasIndex() const;
97  bool hasRoster() const;
98 
99  void description(const char* d) {m_desc = d;}
100  void lowerBound(double d) {lower = d;}
101  void upperBound(double d) {upper = d;}
102  void isSorted(bool);
103  int elementSize() const;
104  int nRows() const {return mask_.size();}
105 
106  const part* partition() const {return thePart;}
107  const part*& partition() {return thePart;}
108 
109  // function related to index/bin
110  const char* indexSpec() const;
111  uint32_t numBins() const;
112  void indexSpec(const char* spec) {m_bins=spec;}
115  void preferredBounds(std::vector<double>&) const;
117  void binWeights(std::vector<uint32_t>&) const;
118 
119  virtual void computeMinMax();
120  virtual void computeMinMax(const char *dir);
121  virtual void computeMinMax(const char *dir,
122  double& min, double &max, bool &asc) const;
123 
124  virtual int attachIndex(double *, uint64_t, int64_t *, uint64_t,
125  void *, FastBitReadBitmaps) const;
126  virtual int attachIndex(double *, uint64_t, int64_t *, uint64_t,
127  uint32_t *, uint64_t) const;
128  virtual void loadIndex(const char* iopt=0, int ropt=0) const throw ();
129  virtual void unloadIndex() const;
130  virtual long indexSize() const;
131 
132  uint32_t indexedRows() const;
133  void indexSpeedTest() const;
134  void purgeIndexFile(const char *dir=0) const;
135 
136  const char* dataFileName(std::string& fname, const char *dir=0) const;
137  const char* nullMaskName(std::string& fname) const;
138  void getNullMask(bitvector& mask) const;
139  int setNullMask(const bitvector&);
140 
144  virtual const char* findString(const char*) const
145  {return static_cast<const char*>(0);}
150  virtual int getString(uint32_t, std::string&) const {return -1;}
155  virtual int getOpaque(uint32_t, ibis::opaque&) const;
156 
157  array_t<int32_t>* getIntArray() const;
158  array_t<float>* getFloatArray() const;
160  virtual int getValuesArray(void* vals) const;
161  virtual ibis::fileManager::storage* getRawData() const;
162  virtual bool hasRawData() const;
163  int getDataflag() const {return dataflag;}
164  void setDataflag(int df) {dataflag = df;}
165 
166  virtual array_t<signed char>* selectBytes(const bitvector& mask) const;
167  virtual array_t<unsigned char>* selectUBytes(const bitvector& mask) const;
168  virtual array_t<int16_t>* selectShorts(const bitvector& mask) const;
169  virtual array_t<uint16_t>* selectUShorts(const bitvector& mask) const;
170  virtual array_t<int32_t>* selectInts(const bitvector& mask) const;
171  virtual array_t<uint32_t>* selectUInts(const bitvector& mask) const;
172  virtual array_t<int64_t>* selectLongs(const bitvector& mask) const;
173  virtual array_t<uint64_t>* selectULongs(const bitvector& mask) const;
174  virtual array_t<float>* selectFloats(const bitvector& mask) const;
175  virtual array_t<double>* selectDoubles(const bitvector& mask) const;
176  virtual std::vector<std::string>*
177  selectStrings(const bitvector& mask) const;
178  virtual std::vector<ibis::opaque>*
179  selectOpaques(const bitvector& mask) const;
180 
181  long selectValues(const bitvector&, void*) const;
182  long selectValues(const bitvector&, void*, array_t<uint32_t>&) const;
183  long selectValues(const ibis::qContinuousRange&, void*) const;
184 
186  virtual void write(FILE* file) const;
188  virtual void print(std::ostream& out) const;
190  void logMessage(const char* event, const char* fmt, ...) const;
192  void logWarning(const char* event, const char* fmt, ...) const;
193 
196  int expandRange(ibis::qContinuousRange& rng) const;
199  int contractRange(ibis::qContinuousRange& rng) const;
200 
201  virtual long evaluateRange(const ibis::qContinuousRange& cmp,
202  const ibis::bitvector& mask,
203  ibis::bitvector& res) const;
205  virtual long evaluateRange(const ibis::qDiscreteRange& cmp,
206  const ibis::bitvector& mask,
207  ibis::bitvector& res) const;
209  virtual long evaluateRange(const ibis::qIntHod& cmp,
210  const ibis::bitvector& mask,
211  ibis::bitvector& res) const;
213  virtual long evaluateRange(const ibis::qUIntHod& cmp,
214  const ibis::bitvector& mask,
215  ibis::bitvector& res) const;
216 
217  virtual long stringSearch(const char*, ibis::bitvector&) const;
218  virtual long stringSearch(const std::vector<std::string>&,
219  ibis::bitvector&) const;
220  virtual long stringSearch(const char*) const;
221  virtual long stringSearch(const std::vector<std::string>&) const;
222  virtual long keywordSearch(const char*, ibis::bitvector&) const;
223  virtual long keywordSearch(const char*) const;
224  virtual long keywordSearch(const std::vector<std::string>&,
225  ibis::bitvector&) const;
226  virtual long keywordSearch(const std::vector<std::string>&) const;
227  virtual long patternSearch(const char*) const;
228  virtual long patternSearch(const char*, ibis::bitvector &) const;
229 
230  virtual long evaluateAndSelect(const ibis::qContinuousRange&,
231  const ibis::bitvector&, void*,
232  ibis::bitvector&) const;
233 
243  virtual long estimateRange(const ibis::qContinuousRange& cmp,
244  ibis::bitvector& low,
245  ibis::bitvector& high) const;
247  virtual long estimateRange(const ibis::qDiscreteRange& cmp,
248  ibis::bitvector& low,
249  ibis::bitvector& high) const;
251  virtual long estimateRange(const ibis::qIntHod& cmp,
252  ibis::bitvector& low,
253  ibis::bitvector& high) const;
255  virtual long estimateRange(const ibis::qUIntHod& cmp,
256  ibis::bitvector& low,
257  ibis::bitvector& high) const;
258 
259  virtual long estimateRange(const ibis::qContinuousRange& cmp) const;
260  virtual long estimateRange(const ibis::qDiscreteRange& cmp) const;
262  virtual long estimateRange(const ibis::qIntHod& cmp) const;
264  virtual long estimateRange(const ibis::qUIntHod& cmp) const;
265 
267  virtual double estimateCost(const ibis::qContinuousRange& cmp) const;
269  virtual double estimateCost(const ibis::qDiscreteRange& cmp) const;
271  virtual double estimateCost(const ibis::qIntHod& cmp) const;
273  virtual double estimateCost(const ibis::qUIntHod& cmp) const;
275  virtual double estimateCost(const ibis::qString&) const {
276  return 0;}
278  virtual double estimateCost(const ibis::qAnyString&) const {
279  return 0;}
280 
281  virtual float getUndecidable(const ibis::qContinuousRange& cmp,
282  ibis::bitvector& iffy) const;
284  virtual float getUndecidable(const ibis::qDiscreteRange& cmp,
285  ibis::bitvector& iffy) const;
287  virtual float getUndecidable(const ibis::qIntHod& cmp,
288  ibis::bitvector& iffy) const;
290  virtual float getUndecidable(const ibis::qUIntHod& cmp,
291  ibis::bitvector& iffy) const;
292 
296  virtual const ibis::dictionary* getDictionary() const {return 0;}
297 
299  virtual long append(const char* dt, const char* df, const uint32_t nold,
300  const uint32_t nnew, uint32_t nbuf, char* buf);
301 
302  virtual long append(const void* vals, const ibis::bitvector& msk);
303  virtual long writeData(const char* dir, uint32_t nold, uint32_t nnew,
304  ibis::bitvector& mask, const void *va1,
305  void *va2=0);
306  template <typename T>
307  long castAndWrite(const array_t<double>& vals, ibis::bitvector& mask,
308  const T special);
309  virtual long saveSelected(const ibis::bitvector& sel, const char *dest,
310  char *buf, uint32_t nbuf);
311  virtual long truncateData(const char* dir, uint32_t nent,
312  ibis::bitvector& mask) const;
313 
314  virtual int indexWrite(ibis::array_t<double> &,
316  ibis::array_t<uint32_t> &) const;
317  virtual void indexSerialSizes(uint64_t&, uint64_t&, uint64_t&) const;
318 
324  virtual double getActualMin() const;
327  virtual double getActualMax() const;
329  virtual double getSum() const;
336  long getCumulativeDistribution(std::vector<double>& bounds,
337  std::vector<uint32_t>& counts) const;
349  long getDistribution(std::vector<double>& bbs,
350  std::vector<uint32_t>& counts) const;
352  class info;
353  class indexLock;
354  class mutexLock;
355 
358  struct unixTimeScribe {
361  delete format_;
362  delete timezone_;
363  }
365  unixTimeScribe(const char *fmt, const char *tz=0)
366  : format_(ibis::util::strnewdup(fmt)),
367  timezone_(ibis::util::strnewdup(tz)) {}
370  : format_(ibis::util::strnewdup(rhs.format_)),
371  timezone_(ibis::util::strnewdup(rhs.timezone_)) {}
372 
373  unixTimeScribe& operator=(const unixTimeScribe &rhs) {
374  delete format_;
375  delete timezone_;
376  format_ = ibis::util::strnewdup(rhs.format_);
377  timezone_ = ibis::util::strnewdup(rhs.timezone_);
378  return *this;
379  }
380 
381  unixTimeScribe* dup() const {
382  return new unixTimeScribe(*this);
383  }
384 
385  void operator()(std::ostream&, int64_t) const;
386  void operator()(std::ostream&, double) const;
387 
388  const char *format_;
389  const char *timezone_;
390  }; // unixTimeScribe
391  void setTimeFormat(const char*);
392  void setTimeFormat(const unixTimeScribe &);
393  const unixTimeScribe* getTimeFormat() const {return m_utscribe;}
394 
396  template <typename T> static
397  void actualMinMax(const array_t<T>& vals, const ibis::bitvector& mask,
398  double& min, double& max, bool &asc);
400  template <typename T> static
401  T computeMin(const array_t<T>& vals, const ibis::bitvector& mask);
403  template <typename T> static
404  T computeMax(const array_t<T>& vals, const ibis::bitvector& mask);
406  template <typename T> static
407  double computeSum(const array_t<T>& vals, const ibis::bitvector& mask);
408 
409 protected:
410  // protected member variables
411  const part* thePart;
414  std::string m_name;
415  std::string m_desc;
416  std::string m_bins;
417  bool m_sorted;
418  double lower;
419  double upper;
425  mutable int dataflag;
427  mutable ibis::index* idx;
430 
432  void logError(const char* event, const char* fmt, ...) const;
435  long string2int(int fptr, dictionary& dic, uint32_t nbuf, char* buf,
436  array_t<uint32_t>& out) const;
438  double computeMin() const;
440  double computeMax() const;
442  double computeSum() const;
445  void actualMinMax(const char *fname, const ibis::bitvector& mask,
446  double &min, double &max, bool &asc) const;
447 
449  virtual int searchSorted(const ibis::qContinuousRange&,
450  ibis::bitvector&) const;
452  virtual int searchSorted(const ibis::qDiscreteRange&,
453  ibis::bitvector&) const;
455  virtual int searchSorted(const ibis::qIntHod&,
456  ibis::bitvector&) const;
458  virtual int searchSorted(const ibis::qUIntHod&,
459  ibis::bitvector&) const;
461  template <typename T> int
462  searchSortedICC(const array_t<T>& vals,
463  const ibis::qContinuousRange& rng,
464  ibis::bitvector& hits) const;
466  template <typename T> int
467  searchSortedICD(const array_t<T>& vals,
468  const ibis::qDiscreteRange& rng,
469  ibis::bitvector& hits) const;
471  template <typename T> int
472  searchSortedICD(const array_t<T>& vals,
473  const ibis::qIntHod& rng,
474  ibis::bitvector& hits) const;
476  template <typename T> int
477  searchSortedICD(const array_t<T>& vals,
478  const ibis::qUIntHod& rng,
479  ibis::bitvector& hits) const;
481  template <typename T> int
482  searchSortedOOCC(const char* fname,
483  const ibis::qContinuousRange& rng,
484  ibis::bitvector& hits) const;
486  template <typename T> int
487  searchSortedOOCD(const char* fname,
488  const ibis::qDiscreteRange& rng,
489  ibis::bitvector& hits) const;
491  template <typename T> int
492  searchSortedOOCD(const char* fname,
493  const ibis::qIntHod& rng,
494  ibis::bitvector& hits) const;
496  template <typename T> int
497  searchSortedOOCD(const char* fname,
498  const ibis::qUIntHod& rng,
499  ibis::bitvector& hits) const;
500 
502  template <typename T> uint32_t
503  findLower(int fdes, const uint32_t nr, const T tgt) const;
505  template <typename T> uint32_t
506  findUpper(int fdes, const uint32_t nr, const T tgt) const;
507 
508  template <typename T>
509  long selectValuesT(const char*, const bitvector&, array_t<T>&) const;
510  template <typename T>
511  long selectValuesT(const char*, const bitvector& mask,
512  array_t<T>& vals, array_t<uint32_t>& inds) const;
513  template <typename T>
514  long selectToStrings(const char*, const bitvector&,
515  std::vector<std::string>&) const;
516  template <typename T>
517  long selectToOpaques(const char*, const bitvector&,
518  std::vector<ibis::opaque>&) const;
519 
521  template <typename T>
522  long appendValues(const array_t<T>&, const ibis::bitvector&);
524  long appendStrings(const std::vector<std::string>&, const ibis::bitvector&);
525 
526  class readLock;
527  class writeLock;
528  class softWriteLock;
529  friend class readLock;
530  friend class writeLock;
531  friend class indexLock;
532  friend class mutexLock;
533  friend class softWriteLock;
534 
535 private:
538  mutable pthread_rwlock_t rwlock;
540  mutable pthread_mutex_t mutex;
541 
542  column& operator=(const column&); // no assignment
543 }; // ibis::column
544 
547 class FASTBIT_CXX_DLLSPEC ibis::column::info {
548  public:
549  const char* name;
550  const char* description;
551  const double expectedMin;
552  const double expectedMax;
554  info(const ibis::column& col);
555  info(const info& rhs)
556  : name(rhs.name), description(rhs.description),
557  expectedMin(rhs.expectedMin),
558  expectedMax(rhs.expectedMax),
559  type(rhs.type) {};
560 
561  private:
562  info();
563  info& operator=(const info&);
564 }; // ibis::column::info
565 
570 public:
571  ~indexLock();
572  indexLock(const ibis::column* col, const char* m);
573  const ibis::index* getIndex() const {return theColumn->idx;};
574 
575 private:
576  const ibis::column* theColumn;
577  const char* mesg;
578 
579  indexLock();
580  indexLock(const indexLock&);
581  indexLock& operator=(const indexLock&);
582 }; // ibis::column::indexLock
583 
586 public:
589  mutexLock(const ibis::column* col, const char* m)
590  : theColumn(col), mesg(m) {
591  LOGGER(ibis::gVerbose > 9)
592  << "column[" << (theColumn ? theColumn->fullname() : "?.?")
593  << "]::gainExclusiveAccess for " << (mesg && *mesg ? mesg : "???");
594  pthread_mutex_t *mtx = (theColumn ? &theColumn->mutex :
596  int ierr = pthread_mutex_lock(mtx);
597  LOGGER(0 != ierr && ibis::gVerbose > 0)
598  << "Warning -- column["
599  << (theColumn ? theColumn->fullname() : "?.?")
600  << "]::gainExclusiveAccess -- pthread_mutex_lock for "
601  << (mesg && *mesg ? mesg : "???") << "returned " << ierr
602  << " (" << strerror(ierr) << ")";
603  }
604  ~mutexLock() {
605  LOGGER(ibis::gVerbose > 9)
606  << "column[" << (theColumn ? theColumn->fullname() : "?.?")
607  << "]::releaseExclusiveAccess for "
608  << (mesg && *mesg ? mesg : "???");
609  pthread_mutex_t *mtx = (theColumn ? &theColumn->mutex :
611  int ierr = pthread_mutex_unlock(mtx);
612  LOGGER(0 != ierr && ibis::gVerbose > 0)
613  << "Warning -- column["
614  << (theColumn ? theColumn->fullname() : "?.?")
615  << "]::releaseExclusiveAccess -- pthread_mutex_unlock for "
616  << (mesg && *mesg ? mesg : "???") << "returned " << ierr
617  << " (" << strerror(ierr) << ")";
618  }
619 
620 private:
621  const ibis::column* theColumn;
622  const char* mesg;
623 
624  mutexLock() {}; // no default constructor
625  mutexLock(const mutexLock&) {}; // can not copy
626  mutexLock& operator=(const mutexLock&);
627 }; // ibis::column::mutexLock
628 
631 public:
632  writeLock(const ibis::column* col, const char* m);
633  ~writeLock();
634 
635 private:
636  const ibis::column* theColumn;
637  const char* mesg;
638 
639  writeLock();
640  writeLock(const writeLock&);
641  writeLock& operator=(const writeLock&);
642 }; // ibis::column::writeLock
643 
646 public:
647  softWriteLock(const ibis::column* col, const char* m);
648  ~softWriteLock();
649  bool isLocked() const {return(locked==0);}
650 
651 private:
652  const ibis::column* theColumn;
653  const char* mesg;
654  const int locked;
655 
656  softWriteLock();
658  softWriteLock& operator=(const softWriteLock&);
659 }; // ibis::column::softWriteLock
660 
663 public:
664  readLock(const ibis::column* col, const char* m);
665  ~readLock();
666 
667 private:
668  const ibis::column* theColumn;
669  const char* mesg;
670 
671  readLock();
672  readLock(const readLock&);
673  readLock& operator=(const readLock&);
674 }; // ibis::column::readLock
675 
677 inline int ibis::column::elementSize() const {
678  int sz;
679  switch (m_type) {
680  case ibis::OID: sz = sizeof(rid_t); break;
681  case ibis::INT: sz = sizeof(int32_t); break;
682  case ibis::UINT: sz = sizeof(uint32_t); break;
683  case ibis::LONG: sz = sizeof(int64_t); break;
684  case ibis::ULONG: sz = sizeof(uint64_t); break;
685  case ibis::FLOAT: sz = sizeof(float); break;
686  case ibis::DOUBLE: sz = sizeof(double); break;
687  case ibis::BYTE: sz = sizeof(char); break;
688  case ibis::UBYTE: sz = sizeof(unsigned char); break;
689  case ibis::SHORT: sz = sizeof(int16_t); break;
690  case ibis::USHORT: sz = sizeof(uint16_t); break;
691  // case ibis::CATEGORY: sz = 0; break; // no fixed size per element
692  // case ibis::TEXT: sz = 0; break; // no fixed size per element
693  // case ibis::BLOB: sz = 0; break; // no fixed size per element
694  default: sz = 0; break;
695  }
696  return sz;
697 } // ibis::column::elementSize
698 
700 inline bool ibis::column::isFloat() const {
701  return(m_type == ibis::FLOAT || m_type == ibis::DOUBLE);
702 } // ibis::column::isFloat
703 
705 inline bool ibis::column::isInteger() const {
706  return(m_type == ibis::BYTE || m_type == ibis::UBYTE ||
707  m_type == ibis::SHORT || m_type == ibis::USHORT ||
708  m_type == ibis::INT || m_type == ibis::UINT ||
709  m_type == ibis::LONG || m_type == ibis::ULONG);
710 } // ibis::column::isInteger
711 
713 inline bool ibis::column::isSignedInteger() const {
714  return(m_type == ibis::BYTE || m_type == ibis::SHORT ||
715  m_type == ibis::INT || m_type == ibis::LONG);
716 } // ibis::column::isSignedInteger
717 
719 inline bool ibis::column::isUnsignedInteger() const {
720  return(m_type == ibis::UBYTE || m_type == ibis::USHORT ||
721  m_type == ibis::UINT || m_type == ibis::ULONG);
722 } // ibis::column::isUnsignedInteger
723 
725 inline bool ibis::column::isNumeric() const {
726  return(m_type == ibis::BYTE || m_type == ibis::UBYTE ||
727  m_type == ibis::SHORT || m_type == ibis::USHORT ||
728  m_type == ibis::INT || m_type == ibis::UINT ||
729  m_type == ibis::LONG || m_type == ibis::ULONG ||
730  m_type == ibis::FLOAT || m_type == ibis::DOUBLE);
731 } // ibis::column::isNumeric
732 
733 // the operator to print a column to an output stream
734 inline std::ostream& operator<<(std::ostream& out, const ibis::column& prop) {
735  prop.print(out);
736  return out;
737 }
738 
739 namespace ibis { // for template specialization
740  template <> long column::selectToStrings<signed char>
741  (const char*, const bitvector&, std::vector<std::string>&) const;
742  template <> long column::selectToStrings<unsigned char>
743  (const char*, const bitvector&, std::vector<std::string>&) const;
744 
745  namespace util {
747  inline bool isFloatType(ibis::TYPE_T t) {
748  return(t == ibis::FLOAT || t == ibis::DOUBLE);
749  }
750 
752  inline bool isIntegerType(ibis::TYPE_T t) {
753  return(t == ibis::BYTE || t == ibis::UBYTE ||
754  t == ibis::SHORT || t == ibis::USHORT ||
755  t == ibis::INT || t == ibis::UINT ||
756  t == ibis::LONG || t == ibis::ULONG);
757  }
758 
761  return(t == ibis::BYTE || t == ibis::SHORT ||
762  t == ibis::INT || t == ibis::LONG);
763  }
764 
767  return(t == ibis::UBYTE || t == ibis::USHORT ||
768  t == ibis::UINT || t == ibis::ULONG);
769  }
770 
772  inline bool isNumericType(ibis::TYPE_T t) {
773  return(t == ibis::BYTE || t == ibis::UBYTE ||
774  t == ibis::SHORT || t == ibis::USHORT ||
775  t == ibis::INT || t == ibis::UINT ||
776  t == ibis::LONG || t == ibis::ULONG ||
777  t == ibis::FLOAT || t == ibis::DOUBLE);
778  }
779 
781  inline bool isStringType(ibis::TYPE_T t) {
782  return(t == ibis::TEXT || t == ibis::CATEGORY);
783  }
784  }
785 }
786 #endif // IBIS_COLUMN_H
virtual double estimateCost(const ibis::qAnyString &) const
Estimate the cost of looking up a group of strings.
Definition: column.h:278
long getDistribution(std::vector< double > &bbs, std::vector< uint32_t > &counts) const
Count the number of records in each bin.
Definition: column.cpp:9387
static void actualMinMax(const array_t< T > &vals, const ibis::bitvector &mask, double &min, double &max, bool &asc)
Compute the minimum and maximum of the values in the array.
Definition: column.cpp:8824
virtual int indexWrite(ibis::array_t< double > &, ibis::array_t< int64_t > &, ibis::array_t< uint32_t > &) const
Write the index into three arrays.
Definition: column.cpp:398
bool isNumericType(ibis::TYPE_T t)
Is the type for numberical values?
Definition: column.h:772
~indexLock()
Destructor of index lock.
Definition: column.cpp:12098
std::string m_desc
!< Name of the column.
Definition: column.h:415
virtual double getActualMax() const
Compute the actual maximum value by reading the data or examining the index.
Definition: column.cpp:9346
!< One bit per record, represented by a bit vector.
Definition: table.h:44
mutexLock(const ibis::column *col, const char *m)
Constructor.
Definition: column.h:589
const double expectedMax
!< The expected lower bound.
Definition: column.h:552
virtual int getString(uint32_t, std::string &) const
Return the string value for the ith row.
Definition: column.h:150
uint32_t findUpper(int fdes, const uint32_t nr, const T tgt) const
Find the smallest value > tgt.
Definition: column.cpp:11626
bool hasRoster() const
Is there a roster list built for this column? Returns true for yes, false for no. ...
Definition: column.cpp:9419
virtual array_t< float > * selectFloats(const bitvector &mask) const
Put selected values of a float column into an array.
Definition: column.cpp:3452
char * strnewdup(const char *s)
Duplicate string content with C++ default new operator.
Definition: util.cpp:1420
uint32_t findLower(int fdes, const uint32_t nr, const T tgt) const
Find the smallest value >= tgt.
Definition: column.cpp:11562
long castAndWrite(const array_t< double > &vals, ibis::bitvector &mask, const T special)
Cast the incoming array into the specified type T before writing the values to the file for this colu...
Definition: column.cpp:8775
bool isInteger() const
Are they integer values?
Definition: column.h:705
const double & upperBound() const
The upper bound of the values.
Definition: column.h:88
virtual bool hasRawData() const
Does the raw data file exist?
Definition: column.cpp:1073
virtual ibis::fileManager::storage * getRawData() const
Return the content of base data file as a storage object.
Definition: column.cpp:1089
unixTimeScribe * m_utscribe
!< The maximum value.
Definition: column.h:420
virtual array_t< signed char > * selectBytes(const bitvector &mask) const
Retrieve selected 1-byte integer values.
Definition: column.cpp:1118
virtual void write(FILE *file) const
Write the metadata entry.
Definition: column.cpp:351
void logMessage(const char *event, const char *fmt,...) const
Log messages using printf syntax.
Definition: column.cpp:5601
virtual array_t< uint16_t > * selectUShorts(const bitvector &mask) const
Return selected rows of the column in an array_t object.
Definition: column.cpp:1593
Simple range condition.
Definition: qExpr.h:252
The storage class treats all memory as char*.
Definition: fileManager.h:237
bool isFloatType(ibis::TYPE_T t)
Is the type for floating-point values?
Definition: column.h:747
uint32_t numBins() const
!< Retrieve the index specification.
Definition: column.cpp:426
int expandRange(ibis::qContinuousRange &rng) const
Expand the range expression so that the new range falls exactly on the bin boundaries.
Definition: column.cpp:6005
!< Four-byte signed integers, internally int32_t.
Definition: table.h:35
std::string m_name
!< Data type.
Definition: column.h:414
column(const column &rhs)
The copy constructor.
Definition: column.cpp:313
This query expression has similar meaning as ibis::qDiscreteRange, however, it stores the values as s...
Definition: qExpr.h:392
The column contains one of the values in a list.
Definition: qExpr.h:560
STL namespace.
pthread_mutex_t envLock
A mutex for serialize operations FastBit wide.
Definition: util.cpp:49
virtual array_t< double > * selectDoubles(const bitvector &mask) const
Put the selected values into an array as doubles.
Definition: column.cpp:3823
The current implementation of FastBit is code named IBIS; most data structures and functions are in t...
Definition: bord.h:16
bool isNumeric() const
Are they numberical values?
Definition: column.h:725
virtual long truncateData(const char *dir, uint32_t nent, ibis::bitvector &mask) const
Truncate the number of records in the named dir to nent.
Definition: column.cpp:8582
!< Eight-byte unsigned integers, internally uint64_t.
Definition: table.h:38
The class to represent a column of a data partition.
Definition: column.h:65
std::string m_bins
!< Free-form description of the column.
Definition: column.h:416
virtual long writeData(const char *dir, uint32_t nold, uint32_t nnew, ibis::bitvector &mask, const void *va1, void *va2=0)
Write the content in array va1 to directory dir.
Definition: column.cpp:7691
array_t< int32_t > * getIntArray() const
Return all rows of the column as an array_t object.
Definition: column.cpp:922
long string2int(int fptr, dictionary &dic, uint32_t nbuf, char *buf, array_t< uint32_t > &out) const
Convert strings in the opened file to a list of integers with the aid of a dictionary.
Definition: column.cpp:7406
const char * description() const
Description of the column. Can be an arbitrary string.
Definition: column.h:82
virtual array_t< uint64_t > * selectULongs(const bitvector &mask) const
Return selected rows of the column in an array_t object.
Definition: column.cpp:3077
array_t< double > * getDoubleArray() const
Return all rows of the column as an array_t object.
Definition: column.cpp:972
!< Two-byte unsigned integers, internally uint16_t.
Definition: table.h:34
double lower
!< Are the column values in ascending order?
Definition: column.h:418
virtual array_t< unsigned char > * selectUBytes(const bitvector &mask) const
Return selected rows of the column in an array_t object.
Definition: column.cpp:1234
The class qString encapsulates information for comparing string values.
Definition: qExpr.h:504
bool isUnsignedIntegerType(ibis::TYPE_T t)
Is the type for unsigned integer values?
Definition: column.h:766
double computeMin() const
Read the data values and compute the minimum value.
Definition: column.cpp:8951
Provide a write lock on a ibis::column object.
Definition: column.h:630
Arbitrary null-terminated strings.
Definition: table.h:51
unixTimeScribe(const char *fmt, const char *tz=0)
Constructor.
Definition: column.h:365
~writeLock()
Destructor.
Definition: column.cpp:12182
virtual int searchSorted(const ibis::qContinuousRange &, ibis::bitvector &) const
Resolve a continuous range condition on a sorted column.
Definition: column.cpp:9447
virtual void indexSerialSizes(uint64_t &, uint64_t &, uint64_t &) const
Compute the sizes (in number of elements) of three arrays that would be produced by writeIndex...
Definition: column.cpp:409
The base index class.
Definition: index.h:82
long selectValues(const bitvector &, void *) const
Return selected rows of the column in an array_t object.
Definition: column.cpp:5050
A functor for formatting unix time using the user supplied format.
Definition: column.h:358
ibis::bitvector mask_
!< Data partition containing this column.
Definition: column.h:412
Define the query expression.
int searchSortedICD(const array_t< T > &vals, const ibis::qDiscreteRange &rng, ibis::bitvector &hits) const
Resolve a discrete range condition on an array of values.
Definition: column.cpp:11688
A class for controlling access of the index object of a column.
Definition: column.h:569
virtual const ibis::dictionary * getDictionary() const
Return a pointer to a dictionary.
Definition: column.h:296
!< Two-byte signed integers, internally int16_t.
Definition: table.h:33
!< Eight-byte signed integers, internally int64_t.
Definition: table.h:37
virtual ~column()
Destructor.
Definition: column.cpp:337
~unixTimeScribe()
Denstructor.
Definition: column.h:360
!< One-byte unsigned integers, internally unsigned char.
Definition: table.h:32
virtual double getActualMin() const
A group of functions to compute some basic statistics for the column values.
Definition: column.cpp:9332
bool isIntegerType(ibis::TYPE_T t)
Is the type for integer values?
Definition: column.h:752
void preferredBounds(std::vector< double > &) const
Retrive the bin boundaries if the index currently in use.
Definition: column.cpp:5902
virtual int getOpaque(uint32_t, ibis::opaque &) const
Return the raw binary value for the ith row.
Definition: column.cpp:5353
~softWriteLock()
Destructor.
Definition: column.cpp:12223
ibis::TYPE_T type() const
Type of the data.
Definition: column.h:76
Provide a write lock on a ibis::column object.
Definition: column.h:662
virtual double estimateCost(const ibis::qContinuousRange &cmp) const
Estimate the cost of evaluating the query expression.
Definition: column.cpp:6612
!< Four-byte IEEE floating-point numbers, internally float.
Definition: table.h:39
void setTimeFormat(const char *)
Add a custom format for the column to be interpretted as unix time stamps.
Definition: column.cpp:4586
ibis::index * idx
The index for this column. It is not considered as a must-have member.
Definition: column.h:427
virtual double getSum() const
Compute the sum of all values by reading the data.
Definition: column.cpp:9360
bool isFloat() const
Are they floating-point values?
Definition: column.h:700
The class ibis::part represents a partition of a relational table.
Definition: part.h:27
long selectValuesT(const char *, const bitvector &, array_t< T > &) const
Select values marked in the bitvector mask.
Definition: column.cpp:4643
TYPE_T
Supported data types.
Definition: table.h:25
This query expression has similar meaning as ibis::qDiscreteRange, however, it stores the values as u...
Definition: qExpr.h:447
long appendValues(const array_t< T > &, const ibis::bitvector &)
Append the content of incoming array to the current data.
Definition: column.cpp:7536
virtual long append(const char *dt, const char *df, const uint32_t nold, const uint32_t nnew, uint32_t nbuf, char *buf)
Append new data in directory df to the end of existing data in dt.
Definition: column.cpp:7060
int searchSortedICC(const array_t< T > &vals, const ibis::qContinuousRange &rng, ibis::bitvector &hits) const
Resolve a continuous range condition on an array of values.
Definition: column.cpp:10219
Some basic information about a column.
Definition: column.h:547
virtual void computeMinMax()
Compute the actual min/max values.
Definition: column.cpp:459
virtual long evaluateRange(const ibis::qContinuousRange &cmp, const ibis::bitvector &mask, ibis::bitvector &res) const
Compute the exact answer.
Definition: column.cpp:6029
const double expectedMin
!< A description about the column.
Definition: column.h:551
virtual array_t< uint32_t > * selectUInts(const bitvector &mask) const
Return selected rows of the column in an array_t object.
Definition: column.cpp:2235
virtual void loadIndex(const char *iopt=0, int ropt=0) const
Load the index associated with the column.
Definition: column.cpp:5736
virtual array_t< int64_t > * selectLongs(const bitvector &mask) const
Return selected rows of the column in an array_t object.
Definition: column.cpp:2483
const double & lowerBound() const
The lower bound of the values.
Definition: column.h:86
virtual array_t< int32_t > * selectInts(const bitvector &mask) const
Return selected rows of the column in an array_t object.
Definition: column.cpp:1833
virtual std::vector< std::string > * selectStrings(const bitvector &mask) const
Return the selected rows as strings.
Definition: column.cpp:5276
double computeSum() const
Read the base data to compute the total sum.
Definition: column.cpp:9199
bool hasIndex() const
!< Are the values sorted?
Definition: column.cpp:9404
A special eight-byte ID type for internal use.
Definition: table.h:29
int searchSortedOOCC(const char *fname, const ibis::qContinuousRange &rng, ibis::bitvector &hits) const
Resolve a continuous range condition using file operations.
Definition: column.cpp:10827
void logWarning(const char *event, const char *fmt,...) const
Log warming message using printf syntax.
Definition: column.cpp:5572
virtual double estimateCost(const ibis::qString &) const
Estimate the cost of evaluating a string lookup.
Definition: column.h:275
int searchSortedOOCD(const char *fname, const ibis::qDiscreteRange &rng, ibis::bitvector &hits) const
Resolve a discrete range condition using file operations.
Definition: column.cpp:11760
long selectToStrings(const char *, const bitvector &, std::vector< std::string > &) const
Extract the values masked 1 and convert them to strings.
Definition: column.cpp:5173
void name(const char *nm)
Rename the column.
Definition: column.h:80
ibis::TYPE_T m_type
!< The entries marked 1 are valid.
Definition: column.h:413
const char * description
!< Column name.
Definition: column.h:550
long appendStrings(const std::vector< std::string > &, const ibis::bitvector &)
Append the strings to the current data.
Definition: column.cpp:7613
virtual long saveSelected(const ibis::bitvector &sel, const char *dest, char *buf, uint32_t nbuf)
Write the selected records to the specified directory.
Definition: column.cpp:8300
A data structure to represent a sequence of bits.
Definition: bitvector.h:62
void binWeights(std::vector< uint32_t > &) const
Retrive the number of rows in each bin.
Definition: column.cpp:5914
virtual long estimateRange(const ibis::qContinuousRange &cmp, ibis::bitvector &low, ibis::bitvector &high) const
Compute a lower bound and an upper bound on the number of hits using the bitmap index.
Definition: column.cpp:6494
Definition of Word-Aligned Hybrid code.
bool isSignedInteger() const
Are they signed integer values?
Definition: column.h:713
double upper
!< The minimum value.
Definition: column.h:419
FastBit Table Interface.
virtual int getValuesArray(void *vals) const
Copy all rows of the column into an array_t object.
Definition: column.cpp:1005
~readLock()
Destructor.
Definition: column.cpp:12143
long getCumulativeDistribution(std::vector< double > &bounds, std::vector< uint32_t > &counts) const
Compute the actual data distribution.
Definition: column.cpp:9375
double computeMax() const
Read the base data to compute the maximum value.
Definition: column.cpp:9076
const char * name() const
Name of the column.
Definition: column.h:78
const ibis::TYPE_T type
!< The expected upper bound.
Definition: column.h:553
bool m_sorted
!< Index/binning specification.
Definition: column.h:417
!< One-byte signed integers, internally char.
Definition: table.h:31
bool isStringType(ibis::TYPE_T t)
Is the type for strings?
Definition: column.h:781
bool isSignedIntegerType(ibis::TYPE_T t)
Is the type for signed integer values?
Definition: column.h:760
int elementSize() const
Size of a data element in bytes.
Definition: column.h:677
ibis::util::sharedInt32 idxcnt
The number of functions using the index.
Definition: column.h:429
std::string fullname() const
Fully qualified name.
Definition: column.cpp:5504
int(* FastBitReadBitmaps)(void *context, uint64_t start, uint64_t count, uint32_t *data)
A function prototype for delayed index reconstruction.
Definition: const.h:341
virtual void print(std::ostream &out) const
Print some basic infomation about this column.
Definition: column.cpp:5524
int dataflag
Presence of the data file.
Definition: column.h:425
A discrete range expression.
Definition: qExpr.h:337
!< Four-byte unsigned integers, internally uint32_t.
Definition: table.h:36
array_t< float > * getFloatArray() const
Return all rows of the column as an array_t object.
Definition: column.cpp:947
int contractRange(ibis::qContinuousRange &rng) const
Contract the range expression so that the new range falls exactly on the bin boundaries.
Definition: column.cpp:6015
virtual long evaluateAndSelect(const ibis::qContinuousRange &, const ibis::bitvector &, void *, ibis::bitvector &) const
Evaluate a range condition and retrieve the selected values.
Definition: column.cpp:6223
A shared unsigned 32-bit integer class.
Definition: util.h:945
unixTimeScribe(const unixTimeScribe &rhs)
Copy constructor.
Definition: column.h:369
Provide a dual-directional mapping between strings and integers.
Definition: dict-0.h:19
Provide a write lock on a ibis::column object.
Definition: column.h:645
virtual array_t< int16_t > * selectShorts(const bitvector &mask) const
Return selected rows of the column in an array_t object.
Definition: column.cpp:1353
void logError(const char *event, const char *fmt,...) const
Print messages started with "Error" and throw a string exception.
Definition: column.cpp:5533
virtual float getUndecidable(const ibis::qContinuousRange &cmp, ibis::bitvector &iffy) const
Compute the locations of the rows can not be decided by the index.
Definition: column.cpp:6657
Definition: const.h:299
bool isUnsignedInteger() const
Are they unsigned integer values?
Definition: column.h:719
Provide a mutual exclusion lock on an ibis::column.
Definition: column.h:585

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive