72 const char* desc=
"",
double low=DBL_MAX,
double high=-DBL_MAX);
78 const char*
name()
const {
return m_name.c_str();}
80 void name(
const char* nm) {m_name = nm;}
95 bool isSorted()
const {
return m_sorted;}
104 int nRows()
const {
return mask_.size();}
106 const part* partition()
const {
return thePart;}
107 const part*& partition() {
return thePart;}
110 const char* indexSpec()
const;
112 void indexSpec(
const char* spec) {m_bins=spec;}
117 void binWeights(std::vector<uint32_t>&)
const;
122 double& min,
double &max,
bool &asc)
const;
124 virtual int attachIndex(
double *, uint64_t, int64_t *, uint64_t,
126 virtual int attachIndex(
double *, uint64_t, int64_t *, uint64_t,
127 uint32_t *, uint64_t)
const;
128 virtual void loadIndex(
const char* iopt=0,
int ropt=0)
const throw ();
129 virtual
void unloadIndex() const;
130 virtual
long indexSize() const;
132 uint32_t indexedRows() const;
133 void indexSpeedTest() const;
134 void purgeIndexFile(const
char *dir=0) const;
136 const
char* dataFileName(
std::
string& fname, const
char *dir=0) const;
137 const
char* nullMaskName(
std::
string& fname) const;
138 void getNullMask(bitvector& mask) const;
139 int setNullMask(const bitvector&);
144 virtual const
char* findString(const
char*)
const
145 {
return static_cast<const char*
>(0);}
150 virtual int getString(uint32_t, std::string&)
const {
return -1;}
155 virtual int getOpaque(uint32_t, ibis::opaque&)
const;
163 int getDataflag()
const {
return dataflag;}
164 void setDataflag(
int df) {dataflag = df;}
166 virtual array_t<signed char>*
selectBytes(
const bitvector& mask)
const;
167 virtual array_t<unsigned char>*
selectUBytes(
const bitvector& mask)
const;
168 virtual array_t<int16_t>*
selectShorts(
const bitvector& mask)
const;
169 virtual array_t<uint16_t>*
selectUShorts(
const bitvector& mask)
const;
170 virtual array_t<int32_t>*
selectInts(
const bitvector& mask)
const;
171 virtual array_t<uint32_t>*
selectUInts(
const bitvector& mask)
const;
172 virtual array_t<int64_t>*
selectLongs(
const bitvector& mask)
const;
173 virtual array_t<uint64_t>*
selectULongs(
const bitvector& mask)
const;
174 virtual array_t<float>*
selectFloats(
const bitvector& mask)
const;
175 virtual array_t<double>*
selectDoubles(
const bitvector& mask)
const;
176 virtual std::vector<std::string>*
178 virtual std::vector<ibis::opaque>*
179 selectOpaques(
const bitvector& mask)
const;
182 long selectValues(
const bitvector&,
void*, array_t<uint32_t>&)
const;
186 virtual void write(FILE* file)
const;
188 virtual void print(std::ostream& out)
const;
190 void logMessage(
const char* event,
const char* fmt, ...)
const;
192 void logWarning(
const char* event,
const char* fmt, ...)
const;
218 virtual long stringSearch(
const std::vector<std::string>&,
220 virtual long stringSearch(
const char*)
const;
221 virtual long stringSearch(
const std::vector<std::string>&)
const;
223 virtual long keywordSearch(
const char*)
const;
224 virtual long keywordSearch(
const std::vector<std::string>&,
226 virtual long keywordSearch(
const std::vector<std::string>&)
const;
227 virtual long patternSearch(
const char*)
const;
299 virtual long append(
const char* dt,
const char* df,
const uint32_t nold,
300 const uint32_t nnew, uint32_t nbuf,
char* buf);
303 virtual long writeData(
const char* dir, uint32_t nold, uint32_t nnew,
306 template <
typename T>
310 char *buf, uint32_t nbuf);
311 virtual long truncateData(
const char* dir, uint32_t nent,
329 virtual double getSum()
const;
337 std::vector<uint32_t>& counts)
const;
350 std::vector<uint32_t>& counts)
const;
381 unixTimeScribe* dup()
const {
382 return new unixTimeScribe(*
this);
385 void operator()(std::ostream&, int64_t)
const;
386 void operator()(std::ostream&,
double)
const;
389 const char *timezone_;
393 const unixTimeScribe* getTimeFormat()
const {
return m_utscribe;}
396 template <
typename T>
static
398 double& min,
double& max,
bool &asc);
400 template <
typename T>
static
403 template <
typename T>
static
406 template <
typename T>
static
432 void logError(
const char* event,
const char* fmt, ...)
const;
446 double &min,
double &max,
bool &asc)
const;
461 template <
typename T>
int
466 template <
typename T>
int
471 template <
typename T>
int
476 template <
typename T>
int
481 template <
typename T>
int
486 template <
typename T>
int
491 template <
typename T>
int
496 template <
typename T>
int
502 template <
typename T> uint32_t
503 findLower(
int fdes,
const uint32_t nr,
const T tgt)
const;
505 template <
typename T> uint32_t
506 findUpper(
int fdes,
const uint32_t nr,
const T tgt)
const;
508 template <
typename T>
510 template <
typename T>
513 template <
typename T>
515 std::vector<std::string>&)
const;
516 template <
typename T>
517 long selectToOpaques(
const char*,
const bitvector&,
518 std::vector<ibis::opaque>&)
const;
521 template <
typename T>
540 mutable pthread_mutex_t mutex;
556 : name(rhs.name), description(rhs.description),
557 expectedMin(rhs.expectedMin),
558 expectedMax(rhs.expectedMax),
590 : theColumn(col), mesg(m) {
591 LOGGER(ibis::gVerbose > 9)
592 <<
"column[" << (theColumn ? theColumn->
fullname() :
"?.?")
593 <<
"]::gainExclusiveAccess for " << (mesg && *mesg ? mesg :
"???");
594 pthread_mutex_t *mtx = (theColumn ? &theColumn->mutex :
596 int ierr = pthread_mutex_lock(mtx);
597 LOGGER(0 != ierr && ibis::gVerbose > 0)
598 <<
"Warning -- column["
599 << (theColumn ? theColumn->
fullname() :
"?.?")
600 <<
"]::gainExclusiveAccess -- pthread_mutex_lock for "
601 << (mesg && *mesg ? mesg :
"???") <<
"returned " << ierr
602 <<
" (" << strerror(ierr) <<
")";
605 LOGGER(ibis::gVerbose > 9)
606 <<
"column[" << (theColumn ? theColumn->
fullname() :
"?.?")
607 <<
"]::releaseExclusiveAccess for "
608 << (mesg && *mesg ? mesg :
"???");
609 pthread_mutex_t *mtx = (theColumn ? &theColumn->mutex :
611 int ierr = pthread_mutex_unlock(mtx);
612 LOGGER(0 != ierr && ibis::gVerbose > 0)
613 <<
"Warning -- column["
614 << (theColumn ? theColumn->
fullname() :
"?.?")
615 <<
"]::releaseExclusiveAccess -- pthread_mutex_unlock for "
616 << (mesg && *mesg ? mesg :
"???") <<
"returned " << ierr
617 <<
" (" << strerror(ierr) <<
")";
625 mutexLock(
const mutexLock&) {};
649 bool isLocked()
const {
return(locked==0);}
680 case ibis::OID: sz =
sizeof(rid_t);
break;
681 case ibis::INT: sz =
sizeof(int32_t);
break;
682 case ibis::UINT: sz =
sizeof(uint32_t);
break;
687 case ibis::BYTE: sz =
sizeof(char);
break;
688 case ibis::UBYTE: sz =
sizeof(
unsigned char);
break;
694 default: sz = 0;
break;
706 return(m_type == ibis::BYTE || m_type ==
ibis::UBYTE ||
714 return(m_type == ibis::BYTE || m_type ==
ibis::SHORT ||
726 return(m_type == ibis::BYTE || m_type ==
ibis::UBYTE ||
734 inline std::ostream& operator<<(std::ostream& out,
const ibis::column& prop) {
740 template <>
long column::selectToStrings<signed char>
741 (
const char*,
const bitvector&, std::vector<std::string>&)
const;
742 template <>
long column::selectToStrings<unsigned char>
743 (
const char*,
const bitvector&, std::vector<std::string>&)
const;
786 #endif // IBIS_COLUMN_H
virtual double estimateCost(const ibis::qAnyString &) const
Estimate the cost of looking up a group of strings.
Definition: column.h:278
long getDistribution(std::vector< double > &bbs, std::vector< uint32_t > &counts) const
Count the number of records in each bin.
Definition: column.cpp:9387
static void actualMinMax(const array_t< T > &vals, const ibis::bitvector &mask, double &min, double &max, bool &asc)
Compute the minimum and maximum of the values in the array.
Definition: column.cpp:8824
virtual int indexWrite(ibis::array_t< double > &, ibis::array_t< int64_t > &, ibis::array_t< uint32_t > &) const
Write the index into three arrays.
Definition: column.cpp:398
bool isNumericType(ibis::TYPE_T t)
Is the type for numberical values?
Definition: column.h:772
~indexLock()
Destructor of index lock.
Definition: column.cpp:12098
std::string m_desc
!< Name of the column.
Definition: column.h:415
virtual double getActualMax() const
Compute the actual maximum value by reading the data or examining the index.
Definition: column.cpp:9346
!< One bit per record, represented by a bit vector.
Definition: table.h:44
mutexLock(const ibis::column *col, const char *m)
Constructor.
Definition: column.h:589
const double expectedMax
!< The expected lower bound.
Definition: column.h:552
virtual int getString(uint32_t, std::string &) const
Return the string value for the ith row.
Definition: column.h:150
uint32_t findUpper(int fdes, const uint32_t nr, const T tgt) const
Find the smallest value > tgt.
Definition: column.cpp:11626
bool hasRoster() const
Is there a roster list built for this column? Returns true for yes, false for no. ...
Definition: column.cpp:9419
virtual array_t< float > * selectFloats(const bitvector &mask) const
Put selected values of a float column into an array.
Definition: column.cpp:3452
char * strnewdup(const char *s)
Duplicate string content with C++ default new operator.
Definition: util.cpp:1420
uint32_t findLower(int fdes, const uint32_t nr, const T tgt) const
Find the smallest value >= tgt.
Definition: column.cpp:11562
long castAndWrite(const array_t< double > &vals, ibis::bitvector &mask, const T special)
Cast the incoming array into the specified type T before writing the values to the file for this colu...
Definition: column.cpp:8775
bool isInteger() const
Are they integer values?
Definition: column.h:705
const double & upperBound() const
The upper bound of the values.
Definition: column.h:88
virtual bool hasRawData() const
Does the raw data file exist?
Definition: column.cpp:1073
virtual ibis::fileManager::storage * getRawData() const
Return the content of base data file as a storage object.
Definition: column.cpp:1089
unixTimeScribe * m_utscribe
!< The maximum value.
Definition: column.h:420
virtual array_t< signed char > * selectBytes(const bitvector &mask) const
Retrieve selected 1-byte integer values.
Definition: column.cpp:1118
virtual void write(FILE *file) const
Write the metadata entry.
Definition: column.cpp:351
void logMessage(const char *event, const char *fmt,...) const
Log messages using printf syntax.
Definition: column.cpp:5601
virtual array_t< uint16_t > * selectUShorts(const bitvector &mask) const
Return selected rows of the column in an array_t object.
Definition: column.cpp:1593
Simple range condition.
Definition: qExpr.h:252
The storage class treats all memory as char*.
Definition: fileManager.h:237
bool isFloatType(ibis::TYPE_T t)
Is the type for floating-point values?
Definition: column.h:747
uint32_t numBins() const
!< Retrieve the index specification.
Definition: column.cpp:426
int expandRange(ibis::qContinuousRange &rng) const
Expand the range expression so that the new range falls exactly on the bin boundaries.
Definition: column.cpp:6005
!< Four-byte signed integers, internally int32_t.
Definition: table.h:35
std::string m_name
!< Data type.
Definition: column.h:414
column(const column &rhs)
The copy constructor.
Definition: column.cpp:313
This query expression has similar meaning as ibis::qDiscreteRange, however, it stores the values as s...
Definition: qExpr.h:392
The column contains one of the values in a list.
Definition: qExpr.h:560
pthread_mutex_t envLock
A mutex for serialize operations FastBit wide.
Definition: util.cpp:49
virtual array_t< double > * selectDoubles(const bitvector &mask) const
Put the selected values into an array as doubles.
Definition: column.cpp:3823
The current implementation of FastBit is code named IBIS; most data structures and functions are in t...
Definition: bord.h:16
bool isNumeric() const
Are they numberical values?
Definition: column.h:725
virtual long truncateData(const char *dir, uint32_t nent, ibis::bitvector &mask) const
Truncate the number of records in the named dir to nent.
Definition: column.cpp:8582
!< Eight-byte unsigned integers, internally uint64_t.
Definition: table.h:38
The class to represent a column of a data partition.
Definition: column.h:65
std::string m_bins
!< Free-form description of the column.
Definition: column.h:416
virtual long writeData(const char *dir, uint32_t nold, uint32_t nnew, ibis::bitvector &mask, const void *va1, void *va2=0)
Write the content in array va1 to directory dir.
Definition: column.cpp:7691
array_t< int32_t > * getIntArray() const
Return all rows of the column as an array_t object.
Definition: column.cpp:922
long string2int(int fptr, dictionary &dic, uint32_t nbuf, char *buf, array_t< uint32_t > &out) const
Convert strings in the opened file to a list of integers with the aid of a dictionary.
Definition: column.cpp:7406
const char * description() const
Description of the column. Can be an arbitrary string.
Definition: column.h:82
virtual array_t< uint64_t > * selectULongs(const bitvector &mask) const
Return selected rows of the column in an array_t object.
Definition: column.cpp:3077
array_t< double > * getDoubleArray() const
Return all rows of the column as an array_t object.
Definition: column.cpp:972
!< Two-byte unsigned integers, internally uint16_t.
Definition: table.h:34
double lower
!< Are the column values in ascending order?
Definition: column.h:418
virtual array_t< unsigned char > * selectUBytes(const bitvector &mask) const
Return selected rows of the column in an array_t object.
Definition: column.cpp:1234
The class qString encapsulates information for comparing string values.
Definition: qExpr.h:504
bool isUnsignedIntegerType(ibis::TYPE_T t)
Is the type for unsigned integer values?
Definition: column.h:766
double computeMin() const
Read the data values and compute the minimum value.
Definition: column.cpp:8951
Provide a write lock on a ibis::column object.
Definition: column.h:630
Arbitrary null-terminated strings.
Definition: table.h:51
unixTimeScribe(const char *fmt, const char *tz=0)
Constructor.
Definition: column.h:365
~writeLock()
Destructor.
Definition: column.cpp:12182
virtual int searchSorted(const ibis::qContinuousRange &, ibis::bitvector &) const
Resolve a continuous range condition on a sorted column.
Definition: column.cpp:9447
virtual void indexSerialSizes(uint64_t &, uint64_t &, uint64_t &) const
Compute the sizes (in number of elements) of three arrays that would be produced by writeIndex...
Definition: column.cpp:409
The base index class.
Definition: index.h:82
long selectValues(const bitvector &, void *) const
Return selected rows of the column in an array_t object.
Definition: column.cpp:5050
A functor for formatting unix time using the user supplied format.
Definition: column.h:358
ibis::bitvector mask_
!< Data partition containing this column.
Definition: column.h:412
Define the query expression.
int searchSortedICD(const array_t< T > &vals, const ibis::qDiscreteRange &rng, ibis::bitvector &hits) const
Resolve a discrete range condition on an array of values.
Definition: column.cpp:11688
A class for controlling access of the index object of a column.
Definition: column.h:569
virtual const ibis::dictionary * getDictionary() const
Return a pointer to a dictionary.
Definition: column.h:296
!< Two-byte signed integers, internally int16_t.
Definition: table.h:33
!< Eight-byte signed integers, internally int64_t.
Definition: table.h:37
virtual ~column()
Destructor.
Definition: column.cpp:337
~unixTimeScribe()
Denstructor.
Definition: column.h:360
!< One-byte unsigned integers, internally unsigned char.
Definition: table.h:32
virtual double getActualMin() const
A group of functions to compute some basic statistics for the column values.
Definition: column.cpp:9332
bool isIntegerType(ibis::TYPE_T t)
Is the type for integer values?
Definition: column.h:752
void preferredBounds(std::vector< double > &) const
Retrive the bin boundaries if the index currently in use.
Definition: column.cpp:5902
virtual int getOpaque(uint32_t, ibis::opaque &) const
Return the raw binary value for the ith row.
Definition: column.cpp:5353
~softWriteLock()
Destructor.
Definition: column.cpp:12223
ibis::TYPE_T type() const
Type of the data.
Definition: column.h:76
Provide a write lock on a ibis::column object.
Definition: column.h:662
virtual double estimateCost(const ibis::qContinuousRange &cmp) const
Estimate the cost of evaluating the query expression.
Definition: column.cpp:6612
!< Four-byte IEEE floating-point numbers, internally float.
Definition: table.h:39
void setTimeFormat(const char *)
Add a custom format for the column to be interpretted as unix time stamps.
Definition: column.cpp:4586
ibis::index * idx
The index for this column. It is not considered as a must-have member.
Definition: column.h:427
virtual double getSum() const
Compute the sum of all values by reading the data.
Definition: column.cpp:9360
bool isFloat() const
Are they floating-point values?
Definition: column.h:700
The class ibis::part represents a partition of a relational table.
Definition: part.h:27
long selectValuesT(const char *, const bitvector &, array_t< T > &) const
Select values marked in the bitvector mask.
Definition: column.cpp:4643
TYPE_T
Supported data types.
Definition: table.h:25
This query expression has similar meaning as ibis::qDiscreteRange, however, it stores the values as u...
Definition: qExpr.h:447
long appendValues(const array_t< T > &, const ibis::bitvector &)
Append the content of incoming array to the current data.
Definition: column.cpp:7536
virtual long append(const char *dt, const char *df, const uint32_t nold, const uint32_t nnew, uint32_t nbuf, char *buf)
Append new data in directory df to the end of existing data in dt.
Definition: column.cpp:7060
int searchSortedICC(const array_t< T > &vals, const ibis::qContinuousRange &rng, ibis::bitvector &hits) const
Resolve a continuous range condition on an array of values.
Definition: column.cpp:10219
Some basic information about a column.
Definition: column.h:547
virtual void computeMinMax()
Compute the actual min/max values.
Definition: column.cpp:459
virtual long evaluateRange(const ibis::qContinuousRange &cmp, const ibis::bitvector &mask, ibis::bitvector &res) const
Compute the exact answer.
Definition: column.cpp:6029
const double expectedMin
!< A description about the column.
Definition: column.h:551
virtual array_t< uint32_t > * selectUInts(const bitvector &mask) const
Return selected rows of the column in an array_t object.
Definition: column.cpp:2235
virtual void loadIndex(const char *iopt=0, int ropt=0) const
Load the index associated with the column.
Definition: column.cpp:5736
virtual array_t< int64_t > * selectLongs(const bitvector &mask) const
Return selected rows of the column in an array_t object.
Definition: column.cpp:2483
const double & lowerBound() const
The lower bound of the values.
Definition: column.h:86
virtual array_t< int32_t > * selectInts(const bitvector &mask) const
Return selected rows of the column in an array_t object.
Definition: column.cpp:1833
virtual std::vector< std::string > * selectStrings(const bitvector &mask) const
Return the selected rows as strings.
Definition: column.cpp:5276
double computeSum() const
Read the base data to compute the total sum.
Definition: column.cpp:9199
bool hasIndex() const
!< Are the values sorted?
Definition: column.cpp:9404
A special eight-byte ID type for internal use.
Definition: table.h:29
int searchSortedOOCC(const char *fname, const ibis::qContinuousRange &rng, ibis::bitvector &hits) const
Resolve a continuous range condition using file operations.
Definition: column.cpp:10827
void logWarning(const char *event, const char *fmt,...) const
Log warming message using printf syntax.
Definition: column.cpp:5572
virtual double estimateCost(const ibis::qString &) const
Estimate the cost of evaluating a string lookup.
Definition: column.h:275
int searchSortedOOCD(const char *fname, const ibis::qDiscreteRange &rng, ibis::bitvector &hits) const
Resolve a discrete range condition using file operations.
Definition: column.cpp:11760
long selectToStrings(const char *, const bitvector &, std::vector< std::string > &) const
Extract the values masked 1 and convert them to strings.
Definition: column.cpp:5173
void name(const char *nm)
Rename the column.
Definition: column.h:80
ibis::TYPE_T m_type
!< The entries marked 1 are valid.
Definition: column.h:413
const char * description
!< Column name.
Definition: column.h:550
long appendStrings(const std::vector< std::string > &, const ibis::bitvector &)
Append the strings to the current data.
Definition: column.cpp:7613
virtual long saveSelected(const ibis::bitvector &sel, const char *dest, char *buf, uint32_t nbuf)
Write the selected records to the specified directory.
Definition: column.cpp:8300
A data structure to represent a sequence of bits.
Definition: bitvector.h:62
void binWeights(std::vector< uint32_t > &) const
Retrive the number of rows in each bin.
Definition: column.cpp:5914
virtual long estimateRange(const ibis::qContinuousRange &cmp, ibis::bitvector &low, ibis::bitvector &high) const
Compute a lower bound and an upper bound on the number of hits using the bitmap index.
Definition: column.cpp:6494
Definition of Word-Aligned Hybrid code.
bool isSignedInteger() const
Are they signed integer values?
Definition: column.h:713
double upper
!< The minimum value.
Definition: column.h:419
virtual int getValuesArray(void *vals) const
Copy all rows of the column into an array_t object.
Definition: column.cpp:1005
~readLock()
Destructor.
Definition: column.cpp:12143
long getCumulativeDistribution(std::vector< double > &bounds, std::vector< uint32_t > &counts) const
Compute the actual data distribution.
Definition: column.cpp:9375
double computeMax() const
Read the base data to compute the maximum value.
Definition: column.cpp:9076
const char * name() const
Name of the column.
Definition: column.h:78
const ibis::TYPE_T type
!< The expected upper bound.
Definition: column.h:553
bool m_sorted
!< Index/binning specification.
Definition: column.h:417
!< One-byte signed integers, internally char.
Definition: table.h:31
bool isStringType(ibis::TYPE_T t)
Is the type for strings?
Definition: column.h:781
bool isSignedIntegerType(ibis::TYPE_T t)
Is the type for signed integer values?
Definition: column.h:760
int elementSize() const
Size of a data element in bytes.
Definition: column.h:677
ibis::util::sharedInt32 idxcnt
The number of functions using the index.
Definition: column.h:429
std::string fullname() const
Fully qualified name.
Definition: column.cpp:5504
int(* FastBitReadBitmaps)(void *context, uint64_t start, uint64_t count, uint32_t *data)
A function prototype for delayed index reconstruction.
Definition: const.h:341
virtual void print(std::ostream &out) const
Print some basic infomation about this column.
Definition: column.cpp:5524
int dataflag
Presence of the data file.
Definition: column.h:425
A discrete range expression.
Definition: qExpr.h:337
!< Four-byte unsigned integers, internally uint32_t.
Definition: table.h:36
array_t< float > * getFloatArray() const
Return all rows of the column as an array_t object.
Definition: column.cpp:947
int contractRange(ibis::qContinuousRange &rng) const
Contract the range expression so that the new range falls exactly on the bin boundaries.
Definition: column.cpp:6015
virtual long evaluateAndSelect(const ibis::qContinuousRange &, const ibis::bitvector &, void *, ibis::bitvector &) const
Evaluate a range condition and retrieve the selected values.
Definition: column.cpp:6223
A shared unsigned 32-bit integer class.
Definition: util.h:945
unixTimeScribe(const unixTimeScribe &rhs)
Copy constructor.
Definition: column.h:369
Provide a dual-directional mapping between strings and integers.
Definition: dict-0.h:19
Provide a write lock on a ibis::column object.
Definition: column.h:645
virtual array_t< int16_t > * selectShorts(const bitvector &mask) const
Return selected rows of the column in an array_t object.
Definition: column.cpp:1353
void logError(const char *event, const char *fmt,...) const
Print messages started with "Error" and throw a string exception.
Definition: column.cpp:5533
virtual float getUndecidable(const ibis::qContinuousRange &cmp, ibis::bitvector &iffy) const
Compute the locations of the rows can not be decided by the index.
Definition: column.cpp:6657
bool isUnsignedInteger() const
Are they unsigned integer values?
Definition: column.h:719
Provide a mutual exclusion lock on an ibis::column.
Definition: column.h:585