The class to represent a column of a data partition. More...
#include <column.h>
Classes | |
class | indexLock |
A class for controlling access of the index object of a column. More... | |
class | info |
Some basic information about a column. More... | |
class | mutexLock |
Provide a mutual exclusion lock on an ibis::column. More... | |
class | readLock |
Provide a write lock on a ibis::column object. More... | |
class | softWriteLock |
Provide a write lock on a ibis::column object. More... | |
struct | unixTimeScribe |
A functor for formatting unix time using the user supplied format. More... | |
class | writeLock |
Provide a write lock on a ibis::column object. More... | |
Public Member Functions | |
virtual long | append (const char *dt, const char *df, const uint32_t nold, const uint32_t nnew, uint32_t nbuf, char *buf) |
Append new data in directory df to the end of existing data in dt. More... | |
virtual long | append (const void *vals, const ibis::bitvector &msk) |
Append the records in vals to the current working dataset. More... | |
virtual int | attachIndex (double *, uint64_t, int64_t *, uint64_t, void *, FastBitReadBitmaps) const |
virtual int | attachIndex (double *, uint64_t, int64_t *, uint64_t, uint32_t *, uint64_t) const |
void | binWeights (std::vector< uint32_t > &) const |
Retrive the number of rows in each bin. | |
template<typename T > | |
long | castAndWrite (const array_t< double > &vals, ibis::bitvector &mask, const T special) |
Cast the incoming array into the specified type T before writing the values to the file for this column. More... | |
column (const column &rhs) | |
The copy constructor. More... | |
column (const part *tbl, FILE *file) | |
Reconstitute a column from the content of a file. More... | |
column (const part *tbl, ibis::TYPE_T t, const char *name, const char *desc="", double low=DBL_MAX, double high=-DBL_MAX) | |
Construct a new column object based on type and name. | |
virtual void | computeMinMax () |
Compute the actual min/max values. More... | |
virtual void | computeMinMax (const char *dir) |
Compute the actual min/max values. More... | |
virtual void | computeMinMax (const char *dir, double &min, double &max, bool &asc) const |
Compute the actual min/max of the data in directory dir . More... | |
int | contractRange (ibis::qContinuousRange &rng) const |
Contract the range expression so that the new range falls exactly on the bin boundaries. More... | |
const char * | dataFileName (std::string &fname, const char *dir=0) const |
Name of the data file in the given data directory. More... | |
const char * | description () const |
Description of the column. Can be an arbitrary string. | |
void | description (const char *d) |
int | elementSize () const |
Size of a data element in bytes. | |
virtual double | estimateCost (const ibis::qContinuousRange &cmp) const |
Estimate the cost of evaluating the query expression. | |
virtual double | estimateCost (const ibis::qDiscreteRange &cmp) const |
Estimate the cost of evaluating a dicreate range expression. | |
virtual double | estimateCost (const ibis::qIntHod &cmp) const |
Estimate the cost of evaluating a dicreate range expression. | |
virtual double | estimateCost (const ibis::qUIntHod &cmp) const |
Estimate the cost of evaluating a dicreate range expression. | |
virtual double | estimateCost (const ibis::qString &) const |
Estimate the cost of evaluating a string lookup. | |
virtual double | estimateCost (const ibis::qAnyString &) const |
Estimate the cost of looking up a group of strings. | |
virtual long | estimateRange (const ibis::qContinuousRange &cmp, ibis::bitvector &low, ibis::bitvector &high) const |
Compute a lower bound and an upper bound on the number of hits using the bitmap index. More... | |
virtual long | estimateRange (const ibis::qDiscreteRange &cmp, ibis::bitvector &low, ibis::bitvector &high) const |
Compute a lower bound and an upper bound for hits. More... | |
virtual long | estimateRange (const ibis::qIntHod &cmp, ibis::bitvector &low, ibis::bitvector &high) const |
Compute a lower bound and an upper bound for hits. More... | |
virtual long | estimateRange (const ibis::qUIntHod &cmp, ibis::bitvector &low, ibis::bitvector &high) const |
Compute a lower bound and an upper bound for hits. More... | |
virtual long | estimateRange (const ibis::qContinuousRange &cmp) const |
Use the index of the column to compute an upper bound on the number of hits. More... | |
virtual long | estimateRange (const ibis::qDiscreteRange &cmp) const |
virtual long | estimateRange (const ibis::qIntHod &cmp) const |
Compute an upper bound on the number of hits. More... | |
virtual long | estimateRange (const ibis::qUIntHod &cmp) const |
Compute an upper bound on the number of hits. More... | |
virtual long | evaluateAndSelect (const ibis::qContinuousRange &, const ibis::bitvector &, void *, ibis::bitvector &) const |
Evaluate a range condition and retrieve the selected values. More... | |
virtual long | evaluateRange (const ibis::qContinuousRange &cmp, const ibis::bitvector &mask, ibis::bitvector &res) const |
Compute the exact answer. More... | |
virtual long | evaluateRange (const ibis::qDiscreteRange &cmp, const ibis::bitvector &mask, ibis::bitvector &res) const |
Compute the exact answer to a discrete range expression. | |
virtual long | evaluateRange (const ibis::qIntHod &cmp, const ibis::bitvector &mask, ibis::bitvector &res) const |
Compute the exact answer to a discrete range expression. | |
virtual long | evaluateRange (const ibis::qUIntHod &cmp, const ibis::bitvector &mask, ibis::bitvector &res) const |
Compute the exact answer to a discrete range expression. | |
int | expandRange (ibis::qContinuousRange &rng) const |
Expand the range expression so that the new range falls exactly on the bin boundaries. More... | |
virtual const char * | findString (const char *) const |
Determine if the input string has appeared in this data partition. More... | |
std::string | fullname () const |
Fully qualified name. More... | |
int | getDataflag () const |
virtual const ibis::dictionary * | getDictionary () const |
Return a pointer to a dictionary. More... | |
array_t< double > * | getDoubleArray () const |
Return all rows of the column as an array_t object. | |
array_t< float > * | getFloatArray () const |
Return all rows of the column as an array_t object. | |
array_t< int32_t > * | getIntArray () const |
Return all rows of the column as an array_t object. More... | |
void | getNullMask (bitvector &mask) const |
If there is a null mask stored already, return a shallow copy of it in mask. More... | |
virtual int | getOpaque (uint32_t, ibis::opaque &) const |
Return the raw binary value for the i th row. More... | |
virtual ibis::fileManager::storage * | getRawData () const |
Return the content of base data file as a storage object. | |
virtual int | getString (uint32_t, std::string &) const |
Return the string value for the i th row. More... | |
const unixTimeScribe * | getTimeFormat () const |
virtual float | getUndecidable (const ibis::qContinuousRange &cmp, ibis::bitvector &iffy) const |
Compute the locations of the rows can not be decided by the index. More... | |
virtual float | getUndecidable (const ibis::qDiscreteRange &cmp, ibis::bitvector &iffy) const |
Find rows that can not be decided with the existing index. | |
virtual float | getUndecidable (const ibis::qIntHod &cmp, ibis::bitvector &iffy) const |
Find rows that can not be decided with the existing index. More... | |
virtual float | getUndecidable (const ibis::qUIntHod &cmp, ibis::bitvector &iffy) const |
Find rows that can not be decided with the existing index. More... | |
virtual int | getValuesArray (void *vals) const |
Copy all rows of the column into an array_t object. More... | |
bool | hasIndex () const |
!< Are the values sorted? More... | |
virtual bool | hasRawData () const |
Does the raw data file exist? | |
bool | hasRoster () const |
Is there a roster list built for this column? Returns true for yes, false for no. More... | |
uint32_t | indexedRows () const |
Compute the number of rows captured by the index of this column. More... | |
virtual void | indexSerialSizes (uint64_t &, uint64_t &, uint64_t &) const |
Compute the sizes (in number of elements) of three arrays that would be produced by writeIndex. More... | |
virtual long | indexSize () const |
Compute the index size (in bytes). More... | |
const char * | indexSpec () const |
void | indexSpec (const char *spec) |
!< Retrieve the number of bins used. More... | |
void | indexSpeedTest () const |
Perform a set of built-in tests to determine the speed of common operations. More... | |
virtual int | indexWrite (ibis::array_t< double > &, ibis::array_t< int64_t > &, ibis::array_t< uint32_t > &) const |
Write the index into three arrays. | |
bool | isFloat () const |
Are they floating-point values? | |
bool | isInteger () const |
Are they integer values? | |
bool | isNumeric () const |
Are they numberical values? | |
bool | isSignedInteger () const |
Are they signed integer values? | |
bool | isSorted () const |
void | isSorted (bool) |
Change the flag m_sorted. More... | |
bool | isUnsignedInteger () const |
Are they unsigned integer values? | |
virtual long | keywordSearch (const char *, ibis::bitvector &) const |
virtual long | keywordSearch (const char *) const |
virtual long | keywordSearch (const std::vector< std::string > &, ibis::bitvector &) const |
virtual long | keywordSearch (const std::vector< std::string > &) const |
virtual void | loadIndex (const char *iopt=0, int ropt=0) const throw () |
Load the index associated with the column. More... | |
void | logMessage (const char *event, const char *fmt,...) const |
Log messages using printf syntax. | |
void | logWarning (const char *event, const char *fmt,...) const |
Log warming message using printf syntax. | |
const double & | lowerBound () const |
The lower bound of the values. | |
void | lowerBound (double d) |
const char * | name () const |
Name of the column. | |
void | name (const char *nm) |
Rename the column. | |
int | nRows () const |
const char * | nullMaskName (std::string &fname) const |
Name of the NULL mask file. More... | |
uint32_t | numBins () const |
!< Retrieve the index specification. | |
const part * | partition () const |
const part *& | partition () |
virtual long | patternSearch (const char *) const |
virtual long | patternSearch (const char *, ibis::bitvector &) const |
void | preferredBounds (std::vector< double > &) const |
Retrive the bin boundaries if the index currently in use. | |
virtual void | print (std::ostream &out) const |
Print some basic infomation about this column. | |
void | purgeIndexFile (const char *dir=0) const |
Purge the index files assocated with the current column. | |
virtual long | saveSelected (const ibis::bitvector &sel, const char *dest, char *buf, uint32_t nbuf) |
Write the selected records to the specified directory. More... | |
virtual array_t< signed char > * | selectBytes (const bitvector &mask) const |
Retrieve selected 1-byte integer values. More... | |
virtual array_t< double > * | selectDoubles (const bitvector &mask) const |
Put the selected values into an array as doubles. More... | |
virtual array_t< float > * | selectFloats (const bitvector &mask) const |
Put selected values of a float column into an array. More... | |
virtual array_t< int32_t > * | selectInts (const bitvector &mask) const |
Return selected rows of the column in an array_t object. More... | |
virtual array_t< int64_t > * | selectLongs (const bitvector &mask) const |
Return selected rows of the column in an array_t object. More... | |
virtual std::vector< ibis::opaque > * | selectOpaques (const bitvector &mask) const |
virtual array_t< int16_t > * | selectShorts (const bitvector &mask) const |
Return selected rows of the column in an array_t object. More... | |
virtual std::vector< std::string > * | selectStrings (const bitvector &mask) const |
Return the selected rows as strings. More... | |
virtual array_t< unsigned char > * | selectUBytes (const bitvector &mask) const |
Return selected rows of the column in an array_t object. More... | |
virtual array_t< uint32_t > * | selectUInts (const bitvector &mask) const |
Return selected rows of the column in an array_t object. More... | |
virtual array_t< uint64_t > * | selectULongs (const bitvector &mask) const |
Return selected rows of the column in an array_t object. More... | |
virtual array_t< uint16_t > * | selectUShorts (const bitvector &mask) const |
Return selected rows of the column in an array_t object. More... | |
long | selectValues (const bitvector &, void *) const |
Return selected rows of the column in an array_t object. More... | |
long | selectValues (const bitvector &, void *, array_t< uint32_t > &) const |
Return selected rows of the column in an array_t object along with their positions. More... | |
long | selectValues (const ibis::qContinuousRange &, void *) const |
Select the values satisfying the specified range condition. | |
void | setDataflag (int df) |
int | setNullMask (const bitvector &) |
Change the null mask to the user specified one. More... | |
void | setTimeFormat (const char *) |
Add a custom format for the column to be interpretted as unix time stamps. | |
void | setTimeFormat (const unixTimeScribe &) |
virtual long | stringSearch (const char *, ibis::bitvector &) const |
virtual long | stringSearch (const std::vector< std::string > &, ibis::bitvector &) const |
virtual long | stringSearch (const char *) const |
virtual long | stringSearch (const std::vector< std::string > &) const |
virtual long | truncateData (const char *dir, uint32_t nent, ibis::bitvector &mask) const |
Truncate the number of records in the named dir to nent. More... | |
ibis::TYPE_T | type () const |
Type of the data. More... | |
virtual void | unloadIndex () const |
Unload the index associated with the column. More... | |
const double & | upperBound () const |
The upper bound of the values. | |
void | upperBound (double d) |
virtual void | write (FILE *file) const |
Write the metadata entry. More... | |
virtual long | writeData (const char *dir, uint32_t nold, uint32_t nnew, ibis::bitvector &mask, const void *va1, void *va2=0) |
Write the content in array va1 to directory dir. More... | |
virtual | ~column () |
Destructor. More... | |
virtual double | getActualMin () const |
A group of functions to compute some basic statistics for the column values. More... | |
virtual double | getActualMax () const |
Compute the actual maximum value by reading the data or examining the index. More... | |
virtual double | getSum () const |
Compute the sum of all values by reading the data. | |
long | getCumulativeDistribution (std::vector< double > &bounds, std::vector< uint32_t > &counts) const |
Compute the actual data distribution. More... | |
long | getDistribution (std::vector< double > &bbs, std::vector< uint32_t > &counts) const |
Count the number of records in each bin. More... | |
Static Public Member Functions | |
template<typename T > | |
static void | actualMinMax (const array_t< T > &vals, const ibis::bitvector &mask, double &min, double &max, bool &asc) |
Compute the minimum and maximum of the values in the array. | |
template<typename T > | |
static T | computeMax (const array_t< T > &vals, const ibis::bitvector &mask) |
Compute the maximum value in the array. | |
template<typename T > | |
static T | computeMin (const array_t< T > &vals, const ibis::bitvector &mask) |
Compute the minimum value in the array. | |
template<typename T > | |
static double | computeSum (const array_t< T > &vals, const ibis::bitvector &mask) |
Compute the sum of values in the array. | |
Protected Member Functions | |
void | actualMinMax (const char *fname, const ibis::bitvector &mask, double &min, double &max, bool &asc) const |
Given the name of the data file, compute the actual minimum and the maximum value. More... | |
long | appendStrings (const std::vector< std::string > &, const ibis::bitvector &) |
Append the strings to the current data. More... | |
template<typename T > | |
long | appendValues (const array_t< T > &, const ibis::bitvector &) |
Append the content of incoming array to the current data. More... | |
double | computeMax () const |
Read the base data to compute the maximum value. | |
double | computeMin () const |
Read the data values and compute the minimum value. | |
double | computeSum () const |
Read the base data to compute the total sum. | |
template<typename T > | |
uint32_t | findLower (int fdes, const uint32_t nr, const T tgt) const |
Find the smallest value >= tgt. More... | |
template<typename T > | |
uint32_t | findUpper (int fdes, const uint32_t nr, const T tgt) const |
Find the smallest value > tgt. More... | |
void | logError (const char *event, const char *fmt,...) const |
Print messages started with "Error" and throw a string exception. | |
virtual int | searchSorted (const ibis::qContinuousRange &, ibis::bitvector &) const |
Resolve a continuous range condition on a sorted column. | |
virtual int | searchSorted (const ibis::qDiscreteRange &, ibis::bitvector &) const |
Resolve a discrete range condition on a sorted column. | |
virtual int | searchSorted (const ibis::qIntHod &, ibis::bitvector &) const |
Resolve a discrete range condition on a sorted column. | |
virtual int | searchSorted (const ibis::qUIntHod &, ibis::bitvector &) const |
Resolve a discrete range condition on a sorted column. | |
template<typename T > | |
int | searchSortedICC (const array_t< T > &vals, const ibis::qContinuousRange &rng, ibis::bitvector &hits) const |
Resolve a continuous range condition on an array of values. | |
template<typename T > | |
int | searchSortedICD (const array_t< T > &vals, const ibis::qDiscreteRange &rng, ibis::bitvector &hits) const |
Resolve a discrete range condition on an array of values. | |
template<typename T > | |
int | searchSortedICD (const array_t< T > &vals, const ibis::qIntHod &rng, ibis::bitvector &hits) const |
Resolve a discrete range condition on an array of values. | |
template<typename T > | |
int | searchSortedICD (const array_t< T > &vals, const ibis::qUIntHod &rng, ibis::bitvector &hits) const |
Resolve a discrete range condition on an array of values. | |
template<typename T > | |
int | searchSortedOOCC (const char *fname, const ibis::qContinuousRange &rng, ibis::bitvector &hits) const |
Resolve a continuous range condition using file operations. More... | |
template<typename T > | |
int | searchSortedOOCD (const char *fname, const ibis::qDiscreteRange &rng, ibis::bitvector &hits) const |
Resolve a discrete range condition using file operations. More... | |
template<typename T > | |
int | searchSortedOOCD (const char *fname, const ibis::qIntHod &rng, ibis::bitvector &hits) const |
Resolve a discrete range condition using file operations. More... | |
template<typename T > | |
int | searchSortedOOCD (const char *fname, const ibis::qUIntHod &rng, ibis::bitvector &hits) const |
Resolve a discrete range condition using file operations. More... | |
template<typename T > | |
long | selectToOpaques (const char *, const bitvector &, std::vector< ibis::opaque > &) const |
template<typename T > | |
long | selectToStrings (const char *, const bitvector &, std::vector< std::string > &) const |
Extract the values masked 1 and convert them to strings. | |
template<> | |
long | selectToStrings (const char *, const bitvector &, std::vector< std::string > &) const |
template<> | |
long | selectToStrings (const char *, const bitvector &, std::vector< std::string > &) const |
template<> | |
long | selectToStrings (const char *dfn, const bitvector &mask, std::vector< std::string > &str) const |
template<> | |
long | selectToStrings (const char *dfn, const bitvector &mask, std::vector< std::string > &str) const |
template<typename T > | |
long | selectValuesT (const char *, const bitvector &, array_t< T > &) const |
Select values marked in the bitvector mask . More... | |
template<typename T > | |
long | selectValuesT (const char *, const bitvector &mask, array_t< T > &vals, array_t< uint32_t > &inds) const |
Select the values marked in the bitvector mask . More... | |
long | string2int (int fptr, dictionary &dic, uint32_t nbuf, char *buf, array_t< uint32_t > &out) const |
Convert strings in the opened file to a list of integers with the aid of a dictionary. More... | |
Protected Attributes | |
int | dataflag |
Presence of the data file. More... | |
ibis::index * | idx |
The index for this column. It is not considered as a must-have member. | |
ibis::util::sharedInt32 | idxcnt |
The number of functions using the index. | |
double | lower |
!< Are the column values in ascending order? | |
std::string | m_bins |
!< Free-form description of the column. | |
std::string | m_desc |
!< Name of the column. | |
std::string | m_name |
!< Data type. | |
bool | m_sorted |
!< Index/binning specification. | |
ibis::TYPE_T | m_type |
!< The entries marked 1 are valid. | |
unixTimeScribe * | m_utscribe |
!< The maximum value. | |
ibis::bitvector | mask_ |
!< Data partition containing this column. | |
const part * | thePart |
double | upper |
!< The minimum value. | |
Friends | |
class | indexLock |
class | mutexLock |
class | readLock |
class | softWriteLock |
class | writeLock |
The class to represent a column of a data partition.
FastBit represents user data as tables (each table may be divided into multiple partitions) where each table consists of a number of columns. Internally, the data values for each column is stored separated from others. In relational algebra terms, this is equivalent to projecting out each attribute of a relation separately. It increases the efficiency of searching on relatively small number of attributes compared to the horizontal data organization used in typical relational database systems.
Rules about column names.
|
virtual |
Destructor.
It acquires a write lock to make sure all other operations have completed.
Reimplemented in ibis::bord::column.
ibis::column::column | ( | const column & | rhs | ) |
The copy constructor.
References fullname(), ibis::fileManager::instance(), m_name, m_type, and ibis::TYPESTRING.
ibis::column::column | ( | const part * | tbl, |
FILE * | file | ||
) |
Reconstitute a column from the content of a file.
Read the basic information about a column from file.
References ibis::BIT, ibis::BLOB, ibis::CATEGORY, ibis::DOUBLE, ibis::FLOAT, fullname(), ibis::util::getString(), ibis::fileManager::instance(), ibis::INT, ibis::resource::isStringTrue(), ibis::util::logMessage(), ibis::LONG, lower, m_bins, m_desc, m_name, m_sorted, m_type, ibis::SHORT, ibis::TEXT, ibis::TYPESTRING, ibis::UBYTE, ibis::UDT, ibis::UINT, ibis::ULONG, ibis::UNKNOWN_TYPE, upper, and ibis::USHORT.
|
protected |
Given the name of the data file, compute the actual minimum and the maximum value.
Compute the actual minimum and maximum values.
Given a data file name, read its content to compute the actual minimum and the maximum of the data values. Only deal with four types of values, unsigned int, signed int, float and double.
References ibis::DOUBLE, ibis::FLOAT, ibis::fileManager::getFile(), ibis::fileManager::instance(), ibis::INT, ibis::LONG, ibis::SHORT, ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
virtual |
Append new data in directory df to the end of existing data in dt.
Append the content of file in df
to end of file in dt
.
It returns the number of rows appended or a negative number to indicate error.
dt
and df
can not be same.Reimplemented in ibis::bord::column, ibis::category, ibis::blob, and ibis::text.
References ibis::bitvector::adjustSize(), ibis::index::append(), ibis::bitvector::cnt(), ibis::util::copy(), ibis::index::create(), ibis::fileManager::flushFile(), ibis::util::getFileSize(), ibis::index::getNRows(), ibis::fileManager::instance(), ibis::util::logMessage(), ibis::OID, ibis::index::print(), ibis::bitvector::read(), ibis::util::read(), ibis::bitvector::size(), UnixOpen, ibis::bitvector::write(), ibis::index::write(), and ibis::util::write().
Referenced by ibis::bord::column::append(), and ibis::part::appendToBackup().
|
virtual |
Append the records in vals to the current working dataset.
The 'void*' in this function follows the convention of the function getValuesArray (not writeData), i.e., for the ten fixed-size elementary data types, it is array_t<type>* and for string-valued columns it is std::vector<std::string>*.
Return the number of entries actually written to disk or a negative number to indicate error conditions.
Reimplemented in ibis::bord::column, ibis::category, ibis::blob, and ibis::text.
References ibis::CATEGORY, ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::SHORT, ibis::TEXT, ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
protected |
Append the strings to the current data.
This function attempts to fill the existing data file with null values based on the content of the validity mask.
It then write strings in vals and extends the validity mask.
References ibis::bitvector::adjustSize(), and UnixOpen.
|
protected |
Append the content of incoming array to the current data.
This function attempts to fill the data file with NULL values if the existing data file is shorter than expected.
It writes the data in vals and extends the existing validity mask.
References ibis::bitvector::adjustSize(), ibis::array_t< T >::size(), UnixOpen, and ibis::util::write().
template long ibis::column::castAndWrite | ( | const array_t< double > & | vals, |
ibis::bitvector & | mask, | ||
const T | special | ||
) |
Cast the incoming array into the specified type T before writing the values to the file for this column.
This function uses assignment statements to perform the casting operations. Warning: this function does not check that the cast values are equal to the incoming values!
References ibis::bitvector::indexSet::nIndices(), and ibis::bitvector::size().
Referenced by ibis::part::addColumn().
|
virtual |
Compute the actual min/max values.
It actually goes through all the values. This function reads the data in the active data directory and modifies the member variables to record the actual min/max.
Reimplemented in ibis::bord::column, and ibis::blob.
Referenced by ibis::column::info::info(), ibis::part::quickTest(), and ibis::part::testRangeOperators().
|
virtual |
Compute the actual min/max values.
It actually goes through all the values. This function reads the data in the given directory and modifies the member variables to record the actual min/max.
Reimplemented in ibis::bord::column, and ibis::blob.
|
virtual |
Compute the actual min/max of the data in directory dir
.
Report the actual min/max found back through output arguments min
and max
. This version does not modify the min/max recorded in this column object.
Reimplemented in ibis::bord::column, and ibis::blob.
int ibis::column::contractRange | ( | ibis::qContinuousRange & | rng | ) | const |
Contract the range expression so that the new range falls exactly on the bin boundaries.
Referenced by ibis::query::doContract().
const char * ibis::column::dataFileName | ( | std::string & | fname, |
const char * | dir = 0 |
||
) | const |
Name of the data file in the given data directory.
If the directory name is not given, the directory is assumed to be the current data directory of the data partition. There is no need for the caller to free the pointer returned by this function. Upon successful completion of this function, it returns fname.c_str(); otherwise, it returns the nil pointer.
Referenced by ibis::index::create(), ibis::part::doScan(), and ibis::part::negativeScan().
|
virtual |
Compute a lower bound and an upper bound on the number of hits using the bitmap index.
If no index is available a new one will be built. If no index can be built, the lower bound will contain nothing and the the upper bound will contain everything. The two bounds are returned as bitmaps which marked the qualified rows as one, where the lower bound is stored in 'low' and the upper bound is stored in 'high'. If the bitvector 'high' has less bits than 'low', the bitvector 'low' is assumed to have an exact solution. This function always returns zero (0).
References ibis::bitvector::adjustSize(), ibis::bitvector::copy(), ibis::bitvector::set(), and ibis::bitvector::size().
Referenced by ibis::part::doScan(), ibis::part::estimateRange(), and ibis::part::negativeScan().
|
virtual |
Compute a lower bound and an upper bound for hits.
Compute an upper bound on the number of hits.
Estimating hits for a discrete range is actually done with evaluateRange.
References ibis::bitvector::clear().
|
virtual |
Compute a lower bound and an upper bound for hits.
Estimating hits for a discrete range.
Does nothing useful in this implementation.
References ibis::bitvector::set(), and ibis::bitvector::sloppyCount().
|
virtual |
Compute a lower bound and an upper bound for hits.
Estimating hits for a discrete range. Does nothing in this implementation.
References ibis::bitvector::set(), and ibis::bitvector::sloppyCount().
|
virtual |
Use the index of the column to compute an upper bound on the number of hits.
If no index can be computed, it will return the number of rows as the upper bound.
|
virtual |
Compute an upper bound on the number of hits.
A dummy function to estimate the number of possible hits.
It always returns the number of rows in the data partition.
|
virtual |
Compute an upper bound on the number of hits.
A dummy function to estimate the number of possible hits.
It always returns the number of rows in the data partition.
|
virtual |
Evaluate a range condition and retrieve the selected values.
This is a combination of evaluateRange and selectTypes. This combination allows some optimizations to reduce the I/O operations.
Note the fourth argument vals must be valid pointer to the correct type. The acceptable types are as follows (same as required by in-memory data partitions):
If vals is a nil pointer, this function simply calls evaluateRange.
References ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::part::countPages(), ibis::OID, ibis::fileManager::pageSize(), ibis::bitvector::set(), ibis::bitvector::size(), ibis::TEXT, and ibis::TYPESTRING.
|
virtual |
Compute the exact answer.
Attempts to use the index if one is available, otherwise use the base data.
Return a negative value to indicate error, 0 to indicate no hit, and positive value to indicate there are zero or more hits.
Reimplemented in ibis::bord::column.
References ibis::bitvector::adjustSize(), ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::part::countPages(), ibis::util::envLock, ibis::bitvector::flip(), ibis::util::getFileSize(), ibis::fileManager::iBeat(), ibis::OID, ibis::fileManager::pageSize(), ibis::bitvector::set(), ibis::bitvector::size(), ibis::bitvector::sloppyCount(), ibis::TEXT, and ibis::TYPESTRING.
Referenced by ibis::part::evaluateRange().
int ibis::column::expandRange | ( | ibis::qContinuousRange & | rng | ) | const |
Expand the range expression so that the new range falls exactly on the bin boundaries.
Referenced by ibis::query::doExpand().
|
protected |
Find the smallest value >= tgt.
An equivalent of array_t<T>::find.
It reads the open file one word at a time and therefore is likely to be very slow.
References ibis::fileManager::instance(), and ibis::fileManager::recordPages().
|
inlinevirtual |
Determine if the input string has appeared in this data partition.
If yes, return the pointer to the incoming string, otherwise return nil.
Reimplemented in ibis::text.
|
protected |
Find the smallest value > tgt.
An equivalent of array_t<T>::find_upper.
It reads the open file one word at a time and therefore is likely to be very slow.
References ibis::fileManager::instance(), and ibis::fileManager::recordPages().
std::string ibis::column::fullname | ( | ) | const |
Fully qualified name.
Generate a SQL style fully qualified name of the form part-name.column-name.
If the part-name is not available, it will simply return the current column name. If the part-name is available, but this column's name is empty, the column name part will be filled with a single question mark.
Referenced by ibis::bin::bin(), column(), ibis::bord::column::column(), ibis::index::create(), ibis::direkte::direkte(), ibis::egale::egale(), ibis::entre::entre(), ibis::index::index(), ibis::column::indexLock::indexLock(), ibis::moins::moins(), ibis::column::mutexLock::mutexLock(), ibis::column::readLock::readLock(), ibis::relic::relic(), ibis::column::softWriteLock::softWriteLock(), and ibis::column::writeLock::writeLock().
|
virtual |
Compute the actual maximum value by reading the data or examining the index.
It returns -DBL_MAX in case of error.
Reimplemented in ibis::blob.
Referenced by ibis::whereClause::amplify(), ibis::part::coarsenBins(), ibis::part::get1DDistribution(), ibis::part::get2DDistributionI(), ibis::part::get2DDistributionU(), ibis::part::getActualMax(), and ibis::mensa::getColumnMax().
|
virtual |
A group of functions to compute some basic statistics for the column values.
Compute the actual minimum value by reading the data or examining the index. It returns DBL_MAX in case of error.
Reimplemented in ibis::blob.
Referenced by ibis::whereClause::amplify(), ibis::part::get1DDistribution(), ibis::part::get2DDistributionI(), ibis::part::get2DDistributionU(), ibis::part::getActualMin(), and ibis::mensa::getColumnMin().
long ibis::column::getCumulativeDistribution | ( | std::vector< double > & | bounds, |
std::vector< uint32_t > & | counts | ||
) | const |
Compute the actual data distribution.
It will generate an index for the column if one is not already available. The value in cts
[i] is the number of values less than bds
[i]. If there is no NULL values in the column, the array cts
will start with 0 and and end the number of rows in the data. The array bds
will end with a value that is greater than the actual maximum value.
|
inlinevirtual |
Return a pointer to a dictionary.
Used by ibis::category and ibis::bord::column (for UINT type converted from ibis::category). For all other types, this function returns a nil pointer.
Reimplemented in ibis::bord::column, and ibis::category.
Referenced by ibis::bord::bord(), ibis::bord::evaluateTerms(), ibis::bord::groupbya(), and ibis::whereClause::verifyExpr().
long ibis::column::getDistribution | ( | std::vector< double > & | bbs, |
std::vector< uint32_t > & | counts | ||
) | const |
Count the number of records in each bin.
The array bins
contains bin boundaries that defines the following bins:
Because of the two open bins at the end, N bin boundaries defines N+1 bins. The array counts
has one more element than bins
. This function returns the number of bins. If this function was executed successfully, the return value should be the same as the size of array counts
, and one larger than the size of array bbs
.
Referenced by ibis::part::get1DDistribution().
ibis::array_t< int32_t > * ibis::column::getIntArray | ( | ) | const |
Return all rows of the column as an array_t object.
Caller is responsible for deleting the returned object.
References ibis::fileManager::getFile(), ibis::fileManager::instance(), ibis::INT, and ibis::UINT.
void ibis::column::getNullMask | ( | ibis::bitvector & | mask | ) | const |
If there is a null mask stored already, return a shallow copy of it in mask.
Otherwise, find out the size of the data file first, if the actual content of the null mask file has less bits, assume the mask is for the leading portion of the data file and the remaining portion of the data file is valid (not null).
References ibis::bitvector::adjustSize(), ibis::CATEGORY, ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::DOUBLE, ibis::FLOAT, ibis::fileManager::flushFile(), ibis::fileManager::getFile(), ibis::fileManager::instance(), ibis::INT, ibis::util::logMessage(), ibis::LONG, ibis::OID, ibis::bitvector::set(), ibis::SHORT, ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::TEXT, ibis::UBYTE, ibis::UINT, ibis::ULONG, ibis::USHORT, and ibis::bitvector::write().
Referenced by ibis::bord::column::column(), ibis::part::doCount(), ibis::part::doScan(), ibis::part::get1DBins(), ibis::part::get1DDistribution(), ibis::part::get2DBins(), ibis::part::get2DDistribution(), ibis::part::get2DDistributionA(), ibis::part::get2DDistributionU(), ibis::part::get3DBins(), ibis::part::get3DDistribution(), ibis::part::getDistribution(), ibis::part::getJointDistribution(), ibis::part::barrel::getNullMask(), ibis::jNatural::jNatural(), ibis::jRange::jRange(), ibis::part::old2DDistribution(), ibis::part::patternSearch(), ibis::query::processJoin(), ibis::part::quickTest(), ibis::part::recursiveQuery(), and ibis::part::stringSearch().
|
virtual |
Return the raw binary value for the i
th row.
This is primarily intended to retrieve values of blobs.
Reimplemented in ibis::text, and ibis::blob.
References ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::OID, ibis::SHORT, ibis::array_t< T >::size(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
Referenced by ibis::mensa::cursor::getColumnAsOpaque(), and ibis::mensa::getColumnAsOpaques().
|
inlinevirtual |
Return the string value for the i
th row.
Only implemented for ibis::text and ibis::category.
Reimplemented in ibis::bord::column, ibis::category, and ibis::text.
Referenced by ibis::mensa::cursor::getColumnAsOpaque(), ibis::mensa::getColumnAsOpaques(), and ibis::bord::cursor::getColumnAsString().
|
virtual |
Compute the locations of the rows can not be decided by the index.
Returns the fraction of rows might satisfy the specified range condition. If no index, nothing can be decided.
Referenced by ibis::part::getUndecidable().
|
virtual |
Find rows that can not be decided with the existing index.
A dummy implementation.
It always return 1.0 to indicate everything rows is undecidable.
|
virtual |
Find rows that can not be decided with the existing index.
A dummy implementation.
It always return 1.0 to indicate everything rows is undecidable.
|
virtual |
Copy all rows of the column into an array_t object.
The incoming argument must be array_t<Type>*. This function explicitly casts vals
into one of the ten supported numerical data types. If the incoming argument is not of the correct type, this cast operatioin can will have unpredictable consequence.
It returns 0 to indicate success, and a negative number to indicate error. If vals
is nil, no values is copied, this function essentially tests whether the values are accessible: >= 0 yes, < 0 no.
Reimplemented in ibis::bord::column, and ibis::blob.
References ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::OID, ibis::SHORT, ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
Referenced by ibis::bin::bin(), ibis::colStrings::colStrings(), ibis::bord::copyColumn(), ibis::part::doCount(), ibis::part::doScan(), ibis::part::fill2DBins2(), ibis::part::fill2DBinsWeighted2(), ibis::part::fill3DBins2(), ibis::part::fill3DBins3(), ibis::part::fill3DBinsWeighted2(), ibis::part::fill3DBinsWeighted3(), ibis::part::get1DBins(), ibis::part::get1DBins_(), ibis::part::get2DBins(), ibis::part::get3DBins(), ibis::mensa::getColumnAsBytes(), ibis::mensa::getColumnAsDoubles(), ibis::mensa::getColumnAsFloats(), ibis::mensa::getColumnAsInts(), ibis::mensa::getColumnAsLongs(), ibis::mensa::getColumnAsOpaques(), ibis::mensa::getColumnAsShorts(), ibis::mensa::getColumnAsStrings(), ibis::mensa::getColumnAsUBytes(), ibis::mensa::getColumnAsUInts(), ibis::mensa::getColumnAsULongs(), ibis::mensa::getColumnAsUShorts(), ibis::part::negativeScan(), and ibis::relic::relic().
bool ibis::column::hasIndex | ( | ) | const |
!< Are the values sorted?
Has an index been built for this column? Returns true for yes, false for no.
Referenced by fastbit_iapi_build_index().
bool ibis::column::hasRoster | ( | ) | const |
Is there a roster list built for this column? Returns true for yes, false for no.
uint32_t ibis::column::indexedRows | ( | ) | const |
Compute the number of rows captured by the index of this column.
This function loads the metadata about the index into memory through ibis::column::indexLock.
|
virtual |
Compute the sizes (in number of elements) of three arrays that would be produced by writeIndex.
|
virtual |
Compute the index size (in bytes).
Return a negative value if the index is not in memory and the index file does not exist.
Reimplemented in ibis::blob.
References ibis::util::getFileSize().
Referenced by ibis::part::get2DDistribution().
|
inline |
!< Retrieve the number of bins used.
Set the index specification.
void ibis::column::indexSpeedTest | ( | ) | const |
Perform a set of built-in tests to determine the speed of common operations.
void ibis::column::isSorted | ( | bool | iss | ) |
Change the flag m_sorted.
If the flag m_sorted is set to true, the caller should have sorted the data file. Incorrect flag will lead to wrong answers to queries. This operation invokes a write lock on the column object.
|
virtual |
Load the index associated with the column.
iopt | This option is passed to ibis::index::create to be used if a new index is to be created. |
ropt | This option is passed to ibis::index::create to control the reading operations for reconstitute the index object from an index file. |
Reimplemented in ibis::category, ibis::text, and ibis::blob.
References ibis::index::create(), ibis::index::getMax(), ibis::index::getMin(), ibis::index::getNRows(), ibis::gParameters(), ibis::index::name(), and ibis::index::print().
Referenced by ibis::bord::bord(), ibis::mensa::buildIndex(), fastbit_build_index(), fastbit_iapi_build_index(), ibis::column::indexLock::indexLock(), and ibis::text::loadIndex().
const char * ibis::column::nullMaskName | ( | std::string & | fname | ) | const |
Name of the NULL mask file.
On successful completion of this function, the return value is the result of fname.c_str(); otherwise the return value is a nil pointer to indicate error.
|
virtual |
Write the selected records to the specified directory.
Save only the rows marked 1. Replace the data file in dest
. Return the number of rows written to the new file or a negative number to indicate error.
Reimplemented in ibis::text.
References ibis::fileManager::buffer< T >::address(), ibis::fileManager::flushFile(), ibis::fileManager::instance(), ibis::util::logMessage(), ibis::bitvector::indexSet::nIndices(), ibis::fileManager::buffer< T >::size(), and ibis::bitvector::subset().
Referenced by ibis::part::purgeInactive().
|
protected |
Resolve a continuous range condition using file operations.
The backup option for searchSortedIC.
This function opens the named file and reads its content one word at a time, which is likely to be very slow. It does assume the content of the file is sorted in ascending order and perform binary searches.
References ibis::bitvector::adjustSize(), ibis::bitvector::appendFill(), ibis::bitvector::clear(), ibis::fileManager::instance(), ibis::qContinuousRange::leftBound(), ibis::util::read(), ibis::fileManager::recordPages(), ibis::qContinuousRange::rightBound(), ibis::util::round_up(), ibis::bitvector::set(), ibis::bitvector::sloppyCount(), and UnixOpen.
|
protected |
Resolve a discrete range condition using file operations.
This version of search function reads the content of data file through explicit read operations.
It sequentially reads the content of the data file. Note the content of the data file is assumed to be sorted in ascending order as elementary data type T.
References ibis::fileManager::buffer< T >::address(), ibis::bitvector::adjustSize(), ibis::bitvector::clear(), ibis::qDiscreteRange::colName(), ibis::qDiscreteRange::getValues(), ibis::fileManager::instance(), ibis::util::read(), ibis::fileManager::recordPages(), ibis::bitvector::reserve(), ibis::bitvector::setBit(), ibis::array_t< T >::size(), ibis::fileManager::buffer< T >::size(), and UnixOpen.
|
protected |
Resolve a discrete range condition using file operations.
This version of search function reads the content of data file through explicit read operations.
It sequentially reads the content of the data file. Note the content of the data file is assumed to be sorted in ascending order as elementary data type T.
References ibis::fileManager::buffer< T >::address(), ibis::bitvector::adjustSize(), ibis::bitvector::clear(), ibis::qIntHod::colName(), ibis::qIntHod::getValues(), ibis::fileManager::instance(), ibis::util::read(), ibis::fileManager::recordPages(), ibis::bitvector::reserve(), ibis::bitvector::setBit(), ibis::array_t< T >::size(), ibis::fileManager::buffer< T >::size(), and UnixOpen.
|
protected |
Resolve a discrete range condition using file operations.
This version of search function reads the content of data file through explicit read operations.
It sequentially reads the content of the data file. Note the content of the data file is assumed to be sorted in ascending order as elementary data type T.
References ibis::fileManager::buffer< T >::address(), ibis::bitvector::adjustSize(), ibis::bitvector::clear(), ibis::qUIntHod::colName(), ibis::qUIntHod::getValues(), ibis::fileManager::instance(), ibis::util::read(), ibis::fileManager::recordPages(), ibis::bitvector::reserve(), ibis::bitvector::setBit(), ibis::array_t< T >::size(), ibis::fileManager::buffer< T >::size(), and UnixOpen.
|
virtual |
Retrieve selected 1-byte integer values.
Note that unsigned integers are simply treated as signed integers.
Reimplemented in ibis::bord::column, and ibis::blob.
References ibis::bitvector::cnt(), ibis::horometer::CPUTime(), ibis::fileManager::getFile(), ibis::bitvector::indexSet::indices(), ibis::fileManager::instance(), ibis::bitvector::indexSet::isRange(), ibis::util::logMessage(), ibis::bitvector::indexSet::nIndices(), ibis::horometer::realTime(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), and ibis::UBYTE.
Referenced by ibis::bord::column::append(), ibis::part::fill2DBins2(), ibis::part::fill2DBinsWeighted2(), ibis::part::fill3DBins2(), ibis::part::fill3DBins3(), ibis::part::fill3DBinsWeighted2(), ibis::part::fill3DBinsWeighted3(), ibis::part::get1DBins(), ibis::part::get1DBins_(), ibis::part::get1DDistribution(), ibis::part::get2DBins(), ibis::part::get3DBins(), ibis::part::get3DDistributionA(), ibis::part::get3DDistributionA1(), ibis::part::get3DDistributionA2(), and ibis::part::selectBytes().
|
virtual |
Put the selected values into an array as doubles.
Reimplemented in ibis::bord::column, and ibis::blob.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::FLOAT, ibis::fileManager::getFile(), ibis::bitvector::indexSet::indices(), ibis::fileManager::instance(), ibis::INT, ibis::bitvector::indexSet::isRange(), ibis::util::logMessage(), ibis::LONG, ibis::bitvector::indexSet::nIndices(), ibis::horometer::realTime(), ibis::SHORT, ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
Referenced by ibis::bord::column::append(), ibis::part::fill2DBins2(), ibis::part::fill2DBinsWeighted2(), ibis::part::fill3DBins2(), ibis::part::fill3DBins3(), ibis::part::fill3DBinsWeighted2(), ibis::part::fill3DBinsWeighted3(), ibis::part::get1DBins(), ibis::part::get1DBins_(), ibis::part::get1DDistribution(), ibis::part::get2DBins(), ibis::part::get2DDistribution(), ibis::part::get2DDistributionA(), ibis::part::get2DDistributionU(), ibis::part::get3DBins(), ibis::part::get3DDistribution(), ibis::part::get3DDistributionA(), ibis::part::get3DDistributionA1(), ibis::part::get3DDistributionA2(), ibis::part::getCumulativeDistribution(), ibis::part::getDistribution(), ibis::part::getJointDistribution(), ibis::part::old2DDistribution(), and ibis::part::selectDoubles().
|
virtual |
Put selected values of a float column into an array.
Reimplemented in ibis::bord::column, and ibis::blob.
References ibis::bitvector::cnt(), ibis::horometer::CPUTime(), ibis::FLOAT, ibis::fileManager::getFile(), ibis::bitvector::indexSet::indices(), ibis::fileManager::instance(), ibis::bitvector::indexSet::isRange(), ibis::util::logMessage(), ibis::bitvector::indexSet::nIndices(), ibis::horometer::realTime(), ibis::SHORT, ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::UBYTE, and ibis::USHORT.
Referenced by ibis::bord::column::append(), ibis::part::fill2DBins2(), ibis::part::fill2DBinsWeighted2(), ibis::part::fill3DBins2(), ibis::part::fill3DBins3(), ibis::part::fill3DBinsWeighted2(), ibis::part::fill3DBinsWeighted3(), ibis::part::get1DBins(), ibis::part::get1DBins_(), ibis::part::get1DDistribution(), ibis::part::get2DBins(), ibis::part::get2DDistribution(), ibis::part::get2DDistributionA(), ibis::part::get2DDistributionU(), ibis::part::get3DBins(), ibis::part::get3DDistribution(), ibis::part::get3DDistributionA(), ibis::part::get3DDistributionA1(), ibis::part::get3DDistributionA2(), ibis::part::getCumulativeDistribution(), ibis::part::getDistribution(), ibis::part::getJointDistribution(), ibis::part::old2DDistribution(), and ibis::part::selectFloats().
|
virtual |
Return selected rows of the column in an array_t object.
Reimplemented in ibis::bord::column, and ibis::blob.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::horometer::CPUTime(), ibis::fileManager::getFile(), ibis::bitvector::indexSet::indices(), ibis::fileManager::instance(), ibis::INT, ibis::bitvector::indexSet::isRange(), ibis::util::logMessage(), ibis::bitvector::indexSet::nIndices(), ibis::horometer::realTime(), ibis::SHORT, ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::TEXT, ibis::UBYTE, ibis::UINT, and ibis::USHORT.
Referenced by ibis::bord::column::append(), ibis::part::fill2DBins2(), ibis::part::fill2DBinsWeighted2(), ibis::part::fill3DBins2(), ibis::part::fill3DBins3(), ibis::part::fill3DBinsWeighted2(), ibis::part::fill3DBinsWeighted3(), ibis::part::get1DBins(), ibis::part::get1DBins_(), ibis::part::get1DDistribution(), ibis::part::get2DBins(), ibis::part::get2DDistribution(), ibis::part::get2DDistributionA(), ibis::part::get2DDistributionU(), ibis::part::get3DBins(), ibis::part::get3DDistribution(), ibis::part::get3DDistributionA(), ibis::part::get3DDistributionA1(), ibis::part::get3DDistributionA2(), ibis::part::getCumulativeDistribution(), ibis::part::getDistribution(), ibis::part::getJointDistribution(), ibis::part::old2DDistribution(), and ibis::part::selectInts().
|
virtual |
Return selected rows of the column in an array_t object.
Can be called on all integral types. Note that 64-byte unsigned integers are simply treated as signed integers. This may cause the values to be interperted incorrectly. Shorter version of unsigned integers are treated correctly as positive values.
Reimplemented in ibis::bord::column, ibis::text, and ibis::blob.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::horometer::CPUTime(), ibis::fileManager::getFile(), ibis::bitvector::indexSet::indices(), ibis::fileManager::instance(), ibis::INT, ibis::bitvector::indexSet::isRange(), ibis::util::logMessage(), ibis::LONG, ibis::bitvector::indexSet::nIndices(), ibis::horometer::realTime(), ibis::SHORT, ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::TEXT, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
Referenced by ibis::bord::column::append(), ibis::part::fill2DBins2(), ibis::part::fill2DBinsWeighted2(), ibis::part::fill3DBins2(), ibis::part::fill3DBins3(), ibis::part::fill3DBinsWeighted2(), ibis::part::fill3DBinsWeighted3(), ibis::part::get1DBins(), ibis::part::get1DBins_(), ibis::part::get1DDistribution(), ibis::part::get2DBins(), ibis::part::get2DDistribution(), ibis::part::get2DDistributionA(), ibis::part::get2DDistributionU(), ibis::part::get3DBins(), ibis::part::get3DDistribution(), ibis::part::get3DDistributionA(), ibis::part::get3DDistributionA1(), ibis::part::get3DDistributionA2(), and ibis::part::selectLongs().
|
virtual |
Return selected rows of the column in an array_t object.
Can convert all integers 2-byte or less in length. Note that unsigned integers are simply treated as signed integers. Shoter types of signed integers are treated correctly as positive values.
Reimplemented in ibis::bord::column, and ibis::blob.
References ibis::bitvector::cnt(), ibis::horometer::CPUTime(), ibis::fileManager::getFile(), ibis::bitvector::indexSet::indices(), ibis::fileManager::instance(), ibis::bitvector::indexSet::isRange(), ibis::util::logMessage(), ibis::bitvector::indexSet::nIndices(), ibis::horometer::realTime(), ibis::SHORT, ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::UBYTE, and ibis::USHORT.
Referenced by ibis::bord::column::append(), ibis::part::fill2DBins2(), ibis::part::fill2DBinsWeighted2(), ibis::part::fill3DBins2(), ibis::part::fill3DBins3(), ibis::part::fill3DBinsWeighted2(), ibis::part::fill3DBinsWeighted3(), ibis::part::get1DBins(), ibis::part::get1DBins_(), ibis::part::get1DDistribution(), ibis::part::get2DBins(), ibis::part::get3DBins(), ibis::part::get3DDistributionA(), ibis::part::get3DDistributionA1(), ibis::part::get3DDistributionA2(), and ibis::part::selectShorts().
|
virtual |
Return the selected rows as strings.
This version returns a std::vector<std::string>, which provides wholly self-contained string values. It may take more memory than necessary, and the memory usage of std::string is not tracked by FastBit. The advantage is that it should work regardless of the actual data type of the column.
Reimplemented in ibis::bord::column, ibis::category, ibis::text, and ibis::blob.
References ibis::bitvector::cnt(), ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::OID, ibis::SHORT, ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
Referenced by ibis::bord::column::append(), ibis::colStrings::colStrings(), ibis::bord::evaluateTerms(), and ibis::part::selectStrings().
|
virtual |
Return selected rows of the column in an array_t object.
Reimplemented in ibis::bord::column, and ibis::blob.
References ibis::bitvector::cnt(), ibis::horometer::CPUTime(), ibis::fileManager::getFile(), ibis::bitvector::indexSet::indices(), ibis::fileManager::instance(), ibis::bitvector::indexSet::isRange(), ibis::util::logMessage(), ibis::bitvector::indexSet::nIndices(), ibis::horometer::realTime(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), and ibis::UBYTE.
Referenced by ibis::bord::column::append(), ibis::part::fill2DBins2(), ibis::part::fill2DBinsWeighted2(), ibis::part::fill3DBins2(), ibis::part::fill3DBins3(), ibis::part::fill3DBinsWeighted2(), ibis::part::fill3DBinsWeighted3(), ibis::part::get1DBins(), ibis::part::get1DBins_(), ibis::part::get1DDistribution(), ibis::part::get2DBins(), ibis::part::get3DBins(), ibis::part::get3DDistributionA(), ibis::part::get3DDistributionA1(), ibis::part::get3DDistributionA2(), and ibis::part::selectUBytes().
|
virtual |
Return selected rows of the column in an array_t object.
Can be called on columns of unsigned integral types, UINT, CATEGORY, USHORT, and UBYTE.
Reimplemented in ibis::bord::column, ibis::category, ibis::text, and ibis::blob.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::horometer::CPUTime(), ibis::fileManager::getFile(), ibis::bitvector::indexSet::indices(), ibis::fileManager::instance(), ibis::bitvector::indexSet::isRange(), ibis::util::logMessage(), ibis::bitvector::indexSet::nIndices(), ibis::horometer::realTime(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::TEXT, ibis::UBYTE, ibis::UINT, and ibis::USHORT.
Referenced by ibis::bord::column::append(), ibis::part::fill2DBins2(), ibis::part::fill2DBinsWeighted2(), ibis::part::fill3DBins2(), ibis::part::fill3DBins3(), ibis::part::fill3DBinsWeighted2(), ibis::part::fill3DBinsWeighted3(), ibis::part::get1DBins(), ibis::part::get1DBins_(), ibis::part::get1DDistribution(), ibis::part::get2DBins(), ibis::part::get2DDistribution(), ibis::part::get2DDistributionA(), ibis::part::get2DDistributionU(), ibis::part::get3DBins(), ibis::part::get3DDistribution(), ibis::part::get3DDistributionA(), ibis::part::get3DDistributionA1(), ibis::part::get3DDistributionA2(), ibis::part::getCumulativeDistribution(), ibis::part::getDistribution(), ibis::part::getJointDistribution(), ibis::part::old2DDistribution(), and ibis::part::selectUInts().
|
virtual |
Return selected rows of the column in an array_t object.
Can be called on all unsigned integral types.
Reimplemented in ibis::bord::column, and ibis::blob.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::horometer::CPUTime(), ibis::fileManager::getFile(), ibis::bitvector::indexSet::indices(), ibis::fileManager::instance(), ibis::bitvector::indexSet::isRange(), ibis::util::logMessage(), ibis::bitvector::indexSet::nIndices(), ibis::horometer::realTime(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::TEXT, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
Referenced by ibis::bord::column::append(), ibis::part::fill2DBins2(), ibis::part::fill2DBinsWeighted2(), ibis::part::fill3DBins2(), ibis::part::fill3DBins3(), ibis::part::fill3DBinsWeighted2(), ibis::part::fill3DBinsWeighted3(), ibis::part::get1DBins(), ibis::part::get1DBins_(), ibis::part::get1DDistribution(), ibis::part::get2DBins(), ibis::part::get2DDistribution(), ibis::part::get3DBins(), ibis::part::get3DDistributionA(), ibis::part::get3DDistributionA1(), ibis::part::get3DDistributionA2(), and ibis::part::selectULongs().
|
virtual |
Return selected rows of the column in an array_t object.
Reimplemented in ibis::bord::column, and ibis::blob.
References ibis::bitvector::cnt(), ibis::horometer::CPUTime(), ibis::fileManager::getFile(), ibis::bitvector::indexSet::indices(), ibis::fileManager::instance(), ibis::bitvector::indexSet::isRange(), ibis::util::logMessage(), ibis::bitvector::indexSet::nIndices(), ibis::horometer::realTime(), ibis::SHORT, ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::UBYTE, and ibis::USHORT.
Referenced by ibis::bord::column::append(), ibis::part::fill2DBins2(), ibis::part::fill2DBinsWeighted2(), ibis::part::fill3DBins2(), ibis::part::fill3DBins3(), ibis::part::fill3DBinsWeighted2(), ibis::part::fill3DBinsWeighted3(), ibis::part::get1DBins(), ibis::part::get1DBins_(), ibis::part::get1DDistribution(), ibis::part::get2DBins(), ibis::part::get3DBins(), ibis::part::get3DDistributionA(), ibis::part::get3DDistributionA1(), ibis::part::get3DDistributionA2(), and ibis::part::selectUShorts().
long ibis::column::selectValues | ( | const bitvector & | mask, |
void * | vals | ||
) | const |
Return selected rows of the column in an array_t object.
The caller must provide the correct array_t<type>* for vals! No type casting is performed in this function. Only elementary numerical types are supported.
References ibis::CATEGORY, ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::OID, ibis::SHORT, ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
Referenced by ibis::bord::column::append(), ibis::bord::evaluateTerms(), ibis::mensa::cursor::fillBuffer(), ibis::part::selectValues(), ibis::query::sortEquiJoin(), and ibis::query::sortRangeJoin().
long ibis::column::selectValues | ( | const bitvector & | mask, |
void * | vals, | ||
ibis::array_t< uint32_t > & | inds | ||
) | const |
Return selected rows of the column in an array_t object along with their positions.
The caller must provide the correct array_t<type>* for vals! No type casting is performed in this function. Only elementary numerical types are supported.
References ibis::CATEGORY, ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::OID, ibis::SHORT, ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
protected |
Select values marked in the bitvector mask
.
Pack them into the output array vals
.
Upon a successful executation, it returns the number of values selected. If it returns zero (0), the contents of vals
is not modified. If it returns a negative number, the contents of arrays vals
is not guaranteed to be in any particular state.
References ibis::array_t< T >::clear(), ibis::bitvector::cnt(), ibis::fileManager::getFile(), ibis::util::getFileSize(), ibis::fileManager::instance(), ibis::util::logMessage(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::push_back(), ibis::util::read(), ibis::fileManager::recordPages(), ibis::array_t< T >::reserve(), ibis::array_t< T >::resize(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::fileManager::tryGetFile(), and UnixOpen.
|
protected |
Select the values marked in the bitvector mask
.
Pack them into the output array vals
and fill the array inds
with the positions of the values selected.
Upon a successful executation, it returns the number of values selected. If it returns zero (0), the contents of vals
and inds
are not modified. If it returns a negative number, the contents of arrays vals
and inds
are not guaranteed to be in particular state.
References ibis::array_t< T >::clear(), ibis::bitvector::cnt(), ibis::util::getFileSize(), ibis::fileManager::instance(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::push_back(), ibis::util::read(), ibis::fileManager::recordPages(), ibis::array_t< T >::reserve(), ibis::array_t< T >::resize(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::fileManager::tryGetFile(), and UnixOpen.
int ibis::column::setNullMask | ( | const bitvector & | msk | ) |
Change the null mask to the user specified one.
The incoming mask should have as many bits as the number of rows in the data partition. Upon a successful completion of this function, the return value is >= 0, otherwise it is less than 0.
References ibis::bitvector::size().
Referenced by ibis::tafel::toTable().
|
protected |
Convert strings in the opened file to a list of integers with the aid of a dictionary.
Convert string values in the opened file to a list of integers with the aid of a dictionary.
References ibis::array_t< T >::clear(), ibis::dictionary::insert(), ibis::fileManager::instance(), ibis::util::logMessage(), ibis::array_t< T >::push_back(), ibis::util::read(), ibis::fileManager::recordPages(), and ibis::array_t< T >::size().
|
virtual |
Truncate the number of records in the named dir to nent.
It truncates file if more entries are in the current file, and it adds more NULL values if the current file is shorter. The null mask is adjusted accordingly.
References ibis::bitvector::adjustSize(), ibis::CATEGORY, ibis::bitvector::cnt(), ibis::fileManager::flushFile(), ibis::fileManager::getFile(), ibis::util::getFileSize(), ibis::fileManager::instance(), ibis::util::logMessage(), ibis::bitvector::size(), ibis::TEXT, and ibis::bitvector::write().
|
inline |
Type of the data.
References m_type.
Referenced by ibis::bord::append(), ibis::bord::backup(), ibis::bin::bin(), ibis::bord::bord(), ibis::colBlobs::colBlobs(), ibis::colStrings::colStrings(), ibis::bord::column::column(), ibis::part::combineNames(), ibis::bord::copyColumn(), ibis::part::countHits(), ibis::colValues::create(), ibis::index::create(), ibis::mensa::cursor::cursor(), ibis::bord::cursor::cursor(), ibis::direkte::direkte(), ibis::part::doScan(), ibis::bord::evaluateTerms(), fastbit_iapi_extend_array(), ibis::part::fill2DBins2(), ibis::part::fill2DBinsWeighted2(), ibis::part::fill3DBins2(), ibis::part::fill3DBins3(), ibis::part::fill3DBinsWeighted2(), ibis::part::fill3DBinsWeighted3(), ibis::part::get1DBins(), ibis::part::get1DBins_(), ibis::part::get1DDistribution(), ibis::part::get2DBins(), ibis::part::get2DDistribution(), ibis::part::get2DDistributionA(), ibis::part::get2DDistributionU(), ibis::part::get3DBins(), ibis::part::get3DDistribution(), ibis::part::get3DDistributionA(), ibis::part::get3DDistributionA1(), ibis::part::get3DDistributionA2(), ibis::bord::getColumnAsBytes(), ibis::bord::getColumnAsDoubles(), ibis::bord::getColumnAsFloats(), ibis::bord::getColumnAsInts(), ibis::bord::getColumnAsLongs(), ibis::bord::getColumnAsOpaques(), ibis::bord::getColumnAsShorts(), ibis::bord::getColumnAsStrings(), ibis::bord::getColumnAsUBytes(), ibis::bord::getColumnAsUInts(), ibis::bord::getColumnAsULongs(), ibis::bord::getColumnAsUShorts(), ibis::part::getJointDistribution(), ibis::jNatural::jNatural(), ibis::keywords::keywords(), ibis::part::keywordSearch(), ibis::bord::merge(), ibis::bord::merge10(), ibis::bord::merge11(), ibis::bord::merge12(), ibis::bord::merge20(), ibis::bord::merge20T1(), ibis::bord::merge21(), ibis::bord::merge21T1(), ibis::bord::merge21T2(), ibis::part::negativeScan(), ibis::part::old2DDistribution(), ibis::part::barrel::open(), ibis::part::vault::open(), ibis::part::quickTest(), ibis::part::readMetaData(), ibis::part::recursiveQuery(), ibis::relic::relic(), ibis::jNatural::select(), ibis::jRange::select(), ibis::query::sortEquiJoin(), ibis::query::sortRangeJoin(), ibis::part::stringSearch(), and ibis::whereClause::verifyExpr().
|
virtual |
Unload the index associated with the column.
This function requires a write lock just like loadIndex. However, it will simply return to the caller if it fails to acquire the lock.
Referenced by ibis::mensa::buildIndex(), and fastbit_build_index().
|
virtual |
Write the metadata entry.
Write the current content to the metadata file -part.txt of the data partition.
Reimplemented in ibis::category, ibis::blob, and ibis::text.
References ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::SHORT, ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
virtual |
Write the content in array va1 to directory dir.
Extend the mask. The void* is internally cast into a pointer to the fixed-size elementary data types according to the type of column. Therefore, there is no way this function can handle string values.
Return the number of entries actually written to file. If writing was completely successful, the return value should match nnew. It also extends the mask. Write out the mask if not all the bits are set.
Reimplemented in ibis::blob.
References ibis::bitvector::adjustSize(), ibis::CATEGORY, ibis::bitvector::cnt(), ibis::DOUBLE, ibis::FLOAT, ibis::fileManager::flushFile(), ibis::util::getFileSize(), ibis::fileManager::instance(), ibis::INT, ibis::OID, ibis::SHORT, ibis::bitvector::size(), ibis::TEXT, ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::USHORT, and ibis::bitvector::write().
Referenced by ibis::part::addColumn().
|
mutableprotected |
Presence of the data file.
0 – don't know. -1 – no data file. 1 – data file is known to be present.
Referenced by ibis::bord::column::column().