The class ibis::part represents a partition of a relational table. More...
#include <part.h>
Classes | |
class | barrel |
To read a list of variables at the same time. More... | |
class | cleaner |
A cleaner to be used by the function fileManager::unload. More... | |
struct | indexBuilderPool |
A struct to pack arguments to the function ibis_part_build_index. More... | |
struct | info |
A simple class to describe an ibis::part object. More... | |
class | mutexLock |
Provide a mutual exclusion lock on an ibis::part object. More... | |
class | readLock |
Provide a read lock on an ibis::part. More... | |
class | softWriteLock |
An non-blocking version of writeLock. More... | |
struct | thrArg |
A struct to pack the arguments to function startTests. More... | |
class | vault |
To read variables in certain order. More... | |
class | writeLock |
Provide a write lock on an ibis::part. More... | |
Public Types | |
enum | TABLE_STATE { UNKNOWN_STATE =0, STABLE_STATE, RECEIVING_STATE, PRETRANSITION_STATE, TRANSITION_STATE, POSTTRANSITION_STATE } |
Public Member Functions | |
virtual int | buildIndexes (const char *iopt, int nthr) |
Make sure indexes for all columns are available. More... | |
virtual int | buildIndexes (const ibis::table::stringArray &, int nthr=1) |
Make sure indexes for all columns are available. More... | |
void | buildSorted (const char *colname) const |
Build a sorted version of the specified column. More... | |
long | calculate (const ibis::math::term &, const ibis::bitvector &, array_t< double > &) const |
Calculate the values of an arithmetic expression as doubles. More... | |
long | calculate (const ibis::math::stringFunction1 &, const ibis::bitvector &, std::vector< std::string > &) const |
Calculate the values of a math expression as strings. More... | |
ibis::table::stringArray | columnNames () const |
Return column names in a list. More... | |
ibis::table::typeArray | columnTypes () const |
Return column types in a list. | |
void | combineNames (ibis::table::namesTypes &metalist) const |
Update the list of columns with information in this data partition. | |
void | computeMinMax () |
Compute the min and max for each column. More... | |
long | countHits (const ibis::qRange &cmp) const |
Count the number of hits for a single range condition. | |
const char * | currentDataDir () const |
Return the name of the active data directory. | |
const char * | description () const |
Return a text description of the partition. | |
virtual long | doScan (const ibis::qRange &cmp, ibis::bitvector &hits) const |
Evaluate the range condition. More... | |
virtual long | doScan (const ibis::qRange &cmp, const ibis::bitvector &mask, ibis::bitvector &hits) const |
Evalute the range condition on the records that are marked 1 in the mask. More... | |
virtual long | doScan (const ibis::qRange &cmp, const ibis::bitvector &mask, void *res) const |
Evalute the range condition and record the values satisfying the condition in res. More... | |
virtual long | doScan (const ibis::qRange &cmp, const ibis::bitvector &mask, void *res, ibis::bitvector &hits) const |
Evalute the range condition and record the values satisfying the condition in res. More... | |
virtual long | doScan (const ibis::math::term &, const ibis::bitvector &, ibis::bitvector &) const |
Treat the arithmetic expression as true or false. More... | |
virtual long | doScan (const ibis::compRange &cmp, ibis::bitvector &hits) const |
Sequential scan without a mask. More... | |
virtual long | doScan (const ibis::compRange &cmp, const ibis::bitvector &mask, ibis::bitvector &hits) const |
Locate the records that have mark value 1 and satisfy the complex range conditions. More... | |
template<typename T > | |
long | doScan (const array_t< T > &vals, const ibis::qContinuousRange &rng, const ibis::bitvector &mask, ibis::bitvector &hits) |
Evalue the range condition on the in memory values. More... | |
template<> | |
long | doScan (const array_t< float > &vals, const ibis::qContinuousRange &rng, const ibis::bitvector &mask, ibis::bitvector &hits) |
Examine the range condition with in memory values. More... | |
template<> | |
long | doScan (const array_t< double > &vals, const ibis::qContinuousRange &rng, const ibis::bitvector &mask, ibis::bitvector &hits) |
Examine the range condition with in memory values. More... | |
template<typename T > | |
long | doScan (const array_t< T > &vals, const ibis::qContinuousRange &rng, const ibis::bitvector &mask, array_t< T > &res) |
Evalue the range condition on the in memory values. More... | |
template<typename T > | |
long | doScan (const array_t< T > &vals, const ibis::qContinuousRange &rng, const ibis::bitvector &mask, array_t< T > &res, ibis::bitvector &hits) |
Evalue the range condition on the in memory values. More... | |
template<> | |
long | doScan (const array_t< float > &vals, const ibis::qContinuousRange &rng, const ibis::bitvector &mask, array_t< float > &res) |
Examine the range condition with in memory values. More... | |
template<> | |
long | doScan (const array_t< double > &vals, const ibis::qContinuousRange &rng, const ibis::bitvector &mask, array_t< double > &res) |
Examine the range condition with in memory values. More... | |
template<> | |
long | doScan (const array_t< float > &vals, const ibis::qContinuousRange &rng, const ibis::bitvector &mask, array_t< float > &res, ibis::bitvector &hits) |
Examine the range condition with in memory values. More... | |
template<> | |
long | doScan (const array_t< double > &vals, const ibis::qContinuousRange &rng, const ibis::bitvector &mask, array_t< double > &res, ibis::bitvector &hits) |
Examine the range condition with in memory values. More... | |
template<> | |
void | equalWeightBins (const array_t< float > &vals, uint32_t nbins, array_t< float > &bounds) |
Explicit specialization for float arrays. More... | |
template<> | |
void | equalWeightBins (const array_t< double > &vals, uint32_t nbins, array_t< double > &bounds) |
Explicit specialization for double arrays. More... | |
virtual double | estimateCost (const ibis::qContinuousRange &cmp) const |
Estimate the cost of evaluate the query expression. | |
virtual double | estimateCost (const ibis::qDiscreteRange &cmp) const |
Estimate the cost of evaluate the query expression. | |
virtual double | estimateCost (const ibis::qIntHod &cmp) const |
Estimate the cost of evaluate the query expression. | |
virtual double | estimateCost (const ibis::qUIntHod &cmp) const |
Estimate the cost of evaluate the query expression. | |
virtual double | estimateCost (const ibis::qString &cmp) const |
Estimate the cost of evaluate the query expression. | |
virtual double | estimateCost (const ibis::qAnyString &cmp) const |
Estimate the cost of evaluate the query expression. | |
virtual long | estimateMatchAny (const ibis::qAnyAny &cmp, ibis::bitvector &low, ibis::bitvector &high) const |
Estimate a lower bound and an upper bound on the records that are hits. More... | |
virtual long | estimateRange (const ibis::qContinuousRange &cmp) const |
Return an upper bound on the number of hits. | |
virtual long | estimateRange (const ibis::qDiscreteRange &cmp) const |
Return an upper bound on the number of hits. | |
virtual long | estimateRange (const ibis::qIntHod &cmp) const |
Return an upper bound on the number of hits. | |
virtual long | estimateRange (const ibis::qUIntHod &cmp) const |
Return an upper bound on the number of hits. | |
virtual long | estimateRange (const ibis::qContinuousRange &cmp, ibis::bitvector &low, ibis::bitvector &high) const |
Estimate a continuous range condition. More... | |
virtual long | estimateRange (const ibis::qDiscreteRange &cmp, ibis::bitvector &low, ibis::bitvector &high) const |
Estimate the discrete range condition. | |
virtual long | estimateRange (const ibis::qIntHod &cmp, ibis::bitvector &low, ibis::bitvector &high) const |
Estimate the discrete range condition. | |
virtual long | estimateRange (const ibis::qUIntHod &cmp, ibis::bitvector &low, ibis::bitvector &high) const |
Estimate the discrete range condition. | |
int64_t | evaluateJoin (const ibis::deprecatedJoin &cmp, const ibis::bitvector &mask, ibis::bitvector64 &pairs) const |
Evaluate a self-join. More... | |
int64_t | evaluateJoin (const ibis::deprecatedJoin &cmp, const ibis::bitvector &mask, const char *pairfile) const |
Return the number of pairs satisfying the join condition. More... | |
int64_t | evaluateJoin (const ibis::deprecatedJoin &cmp, const ibis::bitvector &mask) const |
Return only the number of pairs satisfying the join condition. | |
int64_t | evaluateJoin (const std::vector< const ibis::deprecatedJoin * > &cmp, const ibis::bitvector &mask, ibis::bitvector64 &pairs) const |
Evaluate a join defined with multiple (conjunctive) range join conditions. More... | |
int64_t | evaluateJoin (const std::vector< const ibis::deprecatedJoin * > &cmp, const ibis::bitvector &mask) const |
int64_t | evaluateJoin (const ibis::deprecatedJoin &cmp, const ibis::bitvector64 &trial, ibis::bitvector64 &result) const |
Evaluate all pairs in trial to determine whether they really satisfy the range join defined in cmp . More... | |
int64_t | evaluateJoin (const std::vector< const ibis::deprecatedJoin * > &cmp, const ibis::bitvector64 &trial, ibis::bitvector64 &result) const |
Check a set of pairs defined in trial . More... | |
virtual long | evaluateRange (const ibis::qContinuousRange &cmp, const ibis::bitvector &mask, ibis::bitvector &res) const |
Evaluate a continue range expression accurately. | |
virtual long | evaluateRange (const ibis::qDiscreteRange &cmp, const ibis::bitvector &mask, ibis::bitvector &res) const |
Evaluate a discrete range expression accurately. | |
virtual long | evaluateRange (const ibis::qIntHod &cmp, const ibis::bitvector &mask, ibis::bitvector &res) const |
Evaluate a discrete range expression accurately. | |
virtual long | evaluateRange (const ibis::qUIntHod &cmp, const ibis::bitvector &mask, ibis::bitvector &res) const |
Evaluate a discrete range expression accurately. | |
long | evaluateRIDSet (const ibis::RIDSet &, ibis::bitvector &) const |
Convert a list of RIDs into a bitvector. More... | |
bool | explicitRIDs () const |
Does this partition have an explicit RID column? Returns true for yes, false for no. More... | |
double | getActualMax (const char *name) const |
The actual maximum value in the named column. | |
double | getActualMin (const char *name) const |
The actual minimum value in the named column. | |
column * | getColumn (const char *name) const |
Given a name, return the associated column. More... | |
column * | getColumn (uint32_t ind) const |
Returns the pointer to the ith column. More... | |
double | getColumnSum (const char *name) const |
Sum of all value in the named column. | |
info * | getInfo () const |
Return an ibis::part::info object that describes the current partition. | |
const std::vector< std::string > & | getMeshDimensions () const |
Return the name of the dimensions corresponding to the vector returned from getMeshShape. More... | |
const std::vector< uint32_t > & | getMeshShape () const |
In many scientific applications, data are defined on meshes. More... | |
const char * | getMetaTag (const char *) const |
Return the value of the meta tag with the specified name. | |
array_t< rid_t > * | getRIDs () const |
array_t< rid_t > * | getRIDs (const ibis::bitvector &mask) const |
Retrieve the RIDs corresponding to mask[i] == 1. More... | |
uint32_t | getRowNumber (const rid_t &rid) const |
Return the row number of the row with specified RID. More... | |
TABLE_STATE | getState () const |
Retrieve the current state of data partition. More... | |
TABLE_STATE | getStateNoLocking () const |
Return the current state of data partition. | |
virtual float | getUndecidable (const ibis::qContinuousRange &cmp, ibis::bitvector &iffy) const |
Discover the records that can not be decided using the index. More... | |
virtual float | getUndecidable (const ibis::qDiscreteRange &cmp, ibis::bitvector &iffy) const |
Discover the records that can not be decided using the index. | |
virtual float | getUndecidable (const ibis::qIntHod &cmp, ibis::bitvector &iffy) const |
Discover the records that can not be decided using the index. | |
virtual float | getUndecidable (const ibis::qUIntHod &cmp, ibis::bitvector &iffy) const |
Discover the records that can not be decided using the index. | |
const char * | indexSpec () const |
Return the current index specification. | |
void | indexSpec (const char *) |
Replace existing index specification with a new one. | |
long | keywordSearch (const ibis::qKeyword &cmp, ibis::bitvector &low) const |
Identify all rows containing the specified keyword. More... | |
long | keywordSearch (const ibis::qAllWords &cmp, ibis::bitvector &low) const |
Determine the records that have all specified keywords. More... | |
long | keywordSearch (const ibis::qKeyword &cmp) const |
Return an upper bound of the number of records that have the keyword. | |
long | keywordSearch (const ibis::qAllWords &cmp) const |
Compute an upper bound on the number of rows with all the specified keywords. More... | |
void | loadIndexes (const char *iopt=0, int ropt=0) const |
Load indexes of all columns. More... | |
void | logMessage (const char *event, const char *fmt,...) const |
void | logWarning (const char *event, const char *fmt,...) const |
virtual long | matchAny (const ibis::qAnyAny &cmp, ibis::bitvector &hits) const |
virtual long | matchAny (const ibis::qAnyAny &cmp, const ibis::bitvector &mask, ibis::bitvector &hits) const |
Perform exact match operation for an AnyAny query. More... | |
bool | matchMetaTags (const std::vector< const char * > &mtags) const |
Match multiple name-value pairs against the internally stored meta tags. More... | |
bool | matchMetaTags (const ibis::resource::vList &mtags) const |
Match multiple name-value pairs. More... | |
bool | matchNameValuePair (const char *name, const char *value) const |
Match a name-value pair in the meta tags. More... | |
std::string | metaTags () const |
Return the list of meta tags as a single string. More... | |
const char * | name () const |
Return the name of the partition. | |
uint32_t | nColumns () const |
Return the number of attributes in the partition. | |
virtual long | negativeScan (const ibis::qRange &cmp, const ibis::bitvector &mask, ibis::bitvector &hits) const |
Compute the records (marked 1 in the mask) that does not satisfy the range condition. More... | |
uint32_t | nRows () const |
Return the number of rows. | |
part (const char *name=0, bool ro=false) | |
Initialize a data partition object. More... | |
part (const char *adir, const char *bdir, bool ro=false) | |
Initialize a table from the named directories. More... | |
part (const std::vector< const char * > &mtags, bool ro=false) | |
Initialize a partition with given meta tags. More... | |
part (const ibis::resource::vList &mtags, bool ro=false) | |
Initialize a partition with given meta tags. More... | |
long | patternSearch (const ibis::qLike &cmp, ibis::bitvector &low) const |
Look for string like the given pattern. | |
long | patternSearch (const ibis::qLike &cmp) const |
Look for string like the given pattern. | |
void | print (std::ostream &out) const |
Output a description of every column in the data partition. More... | |
void | purgeIndexFiles () const |
Remove existing index files! The indexes will be rebuilt next time they are needed. More... | |
array_t< signed char > * | selectBytes (const char *name, const ibis::bitvector &mask) const |
Retrieve values of the named column as 8-bit integers. More... | |
array_t< double > * | selectDoubles (const char *name, const ibis::bitvector &mask) const |
Retrieve values of the named column as 64-bit floating-point values. More... | |
array_t< float > * | selectFloats (const char *name, const ibis::bitvector &mask) const |
Retrieve values of the named column as 32-bit floating-point values. More... | |
array_t< int32_t > * | selectInts (const char *name, const ibis::bitvector &mask) const |
Retrieve values of the named column as 32-bit integers. More... | |
array_t< int64_t > * | selectLongs (const char *name, const ibis::bitvector &mask) const |
Retrieve values of the named column as 64-bit integers. More... | |
array_t< int16_t > * | selectShorts (const char *name, const ibis::bitvector &mask) const |
Retrieve values of the named column as 16-bit integers. More... | |
std::vector< std::string > * | selectStrings (const char *name, const ibis::bitvector &mask) const |
Retrieve values of the named column as strings. More... | |
array_t< unsigned char > * | selectUBytes (const char *name, const ibis::bitvector &mask) const |
Retrieve values of the named column as 8-bit unsigned integers. More... | |
array_t< uint32_t > * | selectUInts (const char *name, const ibis::bitvector &mask) const |
Retrieve values of the named column as 32-bit unsigned integers. More... | |
array_t< uint64_t > * | selectULongs (const char *name, const ibis::bitvector &mask) const |
Retrieve values of the named column as 64-bit unsigned integers. More... | |
array_t< uint16_t > * | selectUShorts (const char *name, const ibis::bitvector &mask) const |
Retrieve values of the named column as 16-bit unsigned integers. More... | |
long | selectValues (const char *cname, const ibis::bitvector &mask, void *vals) const |
Select values of a column based on the given mask. | |
long | selectValues (const ibis::qContinuousRange &cond, void *vals) const |
Select values of the column based on the range condition. More... | |
virtual long | selfTest (int nth=1, const char *pref=0) const |
Perform predefined set of tests and return the number of failures. | |
void | setMeshShape (const ibis::array_t< uint64_t > &) |
copy the incoming as the mesh shape of the data partition. | |
void | setMeshShape (const char *shape) |
Digest the mesh shape stored in the string. More... | |
long | stringSearch (const ibis::qString &cmp, ibis::bitvector &low) const |
Find all records that has the exact string value. More... | |
long | stringSearch (const ibis::qAnyString &cmp, ibis::bitvector &low) const |
Determine the records that have the exact string values. More... | |
long | stringSearch (const ibis::qString &cmp) const |
Return an upper bound of the number of records that have the exact string value. More... | |
long | stringSearch (const ibis::qAnyString &cmp) const |
time_t | timestamp () const |
Return the time stamp on the partition. | |
void | unloadIndexes () const |
Unload indexes of all columns. | |
void | updateMetaData () const |
Write the metadata file to record the changes to the partition. More... | |
virtual | ~part () |
Destuctor. | |
long | get1DDistribution (const char *constraints, const char *cname, double begin, double end, double stride, std::vector< uint32_t > &counts) const |
Histogram functions. More... | |
long | get2DDistribution (const char *constraints, const char *cname1, double begin1, double end1, double stride1, const char *cname2, double begin2, double end2, double stride2, std::vector< uint32_t > &counts) const |
Compute conditional 2D histogram with regularly spaced bins. More... | |
long | get3DDistribution (const char *constraints, const char *cname1, double begin1, double end1, double stride1, const char *cname2, double begin2, double end2, double stride2, const char *cname3, double begin3, double end3, double stride3, std::vector< uint32_t > &counts) const |
Compute conditional 3D histogram with regularly spaced bins. More... | |
long | get1DDistribution (const char *constraints, const char *cname, double begin, double end, double stride, const char *wtname, std::vector< double > &weights) const |
Compute weighted conditional 1D histogram with regularly spaced bins. More... | |
long | get2DDistribution (const char *constraints, const char *cname1, double begin1, double end1, double stride1, const char *cname2, double begin2, double end2, double stride2, const char *wtname, std::vector< double > &weights) const |
Compute weighted conditional 2D histogram with regularly spaced bins. More... | |
long | get3DDistribution (const char *constraints, const char *cname1, double begin1, double end1, double stride1, const char *cname2, double begin2, double end2, double stride2, const char *cname3, double begin3, double end3, double stride3, const char *wtname, std::vector< double > &weights) const |
Compute weighted conditional 3D histogram with regularly spaced bins. More... | |
long | get1DDistribution (const char *cname, uint32_t nbin, std::vector< double > &bounds, std::vector< uint32_t > &counts) const |
Compute 1D histogram with adaptive bins. More... | |
long | get1DDistribution (const char *constraints, const char *cname, uint32_t nbin, std::vector< double > &bounds, std::vector< uint32_t > &counts) const |
Compute conditional 1D histogram with adaptive bins. More... | |
long | get2DDistribution (const char *cname1, const char *cname2, uint32_t nb1, uint32_t nb2, std::vector< double > &bounds1, std::vector< double > &bounds2, std::vector< uint32_t > &counts, const char *const option=0) const |
Compute 2D histogram with adaptive bins. More... | |
long | get2DDistribution (const char *constraints, const char *name1, const char *name2, uint32_t nb1, uint32_t nb2, std::vector< double > &bounds1, std::vector< double > &bounds2, std::vector< uint32_t > &counts) const |
Compute conditional 2D histogram with adaptive bins. More... | |
long | get3DDistribution (const char *cname1, const char *cname2, const char *cname3, uint32_t nb1, uint32_t nb2, uint32_t nb3, std::vector< double > &bounds1, std::vector< double > &bounds2, std::vector< double > &bounds3, std::vector< uint32_t > &counts, const char *const option=0) const |
Compute 3D histogram with adaptive bins. More... | |
long | get3DDistribution (const char *constraints, const char *cname1, const char *cname2, const char *cname3, uint32_t nb1, uint32_t nb2, uint32_t nb3, std::vector< double > &bounds1, std::vector< double > &bounds2, std::vector< double > &bounds3, std::vector< uint32_t > &counts) const |
Compute conditional 3D histogram with adaptive bins. More... | |
long | get1DBins (const char *constraints, const char *cname, double begin, double end, double stride, std::vector< ibis::bitvector > &bins) const |
Partition values of the named variable into regularly spaced bins. More... | |
long | get1DBins (const char *constraints, const char *cname, double begin, double end, double stride, std::vector< ibis::bitvector * > &bins) const |
Partition values of the named variable into regularly spaced bins. More... | |
long | get1DBins (const char *constraints, const char *cname, double begin, double end, double stride, const char *wtname, std::vector< double > &weights, std::vector< ibis::bitvector * > &bins) const |
Partition values of the named variable into regularly spaced bins. More... | |
long | get2DBins (const char *constraints, const char *cname1, double begin1, double end1, double stride1, const char *cname2, double begin2, double end2, double stride2, std::vector< ibis::bitvector > &bins) const |
Partition values of named variables into regularly spaced 2D bins. More... | |
long | get2DBins (const char *constraints, const char *cname1, double begin1, double end1, double stride1, const char *cname2, double begin2, double end2, double stride2, std::vector< ibis::bitvector * > &bins) const |
Partition values of named variables into regularly spaced 2D bins. More... | |
long | get2DBins (const char *constraints, const char *cname1, double begin1, double end1, double stride1, const char *cname2, double begin2, double end2, double stride2, const char *wtname, std::vector< double > &weights, std::vector< ibis::bitvector * > &bins) const |
Partition values of named variables into regularly spaced 2D bins. More... | |
long | get3DBins (const char *constraints, const char *cname1, double begin1, double end1, double stride1, const char *cname2, double begin2, double end2, double stride2, const char *cname3, double begin3, double end3, double stride3, std::vector< ibis::bitvector > &bins) const |
Partition values of named variables into regularly spaced 3D bins. More... | |
long | get3DBins (const char *constraints, const char *cname1, double begin1, double end1, double stride1, const char *cname2, double begin2, double end2, double stride2, const char *cname3, double begin3, double end3, double stride3, std::vector< ibis::bitvector * > &bins) const |
Partition values of named variables into regularly spaced 3D bins. More... | |
long | get3DBins (const char *constraints, const char *cname1, double begin1, double end1, double stride1, const char *cname2, double begin2, double end2, double stride2, const char *cname3, double begin3, double end3, double stride3, const char *wtname, std::vector< double > &weights, std::vector< ibis::bitvector * > &bins) const |
Partition values of named variables into regularly spaced 3D bins. More... | |
long | get1DBins (const char *constraints, const char *cname1, uint32_t nb1, std::vector< double > &bounds1, std::vector< ibis::bitvector > &bins) const |
Partition records satisfying specified conditions into bins with about the same number of records. More... | |
long | get2DBins (const char *constraints, const char *cname1, const char *cname2, uint32_t nb1, uint32_t nb2, std::vector< double > &bounds1, std::vector< double > &bounds2, std::vector< ibis::bitvector > &bins) const |
Partition records satisfying specified conditions into 2D bins. More... | |
long | get3DBins (const char *constraints, const char *cname1, const char *cname2, const char *cname3, uint32_t nb1, uint32_t nb2, uint32_t nb3, std::vector< double > &bounds1, std::vector< double > &bounds2, std::vector< double > &bounds3, std::vector< ibis::bitvector > &bins) const |
Partition records satisfying specified conditions into 3D bins. More... | |
long | getDistribution (const char *name, std::vector< double > &bounds, std::vector< uint32_t > &counts) const |
Obsolete histogram functions. More... | |
long | getDistribution (const char *constraints, const char *name, std::vector< double > &bounds, std::vector< uint32_t > &counts) const |
Compute the conditional binned data distribution. More... | |
long | getDistribution (const char *name, uint32_t nbc, double *bounds, uint32_t *counts) const |
Compute the binned distribution with the specified maximum number of bins. More... | |
long | getDistribution (const char *name, const char *constraints, uint32_t nbc, double *bounds, uint32_t *counts) const |
Compute the conditional binned data distribution with the specified maximum number of bins. More... | |
long | getJointDistribution (const char *constraints, const char *name1, const char *name2, std::vector< double > &bounds1, std::vector< double > &bounds2, std::vector< uint32_t > &counts) const |
Compute the joint distribution of two variables. More... | |
long | getCumulativeDistribution (const char *name, std::vector< double > &bounds, std::vector< uint32_t > &counts) const |
Compute a cumulative distribution (a cumulative histogram). More... | |
long | getCumulativeDistribution (const char *constraints, const char *name, std::vector< double > &bounds, std::vector< uint32_t > &counts) const |
Compute the cumulative distribution of the variable named name under the specified constraints. More... | |
long | getCumulativeDistribution (const char *name, uint32_t nbc, double *bounds, uint32_t *counts) const |
This version of getCumulativeDistribution uses two user supplied arrays bounds and counts . More... | |
long | getCumulativeDistribution (const char *constraints, const char *name, uint32_t nbc, double *bounds, uint32_t *counts) const |
Compute the conditional distribution and return the distribution in the arrays provided. More... | |
Static Public Member Functions | |
template<typename E > | |
static long | doScan (const array_t< E > &varr, const ibis::qRange &cmp, const ibis::bitvector &mask, ibis::bitvector &hits) |
Locate the records that satisfy the range condition. More... | |
template<typename E > | |
static long | doScan (const array_t< E > &varr, const ibis::qContinuousRange &cmp, const ibis::bitvector &mask, ibis::bitvector &hits) |
template<typename E > | |
static long | doScan (const array_t< E > &varr, const ibis::qContinuousRange &cmp, const ibis::bitvector &mask, array_t< E > &res) |
template<typename E > | |
static long | doScan (const array_t< E > &varr, const ibis::qContinuousRange &cmp, const ibis::bitvector &mask, array_t< E > &res, ibis::bitvector &hits) |
Protected Member Functions | |
long | append1 (const char *dir) |
Perform append operation using only only one data directory. More... | |
long | append2 (const char *dir) |
Perform append operation with two data directories. More... | |
long | appendToBackup (const char *dir) |
Append data in dir to the partition in the backup directory. More... | |
void | buildQueryList (ibis::part::thrArg &lst, unsigned nc, unsigned nq) const |
Generate a list of random query conditions. More... | |
void | checkQueryList (const ibis::part::thrArg &lst) const |
Sum up the hits from sub-divisions to verify the hits computing from the whole range. More... | |
int | coarsenBins (const ibis::column &col, uint32_t nbin, std::vector< double > &bnds, std::vector< ibis::bitvector * > &btmp) const |
Produce a set of bitmaps corresponding to a set of coarse bins. More... | |
void | composeQueryString (std::string &str, const ibis::column *col1, const ibis::column *col2, const double &lower1, const double &upper1, const double &lower2, const double &upper2) const |
template<typename T1 , typename T2 > | |
long | count2DBins (array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, array_t< T2 > &vals2, const double &begin2, const double &end2, const double &stride2, std::vector< uint32_t > &counts) const |
Count the number of values in 2D bins. | |
template<typename T1 , typename T2 > | |
long | count2DWeights (array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, array_t< T2 > &vals2, const double &begin2, const double &end2, const double &stride2, array_t< double > &wts, std::vector< double > &weights) const |
Count the weights in 2D bins. | |
template<typename T1 , typename T2 , typename T3 > | |
long | count3DBins (const array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, const array_t< T2 > &vals2, const double &begin2, const double &end2, const double &stride2, const array_t< T3 > &vals3, const double &begin3, const double &end3, const double &stride3, std::vector< uint32_t > &counts) const |
Count the number of values in 3D bins. | |
template<typename T1 , typename T2 , typename T3 > | |
long | count3DWeights (const array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, const array_t< T2 > &vals2, const double &begin2, const double &end2, const double &stride2, const array_t< T3 > &vals3, const double &begin3, const double &end3, const double &stride3, const array_t< double > &wts, std::vector< double > &weights) const |
Count the weights in 3D bins. | |
long | deactivate (const ibis::bitvector &rows) |
Rows marked 1 will become inactive. More... | |
void | deriveBackupDirName () |
void | digestMeshShape (const char *shape) |
Convert the string describing the shape into internal storage format. More... | |
template<typename T > | |
long | doCount (const ibis::qRange &cmp) const |
Count the number rows satisfying the range expression. | |
template<typename T > | |
long | doCount (const array_t< T > &vals, const ibis::qIntHod &cmp, const ibis::bitvector &mask) const |
Count the number rows satisfying the range expression. More... | |
template<typename T > | |
long | doCount (const array_t< T > &vals, const ibis::qUIntHod &cmp, const ibis::bitvector &mask) const |
Count the number rows satisfying the range expression. More... | |
template<typename T > | |
long | doCount (const array_t< T > &vals, const ibis::qRange &cmp, const ibis::bitvector &mask) const |
Count the number rows satisfying the range expression. More... | |
template<typename T , typename F > | |
long | doCount (const array_t< T > &vals, const ibis::bitvector &mask, F cmp) const |
Count the number rows satisfying the range expression. | |
template<typename T , typename F1 , typename F2 > | |
long | doCount (const array_t< T > &vals, const ibis::bitvector &mask, F1 cmp1, F2 cmp2) const |
Count the number rows satisfying the range expression. | |
template<> | |
long | doCount (const ibis::qRange &) const |
template<> | |
long | doCount (const ibis::qRange &) const |
template<> | |
long | doCount (const ibis::qRange &cmp) const |
A specialization of template part::doCount for float values. More... | |
template<> | |
long | doCount (const ibis::qRange &cmp) const |
A specialization of template part::doCount for double values. | |
void | extendMetaTags () |
!< Remove the rids list from memory. | |
template<typename T1 > | |
long | fill1DBins (const ibis::bitvector &mask, const array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, std::vector< ibis::bitvector > &bins) const |
Fill the bitvectors representing the 1D bins. More... | |
template<typename T1 > | |
long | fill1DBins (const ibis::bitvector &mask, const array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, std::vector< ibis::bitvector * > &bins) const |
Fill the bitvectors representing the 1D bins. More... | |
template<typename T1 > | |
long | fill1DBinsWeighted (const ibis::bitvector &mask, const array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, const array_t< double > &wts, std::vector< double > &weights, std::vector< ibis::bitvector * > &bins) const |
Fill the bitvectors representing the 1D bins. More... | |
template<typename T1 , typename T2 > | |
long | fill2DBins (const ibis::bitvector &mask, const array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, const array_t< T2 > &vals2, const double &begin2, const double &end2, const double &stride2, std::vector< ibis::bitvector > &bins) const |
Fill the bitvectors representing the 2D bins. More... | |
template<typename T1 , typename T2 > | |
long | fill2DBins (const ibis::bitvector &mask, const array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, const array_t< T2 > &vals2, const double &begin2, const double &end2, const double &stride2, std::vector< ibis::bitvector * > &bins) const |
Fill the bitvectors representing the 2D bins. More... | |
template<typename T1 > | |
long | fill2DBins2 (const ibis::bitvector &mask, const array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, const ibis::column &col2, const double &begin2, const double &end2, const double &stride2, std::vector< ibis::bitvector > &bins) const |
A template function to resolve the second variable involved in the 2D bins. More... | |
template<typename T1 > | |
long | fill2DBins2 (const ibis::bitvector &mask, const array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, const ibis::column &col2, const double &begin2, const double &end2, const double &stride2, std::vector< ibis::bitvector * > &bins) const |
This version returns a vector of pointers to bitmaps. More... | |
template<typename T1 , typename T2 > | |
long | fill2DBinsWeighted (const ibis::bitvector &mask, const array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, const array_t< T2 > &vals2, const double &begin2, const double &end2, const double &stride2, const array_t< double > &wts, std::vector< double > &weights, std::vector< ibis::bitvector * > &bins) const |
Fill the bitvectors representing the 2D bins. More... | |
template<typename T1 > | |
long | fill2DBinsWeighted2 (const ibis::bitvector &mask, const array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, const ibis::column &col2, const double &begin2, const double &end2, const double &stride2, const array_t< double > &wts, std::vector< double > &weights, std::vector< ibis::bitvector * > &bins) const |
This version returns a vector of pointers to bitmaps. More... | |
template<typename T1 , typename T2 , typename T3 > | |
long | fill3DBins (const ibis::bitvector &mask, const array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, const array_t< T2 > &vals2, const double &begin2, const double &end2, const double &stride2, const array_t< T3 > &vals3, const double &begin3, const double &end3, const double &stride3, std::vector< ibis::bitvector > &bins) const |
Fill the bitvectors representing the 3D bins. More... | |
template<typename T1 , typename T2 , typename T3 > | |
long | fill3DBins (const ibis::bitvector &mask, const array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, const array_t< T2 > &vals2, const double &begin2, const double &end2, const double &stride2, const array_t< T3 > &vals3, const double &begin3, const double &end3, const double &stride3, std::vector< bitvector * > &bins) const |
The three triplets, (begin1, end1, stride1), (begin2, end2, stride2), and (begin3, end3, stride3), defines (1 + floor((end1 - begin1) / stride1)) (1 + floor((end2 - begin2) / stride2)) (1 + floor((end3 - begin3) / stride3)) 3D bins. More... | |
template<typename T1 > | |
long | fill3DBins2 (const ibis::bitvector &mask, const array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, const ibis::column &col2, const double &begin2, const double &end2, const double &stride2, const ibis::column &col3, const double &begin3, const double &end3, const double &stride3, std::vector< bitvector > &bins) const |
Resolve the 2nd column of the 3D bins. More... | |
template<typename T1 > | |
long | fill3DBins2 (const ibis::bitvector &mask, const array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, const ibis::column &col2, const double &begin2, const double &end2, const double &stride2, const ibis::column &col3, const double &begin3, const double &end3, const double &stride3, std::vector< ibis::bitvector * > &bins) const |
Resolve the 2nd column of the 3D bins. More... | |
template<typename T1 , typename T2 > | |
long | fill3DBins3 (const ibis::bitvector &mask, const array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, const array_t< T2 > &vals2, const double &begin2, const double &end2, const double &stride2, const ibis::column &col3, const double &begin3, const double &end3, const double &stride3, std::vector< bitvector > &bins) const |
Resolve the 3rd column involved in the 3D bins. More... | |
template<typename T1 , typename T2 > | |
long | fill3DBins3 (const ibis::bitvector &mask, const array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, const array_t< T2 > &vals2, const double &begin2, const double &end2, const double &stride2, const ibis::column &col3, const double &begin3, const double &end3, const double &stride3, std::vector< ibis::bitvector * > &bins) const |
Resolve the 3rd column involved in the 3D bins. More... | |
template<typename T1 , typename T2 , typename T3 > | |
long | fill3DBinsWeighted (const ibis::bitvector &mask, const array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, const array_t< T2 > &vals2, const double &begin2, const double &end2, const double &stride2, const array_t< T3 > &vals3, const double &begin3, const double &end3, const double &stride3, const array_t< double > &wts, std::vector< double > &weights, std::vector< bitvector * > &bins) const |
The three triplets, (begin1, end1, stride1), (begin2, end2, stride2), and (begin3, end3, stride3), defines (1 + floor((end1 - begin1) / stride1)) (1 + floor((end2 - begin2) / stride2)) (1 + floor((end3 - begin3) / stride3)) 3D bins. More... | |
template<typename T1 > | |
long | fill3DBinsWeighted2 (const ibis::bitvector &mask, const array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, const ibis::column &col2, const double &begin2, const double &end2, const double &stride2, const ibis::column &col3, const double &begin3, const double &end3, const double &stride3, const array_t< double > &wts, std::vector< double > &weights, std::vector< ibis::bitvector * > &bins) const |
Resolve the 2nd column of the 3D bins. More... | |
template<typename T1 , typename T2 > | |
long | fill3DBinsWeighted3 (const ibis::bitvector &mask, const array_t< T1 > &vals1, const double &begin1, const double &end1, const double &stride1, const array_t< T2 > &vals2, const double &begin2, const double &end2, const double &stride2, const ibis::column &col3, const double &begin3, const double &end3, const double &stride3, const array_t< double > &wts, std::vector< double > &weights, std::vector< ibis::bitvector * > &bins) const |
Resolve the 3rd column involved in the 3D bins. More... | |
void | freeRIDs () const |
!< Read RIDs from file 'rids'. More... | |
void | gatherSortKeys (ibis::table::stringArray &names) |
Collect a list of column names that might be used as keys for sorting the rows. More... | |
long | get1DBins_ (const ibis::bitvector &mask, const ibis::column &col, uint32_t nbin, std::vector< double > &bounds, std::vector< ibis::bitvector > &bins, const char *mesg) const |
Compute 1D histogram from raw data. More... | |
long | get1DDistribution (const ibis::column &col, uint32_t nbin, std::vector< double > &bounds, std::vector< uint32_t > &counts) const |
Compute 1D histogram from index. More... | |
long | get2DDistributionA (const ibis::column &col1, const ibis::column &col2, uint32_t nb1, uint32_t nb2, std::vector< double > &bounds1, std::vector< double > &bounds2, std::vector< uint32_t > &counts) const |
Compute 2D histogram with adaptive bins from base data. More... | |
long | get2DDistributionI (const ibis::column &col1, const ibis::column &col2, uint32_t nb1, uint32_t nb2, std::vector< double > &bounds1, std::vector< double > &bounds2, std::vector< uint32_t > &counts) const |
Compute 2D histogram from indexes. | |
long | get2DDistributionU (const ibis::column &col1, const ibis::column &col2, uint32_t nb1, uint32_t nb2, std::vector< double > &bounds1, std::vector< double > &bounds2, std::vector< uint32_t > &counts) const |
Compute 2D histogram with uniform bins from base data. More... | |
long | get3DDistributionA (const ibis::bitvector &mask, const ibis::column &col1, const ibis::column &col2, const ibis::column &col3, uint32_t nb1, uint32_t nb2, uint32_t nb3, std::vector< double > &bounds1, std::vector< double > &bounds2, std::vector< double > &bounds3, std::vector< uint32_t > &counts) const |
Compute 3D histogram with adaptive bins from base data. More... | |
template<typename E1 > | |
long | get3DDistributionA1 (const ibis::bitvector &mask, const array_t< E1 > &vals1, const ibis::column &col2, const ibis::column &col3, uint32_t nb1, uint32_t nb2, uint32_t nb3, std::vector< double > &bounds1, std::vector< double > &bounds2, std::vector< double > &bounds3, std::vector< uint32_t > &counts) const |
Read the value of the second column. More... | |
template<typename E1 , typename E2 > | |
long | get3DDistributionA2 (const ibis::bitvector &mask, const array_t< E1 > &vals1, const array_t< E2 > &vals2, const ibis::column &col3, uint32_t nb1, uint32_t nb2, uint32_t nb3, std::vector< double > &bounds1, std::vector< double > &bounds2, std::vector< double > &bounds3, std::vector< uint32_t > &counts) const |
Read the values of the third column. More... | |
void | logError (const char *event, const char *fmt,...) const |
Write out a message with indication of severe error. | |
void | makeBackupCopy () |
Spawn another thread to copy the content of activeDir to backupDir . | |
void | numbersToBitvector (const std::vector< uint32_t > &, ibis::bitvector &) const |
Turn a list of numbers into a bitvector. More... | |
long | old2DDistribution (const char *constraints, const char *name1, const char *name2, uint32_t nb1, uint32_t nb2, std::vector< double > &bounds1, std::vector< double > &bounds2, std::vector< uint32_t > &counts) const |
The old implementation that uses binary lookup. More... | |
long | packCumulativeDistribution (const std::vector< double > &bounds, const std::vector< uint32_t > &counts, uint32_t nbc, double *bptr, uint32_t *cptr) const |
Pack a cumulative distribution stored in two std::vectors into two arrays provided by the caller. More... | |
long | packDistribution (const std::vector< double > &bounds, const std::vector< uint32_t > &counts, uint32_t nbc, double *bptr, uint32_t *cptr) const |
Pack a binned distribution. | |
long | reactivate (const ibis::bitvector &rows) |
Change all rows marked 1 to be active. More... | |
void | readMeshShape (const char *const dir) |
Read shape of the mesh from the metadata file. | |
int | readMetaData (uint32_t &nrows, columnList &plist, const char *dir) |
!< Don't change the data. More... | |
void | readRIDs () const |
A function to retrieve RIDs stored in file. | |
uint32_t | recursiveQuery (const char *pref, const column *att, double low, double high, long *nerr) const |
Issues a query and then subdivided the range into three to check the total hits of the three sub queries matches the hits of the single query. More... | |
template<typename T > | |
long | reorderValues (const char *fname, array_t< uint32_t > &starts, array_t< uint32_t > &indout, const array_t< uint32_t > &indin, bool ascending) |
Write the named data file in a segmented sorted order. More... | |
void | setMetaTags (const ibis::resource::vList &mts) |
Make a deep copy of the incoming name-value pairs. | |
void | setMetaTags (const std::vector< const char * > &mts) |
Make a deep copy of the incoming name-value pairs. More... | |
void | stringToBitvector (const char *, ibis::bitvector &) const |
Convert a set of range conditions to an ibis::bitvector. More... | |
long | verifyBackupDir () |
void | writeMetaData (const uint32_t nrows, const columnList &plist, const char *dir) const |
Write metadata file -part.txt. More... | |
template<typename T > | |
long | writeValues (const char *fname, const array_t< uint32_t > &ind) |
Write the named data file with values in the given order. More... | |
Static Protected Member Functions | |
template<typename T1 , typename T2 > | |
static long | adaptive2DBins (const array_t< T1 > &vals1, const array_t< T2 > &vals2, uint32_t nb1, uint32_t nb2, std::vector< double > &bounds1, std::vector< double > &bounds2, std::vector< uint32_t > &counts) |
Adaptive binning through regularly spaced bins. More... | |
template<typename T1 , typename T2 , typename T3 > | |
static long | adaptive3DBins (const array_t< T1 > &vals1, const array_t< T2 > &vals2, const array_t< T3 > &vals3, uint32_t nb1, uint32_t nb2, uint32_t nb3, std::vector< double > &bounds1, std::vector< double > &bounds2, std::vector< double > &bounds3, std::vector< uint32_t > &counts) |
Adaptive binning through regularly spaced bins. More... | |
template<typename T > | |
static long | adaptiveFloats (const array_t< T > &vals, const T vmin, const T vmax, uint32_t nbins, std::vector< double > &bounds, std::vector< uint32_t > &counts) |
The adaptive binning function for floats and integers in wide ranges. More... | |
template<typename T > | |
static long | adaptiveFloatsDetailed (const ibis::bitvector &mask, const array_t< T > &vals, const T vmin, const T vmax, uint32_t nbins, std::vector< double > &bounds, std::vector< ibis::bitvector > &detail) |
Bins the given values so that each each bin is nearly equal weight. More... | |
template<typename T > | |
static long | adaptiveInts (const array_t< T > &vals, const T vmin, const T vmax, uint32_t nbins, std::vector< double > &bounds, std::vector< uint32_t > &counts) |
The adaptive binning function for integer values. More... | |
template<typename T > | |
static long | adaptiveIntsDetailed (const ibis::bitvector &mask, const array_t< T > &vals, const T vmin, const T vmax, uint32_t nbins, std::vector< double > &bounds, std::vector< ibis::bitvector > &detail) |
Bins the given values so that each each bin is nearly equal weight. More... | |
template<typename T , typename F > | |
static long | doComp (const array_t< T > &vals, F cmp, const ibis::bitvector &mask, array_t< T > &res, ibis::bitvector &hits) |
Evaluate the range condition. More... | |
template<typename T , typename F > | |
static long | doComp (const array_t< T > &vals, F cmp, const ibis::bitvector &mask, array_t< T > &res) |
Evaluate the range condition. More... | |
template<typename T , typename F > | |
static long | doComp (const array_t< T > &vals, F cmp, const ibis::bitvector &mask, ibis::bitvector &hits) |
Evaluate the range condition. More... | |
template<typename T , typename F1 , typename F2 > | |
static long | doComp (const array_t< T > &vals, F1 cmp1, F2 cmp2, const ibis::bitvector &mask, array_t< T > &res, ibis::bitvector &hits) |
Evaluate the range condition. More... | |
template<typename T , typename F1 , typename F2 > | |
static long | doComp (const array_t< T > &vals, F1 cmp1, F2 cmp2, const ibis::bitvector &mask, array_t< T > &res) |
Evaluate the range condition. More... | |
template<typename T , typename F1 , typename F2 > | |
static long | doComp (const array_t< T > &vals, F1 cmp1, F2 cmp2, const ibis::bitvector &mask, ibis::bitvector &hits) |
Evaluate the range condition. More... | |
template<typename T , typename F > | |
static long | doComp0 (const array_t< T > &vals, F cmp, const ibis::bitvector &mask, ibis::bitvector &hits) |
Evaluate the range condition. More... | |
template<typename T , typename F1 , typename F2 > | |
static long | doComp0 (const array_t< T > &vals, F1 cmp1, F2 cmp2, const ibis::bitvector &mask, ibis::bitvector &hits) |
This version uses uncompressed bitvector to store the scan results internally. More... | |
template<typename T > | |
static long | doCompare (const array_t< T > &array, const ibis::qRange &cmp, const ibis::bitvector &mask, ibis::array_t< T > &res, ibis::bitvector &hits) |
The function that performs the actual comparison for range queries. More... | |
template<typename T > | |
static long | doCompare (const char *file, const ibis::qRange &cmp, const ibis::bitvector &mask, ibis::array_t< T > &res, ibis::bitvector &hits) |
Perform comparisons with data in the named file. More... | |
template<typename T > | |
static long | doCompare (const array_t< T > &array, const ibis::qRange &cmp, const ibis::bitvector &mask, ibis::array_t< T > &res) |
The function that performs the actual comparison for range queries. More... | |
template<typename T > | |
static long | doCompare (const char *file, const ibis::qRange &cmp, const ibis::bitvector &mask, ibis::array_t< T > &res) |
Perform comparisons with data in the named file. More... | |
template<typename T > | |
static long | doCompare (const array_t< T > &array, const ibis::qRange &cmp, const ibis::bitvector &mask, ibis::bitvector &hits) |
The function that performs the actual comparison for range queries. More... | |
template<typename T > | |
static long | doCompare (const char *file, const ibis::qRange &cmp, const ibis::bitvector &mask, ibis::bitvector &hits) |
Perform comparisons with data in the named file. More... | |
template<typename T > | |
static long | doCompare (const array_t< T > &array, const ibis::qIntHod &cmp, const ibis::bitvector &mask, ibis::bitvector &hits) |
The function that performs the actual comparison for range queries. More... | |
template<typename T > | |
static long | doCompare (const char *file, const ibis::qIntHod &cmp, const ibis::bitvector &mask, ibis::bitvector &hits) |
Evaluate the range condition. More... | |
template<typename T > | |
static long | doCompare (const array_t< T > &array, const ibis::qUIntHod &cmp, const ibis::bitvector &mask, ibis::bitvector &hits) |
The function that performs the actual comparison for range queries. More... | |
template<typename T > | |
static long | doCompare (const char *file, const ibis::qUIntHod &cmp, const ibis::bitvector &mask, ibis::bitvector &hits) |
template<typename T > | |
static void | equalWeightBins (const array_t< T > &vals, uint32_t nbins, array_t< T > &bounds) |
template<typename E1 , typename E2 > | |
static void | mapValues (array_t< E1 > &val1, array_t< E2 > &val2, uint32_t nb1, uint32_t nb2, array_t< E1 > &bnd1, array_t< E2 > &bnd2, std::vector< uint32_t > &cnts) |
The templated function to decide the bin boundaries and count the number of values fall in each bin. More... | |
template<typename T > | |
static void | mapValues (const array_t< T > &vals, std::map< T, uint32_t > &hist) |
template<typename T > | |
static long | negativeCompare (const array_t< T > &array, const ibis::qRange &cmp, const ibis::bitvector &mask, ibis::bitvector &hits) |
template<typename T > | |
static long | negativeCompare (const char *file, const ibis::qRange &cmp, const ibis::bitvector &mask, ibis::bitvector &hits) |
template<typename T > | |
static long | negativeCompare (const array_t< T > &array, const ibis::qIntHod &cmp, const ibis::bitvector &mask, ibis::bitvector &hits) |
Perform the negative comparison. More... | |
template<typename T > | |
static long | negativeCompare (const char *file, const ibis::qIntHod &cmp, const ibis::bitvector &mask, ibis::bitvector &hits) |
Perform the negative comparison. More... | |
template<typename T > | |
static long | negativeCompare (const array_t< T > &array, const ibis::qUIntHod &cmp, const ibis::bitvector &mask, ibis::bitvector &hits) |
template<typename T > | |
static long | negativeCompare (const char *file, const ibis::qUIntHod &cmp, const ibis::bitvector &mask, ibis::bitvector &hits) |
static int | reorderBitmap (ibis::bitvector &, const ibis::bitvector &, const ibis::array_t< uint32_t > &) |
Produce a reordered bit vector through the inverse order array. | |
Protected Attributes | |
char * | activeDir |
!< Number of events (rows) in the partition. | |
ibis::bitvector | amask |
!< Index specification. | |
char * | backupDir |
!< The active data directory. | |
std::vector< const column * > | colorder |
!< Active rows are maked 1. | |
columnList | columns |
!< The object IDs (row id). | |
char * | idxstr |
std::string | m_desc |
!< Name of the data partition. | |
char * | m_name |
ibis::resource::vList | metaList |
!< Free form description of the partition. | |
ibis::part::cleaner * | myCleaner |
!< Sizes of the dimensions. | |
uint32_t | nEvents |
!< List of the columns. | |
bool | readonly |
!< The cleaner for the file manager. | |
array_t< rid_t > * | rids |
!< Meta tags as name-value pairs. | |
std::vector< std::string > | shapeName |
!< An ordering of columns. | |
std::vector< uint32_t > | shapeSize |
!< Names of the dimensions. | |
TABLE_STATE | state |
!< Time of last switch operation. | |
time_t | switchTime |
!< The backup data directory. | |
Friends | |
class | cleaner |
struct | info |
class | mutexLock |
class | readLock |
class | softWriteLock |
class | writeLock |
typedef std::map< const char *, column *, lessi > | columnList |
!< A function to start backing up the active dir. More... | |
int | clear () |
A group of functions to manipulate the data partition. More... | |
int | updateData () |
Check the time stamp on the metadata files to decide if the in-memory metadata information requires updating. More... | |
long | append (const char *dir) |
Append data from dir . More... | |
long | commit (const char *dir) |
Commit the append operation involving data from dir . More... | |
long | rollback () |
Rollback the append operation. More... | |
long | addColumn (const char *aexpr, const char *cname, ibis::TYPE_T ctype=ibis::DOUBLE) |
Add a column computed with the given arithmetic expression. More... | |
long | addColumn (const ibis::math::term *xpr, ibis::bitvector &mask, const char *cname, ibis::TYPE_T ctype=ibis::DOUBLE) |
Add a column computed with the given arithmetic expression. | |
virtual long | reorder () |
Sort rows with the lowest cardinality column first. More... | |
virtual long | reorder (const ibis::table::stringArray &names) |
Reorder the rows using the given column list. | |
virtual long | reorder (const ibis::table::stringArray &names, const std::vector< bool > &directions) |
Sort rows according the values of the columns specified in names . More... | |
long | deactivate (const std::vector< uint32_t > &rows) |
Mark the specified rows as inactive. More... | |
long | deactivate (const char *conds) |
Mark all rows satisfying the specified conditions as inactive. More... | |
long | reactivate (const std::vector< uint32_t > &rows) |
Make sure the specified rows are active. More... | |
long | reactivate (const char *conds) |
Make sure the rows satisfying the specified conditionis are active. | |
long | purgeInactive () |
Purge all inactive rows from the partition. More... | |
void | emptyCache () const |
Empty all unused resources in cache. More... | |
void | getNullMask (ibis::bitvector &m) const |
Copy the mask of active rows. | |
const ibis::bitvector & | getMaskRef () const |
Return a reference to the mask of active rows. | |
void | rename (const ibis::partAssoc &known) |
Rename the partition to avoid conflicts with an existing list of data partitions. More... | |
void | rename (const char *) |
Change the name of the data partition to the given name. More... | |
ibis::fileManager::ACCESS_PREFERENCE | accessHint (const ibis::bitvector &mask, unsigned elemsize=4) const |
Evaluate the strategy for accessing a data file. | |
void | queryTest (const char *pref, long *nerrors) const |
Generate and run random queries for slefTest. More... | |
void | quickTest (const char *pref, long *nerrors) const |
Generate and run random queries for slefTest. More... | |
void | testRangeOperators (const ibis::column *col, long *nerrors) const |
Try a set of range conditions with different combinations of operators. More... | |
void | doBackup () |
The routine to perform the actual copying for making a backup copy. | |
int | gainReadAccess () const |
Attempt to gain a read access to this part object. More... | |
int | releaseAccess () const |
Attempt to release a read or write access to this part object. More... | |
int | gainWriteAccess () const |
Attempt to gain a write access to this part object. More... | |
int | tryWriteAccess () const |
A soft attempt to gain a write access to this part object. More... | |
static const char * | skipPrefix (const char *) |
Skip pass all the dots in the given string. More... | |
static char * | readMetaTags (const char *const dir) |
A class function to read the meta tags in the tdc file. More... | |
static void | genName (const std::vector< const char * > &mtags, std::string &name) |
Generate name for a partition based on the meta tags. | |
static void | genName (const ibis::resource::vList &mtags, std::string &name) |
Generate name for a partition based on the meta tags. | |
static uint32_t | countPages (const ibis::bitvector &mask, unsigned elemsize=4) |
Estimate the number of pages to be accessed. More... | |
template<typename T > | |
static int | writeColumn (int fdes, ibis::bitvector::word_t nold, ibis::bitvector::word_t nnew, ibis::bitvector::word_t voffset, const array_t< T > &vals, const T &fill, ibis::bitvector &totmask, const ibis::bitvector &newmask) |
Write the content of vals to an open file. More... | |
static int | writeStrings (const char *fnm, ibis::bitvector::word_t nold, ibis::bitvector::word_t nnew, ibis::bitvector::word_t voffset, const std::vector< std::string > &vals, ibis::bitvector &totmask, const ibis::bitvector &newmask) |
Write strings to an open file. More... | |
static int | writeRaw (int bdes, int sdes, ibis::bitvector::word_t nold, ibis::bitvector::word_t nnew, ibis::bitvector::word_t voffset, const ibis::array_t< unsigned char > &bytes, const ibis::array_t< int64_t > &starts, ibis::bitvector &totmask, const ibis::bitvector &newmask) |
Write raw bytes to an open file. More... | |
static int | writeOpaques (int bdes, int sdes, ibis::bitvector::word_t nold, ibis::bitvector::word_t nnew, ibis::bitvector::word_t voffset, const std::vector< ibis::opaque > &opq, ibis::bitvector &totmask, const ibis::bitvector &newmask) |
Write raw bytes to an open file. More... | |
The class ibis::part represents a partition of a relational table.
The current implementation is designed to work with vertically partitioned data files. This class contains common information and operations on a data partition. It must have a name. Following SQL convention, the name must start with a underscore or an ASCII alphabet and may be followed by any number of ASCII alphanumeric characters or underscore.
typedef std::map< const char*, column*, lessi > ibis::part::columnList |
!< A function to start backing up the active dir.
An associative array for columns of data.
|
explicit |
Initialize a data partition object.
The incoming argument can be a directory name or a data partition name.
If it contains any forward or backward slash, it is assumed to be a directory name. If it names an existing directory, it is used as the primary directory for storing the data. Otherwise, it is assumed to be the name of a data partition. In which case, this function looks for data directory names in the global parameter list under the parameters 'name.activeDir' and 'name.backupDir' or 'name.dataDir1' and 'name.dataDir2'. If the name is a directory name, then no attempt shall be made to produce a second directory name.
The default value for name is a nil pointer. In this case, it will find 'dataDir1' and 'dataDir2' from the global parameter list.
The default argument for ro is false, which allows new directory to be created and new data records to be appended. If the argument ro is true, then the specified data directory must already exist, otherwise, an exception is thrown. A data partition constructed with ro set to true will be called a read-only data partition and its content shall not be changed in the relational algebra view.
References ibis::fileManager::instance().
ibis::part::part | ( | const char * | adir, |
const char * | bdir, | ||
bool | ro = false |
||
) |
Initialize a table from the named directories.
Construct a partition from the named directories.
Originally, FastBit was designed to work with a pair of directories, adir
and bdir
. Therefore, the constructor takes a pair of directory names. In many cases, data is stored only in one directory, in which simply give the data directory as adir
and leave bdir
as null. Prefer to have full and complete path.
The default argument for ro is false, which allows new directory to be created and new data records to be appended. If the argument ro is true, then the specified data directory must already exist, otherwise, an exception is thrown. A data partition constructed with ro set to true will be called a read-only data partition and its content shall not be changed in the relational algebra view.
References activeDir, ibis::fileManager::addCleaner(), ibis::bitvector::adjustSize(), amask, backupDir, ibis::bitvector::cnt(), columns, ibis::util::envLock, ibis::fileManager::flushFile(), ibis::gParameters(), ibis::fileManager::instance(), ibis::resource::isStringTrue(), ibis::util::logMessage(), m_desc, makeBackupCopy(), ibis::util::makeDir(), myCleaner, name(), nEvents, print(), ibis::bitvector::read(), readMetaData(), readonly, readRIDs(), ibis::util::removeDir(), ibis::util::removeTail(), rids, ibis::bitvector::set(), ibis::array_t< T >::size(), ibis::bitvector::size(), state, ibis::util::strnewdup(), switchTime, and ibis::bitvector::write().
ibis::part::part | ( | const std::vector< const char * > & | mtags, |
bool | ro = false |
||
) |
Initialize a partition with given meta tags.
The meta tags are specified as a list of name-value strings, where each string in one name-value pair.
References genName(), ibis::fileManager::instance(), and setMetaTags().
ibis::part::part | ( | const ibis::resource::vList & | mtags, |
bool | ro = false |
||
) |
Initialize a partition with given meta tags.
The name-value pairs are specified in a structured form.
References genName(), ibis::fileManager::instance(), and setMetaTags().
|
staticprotected |
Adaptive binning through regularly spaced bins.
It goes through the arrays twice, once to compute the actual minimum and maximum values and once to count the entries in each bins. It produces three sets of bins: the 1-D bins for vals1 and vals2, and a 2-D bin at a high resolution. It then combine the 1-D bins to form nearly equal-weight bins and use that grouping to decide how to combine the 2-D bins to form the final output.
References ibis::index::divideCounts(), ibis::util::incrDouble(), and ibis::array_t< T >::size().
|
staticprotected |
Adaptive binning through regularly spaced bins.
References ibis::index::divideCounts(), ibis::util::incrDouble(), ibis::array_t< T >::resize(), and ibis::array_t< T >::size().
|
staticprotected |
The adaptive binning function for floats and integers in wide ranges.
This function first constructs a number of fine uniform bins and then merge the fine bins to generate nearly equal-weight bins. This is likely to produce final bins that are not as equal in their weights as those produced from ibis::part::adaptiveInts, but because it usually does less work and takes less time.
The number of fine bins used is the larger one of 8 times the number of desired bins and the geometric mean of the number of desired bins and the number of records in vals.
References ibis::index::divideCounts(), ibis::util::incrDouble(), and ibis::array_t< T >::size().
|
staticprotected |
Bins the given values so that each each bin is nearly equal weight.
Instead of counting the number entries in each bin return bitvectors that mark the positions of the records. This version is for floating-point values and integer values with wide ranges. This function first bins the values into a relatively large number of fine equal-width bins and then coalesce nearby fines bins to for nearly equal-weight bins. The final bins produced this way are less likely to be very uniform in their weights, but it requires less internal work space and therefore may be faster than ibis::part::adaptiveIntsDetailed.
References ibis::bitvector::cnt(), ibis::index::divideCounts(), ibis::util::incrDouble(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::size(), ibis::bitvector::size(), and ibis::index::sumBits().
|
staticprotected |
The adaptive binning function for integer values.
It is intended for values within a relatively narrow range. The input arguments vmin and vmax must be the correct minimum and maximum values – it uses the minimum and maximum valuse to decided whether an exact histogram can be used internally; incorrect values for vmin or vmax may cuase this function to misbehave!
It counts the frequency of each distinct value before deciding how to produce the equal-weight bins for output. Because it has the most detailed information possible, the output bins are mostly to be about equal. This comes with a cost of a detailed frequency count, which takes time and memory space to compute.
References ibis::index::divideCounts(), and ibis::array_t< T >::size().
|
staticprotected |
Bins the given values so that each each bin is nearly equal weight.
Instead of counting the number entries in each bin return bitvectors that mark the positions of the records. This version is for integer values in relatively narrow ranges. It will count each distinct value separately, which gives the most accurate information for deciding how to produce equal-weight bins. If there are many dictinct values, this function will require considerable amount of internal memory to count each distinct value.
On successful completion of this function, the return value is the number of bins used. If the input array is empty, it returns 0 without modifying the content of the output arrays, bounds and detail. Either mask and vals have the same number of elements, or vals has as many elements as the number of ones (1) in mask, otherwise this function will return -51.
References ibis::bitvector::cnt(), ibis::index::divideCounts(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::size(), ibis::bitvector::size(), and ibis::index::sumBits().
long ibis::part::addColumn | ( | const char * | aexpr, |
const char * | cname, | ||
ibis::TYPE_T | ctype = ibis::DOUBLE |
||
) |
Add a column computed with the given arithmetic expression.
The arithmetic expression is evaluated in double and casted to the specified type.
References ibis::selectClause::aggExpr(), and ibis::selectClause::aggSize().
long ibis::part::append | ( | const char * | dir | ) |
Append data from dir
.
Append data in dir to the current data partition.
Return the number of rows actually added.
|
protected |
Perform append operation using only only one data directory.
Must wait for all queries on the partition to finish before preceding.
References ibis::fileManager::flushDir(), ibis::fileManager::flushFile(), ibis::gParameters(), ibis::fileManager::instance(), and ibis::util::logMessage().
|
protected |
Perform append operation with two data directories.
It appends the data to the backup directory first, then swap the roles of the two directories.
References ibis::fileManager::flushDir(), ibis::fileManager::flushFile(), ibis::util::getFileSize(), ibis::gParameters(), ibis::fileManager::instance(), ibis::util::logMessage(), and ibis::util::removeDir().
|
protected |
Append data in dir
to the partition in the backup directory.
Return the number of rows actually appended.
References ibis::fileManager::buffer< T >::address(), ibis::column::append(), ibis::CATEGORY, ibis::horometer::CPUTime(), ibis::fileManager::flushDir(), ibis::util::getFileSize(), ibis::fileManager::instance(), ibis::util::logMessage(), ibis::column::name(), ibis::OID, ibis::horometer::realTime(), ibis::fileManager::buffer< T >::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::TEXT, ibis::TYPESTRING, and ibis::column::upperBound().
|
virtual |
Make sure indexes for all columns are available.
May use nthr
threads to build indexes. The argument iopt is used to build new indexes if the corresponding columns do not already have indexes.
References ibis::horometer::start().
Referenced by fastbit_build_indexes().
|
virtual |
Make sure indexes for all columns are available.
The sequence of strings are used as follows.
Here is an example sequence ("a%", "<binning none/>", "_b*" "<binning precision=2>", "bit-slice"). A column name that matches "a%", i.e., starting columns starting with 'a', will be indexed with option "<binning none/>". All column names do not match the pattern "a%" will then be compared with "_b*". If they match, then the column will be indexed with option "<binning precision=2>". All other columns will be indexed with option "bit-slice".
May use nthr
threads to build indexes. The argument iopt is used to build new indexes if the corresponding columns do not already have indexes.
References ibis::horometer::start().
|
protected |
Generate a list of random query conditions.
It selects nc columns from the list of all columns and fills the array lst.conds and lst.super. The array lst.hits is resized to the correct size, but left to be filled with other functions. It generates at least nc-1 queries. When nq > nc, it may generate one nq+1 queries because it always adds two subranges as queries together. This is to ensure that two sub-ranges of any given range is present together for checkQueryList.
References ibis::part::thrArg::hits, ibis::util::rand(), and ibis::part::thrArg::super.
void ibis::part::buildSorted | ( | const char * | cname | ) | const |
Build a sorted version of the specified column.
Will sort the base data of the named column if needed.
long ibis::part::calculate | ( | const ibis::math::term & | trm, |
const ibis::bitvector & | msk, | ||
array_t< double > & | res | ||
) | const |
Calculate the values of an arithmetic expression as doubles.
The arithmetic expression is applied to each row that are marked 1 in the mask, msk, with names in the arithmetic expression interpretted as column names. The resulting values are packed into the array res as doubles. Upon the successful completion of this function, the return value should be the number of records examined, which should be same as msk.cnt() and res.size().
References ibis::array_t< T >::clear(), ibis::bitvector::cnt(), ibis::math::term::eval(), ibis::bitvector::indexSet::indices(), ibis::bitvector::indexSet::isRange(), ibis::bitvector::indexSet::nIndices(), ibis::part::barrel::open(), ibis::array_t< T >::push_back(), ibis::part::barrel::read(), ibis::horometer::realTime(), ibis::math::barrel::recordVariable(), ibis::array_t< T >::reserve(), ibis::array_t< T >::resize(), ibis::part::barrel::seek(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), and ibis::array_t< T >::swap().
Referenced by ibis::bord::append(), ibis::bord::groupbyc(), and ibis::bord::xgroupby().
long ibis::part::calculate | ( | const ibis::math::stringFunction1 & | trm, |
const ibis::bitvector & | msk, | ||
std::vector< std::string > & | res | ||
) | const |
Calculate the values of a math expression as strings.
The expression is applied to each row that are marked 1 in the mask, msk, with names in the arithmetic expression interpretted as column names. The resulting values are packed into the array res as strings. Upon the successful completion of this function, the return value should be the number of records examined, which should be same as msk.cnt() and res.size().
References ibis::bitvector::cnt(), ibis::bitvector::indexSet::indices(), ibis::bitvector::indexSet::isRange(), ibis::bitvector::indexSet::nIndices(), ibis::part::barrel::open(), ibis::part::barrel::read(), ibis::horometer::realTime(), ibis::math::barrel::recordVariable(), ibis::part::barrel::seek(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
|
protected |
Sum up the hits from sub-divisions to verify the hits computing from the whole range.
Based on the construction of buildQueryList, each query condition knows which conditions contains it.
References ibis::part::thrArg::hits, and ibis::part::thrArg::super.
int ibis::part::clear | ( | ) |
A group of functions to manipulate the data partition.
Clear the content of data in this object if it is not in use.
This is a soft request to clear everything, a hard request to clear the content is performed in the destructor of this function. This function is used in cases where one may remove the partition object if it not in use, otherwise leave it alone. Currently, this is used in C API function fastbit_cleanup.
References ibis::util::emptyCache(), ibis::fileManager::instance(), ibis::part::softWriteLock::isLocked(), and ibis::fileManager::removeCleaner().
Referenced by ibis::util::gatherParts().
|
protected |
Produce a set of bitmaps corresponding to a set of coarse bins.
This function makes use of an existing index to produce bitmaps representing a set of bins defined by bnds.
Following the private convention used in FastBit, there are two open bins at the two ends.
References ibis::index::binBoundaries(), ibis::util::copy(), ibis::index::divideCounts(), ibis::index::evaluate(), ibis::column::getActualMax(), ibis::column::name(), ibis::array_t< T >::pop_back(), ibis::array_t< T >::resize(), and ibis::array_t< T >::size().
ibis::table::stringArray ibis::part::columnNames | ( | ) | const |
Return column names in a list.
The list contains raw pointers. These pointers are valid as long as the part objects are present in memory. The names are in the same order as specified during the construction of the data partition. If no order was specified at that time, the columns are in alphabetical order.
References ibis::array_t< T >::clear().
Referenced by ibis::bord::columnNames(), and ibis::query::setSelectClause().
long ibis::part::commit | ( | const char * | dir | ) |
Commit the append operation involving data from dir
.
Commit the active database.
dir
must match the content of directory passed to append
. Clearly, the easiest way to achieve this is to use the same directory.No longer able to rollback after this. Return the number of records committed.
References ibis::fileManager::flushDir(), ibis::fileManager::flushFile(), and ibis::fileManager::instance().
void ibis::part::computeMinMax | ( | ) |
Compute the min and max for each column.
Actually compute the min and max of each attribute and write out a new metadata file for the data partition.
Referenced by ibis::bord::column::computeMinMax().
|
static |
Estimate the number of pages to be accessed.
Assuming the pages are packed with values of wordsize bytes, this function examines the given bitvector to determine the number of pages would be accessed in order to read all the positions marked 1.
The page size is determine by the function ibis::fileManager::pageSize.
References ibis::bitvector::cnt(), ibis::bitvector::indexSet::indices(), ibis::bitvector::indexSet::isRange(), ibis::bitvector::indexSet::nIndices(), and ibis::fileManager::pageSize().
Referenced by ibis::column::evaluateAndSelect(), and ibis::column::evaluateRange().
long ibis::part::deactivate | ( | const std::vector< uint32_t > & | rows | ) |
Mark the specified rows as inactive.
The integers in array rows are simply the row numbers. Note rows are numbered starting from 0. Return the number of rows inactive or error code.
References ibis::bitvector::cnt().
long ibis::part::deactivate | ( | const char * | conds | ) |
Mark all rows satisfying the specified conditions as inactive.
All rows satisfying the specified conditions will be made inactive. Return the number of rows inactive or error code.
References ibis::bitvector::cnt().
|
protected |
Rows marked 1 will become inactive.
Mark the rows identified in rows
as inactive.
Return the number of rows inactive or error code.
References ibis::bitvector::cnt(), ibis::fileManager::flushFile(), ibis::fileManager::instance(), ibis::bitvector::size(), and ibis::bitvector::write().
|
protected |
Convert the string describing the shape into internal storage format.
Digest the column shape string read from metadata file.
References ibis::util::logMessage().
|
staticprotected |
Evaluate the range condition.
Accepts an externally passed comparison operator. It chooses whether the bitvector hits
will be compressed internally based on the number of set bits in the mask
.
References ibis::array_t< T >::capacity(), ibis::array_t< T >::clear(), ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::nosharing(), ibis::array_t< T >::push_back(), ibis::array_t< T >::reserve(), ibis::bitvector::setBit(), ibis::array_t< T >::size(), and ibis::bitvector::size().
|
staticprotected |
Evaluate the range condition.
Accepts an externally passed comparison operator. It chooses whether the bitvector hits
will be compressed internally based on the number of set bits in the mask
.
References ibis::array_t< T >::capacity(), ibis::array_t< T >::clear(), ibis::bitvector::cnt(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::nosharing(), ibis::array_t< T >::push_back(), ibis::array_t< T >::reserve(), ibis::array_t< T >::size(), and ibis::bitvector::size().
|
staticprotected |
Evaluate the range condition.
Accepts an externally passed comparison operator. It chooses whether the bitvector hits
will be compressed internally based on the number of set bits in the mask
.
References ibis::bitvector::adjustSize(), ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::bitvector::compress(), ibis::bitvector::decompress(), ibis::bitvector::indexSet::nIndices(), ibis::bitvector::reserve(), ibis::bitvector::set(), ibis::bitvector::setBit(), ibis::array_t< T >::size(), ibis::bitvector::size(), and ibis::bitvector::sloppyCount().
|
staticprotected |
Evaluate the range condition.
The actual comparison functions are only applied on rows with mask == 1. The values satisfying the comparison operators are stored in res. This function reserves enough space in res for about half of the set bits in mask to avoid repeat reallocation of space for res. This space reservation will likely increase memory usage.
References ibis::bitvector::adjustSize(), ibis::array_t< T >::capacity(), ibis::array_t< T >::clear(), ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::nosharing(), ibis::array_t< T >::push_back(), ibis::array_t< T >::reserve(), ibis::bitvector::setBit(), ibis::array_t< T >::size(), and ibis::bitvector::size().
|
staticprotected |
Evaluate the range condition.
The actual comparison functions are only applied on rows with mask == 1. The values satisfying the comparison operators are stored in res. This function reserves enough space in res for about half of the set bits in mask to avoid repeat reallocation of space for res. This space reservation will likely increase memory usage.
References ibis::array_t< T >::capacity(), ibis::array_t< T >::clear(), ibis::bitvector::cnt(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::nosharing(), ibis::array_t< T >::push_back(), ibis::array_t< T >::reserve(), ibis::array_t< T >::size(), and ibis::bitvector::size().
|
staticprotected |
Evaluate the range condition.
The actual comparison functions are only applied on rows with mask == 1. The actual scan function. This one chooses whether the internal bitvector for storing the scan results will be compressed or not. It always returns a compressed bitvector.
References ibis::bitvector::adjustSize(), ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::bitvector::compress(), ibis::bitvector::decompress(), ibis::bitvector::indexSet::nIndices(), ibis::bitvector::reserve(), ibis::bitvector::set(), ibis::bitvector::setBit(), ibis::array_t< T >::size(), ibis::bitvector::size(), and ibis::bitvector::sloppyCount().
|
staticprotected |
Evaluate the range condition.
The actual comparison function is only applied on rows with mask == 1. This version uses a uncompressed bitvector to store the scan results internally.
References ibis::bitvector::cnt(), ibis::bitvector::compress(), ibis::bitvector::decompress(), ibis::bitvector::indexSet::nIndices(), ibis::bitvector::set(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::bitvector::sloppyCount(), and ibis::bitvector::turnOnRawBit().
|
staticprotected |
This version uses uncompressed bitvector to store the scan results internally.
References ibis::bitvector::cnt(), ibis::bitvector::compress(), ibis::bitvector::decompress(), ibis::bitvector::indexSet::nIndices(), ibis::bitvector::set(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::bitvector::sloppyCount(), and ibis::bitvector::turnOnRawBit().
|
staticprotected |
The function that performs the actual comparison for range queries.
The size of array may either match the number of bits in mask
or the number of set bits in mask
. This allows one to either use the whole array or the only the elements need for this operation. The values satisfying the query condition is stored in res.
The return value is the number of elements in res or a negative number to indicate error.
References ibis::array_t< T >::capacity(), ibis::array_t< T >::clear(), ibis::bitvector::cnt(), ibis::qRange::colName(), ibis::bitvector::indexSet::indices(), ibis::qRange::inRange(), ibis::bitvector::indexSet::isRange(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::nosharing(), ibis::array_t< T >::push_back(), ibis::horometer::realTime(), ibis::array_t< T >::reserve(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
|
staticprotected |
Perform comparisons with data in the named file.
Place the values satisfying the specified condition into res. This function attempts to allocate a buffer so the reading can be done in relatively large-size chunks. If it is unable to allocate a useful buffer, it will read one value at a time.
References ibis::fileManager::buffer< T >::address(), ibis::bitvector::adjustSize(), ibis::array_t< T >::capacity(), ibis::array_t< T >::clear(), ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::qRange::colName(), ibis::bitvector::compress(), ibis::bitvector::decompress(), ibis::bitvector::indexSet::indices(), ibis::qRange::inRange(), ibis::fileManager::instance(), ibis::bitvector::indexSet::isRange(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::nosharing(), ibis::array_t< T >::push_back(), ibis::horometer::realTime(), ibis::fileManager::recordPages(), ibis::array_t< T >::reserve(), ibis::bitvector::set(), ibis::bitvector::setBit(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::fileManager::buffer< T >::size(), ibis::horometer::start(), ibis::horometer::stop(), and UnixOpen.
|
staticprotected |
The function that performs the actual comparison for range queries.
The size of array may either match the number of bits in mask
or the number of set bits in mask
. This allows one to either use the whole array or the only the elements need for this operation. The values satisfying the query condition is stored in res.
The return value is the number of elements in res or a negative number to indicate error.
References ibis::array_t< T >::capacity(), ibis::array_t< T >::clear(), ibis::bitvector::cnt(), ibis::qRange::colName(), ibis::bitvector::indexSet::indices(), ibis::qRange::inRange(), ibis::bitvector::indexSet::isRange(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::nosharing(), ibis::array_t< T >::push_back(), ibis::horometer::realTime(), ibis::array_t< T >::reserve(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
|
staticprotected |
Perform comparisons with data in the named file.
Place the values satisfying the specified condition into res. This function attempts to allocate a buffer so the reading can be done in relatively large-size chunks. If it is unable to allocate a useful buffer, it will read one value at a time.
References ibis::fileManager::buffer< T >::address(), ibis::array_t< T >::capacity(), ibis::array_t< T >::clear(), ibis::bitvector::cnt(), ibis::qRange::colName(), ibis::bitvector::indexSet::indices(), ibis::qRange::inRange(), ibis::fileManager::instance(), ibis::bitvector::indexSet::isRange(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::nosharing(), ibis::array_t< T >::push_back(), ibis::horometer::realTime(), ibis::fileManager::recordPages(), ibis::array_t< T >::reserve(), ibis::array_t< T >::size(), ibis::fileManager::buffer< T >::size(), ibis::horometer::start(), ibis::horometer::stop(), and UnixOpen.
|
staticprotected |
The function that performs the actual comparison for range queries.
The size of array may either match the number of bits in mask
or the number of set bits in mask
. This allows one to either use the whole array or the only the elements need for this operation. In either case, only mask.cnt() elements of array are checked but position of the bits that need to be set in the output bitvector hits
have to be handled differently.
References ibis::bitvector::adjustSize(), ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::qRange::colName(), ibis::bitvector::compress(), ibis::bitvector::decompress(), ibis::bitvector::indexSet::indices(), ibis::qRange::inRange(), ibis::bitvector::indexSet::isRange(), ibis::bitvector::indexSet::nIndices(), ibis::horometer::realTime(), ibis::bitvector::reserve(), ibis::bitvector::set(), ibis::bitvector::setBit(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
|
staticprotected |
Perform comparisons with data in the named file.
It attempts to allocate a buffer so the reading can be done in relatively large-size chunks. If it is unable to allocate a useful buffer, it will read one value at a time.
References ibis::fileManager::buffer< T >::address(), ibis::bitvector::adjustSize(), ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::qRange::colName(), ibis::bitvector::compress(), ibis::bitvector::decompress(), ibis::bitvector::indexSet::indices(), ibis::qRange::inRange(), ibis::fileManager::instance(), ibis::bitvector::indexSet::isRange(), ibis::bitvector::indexSet::nIndices(), ibis::horometer::realTime(), ibis::fileManager::recordPages(), ibis::bitvector::reserve(), ibis::bitvector::set(), ibis::bitvector::setBit(), ibis::bitvector::size(), ibis::fileManager::buffer< T >::size(), ibis::bitvector::sloppyCount(), ibis::horometer::start(), ibis::horometer::stop(), and UnixOpen.
|
staticprotected |
The function that performs the actual comparison for range queries.
The size of array may either match the number of bits in mask
or the number of set bits in mask
. This allows one to either use the whole array or the only the elements need for this operation. In either case, only mask.cnt() elements of array are checked but position of the bits that need to be set in the output bitvector hits
have to be handled differently.
References ibis::bitvector::adjustSize(), ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::qIntHod::colName(), ibis::bitvector::compress(), ibis::bitvector::decompress(), ibis::bitvector::indexSet::indices(), ibis::qIntHod::inRange(), ibis::bitvector::indexSet::isRange(), ibis::bitvector::indexSet::nIndices(), ibis::horometer::realTime(), ibis::bitvector::reserve(), ibis::bitvector::set(), ibis::bitvector::setBit(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
|
staticprotected |
Evaluate the range condition.
The actual comparison function is only applied on rows with mask == 1.
References ibis::fileManager::buffer< T >::address(), ibis::bitvector::adjustSize(), ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::qIntHod::colName(), ibis::bitvector::compress(), ibis::bitvector::decompress(), ibis::bitvector::indexSet::indices(), ibis::qIntHod::inRange(), ibis::fileManager::instance(), ibis::bitvector::indexSet::isRange(), ibis::bitvector::indexSet::nIndices(), ibis::horometer::realTime(), ibis::fileManager::recordPages(), ibis::bitvector::reserve(), ibis::bitvector::set(), ibis::bitvector::setBit(), ibis::bitvector::size(), ibis::fileManager::buffer< T >::size(), ibis::bitvector::sloppyCount(), ibis::horometer::start(), ibis::horometer::stop(), and UnixOpen.
|
staticprotected |
The function that performs the actual comparison for range queries.
The size of array may either match the number of bits in mask
or the number of set bits in mask
. This allows one to either use the whole array or the only the elements need for this operation. In either case, only mask.cnt() elements of array are checked but position of the bits that need to be set in the output bitvector hits
have to be handled differently.
References ibis::bitvector::adjustSize(), ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::qUIntHod::colName(), ibis::bitvector::compress(), ibis::bitvector::decompress(), ibis::bitvector::indexSet::indices(), ibis::qUIntHod::inRange(), ibis::bitvector::indexSet::isRange(), ibis::bitvector::indexSet::nIndices(), ibis::horometer::realTime(), ibis::bitvector::reserve(), ibis::bitvector::set(), ibis::bitvector::setBit(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
|
protected |
Count the number rows satisfying the range expression.
T is an elementary type.
References ibis::qIntHod::inRange(), and ibis::bitvector::indexSet::nIndices().
|
protected |
Count the number rows satisfying the range expression.
T is an elementary type.
References ibis::qUIntHod::inRange(), and ibis::bitvector::indexSet::nIndices().
|
protected |
Count the number rows satisfying the range expression.
T is an elementary type.
References ibis::qRange::inRange(), and ibis::bitvector::indexSet::nIndices().
|
protected |
A specialization of template part::doCount for float values.
Note that the comparison are performed as doubles.
References ibis::bitvector::adjustSize(), ibis::bitvector::cnt(), ibis::qUIntHod::colName(), ibis::column::getNullMask(), ibis::qExpr::getType(), ibis::column::getValuesArray(), ibis::qContinuousRange::leftBound(), ibis::qContinuousRange::rightBound(), and ibis::array_t< T >::size().
|
virtual |
Evaluate the range condition.
Scan the base data to resolve the range condition. Without a user specified mask, all non-NULL values are examined.
References ibis::bitvector::clear(), ibis::qRange::colName(), ibis::column::getNullMask(), and ibis::bitvector::size().
Referenced by ibis::bord::column::evaluateRange().
|
virtual |
Evalute the range condition on the records that are marked 1 in the mask.
The i'th element of the column is examined if mask[i] is set (mask[i] == 1).
References ibis::bitvector::adjustSize(), ibis::CATEGORY, ibis::bitvector::cnt(), ibis::qRange::colName(), ibis::column::dataFileName(), ibis::DOUBLE, ibis::column::estimateRange(), ibis::FLOAT, ibis::qExpr::getType(), ibis::column::getValuesArray(), ibis::INT, ibis::LONG, ibis::bitvector::set(), ibis::bitvector::setBit(), ibis::SHORT, ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::TEXT, ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
virtual |
Evalute the range condition and record the values satisfying the condition in res.
The tests are only performed on the records that are marked 1 in the mask (mask[i] == 1). This function only works for integers and floating-point numbers.
References ibis::bitvector::cnt(), ibis::qRange::colName(), ibis::column::dataFileName(), ibis::DOUBLE, ibis::FLOAT, ibis::qExpr::getType(), ibis::column::getValuesArray(), ibis::INT, ibis::LONG, ibis::SHORT, ibis::bitvector::size(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
virtual |
Evalute the range condition and record the values satisfying the condition in res.
The tests are only performed on the records that are marked 1 in the mask (mask[i] == 1). This function only works for integers and floating-point numbers.
References ibis::bitvector::cnt(), ibis::qRange::colName(), ibis::column::dataFileName(), ibis::DOUBLE, ibis::util::emptyCache(), ibis::FLOAT, ibis::qExpr::getType(), ibis::column::getValuesArray(), ibis::INT, ibis::LONG, ibis::bitvector::set(), ibis::SHORT, ibis::bitvector::size(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
virtual |
Treat the arithmetic expression as true or false.
The arithmetic expression is evaluated, nonzero values are treated as true and others are treated as false. This function only uses the test 'eval() != 0', which will treat all NaN as false.
References ibis::bitvector::adjustSize(), ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::math::term::eval(), ibis::bitvector::indexSet::indices(), ibis::bitvector::indexSet::isRange(), ibis::bitvector::indexSet::nIndices(), ibis::part::barrel::open(), ibis::part::barrel::read(), ibis::horometer::realTime(), ibis::math::barrel::recordVariable(), ibis::part::barrel::seek(), ibis::bitvector::set(), ibis::bitvector::setBit(), ibis::bitvector::size(), ibis::bitvector::sloppyCount(), ibis::horometer::start(), and ibis::horometer::stop().
|
virtual |
Sequential scan without a mask.
It assumes that every valid row is to be examined.
References ibis::bitvector::set().
|
virtual |
Locate the records that have mark value 1 and satisfy the complex range conditions.
This implementation uses ibis::part::barrel for handling actual values needed.
References ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::bitvector::compress(), ibis::bitvector::copy(), ibis::bitvector::decompress(), ibis::qExpr::getLeft(), ibis::qExpr::getRight(), ibis::bitvector::indexSet::indices(), ibis::compRange::inRange(), ibis::bitvector::indexSet::isRange(), ibis::bitvector::indexSet::nIndices(), ibis::part::barrel::open(), ibis::part::barrel::read(), ibis::horometer::realTime(), ibis::math::barrel::recordVariable(), ibis::bitvector::reserve(), ibis::part::barrel::seek(), ibis::bitvector::set(), ibis::bitvector::setBit(), ibis::bitvector::size(), ibis::bitvector::sloppyCount(), ibis::horometer::start(), and ibis::horometer::stop().
|
static |
Locate the records that satisfy the range condition.
A generic scan function that rely on the virtual function ibis::range::inRange. This static member function works on an array provided by the caller. Since the values are provided, this function does not check the name of the variable involved in the range condition.
References ibis::bitvector::cnt(), ibis::bitvector::compress(), ibis::bitvector::decompress(), ibis::qRange::inRange(), ibis::bitvector::indexSet::nIndices(), ibis::horometer::realTime(), ibis::bitvector::set(), ibis::bitvector::setBit(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
long ibis::part::doScan | ( | const array_t< T > & | vals, |
const ibis::qContinuousRange & | rng, | ||
const ibis::bitvector & | mask, | ||
ibis::bitvector & | hits | ||
) |
Evalue the range condition on the in memory values.
This static member function works on an array that is provided by the caller. Since the values are provided, this function does not check the name of the variable involved in the range condition.
References ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::qContinuousRange::leftBound(), ibis::horometer::realTime(), ibis::qContinuousRange::rightBound(), ibis::util::round_down(), ibis::bitvector::set(), ibis::bitvector::size(), ibis::bitvector::sloppyCount(), ibis::horometer::start(), and ibis::horometer::stop().
long ibis::part::doScan | ( | const array_t< float > & | vals, |
const ibis::qContinuousRange & | rng, | ||
const ibis::bitvector & | mask, | ||
ibis::bitvector & | hits | ||
) |
Examine the range condition with in memory values.
A specialization of the template for float arrays. All comparisons are performed as doubles.
References ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::qContinuousRange::leftBound(), ibis::horometer::realTime(), ibis::qContinuousRange::rightBound(), ibis::bitvector::set(), ibis::bitvector::size(), ibis::bitvector::sloppyCount(), ibis::horometer::start(), and ibis::horometer::stop().
long ibis::part::doScan | ( | const array_t< double > & | vals, |
const ibis::qContinuousRange & | rng, | ||
const ibis::bitvector & | mask, | ||
ibis::bitvector & | hits | ||
) |
Examine the range condition with in memory values.
A specialization of the template for double values.
References ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::qContinuousRange::leftBound(), ibis::horometer::realTime(), ibis::qContinuousRange::rightBound(), ibis::bitvector::set(), ibis::bitvector::size(), ibis::bitvector::sloppyCount(), ibis::horometer::start(), and ibis::horometer::stop().
long ibis::part::doScan | ( | const array_t< T > & | vals, |
const ibis::qContinuousRange & | rng, | ||
const ibis::bitvector & | mask, | ||
array_t< T > & | res | ||
) |
Evalue the range condition on the in memory values.
This static member function works on integer data provided by the caller. Since the values are provided, this function does not check the name of the variable involved in the range condition.
References ibis::array_t< T >::clear(), ibis::bitvector::cnt(), ibis::qContinuousRange::leftBound(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::push_back(), ibis::horometer::realTime(), ibis::array_t< T >::reserve(), ibis::qContinuousRange::rightBound(), ibis::util::round_down(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
long ibis::part::doScan | ( | const array_t< T > & | vals, |
const ibis::qContinuousRange & | rng, | ||
const ibis::bitvector & | mask, | ||
array_t< T > & | res, | ||
ibis::bitvector & | hits | ||
) |
Evalue the range condition on the in memory values.
This static member function works on integer data provided by the caller. Since the values are provided, this function does not check the name of the variable involved in the range condition.
References ibis::bitvector::adjustSize(), ibis::array_t< T >::clear(), ibis::bitvector::cnt(), ibis::qContinuousRange::leftBound(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::push_back(), ibis::horometer::realTime(), ibis::array_t< T >::reserve(), ibis::qContinuousRange::rightBound(), ibis::util::round_down(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
long ibis::part::doScan | ( | const array_t< float > & | vals, |
const ibis::qContinuousRange & | rng, | ||
const ibis::bitvector & | mask, | ||
array_t< float > & | res | ||
) |
Examine the range condition with in memory values.
A specialization of the template for float arrays. All comparisons are performed as doubles.
References ibis::array_t< T >::clear(), ibis::bitvector::cnt(), ibis::qContinuousRange::leftBound(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::push_back(), ibis::horometer::realTime(), ibis::array_t< T >::reserve(), ibis::qContinuousRange::rightBound(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
long ibis::part::doScan | ( | const array_t< double > & | vals, |
const ibis::qContinuousRange & | rng, | ||
const ibis::bitvector & | mask, | ||
array_t< double > & | res | ||
) |
Examine the range condition with in memory values.
A specialization of the template for double values.
References ibis::array_t< T >::clear(), ibis::bitvector::cnt(), ibis::qContinuousRange::leftBound(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::push_back(), ibis::horometer::realTime(), ibis::array_t< T >::reserve(), ibis::qContinuousRange::rightBound(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
long ibis::part::doScan | ( | const array_t< float > & | vals, |
const ibis::qContinuousRange & | rng, | ||
const ibis::bitvector & | mask, | ||
array_t< float > & | res, | ||
ibis::bitvector & | hits | ||
) |
Examine the range condition with in memory values.
A specialization of the template for float arrays. All comparisons are performed as doubles.
References ibis::bitvector::adjustSize(), ibis::array_t< T >::clear(), ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::qContinuousRange::leftBound(), ibis::horometer::realTime(), ibis::qContinuousRange::rightBound(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::bitvector::sloppyCount(), ibis::horometer::start(), and ibis::horometer::stop().
long ibis::part::doScan | ( | const array_t< double > & | vals, |
const ibis::qContinuousRange & | rng, | ||
const ibis::bitvector & | mask, | ||
array_t< double > & | res, | ||
ibis::bitvector & | hits | ||
) |
Examine the range condition with in memory values.
A specialization of the template for double values.
References ibis::bitvector::adjustSize(), ibis::array_t< T >::clear(), ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::qContinuousRange::leftBound(), ibis::horometer::realTime(), ibis::qContinuousRange::rightBound(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::bitvector::sloppyCount(), ibis::horometer::start(), and ibis::horometer::stop().
void ibis::part::emptyCache | ( | ) | const |
Empty all unused resources in cache.
This function attempts to unload all the indexes and then remove all unused files from the file manager. The caller should hold a write lock on this data partition to prevent concurrent accesses to this part object.
References ibis::fileManager::flushDir(), and ibis::fileManager::instance().
Referenced by ibis::bord::backup().
void ibis::part::equalWeightBins | ( | const array_t< float > & | vals, |
uint32_t | nbins, | ||
array_t< float > & | bounds | ||
) |
Explicit specialization for float arrays.
Goes through the data twice, once to find the actual min and max values, and once to place the values in ten times as many bins as desired. It then coalesces the finer bins into desired number of bins.
References ibis::array_t< T >::clear(), ibis::util::compactValue(), ibis::util::compactValue2(), ibis::index::divideCounts(), ibis::array_t< T >::push_back(), ibis::array_t< T >::reserve(), ibis::array_t< T >::resize(), and ibis::array_t< T >::size().
void ibis::part::equalWeightBins | ( | const array_t< double > & | vals, |
uint32_t | nbins, | ||
array_t< double > & | bounds | ||
) |
Explicit specialization for double arrays.
Goes through the data twice, once to find the actual min and max values, and once to place the values in ten times as many bins as desired. It then coalesces the finer bins into desired number of bins.
References ibis::array_t< T >::clear(), ibis::util::compactValue(), ibis::util::compactValue2(), ibis::index::divideCounts(), ibis::array_t< T >::push_back(), ibis::array_t< T >::reserve(), ibis::array_t< T >::resize(), and ibis::array_t< T >::size().
|
virtual |
Estimate a lower bound and an upper bound on the records that are hits.
The bitvector low
contains records that are hits (for sure) and the bitvector high
contains records that are possible hits.
References ibis::bitvector::set(), ibis::array_t< T >::size(), and ibis::bitvector::size().
|
virtual |
Estimate a continuous range condition.
Return sure hits in bitvector low, and sure hits plus candidates in bitvector high.
An alternative view is that low and high represent an lower bound and an upper bound of the actual hits.
References ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::qContinuousRange::colName(), ibis::column::estimateRange(), ibis::bitvector::set(), and ibis::bitvector::size().
|
inline |
Evaluate a self-join.
Return the number of pairs satisfying join condition. Only records marked with mask=1 are considered. The result pairs are stored in the bitvector pairs. A pair <i, j> would be marked at position i*nRows() + j in pairs.
|
inline |
Return the number of pairs satisfying the join condition.
In addition, write the pairs into the file named pairfile
.
|
inline |
Evaluate a join defined with multiple (conjunctive) range join conditions.
int64_t ibis::part::evaluateJoin | ( | const ibis::deprecatedJoin & | cmp, |
const ibis::bitvector64 & | trial, | ||
ibis::bitvector64 & | result | ||
) | const |
Evaluate all pairs in trial
to determine whether they really satisfy the range join defined in cmp
.
The result is stored in the argument result
. This function returns the number of hits found.
References ibis::bitvector64::cnt(), ibis::math::term::eval(), ibis::bitvector64::set(), and ibis::bitvector64::size().
int64_t ibis::part::evaluateJoin | ( | const std::vector< const ibis::deprecatedJoin * > & | cmp, |
const ibis::bitvector64 & | trial, | ||
ibis::bitvector64 & | result | ||
) | const |
Check a set of pairs defined in trial
.
This version works on multiple (conjunctive) join conditions.
References ibis::bitvector64::adjustSize(), ibis::bitvector64::clear(), ibis::bitvector64::cnt(), ibis::horometer::CPUTime(), ibis::util::logMessage(), ibis::part::barrel::open(), ibis::part::barrel::read(), ibis::horometer::realTime(), ibis::math::barrel::recordVariable(), ibis::part::barrel::seek(), ibis::bitvector64::set(), ibis::bitvector64::setBit(), ibis::bitvector64::size(), ibis::horometer::start(), and ibis::horometer::stop().
long ibis::part::evaluateRIDSet | ( | const ibis::RIDSet & | in, |
ibis::bitvector & | hits | ||
) | const |
Convert a list of RIDs into a bitvector.
If an list of external RIDs is available, sort those RIDS and search through them, otherwise, assume the incoming numbers are row numbers and mark the corresponding positions of hits.
Return a negative value to indicate error, 0 to indicate no hit, and positive value to indicate there are zero or more hits.
References ibis::bitvector::adjustSize(), ibis::bitvector::cnt(), ibis::bitvector::setBit(), ibis::array_t< T >::size(), ibis::bitvector::sloppyCount(), and ibis::util::sortRIDs().
Referenced by ibis::query::limit().
|
inline |
Does this partition have an explicit RID column? Returns true for yes, false for no.
References nEvents.
Referenced by ibis::query::getRIDs().
|
protected |
Fill the bitvectors representing the 1D bins.
The number of bins defined by the given (begin, end, stride)-triplet is 1 + floor((end-begin)/stride)
, with the following bin boundaries,.
This function detects two error conditions. It returns -11 to indicate that mask and the number of values do not match. Normally, the number of elements in vals is either mask.size() or mask.cnt(). It returns -10 if the triplet (begin, end, stride) does not define a valid set of bins or defines more than 1 billion bins. Upon successful completion of this function, the return value is the number of bins, i.e. bins.size().
References ibis::bitvector::cnt(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::size(), and ibis::bitvector::size().
|
protected |
Fill the bitvectors representing the 1D bins.
This version returns a vector of pointers to bitvectors.
It can reduce the memory usage and reduce execution time if the majority of the bins are empty.
References ibis::bitvector::cnt(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::size(), and ibis::bitvector::size().
|
protected |
Fill the bitvectors representing the 1D bins.
Mark the positions of records falling in each bin and computed the total weight in each bins.
This version returns a vector of pointers to bitvectors. It can reduce the memory usage and reduce execution time if the majority of the bins are empty.
References ibis::bitvector::cnt(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::size(), and ibis::bitvector::size().
|
protected |
Fill the bitvectors representing the 2D bins.
The pair of triplets, (begin1, end1, stride1) and (begin2, end2, stride2) define (1 + floor((end1-begin1)/stride1)) (1 + floor((end2-begin2)/stride2))
2D bins.
The 2D bins are packed into the 1D array bins in raster scan order, with the second dimension as the faster varying dimensioin.
References ibis::bitvector::cnt(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::size(), and ibis::bitvector::size().
|
protected |
Fill the bitvectors representing the 2D bins.
This version returns a vector of pointers to bitmaps.
Because the empty bitmaps are left as null pointers, it can reduce the memory usage and the execution time if the majority of the bins are empty.
References ibis::bitvector::cnt(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::size(), and ibis::bitvector::size().
|
protected |
A template function to resolve the second variable involved in the 2D bins.
The actual binning work done in ibis::part::fill2DBins.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::DOUBLE, ibis::FLOAT, ibis::column::getValuesArray(), ibis::INT, ibis::LONG, ibis::column::name(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::SHORT, ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
protected |
This version returns a vector of pointers to bitmaps.
Because the empty bitmaps are left as null pointers, it can reduce the memory usage and the execution time if the majority of the bins are empty.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::DOUBLE, ibis::FLOAT, ibis::column::getValuesArray(), ibis::INT, ibis::LONG, ibis::column::name(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::SHORT, ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
protected |
Fill the bitvectors representing the 2D bins.
This version returns a vector of pointers to bitmaps.
Because the empty bitmaps are left as null pointers, it can reduce the memory usage and the execution time if the majority of the bins are empty.
References ibis::bitvector::cnt(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::size(), and ibis::bitvector::size().
|
protected |
This version returns a vector of pointers to bitmaps.
Because the empty bitmaps are left as null pointers, it can reduce the memory usage and the execution time if the majority of the bins are empty.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::DOUBLE, ibis::FLOAT, ibis::column::getValuesArray(), ibis::INT, ibis::LONG, ibis::column::name(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::SHORT, ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
protected |
Fill the bitvectors representing the 3D bins.
The three triplets, (begin1, end1, stride1), (begin2, end2, stride2), and (begin3, end3, stride3), defines (1 + floor((end1 - begin1) / stride1)) (1 + floor((end2 - begin2) / stride2)) (1 + floor((end3 - begin3) / stride3))
3D bins.
The 3D bins are packed into the 1D array bins in raster scan order, with the 3rd dimension as the fastest varying dimension and the 1st dimension as the slowest varying dimension.
References ibis::bitvector::cnt(), name(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::size(), and ibis::bitvector::size().
|
protected |
The three triplets, (begin1, end1, stride1), (begin2, end2, stride2), and (begin3, end3, stride3), defines (1 + floor((end1 - begin1) / stride1)) (1 + floor((end2 - begin2) / stride2)) (1 + floor((end3 - begin3) / stride3))
3D bins.
The 3D bins are packed into the 1D array bins in raster scan order, with the 3rd dimension as the fastest varying dimension and the 1st dimension as the slowest varying dimension.
References ibis::bitvector::cnt(), name(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::size(), and ibis::bitvector::size().
|
protected |
Resolve the 2nd column of the 3D bins.
It invokes ibis::part::fill3DBins3 to resolve the 3rd dimension and finally ibis::part::fill3DBins to perform the actual binning.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::DOUBLE, ibis::FLOAT, ibis::column::getValuesArray(), ibis::INT, ibis::LONG, ibis::column::name(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::SHORT, ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
protected |
Resolve the 2nd column of the 3D bins.
It invokes ibis::part::fill3DBins3 to resolve the 3rd dimension and finally ibis::part::fill3DBins to perform the actual binning.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::DOUBLE, ibis::FLOAT, ibis::column::getValuesArray(), ibis::INT, ibis::LONG, ibis::column::name(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::SHORT, ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
protected |
Resolve the 3rd column involved in the 3D bins.
The finally binning work is performed by ibis::part::fill3DBins.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::DOUBLE, ibis::FLOAT, ibis::column::getValuesArray(), ibis::INT, ibis::LONG, ibis::column::name(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::SHORT, ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
protected |
Resolve the 3rd column involved in the 3D bins.
The finally binning work is performed by ibis::part::fill3DBins.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::DOUBLE, ibis::FLOAT, ibis::column::getValuesArray(), ibis::INT, ibis::LONG, ibis::column::name(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::SHORT, ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
protected |
The three triplets, (begin1, end1, stride1), (begin2, end2, stride2), and (begin3, end3, stride3), defines (1 + floor((end1 - begin1) / stride1)) (1 + floor((end2 - begin2) / stride2)) (1 + floor((end3 - begin3) / stride3))
3D bins.
The 3D bins are packed into the 1D array bins in raster scan order, with the 3rd dimension as the fastest varying dimension and the 1st dimension as the slowest varying dimension.
References ibis::bitvector::cnt(), name(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::size(), and ibis::bitvector::size().
|
protected |
Resolve the 2nd column of the 3D bins.
It invokes ibis::part::fill3DBins3 to resolve the 3rd dimension and finally ibis::part::fill3DBins to perform the actual binning.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::DOUBLE, ibis::FLOAT, ibis::column::getValuesArray(), ibis::INT, ibis::LONG, ibis::column::name(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::SHORT, ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
protected |
Resolve the 3rd column involved in the 3D bins.
The finally binning work is performed by ibis::part::fill3DBins.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::DOUBLE, ibis::FLOAT, ibis::column::getValuesArray(), ibis::INT, ibis::LONG, ibis::column::name(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::SHORT, ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
protected |
!< Read RIDs from file 'rids'.
Attempt to free the RID column.
Complete the task if it can acquire a write lock on the ibis::part object. Otherwise, the rids will be left unchanged.
References ibis::part::softWriteLock::isLocked().
|
inline |
Attempt to gain a read access to this part object.
A simple wrap over pthread_rwlock_rdlock. Returns the return value of pthread_rwlock_rdlock.
|
inline |
Attempt to gain a write access to this part object.
A simple wrap over pthread_rwlock_wrlock. Returns the return value of pthread_rwlock_wrlock.
|
protected |
Collect a list of column names that might be used as keys for sorting the rows.
The columns used have integer values and are ordered from the narrowest range of values to the widest range of values. It limits the number of sort keys so that the number of distinct combinations is not much large the number of rows in the data partition.
References ibis::array_t< T >::push_back(), ibis::array_t< T >::reserve(), ibis::array_t< T >::size(), and ibis::array_t< T >::stableSort().
Referenced by reorder().
long ibis::part::get1DBins | ( | const char * | constraints, |
const char * | cname, | ||
double | begin, | ||
double | end, | ||
double | stride, | ||
std::vector< ibis::bitvector > & | bins | ||
) | const |
Partition values of the named variable into regularly spaced bins.
The actual binning operations are performed in function template ibis::part::fill1DBins.
The normal return value is the number of bitmaps stored in bins. Note that the empty bitmaps in bins all share the same underlying storage. The caller should avoid mixing these empty bitmaps with others.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, ibis::countQuery::getHitVector(), ibis::countQuery::getNumHits(), ibis::column::getValuesArray(), ibis::INT, ibis::util::logMessage(), ibis::LONG, ibis::horometer::realTime(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
long ibis::part::get1DBins | ( | const char * | constraints, |
const char * | cname, | ||
double | begin, | ||
double | end, | ||
double | stride, | ||
std::vector< ibis::bitvector * > & | bins | ||
) | const |
Partition values of the named variable into regularly spaced bins.
This version returns a vector of pointers to bit vectors.
It can reduce memory usage and reduce execution time if the majority of the bins are empty.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, ibis::countQuery::getHitVector(), ibis::countQuery::getNumHits(), ibis::column::getValuesArray(), ibis::INT, ibis::util::logMessage(), ibis::LONG, ibis::horometer::realTime(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
long ibis::part::get1DBins | ( | const char * | constraints, |
const char * | cname, | ||
double | begin, | ||
double | end, | ||
double | stride, | ||
const char * | wtname, | ||
std::vector< double > & | weights, | ||
std::vector< ibis::bitvector * > & | bins | ||
) | const |
Partition values of the named variable into regularly spaced bins.
This version returns a vector of pointers to bit vectors.
It can reduce memory usage and reduce execution time if the majority of the bins are empty.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, ibis::countQuery::getHitVector(), ibis::column::getNullMask(), ibis::countQuery::getNumHits(), ibis::column::getValuesArray(), ibis::INT, ibis::util::logMessage(), ibis::LONG, ibis::column::name(), ibis::horometer::realTime(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::bitvector::set(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
long ibis::part::get1DBins | ( | const char * | constraints, |
const char * | cname1, | ||
uint32_t | nb1, | ||
std::vector< double > & | bounds1, | ||
std::vector< ibis::bitvector > & | bins | ||
) | const |
Partition records satisfying specified conditions into bins with about the same number of records.
If the string constraints is nil or an empty string or starting with an asterisk (*), it is assumed every valid record of the named column is used.
Arrays bounds1 and bins are both for output only. Upon successful completion of this function, the return value shall be the number of bins actually used. A return value of 0 indicates no record satisfy the constraints. A negative return indicates error.
References ibis::countQuery::evaluate(), ibis::countQuery::getHitVector(), ibis::column::getNullMask(), ibis::countQuery::getNumHits(), and ibis::countQuery::setWhereClause().
|
protected |
Compute 1D histogram from raw data.
Based on the column type, decide how to retrieve the values and invokethe lower level support functions.
References ibis::fileManager::bytesInUse(), ibis::CATEGORY, ibis::bitvector::cnt(), ibis::DOUBLE, ibis::FLOAT, ibis::column::getValuesArray(), ibis::fileManager::instance(), ibis::INT, ibis::LONG, ibis::column::name(), ibis::bitvector::indexSet::nIndices(), ibis::fileManager::printStatus(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::SHORT, ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, ibis::util::userName(), and ibis::USHORT.
long ibis::part::get1DDistribution | ( | const char * | constraints, |
const char * | cname, | ||
double | begin, | ||
double | end, | ||
double | stride, | ||
std::vector< uint32_t > & | counts | ||
) | const |
Histogram functions.
Count the number of records falling in the regular bins defined by the begin:end:stride
triplet.
Compute conditional 1D histogram with regularly spaced bins.
The triplet defines 1 + floor((end-begin)/stride)
bins:
Note that the bins all have closed ends on the left, and open ends on the right, except the last bin where both ends are closed.
When this function completes successfully, the array counts
shall have 1+floor((end-begin)/stride)
elements, one for each bin. The return value shall be the number of bins. Any other value indicates an error. If array counts
has the same size as the number of bins on input, the count values will be added to the array. This is intended to be used to accumulate counts from different data partitions. If the array counts
does not have the correct size, it will be resized to the correct size and initialized to zero before counting the the current data partition.
This function proceeds by first evaluate the constraints, then retrieve the selected values, and finally count the number of records in each bin.
The argument constraints can be nil (which is interpreted as "no constraint"), but cname must be the name of a valid column in the data partition.
References ibis::CATEGORY, ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, getColumn(), ibis::countQuery::getHitVector(), ibis::countQuery::getNumHits(), ibis::INT, ibis::LONG, ibis::horometer::realTime(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectUInts(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
long ibis::part::get1DDistribution | ( | const char * | constraints, |
const char * | bname, | ||
double | begin, | ||
double | end, | ||
double | stride, | ||
const char * | wtname, | ||
std::vector< double > & | weights | ||
) | const |
Compute weighted conditional 1D histogram with regularly spaced bins.
Compute the weight in each regularly-spaced bin.
The bins are defined by the begin:end:stride
triplet, which defines 1 + floor((end-begin)/stride)
bins:
Note that the bins all have closed ends on the left, and open ends on the right, except the last bin where both ends are closed.
When this function completes successfully, the array weights
shall have 1+floor((end-begin)/stride)
elements, one for each bin. The return value shall be the number of bins. Any other value indicates an error. If array weights
has the same size as the number of bins on input, the weight values will be added to the array. This is intended to be used to accumulate weights from different data partitions. If the array weights
does not have the correct size, it will be resized to the correct size and initialized to zero before counting the the current data partition.
This function proceeds by first evaluate the constraints, then retrieve the selected values, and finally computing the weights in each bin.
The constraints can be nil, which is interpreted as "no constraint", however both cname and wtname must be valid column names of this data partition. Futhermore, both column must be numerical values, not string values.
References ibis::CATEGORY, ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, ibis::countQuery::getHitVector(), ibis::column::getNullMask(), ibis::countQuery::getNumHits(), ibis::INT, ibis::util::logMessage(), ibis::LONG, ibis::column::name(), ibis::horometer::realTime(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectUInts(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
long ibis::part::get1DDistribution | ( | const char * | cname, |
uint32_t | nbin, | ||
std::vector< double > & | bounds, | ||
std::vector< uint32_t > & | counts | ||
) | const |
Compute 1D histogram with adaptive bins.
The caller specify the number of bins, but not the where to place the bins.
The bounds array contains one more element than the counts array and all the bins defined by the bounds are closed ranges. More specifically, the number of elements with values between
is stored in counts
[i]. Note that the lower bound of a range is included in the bin, but the upper bound of a bin is excluded from the bin.
long ibis::part::get1DDistribution | ( | const char * | constraints, |
const char * | cname, | ||
uint32_t | nbins, | ||
std::vector< double > & | bounds, | ||
std::vector< uint32_t > & | counts | ||
) | const |
Compute conditional 1D histogram with adaptive bins.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, ibis::countQuery::getHitVector(), ibis::column::getNullMask(), ibis::countQuery::getNumHits(), ibis::INT, ibis::LONG, ibis::horometer::realTime(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::array_t< T >::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
protected |
Compute 1D histogram from index.
Calls function ibis::column::getDistribution to create the internal histogram first, then pack them into a smaller number of bins if necessary.
References ibis::fileManager::buffer< T >::address(), ibis::util::compactValue(), ibis::column::getActualMax(), ibis::column::getActualMin(), ibis::column::getDistribution(), ibis::util::incrDouble(), and ibis::column::isFloat().
long ibis::part::get2DBins | ( | const char * | constraints, |
const char * | cname1, | ||
double | begin1, | ||
double | end1, | ||
double | stride1, | ||
const char * | cname2, | ||
double | begin2, | ||
double | end2, | ||
double | stride2, | ||
std::vector< ibis::bitvector > & | bins | ||
) | const |
Partition values of named variables into regularly spaced 2D bins.
This function only checks the validity of the column names and resolve the first column involved.
The second column is resolved in function ibis::part::fill2DBins2, and the finally binning work is performed in ibis::part::fill2DBins. Please refer to the documentation for ibis::part::fill2DBins for more information about the return variable bins. The return value of this function is the number of elements in array bins upon successful completion of this function, which should be exactly
This function returns a negative value to indicate errors.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, ibis::countQuery::getHitVector(), ibis::countQuery::getNumHits(), ibis::column::getValuesArray(), ibis::INT, ibis::util::logMessage(), ibis::LONG, ibis::horometer::realTime(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
long ibis::part::get2DBins | ( | const char * | constraints, |
const char * | cname1, | ||
double | begin1, | ||
double | end1, | ||
double | stride1, | ||
const char * | cname2, | ||
double | begin2, | ||
double | end2, | ||
double | stride2, | ||
std::vector< ibis::bitvector * > & | bins | ||
) | const |
Partition values of named variables into regularly spaced 2D bins.
This version returns a vector of pointers to bitmaps.
Because the empty bitmaps are left as null pointers, it can reduce the memory usage and the execution time if the majority of the bins are empty.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, ibis::countQuery::getHitVector(), ibis::countQuery::getNumHits(), ibis::column::getValuesArray(), ibis::INT, ibis::util::logMessage(), ibis::LONG, ibis::horometer::realTime(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
long ibis::part::get2DBins | ( | const char * | constraints, |
const char * | cname1, | ||
double | begin1, | ||
double | end1, | ||
double | stride1, | ||
const char * | cname2, | ||
double | begin2, | ||
double | end2, | ||
double | stride2, | ||
const char * | wtname, | ||
std::vector< double > & | weights, | ||
std::vector< ibis::bitvector * > & | bins | ||
) | const |
Partition values of named variables into regularly spaced 2D bins.
This version returns a vector of pointers to bitmaps.
Because the empty bitmaps are left as null pointers, it can reduce the memory usage and the execution time if the majority of the bins are empty.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, ibis::countQuery::getHitVector(), ibis::column::getNullMask(), ibis::countQuery::getNumHits(), ibis::column::getValuesArray(), ibis::INT, ibis::util::logMessage(), ibis::LONG, ibis::column::name(), ibis::horometer::realTime(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::bitvector::set(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
long ibis::part::get2DBins | ( | const char * | constraints, |
const char * | cname1, | ||
const char * | cname2, | ||
uint32_t | nb1, | ||
uint32_t | nb2, | ||
std::vector< double > & | bounds1, | ||
std::vector< double > & | bounds2, | ||
std::vector< ibis::bitvector > & | bins | ||
) | const |
Partition records satisfying specified conditions into 2D bins.
If the string constraints is nil or an empty string or starting with an asterisk (*), it is assumed every valid record of the named column is used.
Arrays bounds1 and bins are both for output only. Upon successful completion of this function, the return value shall be the number of bins actually used. A return value of 0 indicates no record satisfy the constraints. A negative return indicates error.
References ibis::countQuery::evaluate(), ibis::countQuery::getHitVector(), ibis::column::getNullMask(), ibis::countQuery::getNumHits(), ibis::util::intersect(), and ibis::countQuery::setWhereClause().
long ibis::part::get2DDistribution | ( | const char * | constraints, |
const char * | cname1, | ||
double | begin1, | ||
double | end1, | ||
double | stride1, | ||
const char * | cname2, | ||
double | begin2, | ||
double | end2, | ||
double | stride2, | ||
std::vector< uint32_t > & | counts | ||
) | const |
Compute conditional 2D histogram with regularly spaced bins.
Count the number of values in 2D regular bins.
References ibis::CATEGORY, ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, ibis::countQuery::getHitVector(), ibis::countQuery::getNumHits(), ibis::INT, ibis::util::logMessage(), ibis::LONG, ibis::horometer::realTime(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectUInts(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
long ibis::part::get2DDistribution | ( | const char * | constraints, |
const char * | cname1, | ||
double | begin1, | ||
double | end1, | ||
double | stride1, | ||
const char * | cname2, | ||
double | begin2, | ||
double | end2, | ||
double | stride2, | ||
const char * | wtname, | ||
std::vector< double > & | weights | ||
) | const |
Compute weighted conditional 2D histogram with regularly spaced bins.
Count the weights of 2D regular bins.
References ibis::CATEGORY, ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, ibis::countQuery::getHitVector(), ibis::column::getNullMask(), ibis::countQuery::getNumHits(), ibis::INT, ibis::util::logMessage(), ibis::LONG, ibis::column::name(), ibis::horometer::realTime(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectUInts(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
long ibis::part::get2DDistribution | ( | const char * | cname1, |
const char * | cname2, | ||
uint32_t | nb1, | ||
uint32_t | nb2, | ||
std::vector< double > & | bounds1, | ||
std::vector< double > & | bounds2, | ||
std::vector< uint32_t > & | counts, | ||
const char *const | option = 0 |
||
) | const |
Compute 2D histogram with adaptive bins.
The user only specify the name of the variables/columns and the number of bins for each variable.
This function is free to decide where to place the bin boundaries to count the bins as fast as possible. If the indexes are available and are smaller than the raw data files, then the indexes are used to compute the histogram, otherwise, it reads the raw data files into memory and count the number of records in each bin.
Bin i1
in the first dimension is defined as
and bin i2
in the second dimension is defined as
. The 2D bins are linearized in counts
with the second dimension as the faster varying dimension.
The return value is the number of bins, i.e., the size of array counts. Normally, the number of bins should be
. For example, if the indexes are used, but there are less bins in the indexes than nb1 or nb2, then the number of bins in the indexes will be used.
The last three arguments bounds1, bounds2, and counts are for output only. Their input values are ignored.
The argument option can be either "index", "data" or "uniform". The option "index" indicates the user prefer to use indexes to compute histograms. The indexes will be used in this case if they exist already. If either "data" or "uniform" is specified, it will attempt to use the base data to compute a histogram, with "uniform" indicating a equally spaced (uniform) bins and the other indicating adaptive bins. If the option is none of above choices, this function will choose one based on their relative sizes.
References ibis::column::elementSize(), and ibis::column::indexSize().
long ibis::part::get2DDistribution | ( | const char * | constraints, |
const char * | name1, | ||
const char * | name2, | ||
uint32_t | nb1, | ||
uint32_t | nb2, | ||
std::vector< double > & | bounds1, | ||
std::vector< double > & | bounds2, | ||
std::vector< uint32_t > & | counts | ||
) | const |
Compute conditional 2D histogram with adaptive bins.
The caller specifies only the number of bins, but let this function decide where to place the bin boundaries.
This function attempts to make sure the 1D bins for each dimension are equal-weight bins, which is likely to produce evenly distributed 2D bins but does not guarantee the uniformity. It uses the templated function adaptive2DBins, which starts with a set of regularly spaced bins and coalesces the regular bins to produce the desired number of bins.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, ibis::countQuery::getHitVector(), ibis::column::getNullMask(), ibis::INT, ibis::util::logMessage(), ibis::LONG, ibis::horometer::realTime(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
protected |
Compute 2D histogram with adaptive bins from base data.
Compute a set of adaptive bins based on a fine level uniform bins.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::FLOAT, ibis::column::getNullMask(), ibis::INT, ibis::LONG, ibis::column::name(), ibis::horometer::realTime(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectUInts(), ibis::SHORT, ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
protected |
Compute 2D histogram with uniform bins from base data.
Read the base data, then count how many values fall in each bin.
The binns are defined with regular spacing.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::util::compactValue2(), ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::FLOAT, ibis::column::getActualMax(), ibis::column::getActualMin(), ibis::column::getNullMask(), ibis::util::incrDouble(), ibis::INT, ibis::column::isFloat(), ibis::LONG, ibis::column::name(), ibis::horometer::realTime(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectUInts(), ibis::SHORT, ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
long ibis::part::get3DBins | ( | const char * | constraints, |
const char * | cname1, | ||
double | begin1, | ||
double | end1, | ||
double | stride1, | ||
const char * | cname2, | ||
double | begin2, | ||
double | end2, | ||
double | stride2, | ||
const char * | cname3, | ||
double | begin3, | ||
double | end3, | ||
double | stride3, | ||
std::vector< ibis::bitvector > & | bins | ||
) | const |
Partition values of named variables into regularly spaced 3D bins.
This function calls ibis::part::fill3DBins and other helper functions to compute the 3D bins.
On successful completion, it returns the number of elements in variable bins. In other word, it returns the number of bins generated, which should be exactly
It returns a negative value to indicate error. Please refer to the documentation of ibis::part::fill3DBins for additional information about the objects returned in bins.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, ibis::countQuery::getHitVector(), ibis::countQuery::getNumHits(), ibis::column::getValuesArray(), ibis::INT, ibis::util::logMessage(), ibis::LONG, ibis::horometer::realTime(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
long ibis::part::get3DBins | ( | const char * | constraints, |
const char * | cname1, | ||
double | begin1, | ||
double | end1, | ||
double | stride1, | ||
const char * | cname2, | ||
double | begin2, | ||
double | end2, | ||
double | stride2, | ||
const char * | cname3, | ||
double | begin3, | ||
double | end3, | ||
double | stride3, | ||
std::vector< ibis::bitvector * > & | bins | ||
) | const |
Partition values of named variables into regularly spaced 3D bins.
This function calls ibis::part::fill3DBins and other helper functions to compute the 3D bins.
On successful completion, it returns the number of elements in variable bins. In other word, it returns the number of bins generated, which should be exactly
It returns a negative value to indicate error. Please refer to the documentation of ibis::part::fill3DBins for additional information about the objects returned in bins.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, ibis::countQuery::getHitVector(), ibis::countQuery::getNumHits(), ibis::column::getValuesArray(), ibis::INT, ibis::util::logMessage(), ibis::LONG, ibis::horometer::realTime(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
long ibis::part::get3DBins | ( | const char * | constraints, |
const char * | cname1, | ||
double | begin1, | ||
double | end1, | ||
double | stride1, | ||
const char * | cname2, | ||
double | begin2, | ||
double | end2, | ||
double | stride2, | ||
const char * | cname3, | ||
double | begin3, | ||
double | end3, | ||
double | stride3, | ||
const char * | wtname, | ||
std::vector< double > & | weights, | ||
std::vector< ibis::bitvector * > & | bins | ||
) | const |
Partition values of named variables into regularly spaced 3D bins.
This function calls ibis::part::fill3DBins and other helper functions to compute the 3D bins.
On successful completion, it returns the number of elements in variable bins. In other word, it returns the number of bins generated, which should be exactly
It returns a negative value to indicate error. Please refer to the documentation of ibis::part::fill3DBins for additional information about the objects returned in bins.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, ibis::countQuery::getHitVector(), ibis::countQuery::getNumHits(), ibis::column::getValuesArray(), ibis::INT, ibis::util::logMessage(), ibis::LONG, ibis::column::name(), ibis::horometer::realTime(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::bitvector::set(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
long ibis::part::get3DBins | ( | const char * | constraints, |
const char * | cname1, | ||
const char * | cname2, | ||
const char * | cname3, | ||
uint32_t | nb1, | ||
uint32_t | nb2, | ||
uint32_t | nb3, | ||
std::vector< double > & | bounds1, | ||
std::vector< double > & | bounds2, | ||
std::vector< double > & | bounds3, | ||
std::vector< ibis::bitvector > & | bins | ||
) | const |
Partition records satisfying specified conditions into 3D bins.
If the string constraints is nil or an empty string or starting with an asterisk (*), it is assumed every valid record of the named column is used.
Arrays bounds1 and bins are both for output only. Upon successful completion of this function, the return value shall be the number of bins actually used. A return value of 0 indicates no record satisfy the constraints. A negative return indicates error.
References ibis::countQuery::evaluate(), ibis::countQuery::getHitVector(), ibis::column::getNullMask(), ibis::countQuery::getNumHits(), ibis::util::intersect(), and ibis::countQuery::setWhereClause().
long ibis::part::get3DDistribution | ( | const char * | constraints, |
const char * | cname1, | ||
double | begin1, | ||
double | end1, | ||
double | stride1, | ||
const char * | cname2, | ||
double | begin2, | ||
double | end2, | ||
double | stride2, | ||
const char * | cname3, | ||
double | begin3, | ||
double | end3, | ||
double | stride3, | ||
std::vector< uint32_t > & | counts | ||
) | const |
Compute conditional 3D histogram with regularly spaced bins.
This function defines exactly.
regularly spaced bins. On successful completion of this function, the return value shall be the number of bins. Any other value indicates an error.
References ibis::CATEGORY, ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, ibis::countQuery::getHitVector(), ibis::countQuery::getNumHits(), ibis::INT, ibis::util::logMessage(), ibis::LONG, ibis::horometer::realTime(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectUInts(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
long ibis::part::get3DDistribution | ( | const char * | constraints, |
const char * | cname1, | ||
double | begin1, | ||
double | end1, | ||
double | stride1, | ||
const char * | cname2, | ||
double | begin2, | ||
double | end2, | ||
double | stride2, | ||
const char * | cname3, | ||
double | begin3, | ||
double | end3, | ||
double | stride3, | ||
const char * | wtname, | ||
std::vector< double > & | weights | ||
) | const |
Compute weighted conditional 3D histogram with regularly spaced bins.
This function defines exactly.
regularly spaced bins. On successful completion of this function, the return value shall be the number of bins. Any other value indicates an error.
References ibis::CATEGORY, ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, ibis::countQuery::getHitVector(), ibis::column::getNullMask(), ibis::countQuery::getNumHits(), ibis::INT, ibis::util::logMessage(), ibis::LONG, ibis::column::name(), ibis::horometer::realTime(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectUInts(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
long ibis::part::get3DDistribution | ( | const char * | cname1, |
const char * | cname2, | ||
const char * | cname3, | ||
uint32_t | nb1, | ||
uint32_t | nb2, | ||
uint32_t | nb3, | ||
std::vector< double > & | bounds1, | ||
std::vector< double > & | bounds2, | ||
std::vector< double > & | bounds3, | ||
std::vector< uint32_t > & | counts, | ||
const char *const | option = 0 |
||
) | const |
Compute 3D histogram with adaptive bins.
Upon successful completion of this function, the return value shall be the number of bins produced, which is equal to the number of elements in array counts.
Error codes:
References ibis::bitvector::cnt(), ibis::column::getNullMask(), ibis::column::name(), and ibis::bitvector::size().
long ibis::part::get3DDistribution | ( | const char * | constraints, |
const char * | cname1, | ||
const char * | cname2, | ||
const char * | cname3, | ||
uint32_t | nb1, | ||
uint32_t | nb2, | ||
uint32_t | nb3, | ||
std::vector< double > & | bounds1, | ||
std::vector< double > & | bounds2, | ||
std::vector< double > & | bounds3, | ||
std::vector< uint32_t > & | counts | ||
) | const |
Compute conditional 3D histogram with adaptive bins.
Upon successful completion of this function, the return value shall be the number of bins produced, which is equal to the number of elements in array counts.
Error codes:
References ibis::bitvector::cnt(), ibis::countQuery::evaluate(), ibis::countQuery::getHitVector(), ibis::column::getNullMask(), ibis::countQuery::getNumHits(), and ibis::countQuery::setWhereClause().
|
protected |
Compute 3D histogram with adaptive bins from base data.
Compute 3D distribution with adaptive bins.
It is layered on top of three templated functions, get3DDistributionA1, get3DDistributionA2, and adaptive3DBins. The last function, which is a class function if ibis::part, performs the actual counting, the others are mainly responsible for retrieving values from disk.
This function either returns a negative between -1 and -11 to indicate error detected here, or a value returned by get3DDistributionA1. On successful completion of this function, it should return the number of bins in array counts, which should be exactly
References ibis::CATEGORY, ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::column::name(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::SHORT, ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
protected |
Read the value of the second column.
Call get3DDistributionA2 to process the next column and eventually compute the histogram. This function may return a value between -20 and -30 to indicate an error, or a value returned by get3DDistributionA2.
References ibis::CATEGORY, ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::column::name(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::SHORT, ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
protected |
Read the values of the third column.
Call the actual adaptive binning function adaptive3DBins to compute the histogram. Return the number of bins in the histogram or a negative value in the range of -40 to -60 to indicate errors.
References ibis::CATEGORY, ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::column::name(), ibis::column::selectBytes(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectLongs(), ibis::column::selectShorts(), ibis::column::selectUBytes(), ibis::column::selectUInts(), ibis::column::selectULongs(), ibis::column::selectUShorts(), ibis::SHORT, ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
ibis::column * ibis::part::getColumn | ( | const char * | prop | ) | const |
Given a name, return the associated column.
Return nil pointer if the name is not found. If the name contains a period, it skips the characters up to the first period.
Referenced by ibis::whereClause::amplify(), ibis::bord::append(), ibis::bord::backup(), ibis::bord::bord(), ibis::bundle1::bundle1(), ibis::bundles::bundles(), ibis::quaere::create(), ibis::mensa::cursor::cursor(), ibis::bord::cursor::cursor(), fastbit_build_index(), fastbit_purge_index(), ibis::mensa::cursor::fillBuffer(), get1DDistribution(), ibis::mensa::getColumnAsBytes(), ibis::mensa::getColumnAsDoubles(), ibis::mensa::getColumnAsFloats(), ibis::mensa::getColumnAsInts(), ibis::mensa::getColumnAsLongs(), ibis::mensa::getColumnAsOpaques(), ibis::mensa::getColumnAsShorts(), ibis::mensa::getColumnAsStrings(), ibis::mensa::getColumnAsUBytes(), ibis::mensa::getColumnAsUInts(), ibis::mensa::getColumnAsULongs(), ibis::mensa::getColumnAsUShorts(), ibis::bord::merge(), ibis::selectClause::needsEval(), ibis::part::barrel::open(), ibis::part::vault::open(), ibis::bord::restoreCategoriesAsStrings(), ibis::tafel::toTable(), ibis::whereClause::verifyExpr(), and ibis::selectClause::verifyTerm().
|
inline |
Returns the pointer to the ith column.
If an external order has been established, that order is used, otherwise, the alphabetical order is used. Index starts with 0. An out of range index will result a nil pointer.
long ibis::part::getCumulativeDistribution | ( | const char * | name, |
std::vector< double > & | bounds, | ||
std::vector< uint32_t > & | counts | ||
) | const |
Compute a cumulative distribution (a cumulative histogram).
It returns the number of entries in arrays bounds
and counts
.
The content of counts
[i] will be the number of records in the named column that are less than bounds
[i]. The last element in array bounds
is larger than returned by function getColumnMax.
long ibis::part::getCumulativeDistribution | ( | const char * | constraints, |
const char * | name, | ||
std::vector< double > & | bounds, | ||
std::vector< uint32_t > & | counts | ||
) | const |
Compute the cumulative distribution of the variable named name
under the specified constraints.
References ibis::CATEGORY, ibis::array_t< T >::clear(), ibis::bitvector::cnt(), ibis::util::compactValue(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::index::divideCounts(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, ibis::countQuery::getHitVector(), ibis::INT, ibis::util::logMessage(), ibis::horometer::realTime(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectUInts(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::array_t< T >::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::UBYTE, ibis::UINT, and ibis::USHORT.
long ibis::part::getCumulativeDistribution | ( | const char * | name, |
uint32_t | nbc, | ||
double * | bounds, | ||
uint32_t * | counts | ||
) | const |
This version of getCumulativeDistribution
uses two user supplied arrays bounds
and counts
.
The actual number of elements filled by this function is the return value, which is guaranteed to be no larger than the input value of nbc
.
long ibis::part::getCumulativeDistribution | ( | const char * | constraints, |
const char * | name, | ||
uint32_t | nbc, | ||
double * | bounds, | ||
uint32_t * | counts | ||
) | const |
Compute the conditional distribution and return the distribution in the arrays provided.
Because most of the binning scheme leaves two bins for overflow, one for values less than the expected minimum and one for values greater than the expected maximum.
The minimum number of bins expected is four (4). This function will return error code -1 if the value of nbc is less than 4.
long ibis::part::getDistribution | ( | const char * | name, |
std::vector< double > & | bounds, | ||
std::vector< uint32_t > & | counts | ||
) | const |
Obsolete histogram functions.
The array bounds
defines the following bins:
Avoid using these functions. They might be removed without notice.
Compute the binned distribution of the named variable.
or alternatively,
/// bin 0: (..., bounds[0]) -> counts[0] /// bin 1: [bounds[0], bounds[1]) -> counts[1] /// bin 2: [bounds[1], bounds[2]) -> counts[2] /// bin 3: [bounds[2], bounds[3]) -> counts[3] /// ... ///
In other word, bounds
[n] defines (n+1) bins, with two open bins at the two ends. The array counts
contains the number of rows fall into each bin. On a successful return from this function, the return value of this function is the number of bins defined, which is the same as the size of array counts
but one larger than the size of array bounds
.
Return the number of bins (i.e., counts.size()) on success.
long ibis::part::getDistribution | ( | const char * | constraints, |
const char * | name, | ||
std::vector< double > & | bounds, | ||
std::vector< uint32_t > & | counts | ||
) | const |
Compute the conditional binned data distribution.
Compute the distribution of the named variable under the specified constraints.
If the input array bounds
contains distinct values in ascending order, the array will be used as bin boundaries. Otherwise, the bin boundaries are automatically determined by this function. The basic rule for determining the number of bins is that if there are less than 10,000 distinct values, than every value is counted separatly, otherwise 1000 bins will be used and each bin will contain roughly the same number of records.
References ibis::CATEGORY, ibis::array_t< T >::clear(), ibis::bitvector::cnt(), ibis::util::compactValue(), ibis::horometer::CPUTime(), ibis::index::divideCounts(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, ibis::countQuery::getHitVector(), ibis::column::getNullMask(), ibis::INT, ibis::util::logMessage(), ibis::horometer::realTime(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectUInts(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::array_t< T >::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::UBYTE, ibis::UINT, and ibis::USHORT.
long ibis::part::getDistribution | ( | const char * | name, |
uint32_t | nbc, | ||
double * | bounds, | ||
uint32_t * | counts | ||
) | const |
Compute the binned distribution with the specified maximum number of bins.
Because most of the binning scheme leaves two bins for overflow, one for values less than the expected minimum and one for values greater than the expected maximum.
The minimum number of bins expected is four (4). This function will return error code -1 if the value of nbc is less than 4.
long ibis::part::getDistribution | ( | const char * | constraints, |
const char * | name, | ||
uint32_t | nbc, | ||
double * | bounds, | ||
uint32_t * | counts | ||
) | const |
Compute the conditional binned data distribution with the specified maximum number of bins.
Because most of the binning scheme leaves two bins for overflow, one for values less than the expected minimum and one for values greater than the expected maximum.
The minimum number of bins expected is four (4). This function will return error code -1 if the value of nbc is less than 4.
long ibis::part::getJointDistribution | ( | const char * | constraints, |
const char * | name1, | ||
const char * | name2, | ||
std::vector< double > & | bounds1, | ||
std::vector< double > & | bounds2, | ||
std::vector< uint32_t > & | counts | ||
) | const |
Compute the joint distribution of two variables.
It returns three arrays, bounds1
, bounds2
, and counts
.
The arrays bounds1
andbounds2
defines two sets of bins one for each variable. Together they define
bins for the 2-D joint distributions.
On successful completion of this function, it return the number of bins.
bounds1
and bounds2
are used if they contain values in ascending order. If they are empty or their values are not in ascending order, then a simple linear binning will be used. By default, no more than 256 bins are used for each variable.References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, ibis::countQuery::getHitVector(), ibis::column::getNullMask(), ibis::INT, ibis::util::logMessage(), ibis::horometer::realTime(), ibis::array_t< T >::resize(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectUInts(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::array_t< T >::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::UBYTE, ibis::UINT, and ibis::USHORT.
|
inline |
Return the name of the dimensions corresponding to the vector returned from getMeshShape.
References shapeName.
|
inline |
In many scientific applications, data are defined on meshes.
The following functions assumes the meshes are regular. Under this assumption, each column can be viewed as a multi-dimensional array, such as A[nz][ny][nx]. Following the convention in C/C++. The dimensions of the array are ordered from left to the right, with the left most being the slowest varying dimension and the right most being the fast varying dimension. This assumption about the dimensions is explicitly used in ibis::meshQuery functions toRanges, range2d, range3d and rangend. The function getMeshShape returns the sizes of the dimensions in a std::vector.
References shapeSize.
Referenced by ibis::bord::bord(), and ibis::meshQuery::meshQuery().
ibis::array_t< ibis::rid_t > * ibis::part::getRIDs | ( | const ibis::bitvector & | mask | ) | const |
Retrieve the RIDs corresponding to mask[i] == 1.
If no external row identifers are provided, this function will use the implicit RIDs which are simply the positions of the rows numbered from 0 to nRows()-1.
References ibis::bitvector::cnt(), ibis::bitvector::indexSet::indices(), ibis::bitvector::indexSet::isRange(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::push_back(), ibis::array_t< T >::reserve(), ibis::array_t< T >::size(), and ibis::bitvector::size().
uint32_t ibis::part::getRowNumber | ( | const rid_t & | rid | ) | const |
Return the row number of the row with specified RID.
It tries the sorted RID list first.
If that fails, it uses the brute force searching algorithm.
ibis::part::TABLE_STATE ibis::part::getState | ( | ) | const |
Retrieve the current state of data partition.
It holds a read lock to ensure the sanity of the state. The function getStateNoLocking may return a transient value unless the caller hold a lock.
|
virtual |
Discover the records that can not be decided using the index.
Logically, iffy = high - low, were high and low are computed from estimateRange.
The return value is the estimated fraction of records that might satisfy the range condition.
References ibis::bitvector::cnt(), ibis::qContinuousRange::colName(), and ibis::column::getUndecidable().
long ibis::part::keywordSearch | ( | const ibis::qKeyword & | cmp, |
ibis::bitvector & | low | ||
) | const |
Identify all rows containing the specified keyword.
The keyword search is only applicable to a text column with full-text index (keyword index).
References ibis::qKeyword::colName(), ibis::qKeyword::keyword(), ibis::bitvector::set(), ibis::TEXT, and ibis::column::type().
long ibis::part::keywordSearch | ( | const ibis::qAllWords & | cmp, |
ibis::bitvector & | low | ||
) | const |
Determine the records that have all specified keywords.
Return a negative value to indicate error, 0 to indicate no hit, and positive value to indicate there are zero or more hits. To determine the exact number of hits, call low.count().
References ibis::qAllWords::colName(), ibis::bitvector::set(), ibis::TEXT, ibis::column::type(), and ibis::qAllWords::valueList().
long ibis::part::keywordSearch | ( | const ibis::qAllWords & | cmp | ) | const |
Compute an upper bound on the number of rows with all the specified keywords.
Returns 0 if the column is not a text column.
References ibis::qAllWords::colName(), ibis::TEXT, ibis::column::type(), and ibis::qAllWords::valueList().
void ibis::part::loadIndexes | ( | const char * | iopt = 0 , |
int | ropt = 0 |
||
) | const |
Load indexes of all columns.
This function iterates through all columns and load the index associated with each one of them by call ibis::column::loadIndex. If an index for a column does not exist, the index is built in memory and written to disk. The argument iopt is used as the index specification if a new index is to be built. If iopt is nil, the index specifications for the individual columns or the data partition are used. The argument ropt is passed to ibis::index::create to regenerate an index object from the index file. The default value of ropt is 0.
References ibis::bitvector::cnt(), ibis::index::create(), ibis::gParameters(), ibis::bitvector::indexSet::indices(), ibis::bitvector::indexSet::isRange(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::push_back(), ibis::array_t< T >::reserve(), and ibis::array_t< T >::size().
|
staticprotected |
The templated function to decide the bin boundaries and count the number of values fall in each bin.
This function differs from the one used by getJointDistribution in that the bounds are defined with only closed bins.
References ibis::horometer::CPUTime(), ibis::array_t< T >::find(), ibis::horometer::realTime(), ibis::array_t< T >::size(), ibis::horometer::start(), and ibis::horometer::stop().
Referenced by old2DDistribution().
|
virtual |
Perform exact match operation for an AnyAny query.
The bulk of the work is performed as range queries.
References ibis::bitvector::bytes(), ibis::bitvector::cnt(), ibis::bitvector::compress(), ibis::bitvector::set(), ibis::bitvector::size(), and ibis::bitvector::sloppyCount().
bool ibis::part::matchMetaTags | ( | const std::vector< const char * > & | mtags | ) | const |
Match multiple name-value pairs against the internally stored meta tags.
bool ibis::part::matchMetaTags | ( | const ibis::resource::vList & | mtags | ) | const |
Match multiple name-value pairs.
Return true if and only if the two vLists match exactly.
bool ibis::part::matchNameValuePair | ( | const char * | name, |
const char * | value | ||
) | const |
Match a name-value pair in the meta tags.
Return true if the list of meta tags contains a name-value pair that matches the input arguments.
References ibis::util::nameMatch().
std::string ibis::part::metaTags | ( | ) | const |
Return the list of meta tags as a single string.
Output meta tags as a string.
The meta tags appears as 'name=value' pairs separated by comma (,).
Referenced by ibis::bord::backup().
|
staticprotected |
Perform the negative comparison.
Hits are those don't satisfy the range conditions, however, the comparisons are only performed on those rows with mask == 1.
References ibis::bitvector::adjustSize(), ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::qIntHod::colName(), ibis::bitvector::compress(), ibis::bitvector::decompress(), ibis::bitvector::indexSet::indices(), ibis::qIntHod::inRange(), ibis::bitvector::indexSet::isRange(), ibis::bitvector::indexSet::nIndices(), ibis::horometer::realTime(), ibis::bitvector::reserve(), ibis::bitvector::set(), ibis::bitvector::setBit(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
|
staticprotected |
Perform the negative comparison.
Hits are those don't satisfy the range conditions, however, the comparisons are only performed on those rows with mask == 1.
References ibis::fileManager::buffer< T >::address(), ibis::bitvector::adjustSize(), ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::qIntHod::colName(), ibis::bitvector::compress(), ibis::bitvector::decompress(), ibis::bitvector::indexSet::indices(), ibis::qIntHod::inRange(), ibis::fileManager::instance(), ibis::bitvector::indexSet::isRange(), ibis::bitvector::indexSet::nIndices(), ibis::horometer::realTime(), ibis::fileManager::recordPages(), ibis::bitvector::reserve(), ibis::bitvector::set(), ibis::bitvector::setBit(), ibis::bitvector::size(), ibis::fileManager::buffer< T >::size(), ibis::bitvector::sloppyCount(), ibis::horometer::start(), ibis::horometer::stop(), and UnixOpen.
|
virtual |
Compute the records (marked 1 in the mask) that does not satisfy the range condition.
Only the entries with mask[i] == 1 are examined, those with mask[i] == 0 will never be hits.
References ibis::bitvector::adjustSize(), ibis::CATEGORY, ibis::bitvector::cnt(), ibis::qRange::colName(), ibis::column::dataFileName(), ibis::DOUBLE, ibis::column::estimateRange(), ibis::FLOAT, ibis::qExpr::getType(), ibis::column::getValuesArray(), ibis::INT, ibis::LONG, ibis::bitvector::set(), ibis::SHORT, ibis::bitvector::size(), ibis::bitvector::sloppyCount(), ibis::TEXT, ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
protected |
Turn a list of numbers into a bitvector.
Convert a set of numbers to an ibis::bitvector.
References ibis::bitvector::adjustSize(), ibis::bitvector::appendFill(), ibis::util::copy(), and ibis::bitvector::setBit().
|
protected |
The old implementation that uses binary lookup.
For floating-point values, this function will go through the intermediate arrays three times, once to compute the actual minimum and maximum values, once to count the 1D distributions, and finally to count the number of values in the 2D bins. The last step is more expensive then the first two because it involves two binary searches, one on each each set of the boundaries.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::countQuery::evaluate(), ibis::FLOAT, ibis::countQuery::getHitVector(), ibis::column::getNullMask(), ibis::INT, ibis::util::logMessage(), mapValues(), ibis::horometer::realTime(), ibis::column::selectDoubles(), ibis::column::selectFloats(), ibis::column::selectInts(), ibis::column::selectUInts(), ibis::countQuery::setWhereClause(), ibis::SHORT, ibis::array_t< T >::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::column::type(), ibis::UBYTE, ibis::UINT, and ibis::USHORT.
|
protected |
Pack a cumulative distribution stored in two std::vectors into two arrays provided by the caller.
References ibis::util::logMessage().
void ibis::part::print | ( | std::ostream & | out | ) | const |
Output a description of every column in the data partition.
Print the basic information to the specified output stream.
Referenced by part().
long ibis::part::purgeInactive | ( | ) |
Purge all inactive rows from the partition.
Return the number of rows left or an error code.
References ibis::fileManager::buffer< T >::address(), ibis::fileManager::flushDir(), ibis::fileManager::instance(), ibis::util::logMessage(), ibis::column::name(), ibis::OID, ibis::column::saveSelected(), and ibis::fileManager::buffer< T >::size().
void ibis::part::purgeIndexFiles | ( | ) | const |
Remove existing index files! The indexes will be rebuilt next time they are needed.
This function is useful after changing the index specification before rebuilding a set of new indices.
Referenced by fastbit_purge_indexes().
void ibis::part::queryTest | ( | const char * | pref, |
long * | nerrors | ||
) | const |
Generate and run random queries for slefTest.
Randomly select a column and perform a set of tests recursively.
References ibis::CATEGORY, ibis::util::compactValue(), ibis::horometer::CPUTime(), ibis::DOUBLE, ibis::FLOAT, ibis::gParameters(), ibis::util::logMessage(), ibis::util::rand(), ibis::horometer::realTime(), ibis::util::serialNumber(), ibis::horometer::start(), ibis::horometer::stop(), and ibis::TEXT.
void ibis::part::quickTest | ( | const char * | pref, |
long * | nerrors | ||
) | const |
Generate and run random queries for slefTest.
Randomly select a column from the current list and perform a dozen tests on the column.
References ibis::CATEGORY, ibis::query::clearErrorMessage(), ibis::bitvector::cnt(), ibis::util::compactValue(), ibis::column::computeMinMax(), ibis::horometer::CPUTime(), ibis::array_t< T >::deepCopy(), ibis::DOUBLE, ibis::query::evaluate(), ibis::FLOAT, ibis::query::getHitVector(), ibis::query::getLastError(), ibis::column::getNullMask(), ibis::query::getNumHits(), ibis::query::getRIDs(), ibis::query::getWhereClause(), ibis::gParameters(), ibis::query::id(), ibis::bitvector::indexSet::indices(), ibis::bitvector::indexSet::isRange(), ibis::util::logMessage(), ibis::column::lowerBound(), ibis::column::name(), ibis::bitvector::indexSet::nIndices(), ibis::util::rand(), ibis::horometer::realTime(), ibis::array_t< T >::resize(), ibis::query::sequentialScan(), ibis::util::serialNumber(), ibis::query::setRIDs(), ibis::query::setSelectClause(), ibis::query::setWhereClause(), ibis::array_t< T >::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::TEXT, ibis::column::type(), and ibis::column::upperBound().
long ibis::part::reactivate | ( | const std::vector< uint32_t > & | rows | ) |
Make sure the specified rows are active.
Return the total number of active rows or error code.
References ibis::bitvector::cnt().
|
protected |
Change all rows marked 1 to be active.
Mark the rows identified in rows
as active.
References ibis::bitvector::cnt(), ibis::fileManager::flushFile(), ibis::fileManager::instance(), ibis::bitvector::size(), and ibis::bitvector::write().
|
protected |
!< Don't change the data.
Read the metadata file from the named dir.
Read metadata file -part.txt.
If dir is the activeDir, it will also update the content of *this, otherwise it will only modify arguments nrows
and plist
. If this function completes successfully, it returns the maximum length of the column names. Otherwise, it returns a value of zero or less to indicate errors.
References ibis::BLOB, ibis::CATEGORY, ibis::util::getString(), ibis::util::logMessage(), ibis::column::name(), ibis::resource::parseNameValuePairs(), ibis::util::strnewdup(), ibis::TEXT, ibis::column::type(), and ibis::TYPESTRING.
Referenced by part().
|
static |
A class function to read the meta tags in the tdc file.
Read the meta tag entry in the header section of the metadata file in directory dir.
The meta tags are name-value pairs associated with a data partition. They record information about about a data partition that one might want to search through matchNameValuePair or matchMetaTags or simply part of the regular query expressions.
References ibis::util::getString().
|
protected |
Issues a query and then subdivided the range into three to check the total hits of the three sub queries matches the hits of the single query.
It allows a maximum of 6 levels of recursion, no more than 80 queries.
References ibis::bitvector::cnt(), ibis::util::compactValue(), ibis::bitvector::compress(), ibis::DOUBLE, ibis::FLOAT, ibis::column::getNullMask(), ibis::util::logMessage(), ibis::column::lowerBound(), ibis::column::name(), ibis::column::type(), and ibis::column::upperBound().
|
inline |
Attempt to release a read or write access to this part object.
A simple wrap over pthread_rwlock_unlock. Returns the return value of pthread_rwlock_unlock.
Referenced by fastbit_columns_in_partition(), fastbit_flush_buffer(), fastbit_purge_index(), fastbit_purge_indexes(), fastbit_reorder_partition(), and fastbit_rows_in_partition().
void ibis::part::rename | ( | const ibis::partAssoc & | known | ) |
Rename the partition to avoid conflicts with an existing list of data partitions.
If the incoming name is empty, this function assigns the data partition a random name. If it already has a name, it will append a random number at the end. It will try as many random numbers as necessary to produce a name that is not already in the list of known data partitions.
References ibis::fileManager::iBeat(), ibis::util::int2string(), ibis::util::shortName(), and ibis::util::strnewdup().
Referenced by ibis::util::gatherParts().
void ibis::part::rename | ( | const char * | newname | ) |
Change the name of the data partition to the given name.
Nothing is done if the incoming argument is a nil pointer or points to a null string.
References ibis::util::strnewdup().
|
virtual |
Sort rows with the lowest cardinality column first.
Only integer-valued columns are used in sorting. Returns the number of rows reordered when successful, otherwise return a negative number and the base data is corrupt!
Reimplemented in ibis::bord.
References activeDir, gatherSortKeys(), nColumns(), and nRows().
Referenced by fastbit_reorder_partition(), and ibis::bord::reorder().
|
virtual |
Sort rows according the values of the columns specified in names
.
It orders the rows according the values of names[0] first, names[1] second, and so on. For each column, if the corresponding value of directions is present, the value of directions is interpreted as whether or not the column is to be order in ascending order. The direction defaults to the ascending order if the value is not present.
Reimplemented in ibis::bord.
References ibis::CATEGORY, ibis::bitvector::cnt(), ibis::DOUBLE, ibis::FLOAT, ibis::fileManager::flushDir(), ibis::fileManager::getFile(), ibis::util::getLocalTime(), ibis::fileManager::instance(), ibis::INT, ibis::LONG, ibis::array_t< T >::resize(), ibis::SHORT, ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::util::logger::str(), ibis::array_t< T >::swap(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, ibis::USHORT, and ibis::bitvector::write().
|
protected |
Write the named data file in a segmented sorted order.
Reorders elementary data types.
Can not handle string valued data! This function opens the data file in read-write mode and modify the content of the underlying data file.
References ibis::array_t< T >::clear(), ibis::horometer::CPUTime(), ibis::util::guardBase::dismiss(), PREFERRED_BLOCK_SIZE, ibis::array_t< T >::push_back(), ibis::array_t< T >::read(), ibis::horometer::realTime(), ibis::array_t< T >::resize(), ibis::array_t< T >::size(), ibis::array_t< T >::sort(), ibis::horometer::start(), ibis::horometer::stop(), ibis::array_t< T >::swap(), and UnixOpen.
long ibis::part::rollback | ( | ) |
Rollback the append operation.
Rollback(revert) to previous data set.
This function can only be called before calling commit
.
Can only undo the last append operation on the data partition.
References ibis::fileManager::clear(), ibis::fileManager::flushFile(), ibis::fileManager::getFile(), ibis::fileManager::instance(), ibis::util::logMessage(), and ibis::util::removeDir().
ibis::array_t< signed char > * ibis::part::selectBytes | ( | const char * | pname, |
const ibis::bitvector & | mask | ||
) | const |
Retrieve values of the named column as 8-bit integers.
The selected values are packed into the resulting array.
Only those rows marked 1 are retrieved. The caller is responsible for deleting the returned value.
References ibis::bitvector::cnt(), ibis::util::emptyCache(), ibis::column::selectBytes(), and ibis::bitvector::size().
Referenced by ibis::query::getQualifiedBytes().
ibis::array_t< double > * ibis::part::selectDoubles | ( | const char * | pname, |
const ibis::bitvector & | mask | ||
) | const |
Retrieve values of the named column as 64-bit floating-point values.
The selected values are packed into the resulting array.
Only those rows marked 1 are retrieved. The caller is responsible for deleting the returned value.
References ibis::bitvector::cnt(), ibis::util::emptyCache(), ibis::column::selectDoubles(), and ibis::bitvector::size().
Referenced by ibis::query::getQualifiedDoubles().
ibis::array_t< float > * ibis::part::selectFloats | ( | const char * | pname, |
const ibis::bitvector & | mask | ||
) | const |
Retrieve values of the named column as 32-bit floating-point values.
The selected values are packed into the resulting array.
Only those rows marked 1 are retrieved. The caller is responsible for deleting the returned value.
References ibis::bitvector::cnt(), ibis::util::emptyCache(), ibis::column::selectFloats(), and ibis::bitvector::size().
Referenced by ibis::query::getQualifiedFloats().
ibis::array_t< int32_t > * ibis::part::selectInts | ( | const char * | pname, |
const ibis::bitvector & | mask | ||
) | const |
Retrieve values of the named column as 32-bit integers.
The selected values are packed into the resulting array.
Only those rows marked 1 are retrieved. The caller is responsible for deleting the returned value.
References ibis::bitvector::cnt(), ibis::util::emptyCache(), ibis::column::selectInts(), and ibis::bitvector::size().
Referenced by ibis::query::getQualifiedInts().
ibis::array_t< int64_t > * ibis::part::selectLongs | ( | const char * | pname, |
const ibis::bitvector & | mask | ||
) | const |
Retrieve values of the named column as 64-bit integers.
The selected values are packed into the resulting array.
Only those rows marked 1 are retrieved. The caller is responsible for deleting the returned value.
References ibis::bitvector::cnt(), ibis::util::emptyCache(), ibis::column::selectLongs(), and ibis::bitvector::size().
Referenced by ibis::query::getQualifiedLongs().
ibis::array_t< int16_t > * ibis::part::selectShorts | ( | const char * | pname, |
const ibis::bitvector & | mask | ||
) | const |
Retrieve values of the named column as 16-bit integers.
The selected values are packed into the resulting array.
Only those rows marked 1 are retrieved. The caller is responsible for deleting the returned value.
References ibis::bitvector::cnt(), ibis::util::emptyCache(), ibis::column::selectShorts(), and ibis::bitvector::size().
Referenced by ibis::query::getQualifiedShorts().
std::vector< std::string > * ibis::part::selectStrings | ( | const char * | pname, |
const ibis::bitvector & | mask | ||
) | const |
Retrieve values of the named column as strings.
The selected values are packed into the resulting array.
Only those rows marked 1 are retrieved. The caller is responsible for deleting the returned value.
References ibis::bitvector::cnt(), ibis::util::emptyCache(), ibis::column::selectStrings(), and ibis::bitvector::size().
Referenced by ibis::query::getQualifiedStrings().
ibis::array_t< unsigned char > * ibis::part::selectUBytes | ( | const char * | pname, |
const ibis::bitvector & | mask | ||
) | const |
Retrieve values of the named column as 8-bit unsigned integers.
The selected values are packed into the resulting array.
Only those rows marked 1 are retrieved. The caller is responsible for deleting the returned value.
References ibis::bitvector::cnt(), ibis::util::emptyCache(), ibis::column::selectUBytes(), and ibis::bitvector::size().
Referenced by ibis::query::getQualifiedUBytes().
ibis::array_t< uint32_t > * ibis::part::selectUInts | ( | const char * | pname, |
const ibis::bitvector & | mask | ||
) | const |
Retrieve values of the named column as 32-bit unsigned integers.
The selected values are packed into the resulting array.
Only those rows marked 1 are retrieved. The caller is responsible for deleting the returned value.
References ibis::bitvector::cnt(), ibis::util::emptyCache(), ibis::column::selectUInts(), and ibis::bitvector::size().
Referenced by ibis::query::getQualifiedUInts().
ibis::array_t< uint64_t > * ibis::part::selectULongs | ( | const char * | pname, |
const ibis::bitvector & | mask | ||
) | const |
Retrieve values of the named column as 64-bit unsigned integers.
The selected values are packed into the resulting array.
Only those rows marked 1 are retrieved. The caller is responsible for deleting the returned value.
References ibis::bitvector::cnt(), ibis::util::emptyCache(), ibis::column::selectULongs(), and ibis::bitvector::size().
Referenced by ibis::query::getQualifiedULongs().
ibis::array_t< uint16_t > * ibis::part::selectUShorts | ( | const char * | pname, |
const ibis::bitvector & | mask | ||
) | const |
Retrieve values of the named column as 16-bit unsigned integers.
The selected values are packed into the resulting array.
Only those rows marked 1 are retrieved. The caller is responsible for deleting the returned value.
References ibis::bitvector::cnt(), ibis::util::emptyCache(), ibis::column::selectUShorts(), and ibis::bitvector::size().
Referenced by ibis::query::getQualifiedUShorts().
long ibis::part::selectValues | ( | const ibis::qContinuousRange & | cond, |
void * | vals | ||
) | const |
Select values of the column based on the range condition.
The column to be selected is also the column subject to the range condition.
References ibis::qContinuousRange::colName(), and ibis::column::selectValues().
void ibis::part::setMeshShape | ( | const char * | shape | ) |
Digest the mesh shape stored in the string.
The shape can be just numbers, e.g., "(10, 12, 14)", or 'name=value' pairs, e.g., "(nz=10, ny=12, nx=14)".
References ibis::part::softWriteLock::isLocked().
|
protected |
Make a deep copy of the incoming name-value pairs.
The even element is assumed to be the name and the odd element is assumed to be the value. If the last name is not followed by a value it is assumed to have the value of '*', which indicates do-not-care.
References ibis::util::strnewdup().
|
static |
Skip pass all the dots in the given string.
The pointer returned from this function points to the first character after the last dot (.). If the incoming string ends with a dot, the return value would be null terminator. If there is no dot at all, the return value is the same as the input value.
Referenced by ibis::bord::bord().
long ibis::part::stringSearch | ( | const ibis::qString & | cmp, |
ibis::bitvector & | low | ||
) | const |
Find all records that has the exact string value.
The object qString contains only two string values without any indication as what they represent. It first tries to match the left string against known column names of this partition. If the name matches one that is of type STRING or KEY, the search is performed on this column. Otherwise the right string is compared against the column names, if a match is found, the search is performed on that column. If both failed, the search returns no hit.
Return a negative value to indicate error, 0 to indicate no hit, and positive value to indicate there are zero or more hits.
References ibis::bitvector::set().
Referenced by ibis::query::weight::operator()().
long ibis::part::stringSearch | ( | const ibis::qAnyString & | cmp, |
ibis::bitvector & | low | ||
) | const |
Determine the records that have the exact string values.
Actual work done in the function search of the string-valued column. It produces no hit if the name is not a string-valued column.
Return a negative value to indicate error, 0 to indicate no hit, and positive value to indicate there are zero or more hits. To determine the exact number of hits, call low.count().
References ibis::CATEGORY, ibis::qAnyString::colName(), ibis::column::getNullMask(), ibis::bitvector::set(), ibis::TEXT, ibis::column::type(), and ibis::qAnyString::valueList().
long ibis::part::stringSearch | ( | const ibis::qString & | cmp | ) | const |
Return an upper bound of the number of records that have the exact string value.
|
protected |
Convert a set of range conditions to an ibis::bitvector.
References ibis::bitvector::clear(), ibis::query::getExpandedHits(), ibis::query::setWhereClause(), and ibis::util::userName().
void ibis::part::testRangeOperators | ( | const ibis::column * | col, |
long * | nerrors | ||
) | const |
Try a set of range conditions with different combinations of operators.
Loop through all operators for a continuous range expression, check to see if the number of hits computed from evaluating a count query matches that returned from ibis::part::countHits.
References ibis::column::computeMinMax(), ibis::column::lowerBound(), ibis::column::name(), ibis::util::rand(), and ibis::column::upperBound().
|
inline |
A soft attempt to gain a write access to this part object.
A simple wrap over pthread_rwlock_trywrlock. Returns the return value of pthread_rwlock_trywrlock.
int ibis::part::updateData | ( | ) |
Check the time stamp on the metadata files to decide if the in-memory metadata information requires updating.
References ibis::util::emptyCache(), and ibis::part::softWriteLock::isLocked().
Referenced by fastbit_flush_buffer(), and ibis::util::updateDatasets().
void ibis::part::updateMetaData | ( | ) | const |
Write the metadata file to record the changes to the partition.
References ibis::part::softWriteLock::isLocked().
Referenced by fastbit_build_index().
|
static |
Write the content of vals to an open file.
This template function works with fixed size elements stored in array_t.
Return the number of elements written or an error code. The error code is always less than 0.
References ibis::bitvector::adjustSize(), ibis::array_t< T >::size(), and ibis::bitvector::size().
Referenced by ibis::bord::backup().
|
protected |
Write metadata file -part.txt.
Write the metadata about the data partition into the ASCII file named "-part.txt".
The caller is expected to hold a write lock on the data partition to prevent simultaneous writes.
References ibis::util::checksum(), ibis::util::getLocalTime(), and ibis::util::int2string().
|
static |
Write raw bytes to an open file.
It also requires a second file to store starting positions of the raw binary objects.
Return the number of raw objects written to the open file or an error code. Note that the error code is always less than 0.
References ibis::bitvector::adjustSize().
Referenced by ibis::bord::backup().
|
static |
Write raw bytes to an open file.
It also requires a second file to store starting positions of the raw binary objects.
Return the number of raw objects written to the open file or an error code. Note that the error code is always less than 0.
References ibis::bitvector::adjustSize(), ibis::array_t< T >::size(), and ibis::bitvector::size().
|
static |
Write strings to an open file.
The strings are stored in a std::vector<std::string>. The strings are null-terminated and therefore can not contain null characters in them.
Return the number of strings written to the open file or an error code.
References ibis::bitvector::adjustSize().
Referenced by ibis::bord::backup().
|
protected |
Write the named data file with values in the given order.
Writes elementary data tyles. Can not handle string values correctly.
References PREFERRED_BLOCK_SIZE, ibis::array_t< T >::read(), ibis::horometer::realTime(), ibis::array_t< T >::size(), ibis::horometer::start(), ibis::horometer::stop(), and UnixOpen.