A version of precise index that directly uses the integer values. More...
#include <idirekte.h>
Public Member Functions | |
virtual long | append (const char *dt, const char *df, uint32_t nnew) |
Append the index in df to the one in dt. More... | |
long | append (const ibis::direkte &tail) |
Append tail to this index. More... | |
long | append (const array_t< uint32_t > &ind) |
Append a list of integers. More... | |
virtual void | binBoundaries (std::vector< double > &) const |
The function binBoundaries and binWeights return bin boundaries and counts of each bin respectively. More... | |
virtual void | binWeights (std::vector< uint32_t > &) const |
direkte (const direkte &rhs) | |
direkte (const ibis::column *c, const char *f=0) | |
Constructing a new ibis::direkte object from base data in a file. More... | |
direkte (const ibis::column *c, ibis::fileManager::storage *st) | |
direkte (const ibis::column *c, uint32_t popu, uint32_t ntpl=0) | |
Construct a dummy index. More... | |
direkte (const ibis::column *c, uint32_t card, array_t< uint32_t > &ints) | |
Construct an index from an integer array. More... | |
virtual index * | dup () const |
Duplicate the content of an index object. | |
virtual void | estimate (const ibis::qContinuousRange &expr, ibis::bitvector &lower, ibis::bitvector &upper) const |
Computes an approximation of hits as a pair of lower and upper bounds. More... | |
virtual uint32_t | estimate (const ibis::qContinuousRange &expr) const |
Returns an upper bound on the number of hits. | |
virtual void | estimate (const ibis::qDiscreteRange &expr, ibis::bitvector &lower, ibis::bitvector &upper) const |
Estimate the hits for discrete ranges, i.e., those translated from 'a IN (x, y, ..)'. More... | |
virtual uint32_t | estimate (const ibis::qDiscreteRange &expr) const |
virtual double | estimateCost (const ibis::qContinuousRange &expr) const |
Estimate the cost of evaluating a range condition. | |
virtual double | estimateCost (const ibis::qDiscreteRange &expr) const |
Estimate the cost of evaluating a range condition. | |
virtual long | evaluate (const ibis::qContinuousRange &expr, ibis::bitvector &hits) const |
To evaluate the exact hits. More... | |
virtual long | evaluate (const ibis::qDiscreteRange &expr, ibis::bitvector &hits) const |
To evaluate the exact hits. More... | |
virtual long | getCumulativeDistribution (std::vector< double > &bds, std::vector< uint32_t > &cts) const |
Cumulative distribution of the data. More... | |
virtual long | getDistribution (std::vector< double > &bbs, std::vector< uint32_t > &cts) const |
Binned distribution of the data. More... | |
virtual double | getMax () const |
The maximum value recorded in the index. | |
virtual double | getMin () const |
The minimum value recorded in the index. | |
virtual double | getSum () const |
Compute the approximate sum of all the values indexed. More... | |
void | ints (array_t< uint32_t > &) const |
Convert the bitvector into integer values. | |
array_t< uint32_t > * | keys (const ibis::bitvector &mask) const |
Convert the bitvector mask into key values. | |
virtual const char * | name () const |
Returns the name of the index, similar to the function type , but returns a string instead. More... | |
virtual void | print (std::ostream &out) const |
The printing function. | |
virtual int | read (const char *name) |
Read index from the specified location. | |
virtual int | read (ibis::fileManager::storage *st) |
Reconstruct an index from a piece of consecutive memory. | |
int | remapKeys (const ibis::array_t< uint32_t > &) |
Change the key values to a new set of numbers. More... | |
virtual long | select (const ibis::qContinuousRange &, void *) const |
Evaluate the range condition and select values. | |
virtual long | select (const ibis::qContinuousRange &, void *, ibis::bitvector &) const |
Evaluate the range condition, select values, and record the positions. | |
virtual void | serialSizes (uint64_t &, uint64_t &, uint64_t &) const |
Compute the size of arrays that would be generated by the serializatioin function (write). More... | |
virtual void | speedTest (std::ostream &out) const |
Time some logical operations and print out their speed. More... | |
virtual INDEX_TYPE | type () const |
Returns an index type identifier. | |
virtual float | undecidable (const ibis::qContinuousRange &expr, ibis::bitvector &iffy) const |
Mark the position of the rows that can not be decided with this index. More... | |
virtual float | undecidable (const ibis::qDiscreteRange &expr, ibis::bitvector &iffy) const |
virtual int | write (ibis::array_t< double > &, ibis::array_t< int64_t > &, ibis::array_t< uint32_t > &) const |
Save index to three arrays. Serialize the index in memory. | |
virtual int | write (const char *name) const |
Write the direct bitmap index to a file. | |
Public Member Functions inherited from ibis::index | |
void | addBins (uint32_t ib, uint32_t ie, ibis::bitvector &res) const |
Add the sum of bits [ib] through bits [ie-1] to res . More... | |
void | addBins (uint32_t ib, uint32_t ie, ibis::bitvector &res, const ibis::bitvector &tot) const |
Compute the sum of bit vectors [ib , ie ). More... | |
virtual int | contractRange (ibis::qContinuousRange &) const |
bool | empty () const |
The index object is considered empty if there is no bitmap or getNRows returns 0. More... | |
virtual void | estimate (const ibis::index &idx2, const ibis::deprecatedJoin &expr, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
Estimate the pairs for the range join operator. | |
virtual void | estimate (const ibis::index &idx2, const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
Estimate the pairs for the range join operator. More... | |
virtual void | estimate (const ibis::index &idx2, const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
virtual int64_t | estimate (const ibis::index &idx2, const ibis::deprecatedJoin &expr) const |
Estimate an upper bound for the number of pairs. | |
virtual int64_t | estimate (const ibis::index &idx2, const ibis::deprecatedJoin &expr, const ibis::bitvector &mask) const |
Estimate an upper bound for the number of pairs produced from marked records. More... | |
virtual int64_t | estimate (const ibis::index &idx2, const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2) const |
virtual void | estimate (const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
Evaluating a join condition with one (likely composite) index. | |
virtual int64_t | estimate (const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2) const |
virtual int | expandRange (ibis::qContinuousRange &) const |
The functions expandRange and contractRange expands or contracts the boundaries of a range condition so that the new range will have exact answers using the function estimate. More... | |
virtual const ibis::bitvector * | getBitvector (uint32_t i) const |
Return a pointer to the ith bitvector used in the index (may be 0). | |
uint32_t | getNRows () const |
Return the number of rows represented by this object. | |
virtual uint32_t | numBitvectors () const |
Returns the number of bit vectors used by the index. | |
float | sizeInBytes () const |
Estiamte the size of this index object measured in bytes. More... | |
void | sumBins (uint32_t ib, uint32_t ie, ibis::bitvector &res) const |
Sum up bits[ib:ie-1] and place the result in res. More... | |
void | sumBins (uint32_t ib, uint32_t ie, ibis::bitvector &res, uint32_t ib0, uint32_t ie0) const |
Compute a new sum for bit vectors [ib, ie) by taking advantage of the old sum for bitvectors [ib0, ie0). More... | |
void | sumBins (uint32_t ib, uint32_t ie, ibis::bitvector &res, uint32_t *buf) const |
Sum up bits[ib:ie-1] and place the result in res. More... | |
void | sumBins (const ibis::array_t< uint32_t > &, ibis::bitvector &) const |
Sum up the bits in in the specified bins. | |
virtual | ~index () |
The destructor. | |
Protected Member Functions | |
template<typename T > | |
int | construct (const char *f) |
template<typename T > | |
int | construct0 (const char *f) |
virtual size_t | getSerialSize () const throw () |
Estimate the size of the index file. More... | |
void | locate (const ibis::qContinuousRange &expr, uint32_t &hit0, uint32_t &hit1) const |
direkte & | operator= (const direkte &) |
Protected Member Functions inherited from ibis::index | |
virtual void | activate () const |
Regenerate all bitvectors from the underlying storage. More... | |
virtual void | activate (uint32_t i) const |
Regenerate the ith bitvector from the underlying storage. | |
virtual void | activate (uint32_t i, uint32_t j) const |
Regenerate bitvectors i (inclusive) through j (exclusive) from the underlying storage. More... | |
virtual void | clear () |
Clear the existing content. More... | |
void | computeMinMax (const char *f, double &min, double &max) const |
void | dataFileName (std::string &name, const char *f=0) const |
Generate data file name from "f". More... | |
index (const ibis::column *c=0) | |
Default constructor. More... | |
index (const ibis::column *c, ibis::fileManager::storage *s) | |
Constructor with a storage object. More... | |
index (const index &) | |
Copy constructor. | |
void | indexFileName (std::string &name, const char *f=0) const |
Generates index file name from "f". More... | |
void | initBitmaps (int fdes) |
Prepare the bitmaps using the given file descriptor. More... | |
void | initBitmaps (ibis::fileManager::storage *st) |
Prepare bitmaps from the given storage object. More... | |
void | initBitmaps (uint32_t *st) |
Prepare bitmaps from the given raw pointer. More... | |
void | initBitmaps (void *ctx, FastBitReadBitmaps rd) |
Prepare bitmaps from the user provided function pointer and context. More... | |
int | initOffsets (int64_t *, size_t) |
Initialize the offsets from the given data array. More... | |
int | initOffsets (int fdes, const char offsize, size_t start, uint32_t nobs) |
Read in the offset array. More... | |
int | initOffsets (ibis::fileManager::storage *st, size_t start, uint32_t nobs) |
Regenerate the offsets array from the given storage object. More... | |
void | mapValues (const char *f, VMap &bmap) const |
Map the positions of each individual value. More... | |
void | mapValues (const char *f, histogram &hist, uint32_t count=0) const |
Generate a histogram. More... | |
index & | operator= (const index &) |
Assignment operator. | |
void | optionalUnpack (array_t< ibis::bitvector * > &bits, const char *opt) |
A function to decide whether to uncompress the bitvectors. More... | |
Additional Inherited Members | |
Public Types inherited from ibis::index | |
typedef std::map< double, uint32_t > | histogram |
enum | INDEX_TYPE { BINNING =0, RANGE, MESA, AMBIT, PALE, PACK, ZONE, RELIC, ROSTER, SKIVE, FADE, SBIAD, SAPID, EGALE, MOINS, ENTRE, BAK, BAK2, KEYWORDS, MESH, BAND, DIREKTE, GENERIC, BYLT, FUZZ, ZONA, FUGE, SLICE, EXTERN } |
The integer values of this enum type are used in the index files to differentiate the indexes. More... | |
typedef std::map< double, ibis::bitvector * > | VMap |
Static Public Member Functions inherited from ibis::index | |
static void | addBits (const array_t< bitvector * > &bits, uint32_t ib, uint32_t ie, ibis::bitvector &res) |
Add the pile [ib:ie-1] to res . More... | |
static index * | create (const column *c, const char *name=0, const char *spec=0, int inEntirety=0) |
Index factory. More... | |
static void | divideCounts (array_t< uint32_t > &bounds, const array_t< uint32_t > &cnt) |
Determine how to split the array cnt , so that each group has roughly the same total value. More... | |
static bool | isIndex (const char *f, INDEX_TYPE t) |
Is the named file an index file? Read the header of the named file to determine if it contains an index of the specified type. More... | |
template<typename E > | |
static void | mapValues (const array_t< E > &val, VMap &bmap) |
template<typename E > | |
static void | mapValues (const array_t< E > &val, histogram &hist, uint32_t count=0) |
template<typename E > | |
static void | mapValues (const array_t< E > &val, array_t< E > &bounds, std::vector< uint32_t > &cnts) |
template<typename E1 , typename E2 > | |
static void | mapValues (const array_t< E1 > &val1, const array_t< E2 > &val2, array_t< E1 > &bnd1, array_t< E2 > &bnd2, std::vector< uint32_t > &cnts) |
Compute a two-dimensional histogram. More... | |
static void | printHeader (std::ostream &, const char *) |
static void | setBases (array_t< uint32_t > &bases, uint32_t card, uint32_t nbase=2) |
Fill the array bases with the values that cover the range [0, card). More... | |
static void | sumBits (const array_t< bitvector * > &bits, uint32_t ib, uint32_t ie, ibis::bitvector &res) |
Sum up pile [ib:ie-1] and place the result in res . More... | |
static void | sumBits (const array_t< bitvector * > &bits, const ibis::bitvector &tot, uint32_t ib, uint32_t ie, ibis::bitvector &res) |
Sum up pile [ib:ie-1] and add the result to res . More... | |
Static Protected Member Functions inherited from ibis::index | |
static void | indexFileName (std::string &name, const ibis::column *col1, const ibis::column *col2, const char *f=0) |
Generate the index file name for the composite index fromed on two columns. More... | |
Protected Attributes inherited from ibis::index | |
array_t< ibis::bitvector * > | bits |
A list of bitvectors. | |
bitmapReader * | breader |
The functor to read serialized bitmaps from a more complex source. | |
const ibis::column * | col |
Pointer to the column this index is for. | |
const char * | fname |
The name of the file containing the index. | |
uint32_t | nrows |
The number of rows represented by the index. More... | |
array_t< int32_t > | offset32 |
Starting positions of the bitvectors. | |
array_t< int64_t > | offset64 |
Starting positions of the bitvectors. More... | |
ibis::fileManager::storage * | str |
The underlying storage. More... | |
A version of precise index that directly uses the integer values.
It can avoid some intemdiate steps during index building and query answering. However, this class can only be used with integer column with nonnegative values. Ideally, the values should start with 0, and only use small positive integers.
ibis::direkte::direkte | ( | const ibis::column * | c, |
const char * | f = 0 |
||
) |
Constructing a new ibis::direkte object from base data in a file.
Both arguments are expected to be valid pointers.
References ibis::index::bits, ibis::CATEGORY, ibis::index::col, ibis::index::dataFileName(), ibis::DOUBLE, ibis::FLOAT, ibis::column::fullname(), ibis::INT, ibis::LONG, ibis::column::lowerBound(), ibis::column::name(), print(), read(), ibis::SHORT, ibis::array_t< T >::size(), ibis::TEXT, ibis::column::type(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, ibis::column::upperBound(), and ibis::USHORT.
ibis::direkte::direkte | ( | const ibis::column * | c, |
uint32_t | popu, | ||
uint32_t | ntpl = 0 |
||
) |
Construct a dummy index.
All rows are marked as having the same value with position popu. This creates an index with (popu+1) bit vectors, with the last one set to all 1s and the rest to be empty.
References ibis::index::bits, ibis::index::clear(), ibis::index::col, ibis::column::fullname(), ibis::part::nRows(), ibis::index::nrows, print(), and ibis::array_t< T >::resize().
ibis::direkte::direkte | ( | const ibis::column * | c, |
uint32_t | card, | ||
array_t< uint32_t > & | ind | ||
) |
Construct an index from an integer array.
The values in the array ind
are assumed to be between 0 and card-1. All values outside of this range are ignored.
References ibis::index::bits, ibis::index::clear(), ibis::index::col, ibis::column::fullname(), ibis::index::nrows, print(), ibis::array_t< T >::resize(), and ibis::array_t< T >::size().
|
virtual |
Append the index in df to the one in dt.
If the index in df exists, then it will be used, otherwise it simply creates a new index using the data in dt.
Reimplemented from ibis::index.
References ibis::fileManager::storage::begin(), ibis::index::bits, ibis::CATEGORY, ibis::util::clear(), ibis::index::DIREKTE, ibis::fileManager::getFile(), ibis::fileManager::instance(), ibis::INT, ibis::LONG, ibis::index::nrows, ibis::index::offset32, ibis::index::offset64, ibis::SHORT, ibis::array_t< T >::size(), ibis::index::str, ibis::fileManager::storage::swap(), ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
Referenced by ibis::category::append(), and ibis::category::fillIndex().
long ibis::direkte::append | ( | const ibis::direkte & | tail | ) |
Append tail to this index.
The incoming index must be for the same column as this one.
References ibis::index::activate(), ibis::index::bits, ibis::index::col, ibis::index::nrows, ibis::bitvector::set(), and ibis::array_t< T >::size().
long ibis::direkte::append | ( | const array_t< uint32_t > & | ind | ) |
Append a list of integers.
The integers are treated as bin numbers. This function is primarily used by ibis::category::append.
References ibis::array_t< T >::push_back(), and ibis::array_t< T >::size().
|
virtual |
The function binBoundaries and binWeights return bin boundaries and counts of each bin respectively.
Reimplemented from ibis::index.
|
virtual |
Computes an approximation of hits as a pair of lower and upper bounds.
expr | the query expression to be evaluated. |
lower | a bitvector marking a subset of the hits. All rows marked with one (1) are definitely hits. |
upper | a bitvector marking a superset of the hits. All hits are marked with one, but some of the rows marked one may not be hits. If the variable upper is empty, the variable lower is assumed to contain the exact answer. |
Reimplemented from ibis::index.
References ibis::bitvector::clear().
|
virtual |
Estimate the hits for discrete ranges, i.e., those translated from 'a IN (x, y, ..)'.
A trivial implementation to indicate the index can not determine any row.
Reimplemented from ibis::index.
References ibis::bitvector::clear(), ibis::qDiscreteRange::getValues(), ibis::bitvector::set(), and ibis::array_t< T >::size().
|
virtual |
To evaluate the exact hits.
On success, return the number of hits, otherwise a negative value is returned.
Implements ibis::index.
References ibis::bitvector::cnt().
|
virtual |
To evaluate the exact hits.
On success, return the number of hits, otherwise a negative value is returned.
Reimplemented from ibis::index.
References ibis::bitvector::cnt(), ibis::qDiscreteRange::getValues(), ibis::bitvector::set(), and ibis::array_t< T >::size().
|
virtual |
Cumulative distribution of the data.
A brute-force approach to get an accurate cumulative distribution.
Reimplemented from ibis::index.
|
virtual |
Binned distribution of the data.
A brute-force approach to get an accurate distribution.
Reimplemented from ibis::index.
|
protectedvirtual |
Estimate the size of the index file.
The index file contains primarily the bitmaps.
Reimplemented from ibis::index.
|
virtual |
Compute the approximate sum of all the values indexed.
If it decides that computing the sum directly from the vertical partition is more efficient, it will return NaN immediately.
Reimplemented from ibis::index.
|
inlinevirtual |
Returns the name of the index, similar to the function type
, but returns a string instead.
Implements ibis::index.
int ibis::direkte::remapKeys | ( | const ibis::array_t< uint32_t > & | o2n | ) |
Change the key values to a new set of numbers.
This is used after a categorical value column changes it dictionary and we need to reshuffle the bitmaps but not the actual content in any bitmap. The incoming argument is expected to be an array of exactly the same number of elements as the number of bitmaps in this index.
Return the number of bit vectors after successfully remapped the keys. Otherwise return a negative number.
References ibis::array_t< T >::clear(), ibis::util::clear(), ibis::array_t< T >::copy(), ibis::fileManager::flushFile(), ibis::fileManager::instance(), ibis::array_t< T >::size(), ibis::array_t< T >::swap(), and ibis::util::write().
Referenced by ibis::category::setDictionary().
|
virtual |
Compute the size of arrays that would be generated by the serializatioin function (write).
Implements ibis::index.
|
inlinevirtual |
Time some logical operations and print out their speed.
This version does nothing.
Reimplemented from ibis::index.
|
inlinevirtual |
Mark the position of the rows that can not be decided with this index.
expr | the range conditions to be evaluated. |
iffy | the bitvector marking the positions of rows that can not be decided using the index. Return value is the expected fraction of undecided rows that might satisfy the range conditions. |
Reimplemented from ibis::index.
References ibis::bitvector::clear().