The base index class. More...
#include <index.h>
Classes | |
class | barrel |
A specialization that adds function setValue . More... | |
class | bitmapReader |
A simple container to hold the function pointer given by user for reading the serialized bitmaps. More... | |
Public Types | |
typedef std::map< double, uint32_t > | histogram |
enum | INDEX_TYPE { BINNING =0, RANGE, MESA, AMBIT, PALE, PACK, ZONE, RELIC, ROSTER, SKIVE, FADE, SBIAD, SAPID, EGALE, MOINS, ENTRE, BAK, BAK2, KEYWORDS, MESH, BAND, DIREKTE, GENERIC, BYLT, FUZZ, ZONA, FUGE, SLICE, EXTERN } |
The integer values of this enum type are used in the index files to differentiate the indexes. More... | |
typedef std::map< double, ibis::bitvector * > | VMap |
Public Member Functions | |
void | addBins (uint32_t ib, uint32_t ie, ibis::bitvector &res) const |
Add the sum of bits [ib] through bits [ie-1] to res . More... | |
void | addBins (uint32_t ib, uint32_t ie, ibis::bitvector &res, const ibis::bitvector &tot) const |
Compute the sum of bit vectors [ib , ie ). More... | |
virtual long | append (const char *, const char *, uint32_t) |
Extend the index. | |
virtual void | binBoundaries (std::vector< double > &) const |
The function binBoundaries and binWeights return bin boundaries and counts of each bin respectively. More... | |
virtual void | binWeights (std::vector< uint32_t > &) const |
virtual int | contractRange (ibis::qContinuousRange &) const |
virtual index * | dup () const =0 |
Duplicate the content of an index object. | |
bool | empty () const |
The index object is considered empty if there is no bitmap or getNRows returns 0. More... | |
virtual void | estimate (const ibis::qContinuousRange &, ibis::bitvector &lower, ibis::bitvector &upper) const |
Computes an approximation of hits as a pair of lower and upper bounds. More... | |
virtual uint32_t | estimate (const ibis::qContinuousRange &) const |
Returns an upper bound on the number of hits. | |
virtual void | estimate (const ibis::qDiscreteRange &expr, ibis::bitvector &lower, ibis::bitvector &upper) const |
Estimate the hits for discrete ranges, i.e., those translated from 'a IN (x, y, ..)'. More... | |
virtual uint32_t | estimate (const ibis::qDiscreteRange &expr) const |
virtual void | estimate (const ibis::index &idx2, const ibis::deprecatedJoin &expr, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
Estimate the pairs for the range join operator. | |
virtual void | estimate (const ibis::index &idx2, const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
Estimate the pairs for the range join operator. More... | |
virtual void | estimate (const ibis::index &idx2, const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
virtual int64_t | estimate (const ibis::index &idx2, const ibis::deprecatedJoin &expr) const |
Estimate an upper bound for the number of pairs. | |
virtual int64_t | estimate (const ibis::index &idx2, const ibis::deprecatedJoin &expr, const ibis::bitvector &mask) const |
Estimate an upper bound for the number of pairs produced from marked records. More... | |
virtual int64_t | estimate (const ibis::index &idx2, const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2) const |
virtual void | estimate (const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
Evaluating a join condition with one (likely composite) index. | |
virtual int64_t | estimate (const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2) const |
virtual double | estimateCost (const ibis::qContinuousRange &) const |
Estimate the cost of evaluating a range condition. | |
virtual double | estimateCost (const ibis::qDiscreteRange &) const |
Estimate the cost of evaluating a range condition. | |
virtual long | evaluate (const ibis::qContinuousRange &expr, ibis::bitvector &hits) const =0 |
To evaluate the exact hits. More... | |
virtual long | evaluate (const ibis::qDiscreteRange &, ibis::bitvector &) const |
To evaluate the exact hits. More... | |
virtual int | expandRange (ibis::qContinuousRange &) const |
The functions expandRange and contractRange expands or contracts the boundaries of a range condition so that the new range will have exact answers using the function estimate. More... | |
virtual const ibis::bitvector * | getBitvector (uint32_t i) const |
Return a pointer to the ith bitvector used in the index (may be 0). | |
virtual long | getCumulativeDistribution (std::vector< double > &bds, std::vector< uint32_t > &cts) const |
Cumulative distribution of the data. More... | |
virtual long | getDistribution (std::vector< double > &bbs, std::vector< uint32_t > &cts) const |
Binned distribution of the data. More... | |
virtual double | getMax () const |
The maximum value recorded in the index. | |
virtual double | getMin () const |
The minimum value recorded in the index. | |
uint32_t | getNRows () const |
Return the number of rows represented by this object. | |
virtual double | getSum () const |
Compute the approximate sum of all the values indexed. More... | |
virtual const char * | name () const =0 |
Returns the name of the index, similar to the function type , but returns a string instead. More... | |
virtual uint32_t | numBitvectors () const |
Returns the number of bit vectors used by the index. | |
virtual void | print (std::ostream &out) const =0 |
Prints human readable information. More... | |
virtual int | read (const char *name)=0 |
Reconstructs an index from the named file. More... | |
virtual int | read (ibis::fileManager::storage *st)=0 |
Reconstructs an index from an array of bytes. More... | |
virtual long | select (const ibis::qContinuousRange &, void *) const =0 |
Evaluate the range condition and select values. | |
virtual long | select (const ibis::qContinuousRange &, void *, ibis::bitvector &) const =0 |
Evaluate the range condition, select values, and record the positions. | |
virtual void | serialSizes (uint64_t &, uint64_t &, uint64_t &) const =0 |
Compute the size of arrays that would be generated by the serializatioin function (write). More... | |
float | sizeInBytes () const |
Estiamte the size of this index object measured in bytes. More... | |
virtual void | speedTest (std::ostream &) const |
Time some logical operations and print out their speed. | |
void | sumBins (uint32_t ib, uint32_t ie, ibis::bitvector &res) const |
Sum up bits[ib:ie-1] and place the result in res. More... | |
void | sumBins (uint32_t ib, uint32_t ie, ibis::bitvector &res, uint32_t ib0, uint32_t ie0) const |
Compute a new sum for bit vectors [ib, ie) by taking advantage of the old sum for bitvectors [ib0, ie0). More... | |
void | sumBins (uint32_t ib, uint32_t ie, ibis::bitvector &res, uint32_t *buf) const |
Sum up bits[ib:ie-1] and place the result in res. More... | |
void | sumBins (const ibis::array_t< uint32_t > &, ibis::bitvector &) const |
Sum up the bits in in the specified bins. | |
virtual INDEX_TYPE | type () const =0 |
Returns an index type identifier. | |
virtual float | undecidable (const ibis::qContinuousRange &expr, ibis::bitvector &iffy) const |
Mark the position of the rows that can not be decided with this index. More... | |
virtual float | undecidable (const ibis::qDiscreteRange &expr, ibis::bitvector &iffy) const |
virtual int | write (const char *name) const =0 |
Save index to a file. More... | |
virtual int | write (ibis::array_t< double > &, ibis::array_t< int64_t > &, ibis::array_t< uint32_t > &) const =0 |
Save index to three arrays. Serialize the index in memory. | |
virtual | ~index () |
The destructor. | |
Static Public Member Functions | |
static void | addBits (const array_t< bitvector * > &bits, uint32_t ib, uint32_t ie, ibis::bitvector &res) |
Add the pile [ib:ie-1] to res . More... | |
static index * | create (const column *c, const char *name=0, const char *spec=0, int inEntirety=0) |
Index factory. More... | |
static void | divideCounts (array_t< uint32_t > &bounds, const array_t< uint32_t > &cnt) |
Determine how to split the array cnt , so that each group has roughly the same total value. More... | |
static bool | isIndex (const char *f, INDEX_TYPE t) |
Is the named file an index file? Read the header of the named file to determine if it contains an index of the specified type. More... | |
template<typename E > | |
static void | mapValues (const array_t< E > &val, VMap &bmap) |
template<typename E > | |
static void | mapValues (const array_t< E > &val, histogram &hist, uint32_t count=0) |
template<typename E > | |
static void | mapValues (const array_t< E > &val, array_t< E > &bounds, std::vector< uint32_t > &cnts) |
template<typename E1 , typename E2 > | |
static void | mapValues (const array_t< E1 > &val1, const array_t< E2 > &val2, array_t< E1 > &bnd1, array_t< E2 > &bnd2, std::vector< uint32_t > &cnts) |
Compute a two-dimensional histogram. More... | |
static void | printHeader (std::ostream &, const char *) |
static void | setBases (array_t< uint32_t > &bases, uint32_t card, uint32_t nbase=2) |
Fill the array bases with the values that cover the range [0, card). More... | |
static void | sumBits (const array_t< bitvector * > &bits, uint32_t ib, uint32_t ie, ibis::bitvector &res) |
Sum up pile [ib:ie-1] and place the result in res . More... | |
static void | sumBits (const array_t< bitvector * > &bits, const ibis::bitvector &tot, uint32_t ib, uint32_t ie, ibis::bitvector &res) |
Sum up pile [ib:ie-1] and add the result to res . More... | |
Protected Member Functions | |
virtual void | activate () const |
Regenerate all bitvectors from the underlying storage. More... | |
virtual void | activate (uint32_t i) const |
Regenerate the ith bitvector from the underlying storage. | |
virtual void | activate (uint32_t i, uint32_t j) const |
Regenerate bitvectors i (inclusive) through j (exclusive) from the underlying storage. More... | |
virtual void | clear () |
Clear the existing content. More... | |
void | computeMinMax (const char *f, double &min, double &max) const |
void | dataFileName (std::string &name, const char *f=0) const |
Generate data file name from "f". More... | |
virtual size_t | getSerialSize () const throw () |
Compute the size of the serialized version of the index. More... | |
index (const ibis::column *c=0) | |
Default constructor. More... | |
index (const ibis::column *c, ibis::fileManager::storage *s) | |
Constructor with a storage object. More... | |
index (const index &) | |
Copy constructor. | |
void | indexFileName (std::string &name, const char *f=0) const |
Generates index file name from "f". More... | |
void | initBitmaps (int fdes) |
Prepare the bitmaps using the given file descriptor. More... | |
void | initBitmaps (ibis::fileManager::storage *st) |
Prepare bitmaps from the given storage object. More... | |
void | initBitmaps (uint32_t *st) |
Prepare bitmaps from the given raw pointer. More... | |
void | initBitmaps (void *ctx, FastBitReadBitmaps rd) |
Prepare bitmaps from the user provided function pointer and context. More... | |
int | initOffsets (int64_t *, size_t) |
Initialize the offsets from the given data array. More... | |
int | initOffsets (int fdes, const char offsize, size_t start, uint32_t nobs) |
Read in the offset array. More... | |
int | initOffsets (ibis::fileManager::storage *st, size_t start, uint32_t nobs) |
Regenerate the offsets array from the given storage object. More... | |
void | mapValues (const char *f, VMap &bmap) const |
Map the positions of each individual value. More... | |
void | mapValues (const char *f, histogram &hist, uint32_t count=0) const |
Generate a histogram. More... | |
index & | operator= (const index &) |
Assignment operator. | |
void | optionalUnpack (array_t< ibis::bitvector * > &bits, const char *opt) |
A function to decide whether to uncompress the bitvectors. More... | |
Static Protected Member Functions | |
static void | indexFileName (std::string &name, const ibis::column *col1, const ibis::column *col2, const char *f=0) |
Generate the index file name for the composite index fromed on two columns. More... | |
Protected Attributes | |
array_t< ibis::bitvector * > | bits |
A list of bitvectors. | |
bitmapReader * | breader |
The functor to read serialized bitmaps from a more complex source. | |
const ibis::column * | col |
Pointer to the column this index is for. | |
const char * | fname |
The name of the file containing the index. | |
uint32_t | nrows |
The number of rows represented by the index. More... | |
array_t< int32_t > | offset32 |
Starting positions of the bitvectors. | |
array_t< int64_t > | offset64 |
Starting positions of the bitvectors. More... | |
ibis::fileManager::storage * | str |
The underlying storage. More... | |
The base index class.
Class ibis::index contains the common definitions and virtual functions of the class hierarchy. It is assumed that an ibis::index is for only one column. The user is to create an new index through the function ibis::index::create and only use the functions defined in this class.
The integer values of this enum type are used in the index files to differentiate the indexes.
Enumerator | |
---|---|
BINNING |
Fix this as 0 so that the index type indicator will be known all versions of the program. This should ensure the index files from different version of FastBit code are recognized correctly. |
RANGE | |
MESA |
ibis::interval. |
AMBIT |
ibis::ambit, range-range two level encoding on bins. |
PALE |
ibis::pale, equality-range encoding on bins. |
PACK |
ibis::pack, range-equality encoding on bins. |
ZONE |
ibis::zone, equality-equality encoding on bins. |
RELIC |
ibis::relic, the basic bitmap index. |
ROSTER |
ibis::roster, RID list. |
SKIVE |
ibis::skive, binary encoding with recoding of key values. |
FADE |
ibis::fade, multicomponent range encoding (unbinned). |
SBIAD |
ibis::sbiad, multicomponent interval encoding (unbinned). |
SAPID |
ibis::sapid, multicomponent equality encoding (unbinned). |
EGALE |
ibis::egale, multicomponent equality encoding on bins. |
MOINS |
ibis::moins, multicomponent range encoding on bins. |
ENTRE |
ibis::entre, multicomponent interval encoding on bins. |
BAK |
ibis::bak, reduced precision mapping, equality code. |
BAK2 |
ibis::bak2, splits each BAK bin in three, one less than the mapped value, one greater than the mapped value, and one equal to the mapped value. This is used to implement the low-precision binning scheme. |
KEYWORDS |
ibis::keywords, boolean term-document matrix. |
MESH |
not used. |
BAND |
not used. |
DIREKTE |
ibis::direkte, hash value to bitmaps. |
GENERIC |
not used. |
BYLT |
ibis::bylt, unbinned range-equality encoding. |
FUZZ |
ibis::fuzz, unbinned interval-equality encoding. |
ZONA |
ibis::zona, unbinned equality-equality encoding. |
FUGE |
ibis::fuge, binned interval-equality encoding. |
SLICE |
ibis::slice, bit-sliced index. |
EXTERN |
externally defined index. |
|
inlineprotected |
Default constructor.
Protect the constructor so that ibis::index can not be instantiated directly. Protecting it also reduces the size of public interface.
|
protected |
Constructor with a storage object.
Both the column object and the storage object are expected to be valid. However, this function only make uses of the storage object.
References ibis::fileManager::storage::begin(), col, ibis::column::fullname(), and nrows.
|
protectedvirtual |
Regenerate all bitvectors from the underlying storage.
Activate all bitvectors.
References ibis::array_t< T >::getStorage(), ibis::array_t< T >::read(), ibis::array_t< T >::size(), and UnixOpen.
Referenced by ibis::bin::append(), ibis::direkte::append(), ibis::relic::append(), ibis::bin::estimate(), getBitvector(), and ibis::skive::skive().
|
protectedvirtual |
Regenerate bitvectors i (inclusive) through j (exclusive) from the underlying storage.
References ibis::array_t< T >::read(), and UnixOpen.
void ibis::index::addBins | ( | uint32_t | ib, |
uint32_t | ie, | ||
ibis::bitvector & | res | ||
) | const |
Add the sum of bits
[ib] through bits
[ie-1] to res
.
Always explicitly use bits
[ib] through bits
[ie-1]. The most important difference between this function and sumBins
is that this function always use bits
[ib] through bits
[ie-1]. This is similar to the function addBits
.
References ibis::bitvector::adjustSize(), ibis::bitvector::bitsPerLiteral(), ibis::bitvector::bytes(), ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::bitvector::decompress(), ibis::horometer::realTime(), ibis::bitvector::set(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
void ibis::index::addBins | ( | uint32_t | ib, |
uint32_t | ie, | ||
ibis::bitvector & | res, | ||
const ibis::bitvector & | tot | ||
) | const |
Compute the sum of bit vectors [ib
, ie
).
If computing a complement is faster, assume all bit vectors add up to tot
. This is basically a copy of the function sumBins
(without the 4th argument). There are two changes: (1) if res
has the same number of bits as tot
, the new sum is added to the existing bitvector, and (2) when it computes the sum through complements, it performs a subtraction from tot
.
References ibis::bitvector::adjustSize(), ibis::bitvector::bitsPerLiteral(), ibis::bitvector::bytes(), ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::bitvector::decompress(), ibis::horometer::realTime(), ibis::bitvector::set(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
|
static |
Add the pile
[ib:ie-1] to res
.
This function always use bitvectors pile
[ib] through pile
[ie-1] and expects the caller to have filled these bitvectors already.
References ibis::bitvector::bytes(), ibis::bitvector::cnt(), ibis::bitvector::compress(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::bitvector::decompress(), ibis::util::logMessage(), ibis::horometer::realTime(), ibis::bitvector::set(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
|
inlinevirtual |
The function binBoundaries and binWeights return bin boundaries and counts of each bin respectively.
Reimplemented in ibis::bak2, ibis::bak, ibis::egale, ibis::zone, ibis::pack, ibis::pale, ibis::ambit, ibis::mesa, ibis::range, ibis::bin, ibis::relic, ibis::keywords, and ibis::direkte.
Referenced by ibis::part::coarsenBins(), and ibis::part::get2DDistributionI().
|
protectedvirtual |
Clear the existing content.
Free the bitmap objectes common to all index objects.
Reimplemented in ibis::egale, ibis::fuge, ibis::zone, ibis::pack, ibis::pale, ibis::ambit, ibis::zona, ibis::bylt, ibis::fuzz, ibis::fade, ibis::bin, ibis::skive, ibis::relic, and ibis::keywords.
Referenced by ibis::bin::binning(), ibis::bin::binningT(), ibis::keywords::clear(), ibis::relic::clear(), ibis::bin::clear(), ibis::direkte::direkte(), ibis::keywords::keywords(), ibis::relic::read(), ibis::direkte::read(), ibis::keywords::read(), and ~index().
|
static |
Index factory.
It creates a specific concrete index object. It attempts to read the existing index if a location is specified. If it fails to read an index or no expilicit location is given, it attempts to create a new index based on the current data file and index specification. Any newly created index will be written to a file.
c | a pointer to a ibis::column object. This argument must be present. |
dfname | data file name, may also be the name of the index file, or the directory containing the data file. If the name ends with '.idx' is treated as an index file, and the content of the file is read. If the name does not end with '.idx', it is assumed to be the data file name, unless it is determined to be a directory name. If it is a directory name, the data file is assumed to be in the specified directory with the same name as the column. Once a data file is found, the content of the data file is read to construct a new index according to the return value of function indexSpec. The argument dfname can be nil, in which case, the data file name is constructed by concatenate the return from partition()->currentDataDir() and the column name. |
spec | the index specification. This string contains the parameters for how to create an index. The most general form is * <binning .../> <encoding .../> <compression .../>. *Here is one example (it is the default for some integer columns) * <binning none /> <encoding equality /> *FastBit always compresses every bitmap it ever generates. The compression option is to instruct it to uncompress some bitmaps or not compress indexes at all. The compress option is usually not used. |
If the argument spec
is not specified, this function checks the specification in the following order.
– use the index specification for the column being indexed; – use the index specification for the table containing the column being indexed; – use the most specific index specification relates to the column be indexed in the global resources (gParameters).
It stops looking as soon as it finds the first non-empty string, which follows the principle of a more specific index specification override a general specification.
readopt | Depending on whether this value is positive, zero or negative, the index is read from the index file in three different ways. |
(1) If this value is positive, the content of the index file is read into memory and there is no need for further I/O operation to use the index.
(2) If this value is zero, the content of a large index file is loaded into memory through memory map and the content of a small index file will be read into memory. This is the default option.
(3) If this value is negative, then only the metadata is read into memory. This option requires the least amount of memory, but requires more I/O operations later when bitmaps are needed to answer queries. To use option (1) and (2), there must be enough memory to hold the index file in memory. Furthermore, to use the memory map option, FastBit must be able to hold the index file open indefinitely and the operating system must support memory map function mmap.
These three options have different start up cost and different query processing cost. Typically, reading the whole file in this function will take the longest time, but since it requires no further I/O operations, its query processing cost is likely the lowest. The memory map option only need to load the page table into memory and read part of the metadata, it is likely to be relatively inexpensive to reconstruct the index object this way. Since the memory map option can read the necessary portion of the index file into memory pretty efficiently, the query processing cost should have reasonable performance. The third of reading metadata only in this function requires the least amount of memory, but it might actually read more bytes in this function than the memory map option because this option actually needs to read all the bytes representing the metadata while the memory map option only need to create a memory map for the index file. Later when the index is used, the needed bitmaps are read into memory, which is likely take more time than accessing the memory mapped bitmaps. Additionally, the third option also causes the bitmaps to be placed in unpredictable memory locations, while the first two options place all bitmaps of an index consecutively in memory. This difference in memory layout could cause the memory accesses to take different amounts of time; typically, accessing consecutive memory locations is more efficient.
The default value of readopt
is 0, which prefers the memory map option.
dfname
to null to build a brand new index and discard the existing index.References ibis::fileManager::storage::begin(), ibis::BIT, ibis::BLOB, ibis::horometer::CPUTime(), ibis::part::currentDataDir(), ibis::column::dataFileName(), ibis::fileManager::flushFile(), ibis::column::fullname(), getNRows(), ibis::gParameters(), indexFileName(), ibis::part::indexSpec(), ibis::fileManager::instance(), ibis::resource::isTrue(), ibis::part::name(), ibis::column::name(), name(), ibis::part::nRows(), print(), ibis::column::purgeIndexFile(), ibis::horometer::realTime(), ibis::horometer::start(), ibis::horometer::stop(), ibis::fileManager::tryGetFile(), ibis::column::type(), UnixOpen, ibis::UNKNOWN_TYPE, and write().
Referenced by ibis::column::append(), ibis::column::loadIndex(), and ibis::part::loadIndexes().
|
protected |
Generate data file name from "f".
Invokes ibis::column::dataFileName to do the actual work.
Referenced by ibis::direkte::direkte(), and ibis::keywords::keywords().
|
static |
Determine how to split the array cnt
, so that each group has roughly the same total value.
bdry
stores the dividers. The first group is [0, bdry[0]), the second group is [bdry[0], bdry[1]), and so on. Ths function uses the size of array bdry
to determine the number of groups to use. References ibis::array_t< T >::clear(), ibis::array_t< T >::push_back(), ibis::array_t< T >::resize(), and ibis::array_t< T >::size().
Referenced by ibis::part::adaptive2DBins(), ibis::part::adaptive3DBins(), ibis::part::adaptiveFloats(), ibis::part::adaptiveFloatsDetailed(), ibis::part::adaptiveInts(), ibis::part::adaptiveIntsDetailed(), ibis::part::coarsenBins(), ibis::part::equalWeightBins(), ibis::part::get2DDistributionI(), ibis::part::getCumulativeDistribution(), and ibis::part::getDistribution().
|
inline |
The index object is considered empty if there is no bitmap or getNRows returns 0.
Referenced by ibis::column::indexLock::indexLock().
|
inlinevirtual |
Computes an approximation of hits as a pair of lower and upper bounds.
expr | the query expression to be evaluated. |
lower | a bitvector marking a subset of the hits. All rows marked with one (1) are definitely hits. |
upper | a bitvector marking a superset of the hits. All hits are marked with one, but some of the rows marked one may not be hits. If the variable upper is empty, the variable lower is assumed to contain the exact answer. |
Reimplemented in ibis::entre, ibis::moins, ibis::egale, ibis::fuge, ibis::zone, ibis::pack, ibis::pale, ibis::ambit, ibis::mesa, ibis::range, ibis::skive, ibis::keywords, ibis::bin, ibis::relic, and ibis::direkte.
References nrows, and ibis::bitvector::set().
|
virtual |
Estimate the hits for discrete ranges, i.e., those translated from 'a IN (x, y, ..)'.
A trivial implementation to indicate the index can not determine any row.
Reimplemented in ibis::relic, and ibis::direkte.
References ibis::qDiscreteRange::colName(), and ibis::bitvector::set().
|
virtual |
Estimate the pairs for the range join operator.
Only records that are masked are evaluated.
References ibis::bitvector64::clear(), ibis::util::outerProduct(), and ibis::bitvector64::set().
|
virtual |
Estimate an upper bound for the number of pairs produced from marked records.
References ibis::bitvector::cnt().
|
pure virtual |
To evaluate the exact hits.
On success, return the number of hits, otherwise a negative value is returned.
Implemented in ibis::entre, ibis::moins, ibis::egale, ibis::fuge, ibis::zone, ibis::pack, ibis::pale, ibis::ambit, ibis::zona, ibis::bylt, ibis::mesa, ibis::fuzz, ibis::range, ibis::sapid, ibis::sbiad, ibis::fade, ibis::skive, ibis::keywords, ibis::bin, ibis::relic, and ibis::direkte.
Referenced by ibis::part::coarsenBins(), ibis::bin::evaluate(), ibis::range::evaluate(), ibis::mesa::evaluate(), ibis::ambit::evaluate(), ibis::pale::evaluate(), ibis::pack::evaluate(), ibis::zone::evaluate(), ibis::fuge::evaluate(), ibis::egale::evaluate(), ibis::moins::evaluate(), ibis::entre::evaluate(), and ibis::part::get2DDistributionI().
|
inlinevirtual |
To evaluate the exact hits.
On success, return the number of hits, otherwise a negative value is returned.
Reimplemented in ibis::entre, ibis::moins, ibis::egale, ibis::fuge, ibis::zone, ibis::pack, ibis::pale, ibis::ambit, ibis::mesa, ibis::range, ibis::sapid, ibis::sbiad, ibis::fade, ibis::skive, ibis::bin, ibis::relic, and ibis::direkte.
|
inlinevirtual |
The functions expandRange and contractRange expands or contracts the boundaries of a range condition so that the new range will have exact answers using the function estimate.
The default implementation provided does nothing since this is only meaningful for indices based on bins.
Reimplemented in ibis::bak2, ibis::bak, ibis::range, and ibis::bin.
|
virtual |
Cumulative distribution of the data.
A brute-force approach to get an accurate cumulative distribution.
Reimplemented in ibis::bin, ibis::relic, and ibis::direkte.
References ibis::util::compactValue().
|
virtual |
Binned distribution of the data.
A brute-force approach to get an accurate distribution.
Reimplemented in ibis::bin, ibis::relic, and ibis::direkte.
|
protectedvirtual |
Compute the size of the serialized version of the index.
This the fallback implementation which always returns 0.
Reimplemented in ibis::egale, ibis::fuge, ibis::zone, ibis::pack, ibis::pale, ibis::ambit, ibis::zona, ibis::mesa, ibis::bylt, ibis::range, ibis::fuzz, ibis::fade, ibis::skive, ibis::bin, ibis::relic, ibis::keywords, and ibis::direkte.
|
inlinevirtual |
Compute the approximate sum of all the values indexed.
If it decides that computing the sum directly from the vertical partition is more efficient, it will return NaN immediately.
Reimplemented in ibis::entre, ibis::moins, ibis::egale, ibis::pack, ibis::ambit, ibis::mesa, ibis::range, ibis::fade, ibis::skive, ibis::bin, ibis::relic, ibis::keywords, and ibis::direkte.
|
protected |
Generates index file name from "f".
Invokes ibis::column::dataFileName to do most of the work.
Referenced by create().
|
staticprotected |
Generate the index file name for the composite index fromed on two columns.
May use argument "dir" if it is not null.
References ibis::part::currentDataDir(), and ibis::column::name().
|
protected |
Prepare the bitmaps using the given file descriptor.
It clears the existing content of the array bits and resize the array to have nobs elements. It reconstructs all the bitmaps if the file name (fname) is not a valid pointer. It reads the first bitmap if the compiler macro FASTBIT_READ_BITVECTOR0 is defined. It is to be used by the constructors of a concrete index classes after initOffsets has been called.
References ibis::bitvector::set(), ibis::bitvector::size(), and ibis::bitvector::sloppySize().
Referenced by ibis::bin::bin(), and ibis::relic::relic().
|
protected |
Prepare bitmaps from the given storage object.
Used by constructors to initialize the array bits based on the content of offset32 and offset64. The member variable nrows is expected to be set to the correct value as well.
References ibis::fileManager::storage::isFileMap(), ibis::bitvector::set(), ibis::bitvector::size(), and ibis::bitvector::sloppySize().
|
protected |
Prepare bitmaps from the given raw pointer.
Used by constructors to initialize the array bits after the content of offset32 and offset64 have been initialized correctly. It expects all bitmaps are serialized and packed into this single array.
The member variable nrows is expected to be set to the correct value as well.
References ibis::bitvector::size(), and ibis::bitvector::sloppySize().
|
protected |
Prepare bitmaps from the user provided function pointer and context.
This is intended for reading serialized bitmaps placed in a more complex setting, however, we still view the content as if it is written as 1-D array.
|
protected |
Initialize the offsets from the given data array.
References ibis::array_t< T >::print().
Referenced by ibis::bin::bin(), and ibis::relic::relic().
|
protected |
Read in the offset array.
The offset size has been read by the caller and so has the number of bitmaps to expect.
|
protected |
Regenerate the offsets array from the given storage object.
It determines the size of offsets based on the 7th bytes in the storage object, and the offset size is expected to be either 4-byte or 8-byte. Unexpected offset size will cause an exception to be raised. It is to be used by the constructors of a concrete index classes.
References ibis::fileManager::storage::begin().
|
static |
Is the named file an index file? Read the header of the named file to determine if it contains an index of the specified type.
Returns true if the correct header is found, else return false.
References ibis::util::logMessage(), and UnixOpen.
Referenced by ibis::bak::read(), and ibis::bak2::read().
|
static |
Compute a two-dimensional histogram.
Given two arrays of the same size, count the number of appearance of each combinations defined by bnd1
and bnd2
. If the arrays bnd1
or bnd2
contain values in ascending order, their values are directly used in this function. The empty one will be replaced by a linear division of actual range into 256 bins. The array counts
stores the 2-D bins in raster-scan order with the second variable, val2
, as the faster varying variables. More specifically, the bins for variable 1 are defined as:
/// (..., bnd1[0]) [bnd1[1], bin1[2]) [bnd1[2], bin1[3) ... [bnd1.back(), ...) ///
Note that '[' denote the left boundary is inclusive and ')' denote the right boundary is exclusive. Similarly, the bins for variable 2 are
/// (..., bnd2[0]) [bnd2[1], bin2[2]) [bnd2[2], bin2[3) ... [bnd2.back(), ...) ///
The counts
are for the following bins
/// (..., bin1[0]) (.... bin2[0]) /// (..., bin1[0]) [bin2[0], bin2[1]) /// (..., bin1[0]) [bin2[1], bin2[2]) /// ... /// (..., bin1[0]) [bin2.back(), ...) /// [bin1[0], bin1[1]) (..., bin2[0]) /// [bin1[0], bin1[1]) [bin2[0], bin2[1]) /// [bin1[0], bin1[1]) [bin2[1], bin2[2]) /// ... /// [bin1[0], bin1[1]) [bin2.back(), ...) /// ... ///
References ibis::array_t< T >::find(), ibis::array_t< T >::push_back(), ibis::array_t< T >::reserve(), and ibis::array_t< T >::size().
|
protected |
Map the positions of each individual value.
Map the locations of the values of one column.
Given a file containing the values of a column, this function maps the position of each individual values and stores the result in a set of bitmaps.
References ibis::bitvector::adjustSize(), ibis::bitvector::bitsPerLiteral(), ibis::CATEGORY, ibis::DOUBLE, ibis::FLOAT, ibis::fileManager::getFile(), ibis::util::getFileSize(), ibis::bitvector::indexSet::indices(), ibis::fileManager::instance(), ibis::INT, ibis::bitvector::indexSet::isRange(), ibis::LONG, ibis::bitvector::indexSet::nIndices(), ibis::horometer::realTime(), ibis::bitvector::set(), ibis::bitvector::setBit(), ibis::SHORT, ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::TEXT, ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
protected |
Generate a histogram.
Compute a histogram of a column.
Given a property file containing the values of a column, this function counts the occurances of each distinct values. Argument count
is the number of samples to be used for building the histogram. If it is zero or greater than half of the number of values in the data files, all values are used, otherwise, approximately count
values will be sampled with nearly uniform distances from each other.
References ibis::bitvector::adjustSize(), ibis::bitvector::appendFill(), ibis::bitvector::bitsPerLiteral(), ibis::CATEGORY, ibis::DOUBLE, ibis::FLOAT, ibis::fileManager::getFile(), ibis::bitvector::indexSet::indices(), ibis::fileManager::instance(), ibis::INT, ibis::bitvector::indexSet::isRange(), ibis::LONG, ibis::bitvector::indexSet::nIndices(), ibis::util::rand(), ibis::horometer::realTime(), ibis::SHORT, ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::TEXT, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
pure virtual |
Returns the name of the index, similar to the function type
, but returns a string instead.
Implemented in ibis::bak2, ibis::bak, ibis::entre, ibis::moins, ibis::egale, ibis::fuge, ibis::zone, ibis::pack, ibis::pale, ibis::ambit, ibis::zona, ibis::mesa, ibis::bylt, ibis::fuzz, ibis::range, ibis::sapid, ibis::sbiad, ibis::fade, ibis::slice, ibis::skive, ibis::bin, ibis::relic, ibis::keywords, and ibis::direkte.
Referenced by create(), and ibis::column::loadIndex().
|
protected |
A function to decide whether to uncompress the bitvectors.
Decide whether to uncompress the bitmaps.
References ibis::gParameters(), and ibis::array_t< T >::size().
Referenced by ibis::bin::bin(), ibis::keywords::keywords(), ibis::mesa::mesa(), and ibis::range::range().
|
pure virtual |
Prints human readable information.
Outputs information about the index as text to the specified output stream.
Implemented in ibis::bak2, ibis::bak, ibis::entre, ibis::moins, ibis::egale, ibis::fuge, ibis::zone, ibis::pack, ibis::pale, ibis::ambit, ibis::zona, ibis::bylt, ibis::mesa, ibis::fuzz, ibis::range, ibis::sapid, ibis::sbiad, ibis::fade, ibis::slice, ibis::skive, ibis::keywords, ibis::direkte, ibis::bin, and ibis::relic.
Referenced by ibis::column::append(), create(), and ibis::column::loadIndex().
|
pure virtual |
Reconstructs an index from the named file.
The name can be the directory containing an index file. In this case, the name of the index file must be the name of the column followed by ".idx" suffix.
Implemented in ibis::bak2, ibis::bak, ibis::egale, ibis::fuge, ibis::zone, ibis::pack, ibis::pale, ibis::ambit, ibis::zona, ibis::bylt, ibis::fuzz, ibis::range, ibis::fade, ibis::skive, ibis::keywords, ibis::direkte, ibis::bin, and ibis::relic.
|
pure virtual |
Reconstructs an index from an array of bytes.
Intended for internal use only!
Implemented in ibis::egale, ibis::fuge, ibis::zone, ibis::pack, ibis::pale, ibis::ambit, ibis::zona, ibis::bylt, ibis::fuzz, ibis::range, ibis::fade, ibis::skive, ibis::keywords, ibis::direkte, ibis::bin, and ibis::relic.
|
pure virtual |
Compute the size of arrays that would be generated by the serializatioin function (write).
Implemented in ibis::keywords, ibis::direkte, ibis::bin, and ibis::relic.
|
static |
Fill the array bases with the values that cover the range [0, card).
Assumes at least two components. Since the base size of each component can not be less two, the maximum number of components could be used is to have each component uses base size two. If the input argument ncomp is larger than ceiling(log_2(card)), the return array bases shall have ceiling(log_2(card)) elements.
References ibis::array_t< T >::push_back(), ibis::array_t< T >::reserve(), ibis::array_t< T >::resize(), and ibis::array_t< T >::size().
Referenced by ibis::egale::egale().
float ibis::index::sizeInBytes | ( | ) | const |
Estiamte the size of this index object measured in bytes.
Do not intend to be precise, but should be good enough for operations such as comparing index size against base data size to determine which operation to use for answering a query.
References ibis::util::getFileSize().
void ibis::index::sumBins | ( | uint32_t | ib, |
uint32_t | ie, | ||
ibis::bitvector & | res | ||
) | const |
Sum up bits[ib:ie-1]
and place the result in res.
The bitmaps (bits) are held by this index object and may be regenerated as needed. It uses the combined strategy that was determined in an series of earlier tests. The basic rule is as follows.
References ibis::bitvector::bitsPerLiteral(), ibis::bitvector::bytes(), ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::bitvector::decompress(), ibis::bitvector::flip(), ibis::util::logMessage(), ibis::horometer::realTime(), ibis::bitvector::set(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
Referenced by ibis::category::patternSearch().
void ibis::index::sumBins | ( | uint32_t | ib, |
uint32_t | ie, | ||
ibis::bitvector & | res, | ||
uint32_t | ib0, | ||
uint32_t | ie0 | ||
) | const |
Compute a new sum for bit vectors [ib, ie) by taking advantage of the old sum for bitvectors [ib0, ie0).
This function attempts to take advantage of existing results of a previously computed sum.
References ibis::bitvector::bytes(), ibis::bitvector::cnt(), and ibis::bitvector::size().
void ibis::index::sumBins | ( | uint32_t | ib, |
uint32_t | ie, | ||
ibis::bitvector & | res, | ||
uint32_t * | buf | ||
) | const |
Sum up bits[ib:ie-1]
and place the result in res.
The bitmaps (bits) are stored in the argument buf and have to be regenerated based on the information in offset64. The basic rule is as follows.
References ibis::bitvector::bitsPerLiteral(), ibis::bitvector::bytes(), ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::bitvector::decompress(), ibis::horometer::realTime(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
|
static |
Sum up pile
[ib:ie-1] and place the result in res
.
setBit
is always slower. References ibis::bitvector::bitsPerLiteral(), ibis::bitvector::bytes(), ibis::bitvector::cnt(), ibis::bitvector::compress(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::bitvector::decompress(), ibis::bitvector::flip(), ibis::util::logMessage(), ibis::array_t< T >::push_back(), ibis::horometer::realTime(), ibis::array_t< T >::reserve(), ibis::bitvector::set(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
Referenced by ibis::part::adaptiveFloatsDetailed(), ibis::part::adaptiveIntsDetailed(), and ibis::mesa::mesa().
|
static |
Sum up pile
[ib:ie-1] and add the result to res
.
It is assumed that all pile add up to tot
. In the other version of sumBits without this argument tot
, it was assumed that all bitmaps add up to a bit vector of all ones. The decision of whether to use pile[ib:ie-1] directly or use the subtractive version (with pile[0:ib-1] and pile[ie:nobs-1]) are based on the number of bit vectors. The caller is responsible to ensuring the necessary bitmaps are already in memory before calling this function.
References ibis::bitvector::bitsPerLiteral(), ibis::bitvector::bytes(), ibis::bitvector::cnt(), ibis::bitvector::copy(), ibis::horometer::CPUTime(), ibis::bitvector::decompress(), ibis::horometer::realTime(), ibis::bitvector::set(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::horometer::start(), and ibis::horometer::stop().
|
inlinevirtual |
Mark the position of the rows that can not be decided with this index.
expr | the range conditions to be evaluated. |
iffy | the bitvector marking the positions of rows that can not be decided using the index. Return value is the expected fraction of undecided rows that might satisfy the range conditions. |
Reimplemented in ibis::egale, ibis::zone, ibis::pack, ibis::pale, ibis::ambit, ibis::mesa, ibis::range, ibis::keywords, ibis::bin, ibis::relic, and ibis::direkte.
|
pure virtual |
Save index to a file.
Outputs the index in a compact binary format to the named file or directory. The index file contains a header that can be identified by the function isIndex.
Implemented in ibis::bak2, ibis::bak, ibis::entre, ibis::moins, ibis::egale, ibis::fuge, ibis::zone, ibis::pack, ibis::pale, ibis::ambit, ibis::zona, ibis::bylt, ibis::mesa, ibis::fuzz, ibis::range, ibis::sapid, ibis::sbiad, ibis::fade, ibis::slice, ibis::skive, ibis::keywords, ibis::direkte, ibis::bin, and ibis::relic.
Referenced by ibis::column::append(), and create().
|
protected |
The number of rows represented by the index.
Can not take more than 2^32 rows because the bitvector class can not hold more than 2^32 bits.
Referenced by ibis::direkte::append(), ibis::relic::append(), ibis::bin::append(), ibis::bin::bin(), ibis::bylt::bylt(), ibis::direkte::direkte(), ibis::egale::egale(), empty(), ibis::relic::estimate(), estimate(), estimateCost(), ibis::fade::fade(), ibis::fuge::fuge(), ibis::fuzz::fuzz(), getNRows(), index(), ibis::mesa::mesa(), operator=(), ibis::pack::pack(), ibis::pale::pale(), ibis::range::range(), ibis::relic::relic(), ibis::sapid::sapid(), ibis::sbiad::sbiad(), ibis::skive::skive(), ibis::slice::slice(), ibis::bin::swap(), and ibis::zone::zone().
|
mutableprotected |
Starting positions of the bitvectors.
This is the 64-bit version of offset32 to deal with large indexes. All functions that requires these offsets will attempt to use the 64-bit first.
Referenced by ibis::direkte::append(), ibis::bylt::bylt(), ibis::skive::estimateCost(), ibis::fuge::fuge(), ibis::fuzz::fuzz(), operator=(), ibis::relic::relic(), and ibis::zona::zona().
|
mutableprotected |
The underlying storage.
It may be nil if bitvectors are not from a storage object managed by the file manager.
Referenced by ibis::direkte::append(), and operator=().