A data structure for storing null-terminated text. More...
#include <category.h>
Classes | |
struct | tokenizer |
A tokenizer class to turn a string buffer into tokens. More... | |
Public Member Functions | |
virtual long | append (const char *dt, const char *df, const uint32_t nold, const uint32_t nnew, uint32_t nbuf, char *buf) |
Append the data file stored in directory df to the corresponding data file in directory dt . More... | |
virtual long | append (const void *, const ibis::bitvector &) |
Append the records in vals to the current working dataset. More... | |
void | delimitersForKeywordIndex (std::string &) const |
virtual double | estimateCost (const ibis::qString &cmp) const |
Estimate the cost of evaluating a string lookup. | |
virtual double | estimateCost (const ibis::qAnyString &cmp) const |
Estimate the cost of looking up a group of strings. | |
virtual const char * | findString (const char *str) const |
If the input string is found in the data file, it is returned, else this function returns 0. More... | |
virtual int | getOpaque (uint32_t, ibis::opaque &) const |
Return the raw binary value for the i th row. More... | |
virtual int | getString (uint32_t i, std::string &val) const |
Return the string value for the i th row. More... | |
const column * | IDColumnForKeywordIndex () const |
!< Print header info. More... | |
virtual long | keywordSearch (const char *str, ibis::bitvector &hits) const |
virtual long | keywordSearch (const std::vector< std::string > &strs, ibis::bitvector &hits) const |
virtual long | keywordSearch (const char *) const |
virtual long | keywordSearch (const std::vector< std::string > &) const |
virtual void | loadIndex (const char *iopt=0, int ropt=0) const throw () |
Load the index associated with the column. More... | |
virtual long | patternSearch (const char *, ibis::bitvector &) const |
virtual long | patternSearch (const char *) const |
virtual void | print (std::ostream &out) const |
!< Write the metadata entry. | |
virtual long | saveSelected (const ibis::bitvector &sel, const char *dest, char *buf, uint32_t nbuf) |
Write the selected values to the specified directory. More... | |
virtual array_t< int64_t > * | selectLongs (const bitvector &mask) const |
Return the starting positions of strings marked 1 in the mask. More... | |
virtual std::vector< std::string > * | selectStrings (const bitvector &mask) const |
Retrieve the string values from the rows marked 1 in mask. More... | |
virtual array_t< uint32_t > * | selectUInts (const bitvector &mask) const |
Return the positions of records marked 1 in the mask. More... | |
virtual long | stringSearch (const char *str, ibis::bitvector &hits) const |
Given a string literal, return a bitvector that marks the strings that matche it. More... | |
virtual long | stringSearch (const std::vector< std::string > &strs, ibis::bitvector &hits) const |
Locate the rows match any of the given strings. More... | |
virtual long | stringSearch (const char *str) const |
virtual long | stringSearch (const std::vector< std::string > &strs) const |
void | TDListForKeywordIndex (std::string &) const |
text (const part *tbl, FILE *file) | |
text (const part *tbl, const char *name, ibis::TYPE_T t=ibis::TEXT) | |
Construct a text object for a data partition with the given name. | |
text (const ibis::column &col) | |
Copy constructor. Copy from a column of the type TEXT. | |
virtual void | write (FILE *file) const |
Write the current metadata to -part.txt of the data partition. | |
Public Member Functions inherited from ibis::column | |
virtual int | attachIndex (double *, uint64_t, int64_t *, uint64_t, void *, FastBitReadBitmaps) const |
virtual int | attachIndex (double *, uint64_t, int64_t *, uint64_t, uint32_t *, uint64_t) const |
void | binWeights (std::vector< uint32_t > &) const |
Retrive the number of rows in each bin. | |
template<typename T > | |
long | castAndWrite (const array_t< double > &vals, ibis::bitvector &mask, const T special) |
Cast the incoming array into the specified type T before writing the values to the file for this column. More... | |
column (const column &rhs) | |
The copy constructor. More... | |
column (const part *tbl, FILE *file) | |
Reconstitute a column from the content of a file. More... | |
column (const part *tbl, ibis::TYPE_T t, const char *name, const char *desc="", double low=DBL_MAX, double high=-DBL_MAX) | |
Construct a new column object based on type and name. | |
virtual void | computeMinMax () |
Compute the actual min/max values. More... | |
virtual void | computeMinMax (const char *dir) |
Compute the actual min/max values. More... | |
virtual void | computeMinMax (const char *dir, double &min, double &max, bool &asc) const |
Compute the actual min/max of the data in directory dir . More... | |
int | contractRange (ibis::qContinuousRange &rng) const |
Contract the range expression so that the new range falls exactly on the bin boundaries. More... | |
const char * | dataFileName (std::string &fname, const char *dir=0) const |
Name of the data file in the given data directory. More... | |
const char * | description () const |
Description of the column. Can be an arbitrary string. | |
void | description (const char *d) |
int | elementSize () const |
Size of a data element in bytes. | |
virtual double | estimateCost (const ibis::qContinuousRange &cmp) const |
Estimate the cost of evaluating the query expression. | |
virtual double | estimateCost (const ibis::qDiscreteRange &cmp) const |
Estimate the cost of evaluating a dicreate range expression. | |
virtual double | estimateCost (const ibis::qIntHod &cmp) const |
Estimate the cost of evaluating a dicreate range expression. | |
virtual double | estimateCost (const ibis::qUIntHod &cmp) const |
Estimate the cost of evaluating a dicreate range expression. | |
virtual long | estimateRange (const ibis::qContinuousRange &cmp, ibis::bitvector &low, ibis::bitvector &high) const |
Compute a lower bound and an upper bound on the number of hits using the bitmap index. More... | |
virtual long | estimateRange (const ibis::qDiscreteRange &cmp, ibis::bitvector &low, ibis::bitvector &high) const |
Compute a lower bound and an upper bound for hits. More... | |
virtual long | estimateRange (const ibis::qIntHod &cmp, ibis::bitvector &low, ibis::bitvector &high) const |
Compute a lower bound and an upper bound for hits. More... | |
virtual long | estimateRange (const ibis::qUIntHod &cmp, ibis::bitvector &low, ibis::bitvector &high) const |
Compute a lower bound and an upper bound for hits. More... | |
virtual long | estimateRange (const ibis::qContinuousRange &cmp) const |
Use the index of the column to compute an upper bound on the number of hits. More... | |
virtual long | estimateRange (const ibis::qDiscreteRange &cmp) const |
virtual long | estimateRange (const ibis::qIntHod &cmp) const |
Compute an upper bound on the number of hits. More... | |
virtual long | estimateRange (const ibis::qUIntHod &cmp) const |
Compute an upper bound on the number of hits. More... | |
virtual long | evaluateAndSelect (const ibis::qContinuousRange &, const ibis::bitvector &, void *, ibis::bitvector &) const |
Evaluate a range condition and retrieve the selected values. More... | |
virtual long | evaluateRange (const ibis::qContinuousRange &cmp, const ibis::bitvector &mask, ibis::bitvector &res) const |
Compute the exact answer. More... | |
virtual long | evaluateRange (const ibis::qDiscreteRange &cmp, const ibis::bitvector &mask, ibis::bitvector &res) const |
Compute the exact answer to a discrete range expression. | |
virtual long | evaluateRange (const ibis::qIntHod &cmp, const ibis::bitvector &mask, ibis::bitvector &res) const |
Compute the exact answer to a discrete range expression. | |
virtual long | evaluateRange (const ibis::qUIntHod &cmp, const ibis::bitvector &mask, ibis::bitvector &res) const |
Compute the exact answer to a discrete range expression. | |
int | expandRange (ibis::qContinuousRange &rng) const |
Expand the range expression so that the new range falls exactly on the bin boundaries. More... | |
std::string | fullname () const |
Fully qualified name. More... | |
int | getDataflag () const |
virtual const ibis::dictionary * | getDictionary () const |
Return a pointer to a dictionary. More... | |
array_t< double > * | getDoubleArray () const |
Return all rows of the column as an array_t object. | |
array_t< float > * | getFloatArray () const |
Return all rows of the column as an array_t object. | |
array_t< int32_t > * | getIntArray () const |
Return all rows of the column as an array_t object. More... | |
void | getNullMask (bitvector &mask) const |
If there is a null mask stored already, return a shallow copy of it in mask. More... | |
virtual ibis::fileManager::storage * | getRawData () const |
Return the content of base data file as a storage object. | |
const unixTimeScribe * | getTimeFormat () const |
virtual float | getUndecidable (const ibis::qContinuousRange &cmp, ibis::bitvector &iffy) const |
Compute the locations of the rows can not be decided by the index. More... | |
virtual float | getUndecidable (const ibis::qDiscreteRange &cmp, ibis::bitvector &iffy) const |
Find rows that can not be decided with the existing index. | |
virtual float | getUndecidable (const ibis::qIntHod &cmp, ibis::bitvector &iffy) const |
Find rows that can not be decided with the existing index. More... | |
virtual float | getUndecidable (const ibis::qUIntHod &cmp, ibis::bitvector &iffy) const |
Find rows that can not be decided with the existing index. More... | |
virtual int | getValuesArray (void *vals) const |
Copy all rows of the column into an array_t object. More... | |
bool | hasIndex () const |
!< Are the values sorted? More... | |
virtual bool | hasRawData () const |
Does the raw data file exist? | |
bool | hasRoster () const |
Is there a roster list built for this column? Returns true for yes, false for no. More... | |
uint32_t | indexedRows () const |
Compute the number of rows captured by the index of this column. More... | |
virtual void | indexSerialSizes (uint64_t &, uint64_t &, uint64_t &) const |
Compute the sizes (in number of elements) of three arrays that would be produced by writeIndex. More... | |
virtual long | indexSize () const |
Compute the index size (in bytes). More... | |
const char * | indexSpec () const |
void | indexSpec (const char *spec) |
!< Retrieve the number of bins used. More... | |
void | indexSpeedTest () const |
Perform a set of built-in tests to determine the speed of common operations. More... | |
virtual int | indexWrite (ibis::array_t< double > &, ibis::array_t< int64_t > &, ibis::array_t< uint32_t > &) const |
Write the index into three arrays. | |
bool | isFloat () const |
Are they floating-point values? | |
bool | isInteger () const |
Are they integer values? | |
bool | isNumeric () const |
Are they numberical values? | |
bool | isSignedInteger () const |
Are they signed integer values? | |
bool | isSorted () const |
void | isSorted (bool) |
Change the flag m_sorted. More... | |
bool | isUnsignedInteger () const |
Are they unsigned integer values? | |
void | logMessage (const char *event, const char *fmt,...) const |
Log messages using printf syntax. | |
void | logWarning (const char *event, const char *fmt,...) const |
Log warming message using printf syntax. | |
const double & | lowerBound () const |
The lower bound of the values. | |
void | lowerBound (double d) |
const char * | name () const |
Name of the column. | |
void | name (const char *nm) |
Rename the column. | |
int | nRows () const |
const char * | nullMaskName (std::string &fname) const |
Name of the NULL mask file. More... | |
uint32_t | numBins () const |
!< Retrieve the index specification. | |
const part * | partition () const |
const part *& | partition () |
void | preferredBounds (std::vector< double > &) const |
Retrive the bin boundaries if the index currently in use. | |
void | purgeIndexFile (const char *dir=0) const |
Purge the index files assocated with the current column. | |
virtual array_t< signed char > * | selectBytes (const bitvector &mask) const |
Retrieve selected 1-byte integer values. More... | |
virtual array_t< double > * | selectDoubles (const bitvector &mask) const |
Put the selected values into an array as doubles. More... | |
virtual array_t< float > * | selectFloats (const bitvector &mask) const |
Put selected values of a float column into an array. More... | |
virtual array_t< int32_t > * | selectInts (const bitvector &mask) const |
Return selected rows of the column in an array_t object. More... | |
virtual std::vector< ibis::opaque > * | selectOpaques (const bitvector &mask) const |
virtual array_t< int16_t > * | selectShorts (const bitvector &mask) const |
Return selected rows of the column in an array_t object. More... | |
virtual array_t< unsigned char > * | selectUBytes (const bitvector &mask) const |
Return selected rows of the column in an array_t object. More... | |
virtual array_t< uint64_t > * | selectULongs (const bitvector &mask) const |
Return selected rows of the column in an array_t object. More... | |
virtual array_t< uint16_t > * | selectUShorts (const bitvector &mask) const |
Return selected rows of the column in an array_t object. More... | |
long | selectValues (const bitvector &, void *) const |
Return selected rows of the column in an array_t object. More... | |
long | selectValues (const bitvector &, void *, array_t< uint32_t > &) const |
Return selected rows of the column in an array_t object along with their positions. More... | |
long | selectValues (const ibis::qContinuousRange &, void *) const |
Select the values satisfying the specified range condition. | |
void | setDataflag (int df) |
int | setNullMask (const bitvector &) |
Change the null mask to the user specified one. More... | |
void | setTimeFormat (const char *) |
Add a custom format for the column to be interpretted as unix time stamps. | |
void | setTimeFormat (const unixTimeScribe &) |
virtual long | truncateData (const char *dir, uint32_t nent, ibis::bitvector &mask) const |
Truncate the number of records in the named dir to nent. More... | |
ibis::TYPE_T | type () const |
Type of the data. More... | |
virtual void | unloadIndex () const |
Unload the index associated with the column. More... | |
const double & | upperBound () const |
The upper bound of the values. | |
void | upperBound (double d) |
virtual long | writeData (const char *dir, uint32_t nold, uint32_t nnew, ibis::bitvector &mask, const void *va1, void *va2=0) |
Write the content in array va1 to directory dir. More... | |
virtual | ~column () |
Destructor. More... | |
virtual double | getActualMin () const |
A group of functions to compute some basic statistics for the column values. More... | |
virtual double | getActualMax () const |
Compute the actual maximum value by reading the data or examining the index. More... | |
virtual double | getSum () const |
Compute the sum of all values by reading the data. | |
long | getCumulativeDistribution (std::vector< double > &bounds, std::vector< uint32_t > &counts) const |
Compute the actual data distribution. More... | |
long | getDistribution (std::vector< double > &bbs, std::vector< uint32_t > &counts) const |
Count the number of records in each bin. More... | |
Protected Member Functions | |
int | readString (uint32_t i, std::string &val) const |
Read the string value of i th row. More... | |
int | readString (std::string &, int, long, long, char *, uint32_t, uint32_t &, off_t &) const |
Read one string from an open file. More... | |
int | readStrings1 (const ibis::bitvector &, std::vector< std::string > &) const |
Read the strings marked 1 in the mask. More... | |
int | readStrings2 (const ibis::bitvector &, std::vector< std::string > &) const |
Read the strings marked 1 in the mask. More... | |
void | startPositions (const char *dir, char *buf, uint32_t nbuf) const |
Locate the starting position of each string. More... | |
int | writeStrings (const char *to, const char *from, const char *spto, const char *spfrom, ibis::bitvector &msk, const ibis::bitvector &sel, char *buf, uint32_t nbuf) const |
Write the selected strings. More... | |
Protected Member Functions inherited from ibis::column | |
void | actualMinMax (const char *fname, const ibis::bitvector &mask, double &min, double &max, bool &asc) const |
Given the name of the data file, compute the actual minimum and the maximum value. More... | |
long | appendStrings (const std::vector< std::string > &, const ibis::bitvector &) |
Append the strings to the current data. More... | |
template<typename T > | |
long | appendValues (const array_t< T > &, const ibis::bitvector &) |
Append the content of incoming array to the current data. More... | |
double | computeMax () const |
Read the base data to compute the maximum value. | |
double | computeMin () const |
Read the data values and compute the minimum value. | |
double | computeSum () const |
Read the base data to compute the total sum. | |
template<typename T > | |
uint32_t | findLower (int fdes, const uint32_t nr, const T tgt) const |
Find the smallest value >= tgt. More... | |
template<typename T > | |
uint32_t | findUpper (int fdes, const uint32_t nr, const T tgt) const |
Find the smallest value > tgt. More... | |
void | logError (const char *event, const char *fmt,...) const |
Print messages started with "Error" and throw a string exception. | |
virtual int | searchSorted (const ibis::qContinuousRange &, ibis::bitvector &) const |
Resolve a continuous range condition on a sorted column. | |
virtual int | searchSorted (const ibis::qDiscreteRange &, ibis::bitvector &) const |
Resolve a discrete range condition on a sorted column. | |
virtual int | searchSorted (const ibis::qIntHod &, ibis::bitvector &) const |
Resolve a discrete range condition on a sorted column. | |
virtual int | searchSorted (const ibis::qUIntHod &, ibis::bitvector &) const |
Resolve a discrete range condition on a sorted column. | |
template<typename T > | |
int | searchSortedICC (const array_t< T > &vals, const ibis::qContinuousRange &rng, ibis::bitvector &hits) const |
Resolve a continuous range condition on an array of values. | |
template<typename T > | |
int | searchSortedICD (const array_t< T > &vals, const ibis::qDiscreteRange &rng, ibis::bitvector &hits) const |
Resolve a discrete range condition on an array of values. | |
template<typename T > | |
int | searchSortedICD (const array_t< T > &vals, const ibis::qIntHod &rng, ibis::bitvector &hits) const |
Resolve a discrete range condition on an array of values. | |
template<typename T > | |
int | searchSortedICD (const array_t< T > &vals, const ibis::qUIntHod &rng, ibis::bitvector &hits) const |
Resolve a discrete range condition on an array of values. | |
template<typename T > | |
int | searchSortedOOCC (const char *fname, const ibis::qContinuousRange &rng, ibis::bitvector &hits) const |
Resolve a continuous range condition using file operations. More... | |
template<typename T > | |
int | searchSortedOOCD (const char *fname, const ibis::qDiscreteRange &rng, ibis::bitvector &hits) const |
Resolve a discrete range condition using file operations. More... | |
template<typename T > | |
int | searchSortedOOCD (const char *fname, const ibis::qIntHod &rng, ibis::bitvector &hits) const |
Resolve a discrete range condition using file operations. More... | |
template<typename T > | |
int | searchSortedOOCD (const char *fname, const ibis::qUIntHod &rng, ibis::bitvector &hits) const |
Resolve a discrete range condition using file operations. More... | |
template<typename T > | |
long | selectToOpaques (const char *, const bitvector &, std::vector< ibis::opaque > &) const |
template<typename T > | |
long | selectToStrings (const char *, const bitvector &, std::vector< std::string > &) const |
Extract the values masked 1 and convert them to strings. | |
template<> | |
long | selectToStrings (const char *, const bitvector &, std::vector< std::string > &) const |
template<> | |
long | selectToStrings (const char *, const bitvector &, std::vector< std::string > &) const |
template<> | |
long | selectToStrings (const char *dfn, const bitvector &mask, std::vector< std::string > &str) const |
template<> | |
long | selectToStrings (const char *dfn, const bitvector &mask, std::vector< std::string > &str) const |
template<typename T > | |
long | selectValuesT (const char *, const bitvector &, array_t< T > &) const |
Select values marked in the bitvector mask . More... | |
template<typename T > | |
long | selectValuesT (const char *, const bitvector &mask, array_t< T > &vals, array_t< uint32_t > &inds) const |
Select the values marked in the bitvector mask . More... | |
long | string2int (int fptr, dictionary &dic, uint32_t nbuf, char *buf, array_t< uint32_t > &out) const |
Convert strings in the opened file to a list of integers with the aid of a dictionary. More... | |
Additional Inherited Members | |
Static Public Member Functions inherited from ibis::column | |
template<typename T > | |
static void | actualMinMax (const array_t< T > &vals, const ibis::bitvector &mask, double &min, double &max, bool &asc) |
Compute the minimum and maximum of the values in the array. | |
template<typename T > | |
static T | computeMax (const array_t< T > &vals, const ibis::bitvector &mask) |
Compute the maximum value in the array. | |
template<typename T > | |
static T | computeMin (const array_t< T > &vals, const ibis::bitvector &mask) |
Compute the minimum value in the array. | |
template<typename T > | |
static double | computeSum (const array_t< T > &vals, const ibis::bitvector &mask) |
Compute the sum of values in the array. | |
Protected Attributes inherited from ibis::column | |
int | dataflag |
Presence of the data file. More... | |
ibis::index * | idx |
The index for this column. It is not considered as a must-have member. | |
ibis::util::sharedInt32 | idxcnt |
The number of functions using the index. | |
double | lower |
!< Are the column values in ascending order? | |
std::string | m_bins |
!< Free-form description of the column. | |
std::string | m_desc |
!< Name of the column. | |
std::string | m_name |
!< Data type. | |
bool | m_sorted |
!< Index/binning specification. | |
ibis::TYPE_T | m_type |
!< The entries marked 1 are valid. | |
unixTimeScribe * | m_utscribe |
!< The maximum value. | |
ibis::bitvector | mask_ |
!< Data partition containing this column. | |
const part * | thePart |
double | upper |
!< The minimum value. | |
A data structure for storing null-terminated text.
This is meant for string values that are relatively long and may have an internal structure. The most useful search operation supported on this type of data is the keyword search, also known as full-text search. The keyword search operation is implemented through a boolean term-document matrix (implemented as ibis::keywords).
|
virtual |
Append the data file stored in directory df
to the corresponding data file in directory dt
.
Use the buffer buf
to copy data in large chuncks.
Reimplemented from ibis::column.
Reimplemented in ibis::category.
References UnixOpen.
|
inlinevirtual |
Append the records in vals to the current working dataset.
The 'void*' in this function follows the convention of the function getValuesArray (not writeData), i.e., for the ten fixed-size elementary data types, it is array_t<type>* and for string-valued columns it is std::vector<std::string>*.
Return the number of entries actually written to disk or a negative number to indicate error conditions.
Reimplemented from ibis::column.
Reimplemented in ibis::category.
|
virtual |
If the input string is found in the data file, it is returned, else this function returns 0.
It needs to keep both the data file and the starting position file open at the same time.
Reimplemented from ibis::column.
References ibis::fileManager::buffer< T >::address(), ibis::fileManager::instance(), ibis::fileManager::recordPages(), and ibis::fileManager::buffer< T >::size().
|
virtual |
Return the raw binary value for the i
th row.
This is primarily intended to retrieve values of blobs.
Reimplemented from ibis::column.
References ibis::util::getString().
|
inlinevirtual |
Return the string value for the i
th row.
Only implemented for ibis::text and ibis::category.
Reimplemented from ibis::column.
Reimplemented in ibis::category.
References readString().
Referenced by ibis::mensa::cursor::dumpIJ().
const ibis::column * ibis::text::IDColumnForKeywordIndex | ( | ) | const |
!< Print header info.
Locate the ID column for processing term-document list provided by the user.
This function checks indexSpec first for docIDName=xx for the name of the ID column, then checks the global parameter <table-name>.<column-name>.docIDName.
References ibis::util::getString(), ibis::gParameters(), and ibis::column::name().
|
virtual |
Load the index associated with the column.
iopt | This option is passed to ibis::index::create to be used if a new index is to be created. |
ropt | This option is passed to ibis::index::create to control the reading operations for reconstitute the index object from an index file. |
Reimplemented from ibis::column.
Reimplemented in ibis::category.
References ibis::column::loadIndex().
|
protected |
Read the string value of i
th row.
It goes through a two-stage process by reading from two files, first from the .sp file to read the position of the string in the second file and the second file contains the actual string values (with nil terminators). This can be quite slow!
References ibis::fileManager::instance(), ibis::fileManager::recordPages(), and UnixOpen.
Referenced by getString(), and ibis::category::getString().
|
protected |
Read one string from an open file.
The string starts at position be
and ends at en
. The content may be in the array buf
.
Returns 0 if successful, otherwise return a negative number to indicate error.
|
protected |
Read the strings marked 1 in the mask.
It goes through a two-stage process by reading from two files, first from the .sp file to read the position of the string in the second file containing the actual string values (with nil terminators), and then go to the second file to read the string value. This can be quite slow if you are attempting to read a lot of strings, however it could be faster than readStrings2 if only a few strings are expected.
References ibis::bitvector::cnt(), ibis::bitvector::empty(), ibis::fileManager::instance(), ibis::bitvector::indexSet::nIndices(), ibis::fileManager::recordPages(), and UnixOpen.
|
protected |
Read the strings marked 1 in the mask.
It creates a memory map of the .sp file before reading the actual string values. Because the process of creating the memory map can take more time than reading a few values, this function is much more appropriate for reading a relatively large number of string values.
References ibis::fileManager::buffer< T >::address(), ibis::bitvector::cnt(), ibis::fileManager::instance(), ibis::bitvector::indexSet::nIndices(), ibis::util::readString(), ibis::fileManager::recordPages(), ibis::array_t< T >::size(), ibis::bitvector::size(), ibis::fileManager::buffer< T >::size(), and UnixOpen.
|
virtual |
Write the selected values to the specified directory.
If the destination directory is the current data directory, the file containing existing string values will be renamed to be column-name.old, otherwise, the file in the destination directory is simply overwritten. In case of error, a negative number is returned, otherwise, the number of rows saved to the new file is returned.
Reimplemented from ibis::column.
References ibis::fileManager::flushFile(), and ibis::fileManager::instance().
|
virtual |
Return the starting positions of strings marked 1 in the mask.
The starting positions of the selected string values are stored in the returned array.
Reimplemented from ibis::column.
References ibis::fileManager::getFile(), ibis::util::getFileSize(), ibis::fileManager::instance(), ibis::bitvector::indexSet::nIndices(), ibis::array_t< T >::push_back(), ibis::array_t< T >::size(), and ibis::bitvector::size().
|
virtual |
Retrieve the string values from the rows marked 1 in mask.
Reimplemented from ibis::column.
Reimplemented in ibis::category.
References ibis::fileManager::buffer< T >::address(), ibis::bitvector::cnt(), ibis::util::getFileSize(), ibis::bitvector::indexSet::nIndices(), ibis::util::readString(), ibis::bitvector::size(), ibis::fileManager::buffer< T >::size(), and UnixOpen.
Referenced by ibis::category::selectStrings().
|
virtual |
Return the positions of records marked 1 in the mask.
This indicates to ibis::bundle that every string value is distinct.
It also forces the sorting procedure to produce an order following the order of the entries in the table. This makes the print out of an ibis::text field quite less useful than others!
Reimplemented from ibis::column.
Reimplemented in ibis::category.
References ibis::bitvector::indexSet::nIndices(), and ibis::array_t< T >::push_back().
|
protected |
Locate the starting position of each string.
Using the data file located in the named directory dir
. If dir
is a nil pointer, the directory defaults to the current working directory of the data partition.
It writes the starting positions as int64_t integers to a file with .sp as extension.
Argument buf
(with nbuf
bytes) is used as temporary work space. If nbuf
= 0, this function allocates its own working space.
References ibis::fileManager::buffer< T >::address(), and ibis::fileManager::buffer< T >::size().
Referenced by text().
|
virtual |
Given a string literal, return a bitvector that marks the strings that matche it.
This is a relatively slow process since this function actually reads the string values from disk.
Reimplemented from ibis::column.
Reimplemented in ibis::category.
References ibis::fileManager::buffer< T >::address(), ibis::bitvector::adjustSize(), ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::fileManager::instance(), ibis::fileManager::recordPages(), ibis::bitvector::setBit(), ibis::bitvector::size(), and ibis::fileManager::buffer< T >::size().
Referenced by ibis::category::stringSearch().
|
virtual |
Locate the rows match any of the given strings.
Return the number of hits upon successful completion of this function, otherwise return a negative number to indicate error.
Reimplemented from ibis::column.
Reimplemented in ibis::category.
References ibis::fileManager::buffer< T >::address(), ibis::bitvector::adjustSize(), ibis::bitvector::clear(), ibis::bitvector::cnt(), ibis::fileManager::instance(), ibis::fileManager::recordPages(), ibis::bitvector::set(), ibis::bitvector::setBit(), ibis::bitvector::size(), and ibis::fileManager::buffer< T >::size().
|
protected |
Write the selected strings.
The caller manages the necessary locks for accessing this function.
References ibis::fileManager::buffer< T >::address(), ibis::bitvector::adjustSize(), ibis::bitvector::cnt(), ibis::bitvector::indexSet::nIndices(), ibis::fileManager::buffer< T >::size(), ibis::bitvector::subset(), and UnixOpen.