4 #ifndef IBIS_KEYWORDS_H 
    5 #define IBIS_KEYWORDS_H 
  105     virtual const char* 
name()
 const {
return "keywords";}
 
  107     virtual void binWeights(std::vector<uint32_t>& b) 
const;
 
  108     virtual double getMin()
 const {
return DBL_MAX;}
 
  109     virtual double getMax()
 const {
return -DBL_MAX;}
 
  110     virtual double getSum()
 const {
return -DBL_MAX;}
 
  112     long search(
const char*) 
const;
 
  114     long search(
const std::vector<std::string>&) 
const;
 
  117     virtual void print(std::ostream& out) 
const;
 
  118     virtual void serialSizes(uint64_t&, uint64_t&, uint64_t&) 
const;
 
  122     virtual int write(
const char* dt) 
const;
 
  123     virtual int read(
const char* idxfile);
 
  125     virtual long append(
const char* dt, 
const char* df, uint32_t nnew);
 
  160     inline 
char readTerm(const 
char*& buf, 
std::
string &key) const;
 
  161     inline uint32_t 
readUInt(const 
char*& buf) const;
 
  163                    std::vector<uint32_t>& idlist,
 
  164                    char* buf, uint32_t nbuf) const;
 
  178                                      std::
string &keyword)
 const {
 
  179     while (isspace(*buf)) 
 
  181     while (isprint(*buf)) { 
 
  185         else if (isspace(*buf)) {
 
  186             for (++ buf; isspace(*buf); ++ buf);
 
  207     while (*buf && ! isdigit(*buf)) 
 
  210     while (isdigit(*buf)) {
 
  211         res = res * 10 + (*buf - 
'0');
 
  226     virtual int operator()(std::vector<const char*>& tkns, 
char *buf);
 
virtual double getMin() const 
The minimum value recorded in the index. 
Definition: ikeywords.h:108
A simple tokenizer used to extract keywords. 
Definition: ikeywords.h:220
void clear()
Clear the current content. 
Definition: ikeywords.cpp:844
Class ibis::keywords defines a boolean term-document matrix. 
Definition: ikeywords.h:96
Definition of the common functions of an index. 
uint32_t readUInt(const char *&buf) const 
Extract the next integer in an inputline. 
Definition: ikeywords.h:205
virtual size_t getSerialSize() const 
Estimate the size of the .idx file. 
Definition: ikeywords.cpp:1030
Define three specialization of the column class. 
virtual int read(const char *idxfile)
Reconstructs an index from the named file. 
Definition: ikeywords.cpp:714
virtual ~tokenizer()
Destructor. 
Definition: ikeywords.h:224
std::string delim_
The list of delimiters. May be empty. 
Definition: ikeywords.h:230
Simple range condition. 
Definition: qExpr.h:252
virtual double getMax() const 
The maximum value recorded in the index. 
Definition: ikeywords.h:109
The storage class treats all memory as char*. 
Definition: fileManager.h:237
virtual void print(std::ostream &out) const 
Prints human readable information. 
Definition: ikeywords.cpp:501
virtual long append(const char *dt, const char *df, uint32_t nnew)
Extend the index. 
Definition: ikeywords.cpp:860
long search(const char *, ibis::bitvector &) const 
Match a particular keyword. 
Definition: ikeywords.cpp:886
virtual void estimate(const ibis::qContinuousRange &expr, ibis::bitvector &lower, ibis::bitvector &upper) const 
Computes an approximation of hits as a pair of lower and upper bounds. 
Definition: ikeywords.cpp:874
The current implementation of FastBit is code named IBIS; most data structures and functions are in t...
Definition: bord.h:16
virtual void estimate(const ibis::qContinuousRange &, ibis::bitvector &lower, ibis::bitvector &upper) const 
Computes an approximation of hits as a pair of lower and upper bounds. 
Definition: index.h:191
The class to represent a column of a data partition. 
Definition: column.h:65
virtual long evaluate(const ibis::qContinuousRange &expr, ibis::bitvector &hits) const  =0
To evaluate the exact hits. 
virtual void serialSizes(uint64_t &, uint64_t &, uint64_t &) const 
Compute the size of arrays that would be generated by the serializatioin function (write)...
Definition: ikeywords.cpp:695
ibis::dictionary terms
A dictionary for the terms. 
Definition: ikeywords.h:152
void reorderTerms()
Reorder the terms in the dictionary. 
Definition: ikeywords.cpp:851
tokenizer(const char *d=ibis::util::delimiters)
Constructor. 
Definition: ikeywords.cpp:1043
virtual float undecidable(const ibis::qContinuousRange &expr, ibis::bitvector &iffy) const 
Mark the position of the rows that can not be decided with this index. 
Definition: index.h:205
The base index class. 
Definition: index.h:82
virtual void binBoundaries(std::vector< double > &b) const 
The function binBoundaries and binWeights return bin boundaries and counts of each bin respectively...
Definition: ikeywords.h:106
virtual int operator()(std::vector< const char * > &tkns, char *buf)
Tokenizer. 
Definition: ikeywords.cpp:1058
virtual index * dup() const 
Duplicate the content of an index object. 
Definition: ikeywords.cpp:166
INDEX_TYPE
The integer values of this enum type are used in the index files to differentiate the indexes...
Definition: index.h:86
virtual long select(const ibis::qContinuousRange &, void *, ibis::bitvector &) const 
Evaluate the range condition, select values, and record the positions. 
Definition: ikeywords.h:148
A data structure for storing null-terminated text. 
Definition: category.h:27
virtual long select(const ibis::qContinuousRange &, void *) const 
Evaluate the range condition and select values. 
Definition: ikeywords.h:146
virtual double getSum() const 
Compute the approximate sum of all the values indexed. 
Definition: ikeywords.h:110
int readTDLine(std::istream &in, std::string &key, std::vector< uint32_t > &idlist, char *buf, uint32_t nbuf) const 
Read one line from the term-docuement file. 
Definition: ikeywords.cpp:289
A tokenizer class to turn a string buffer into tokens. 
Definition: category.h:81
virtual int write(ibis::array_t< double > &, ibis::array_t< int64_t > &, ibis::array_t< uint32_t > &) const 
Save index to three arrays. Serialize the index in memory. 
Definition: ikeywords.cpp:668
A data structure to represent a sequence of bits. 
Definition: bitvector.h:62
int parseTextFile(ibis::text::tokenizer &tkn, const char *f)
Parse the text file to build a keyword index. 
Definition: ikeywords.cpp:366
virtual float undecidable(const ibis::qContinuousRange &, ibis::bitvector &iffy) const 
This class and its derived classes should produce exact answers, therefore no undecidable rows...
Definition: ikeywords.h:138
const char * delimiters
Delimiters used to separate a string of names. 
Definition: util.cpp:71
virtual const char * name() const 
Returns the name of the index, similar to the function type, but returns a string instead...
Definition: ikeywords.h:105
keywords(const ibis::column *c, const char *f=0)
Constructor. 
Definition: ikeywords.cpp:18
virtual INDEX_TYPE type() const 
Returns an index type identifier. 
Definition: ikeywords.h:104
void setBits(std::vector< uint32_t > &pos, ibis::bitvector &bvec) const 
Turn on the specified positions in a bitvector. 
Definition: ikeywords.cpp:354
char readTerm(const char *&buf, std::string &key) const 
Extract the term from a line of input term-document file. 
Definition: ikeywords.h:177
virtual double estimateCost(const ibis::qContinuousRange &expr) const 
Estimate the cost of evaluating a range condition. 
Definition: ikeywords.cpp:977
void clear()
Remove the existing content of a bitvector. 
Definition: bitvector.cpp:243
A discrete range expression. 
Definition: qExpr.h:337
Provide a dual-directional mapping between strings and integers. 
Definition: dict-0.h:19
int readTermDocFile(const ibis::column *idcol, const char *f)
Reads a term-document list from an external file. 
Definition: ikeywords.cpp:173
ibis::keywords, boolean term-document matrix. 
Definition: index.h:130
virtual long evaluate(const ibis::qContinuousRange &expr, ibis::bitvector &hits) const 
To evaluate the exact hits. 
Definition: ikeywords.cpp:866