4 #ifndef IBIS_KEYWORDS_H
5 #define IBIS_KEYWORDS_H
105 virtual const char*
name()
const {
return "keywords";}
107 virtual void binWeights(std::vector<uint32_t>& b)
const;
108 virtual double getMin()
const {
return DBL_MAX;}
109 virtual double getMax()
const {
return -DBL_MAX;}
110 virtual double getSum()
const {
return -DBL_MAX;}
112 long search(
const char*)
const;
114 long search(
const std::vector<std::string>&)
const;
117 virtual void print(std::ostream& out)
const;
118 virtual void serialSizes(uint64_t&, uint64_t&, uint64_t&)
const;
122 virtual int write(
const char* dt)
const;
123 virtual int read(
const char* idxfile);
125 virtual long append(
const char* dt,
const char* df, uint32_t nnew);
160 inline
char readTerm(const
char*& buf,
std::
string &key) const;
161 inline uint32_t
readUInt(const
char*& buf) const;
163 std::vector<uint32_t>& idlist,
164 char* buf, uint32_t nbuf) const;
178 std::
string &keyword)
const {
179 while (isspace(*buf))
181 while (isprint(*buf)) {
185 else if (isspace(*buf)) {
186 for (++ buf; isspace(*buf); ++ buf);
207 while (*buf && ! isdigit(*buf))
210 while (isdigit(*buf)) {
211 res = res * 10 + (*buf -
'0');
226 virtual int operator()(std::vector<const char*>& tkns,
char *buf);
virtual double getMin() const
The minimum value recorded in the index.
Definition: ikeywords.h:108
A simple tokenizer used to extract keywords.
Definition: ikeywords.h:220
void clear()
Clear the current content.
Definition: ikeywords.cpp:844
Class ibis::keywords defines a boolean term-document matrix.
Definition: ikeywords.h:96
Definition of the common functions of an index.
uint32_t readUInt(const char *&buf) const
Extract the next integer in an inputline.
Definition: ikeywords.h:205
virtual size_t getSerialSize() const
Estimate the size of the .idx file.
Definition: ikeywords.cpp:1030
Define three specialization of the column class.
virtual int read(const char *idxfile)
Reconstructs an index from the named file.
Definition: ikeywords.cpp:714
virtual ~tokenizer()
Destructor.
Definition: ikeywords.h:224
std::string delim_
The list of delimiters. May be empty.
Definition: ikeywords.h:230
Simple range condition.
Definition: qExpr.h:252
virtual double getMax() const
The maximum value recorded in the index.
Definition: ikeywords.h:109
The storage class treats all memory as char*.
Definition: fileManager.h:237
virtual void print(std::ostream &out) const
Prints human readable information.
Definition: ikeywords.cpp:501
virtual long append(const char *dt, const char *df, uint32_t nnew)
Extend the index.
Definition: ikeywords.cpp:860
long search(const char *, ibis::bitvector &) const
Match a particular keyword.
Definition: ikeywords.cpp:886
virtual void estimate(const ibis::qContinuousRange &expr, ibis::bitvector &lower, ibis::bitvector &upper) const
Computes an approximation of hits as a pair of lower and upper bounds.
Definition: ikeywords.cpp:874
The current implementation of FastBit is code named IBIS; most data structures and functions are in t...
Definition: bord.h:16
virtual void estimate(const ibis::qContinuousRange &, ibis::bitvector &lower, ibis::bitvector &upper) const
Computes an approximation of hits as a pair of lower and upper bounds.
Definition: index.h:191
The class to represent a column of a data partition.
Definition: column.h:65
virtual long evaluate(const ibis::qContinuousRange &expr, ibis::bitvector &hits) const =0
To evaluate the exact hits.
virtual void serialSizes(uint64_t &, uint64_t &, uint64_t &) const
Compute the size of arrays that would be generated by the serializatioin function (write)...
Definition: ikeywords.cpp:695
ibis::dictionary terms
A dictionary for the terms.
Definition: ikeywords.h:152
void reorderTerms()
Reorder the terms in the dictionary.
Definition: ikeywords.cpp:851
tokenizer(const char *d=ibis::util::delimiters)
Constructor.
Definition: ikeywords.cpp:1043
virtual float undecidable(const ibis::qContinuousRange &expr, ibis::bitvector &iffy) const
Mark the position of the rows that can not be decided with this index.
Definition: index.h:205
The base index class.
Definition: index.h:82
virtual void binBoundaries(std::vector< double > &b) const
The function binBoundaries and binWeights return bin boundaries and counts of each bin respectively...
Definition: ikeywords.h:106
virtual int operator()(std::vector< const char * > &tkns, char *buf)
Tokenizer.
Definition: ikeywords.cpp:1058
virtual index * dup() const
Duplicate the content of an index object.
Definition: ikeywords.cpp:166
INDEX_TYPE
The integer values of this enum type are used in the index files to differentiate the indexes...
Definition: index.h:86
virtual long select(const ibis::qContinuousRange &, void *, ibis::bitvector &) const
Evaluate the range condition, select values, and record the positions.
Definition: ikeywords.h:148
A data structure for storing null-terminated text.
Definition: category.h:27
virtual long select(const ibis::qContinuousRange &, void *) const
Evaluate the range condition and select values.
Definition: ikeywords.h:146
virtual double getSum() const
Compute the approximate sum of all the values indexed.
Definition: ikeywords.h:110
int readTDLine(std::istream &in, std::string &key, std::vector< uint32_t > &idlist, char *buf, uint32_t nbuf) const
Read one line from the term-docuement file.
Definition: ikeywords.cpp:289
A tokenizer class to turn a string buffer into tokens.
Definition: category.h:81
virtual int write(ibis::array_t< double > &, ibis::array_t< int64_t > &, ibis::array_t< uint32_t > &) const
Save index to three arrays. Serialize the index in memory.
Definition: ikeywords.cpp:668
A data structure to represent a sequence of bits.
Definition: bitvector.h:62
int parseTextFile(ibis::text::tokenizer &tkn, const char *f)
Parse the text file to build a keyword index.
Definition: ikeywords.cpp:366
virtual float undecidable(const ibis::qContinuousRange &, ibis::bitvector &iffy) const
This class and its derived classes should produce exact answers, therefore no undecidable rows...
Definition: ikeywords.h:138
const char * delimiters
Delimiters used to separate a string of names.
Definition: util.cpp:71
virtual const char * name() const
Returns the name of the index, similar to the function type, but returns a string instead...
Definition: ikeywords.h:105
keywords(const ibis::column *c, const char *f=0)
Constructor.
Definition: ikeywords.cpp:18
virtual INDEX_TYPE type() const
Returns an index type identifier.
Definition: ikeywords.h:104
void setBits(std::vector< uint32_t > &pos, ibis::bitvector &bvec) const
Turn on the specified positions in a bitvector.
Definition: ikeywords.cpp:354
char readTerm(const char *&buf, std::string &key) const
Extract the term from a line of input term-document file.
Definition: ikeywords.h:177
virtual double estimateCost(const ibis::qContinuousRange &expr) const
Estimate the cost of evaluating a range condition.
Definition: ikeywords.cpp:977
void clear()
Remove the existing content of a bitvector.
Definition: bitvector.cpp:243
A discrete range expression.
Definition: qExpr.h:337
Provide a dual-directional mapping between strings and integers.
Definition: dict-0.h:19
int readTermDocFile(const ibis::column *idcol, const char *f)
Reads a term-document list from an external file.
Definition: ikeywords.cpp:173
ibis::keywords, boolean term-document matrix.
Definition: index.h:130
virtual long evaluate(const ibis::qContinuousRange &expr, ibis::bitvector &hits) const
To evaluate the exact hits.
Definition: ikeywords.cpp:866