4 #ifndef IBIS_CATEGORY_H
5 #define IBIS_CATEGORY_H
34 virtual long keywordSearch(
const char* str,
ibis::bitvector& hits)
const;
35 virtual long keywordSearch(
const std::vector<std::string>& strs,
37 virtual long keywordSearch(
const char*)
const;
38 virtual long keywordSearch(
const std::vector<std::string>&)
const;
41 virtual long stringSearch(
const std::vector<std::string>& strs,
44 virtual long stringSearch(
const std::vector<std::string>& strs)
const;
47 virtual long patternSearch(
const char*)
const;
53 virtual void loadIndex(
const char* iopt=0,
int ropt=0)
const throw ();
54 virtual long append(
const char* dt,
const char* df,
const uint32_t nold,
55 const uint32_t nnew, uint32_t nbuf,
char* buf);
58 char *buf, uint32_t nbuf);
63 virtual std::vector<std::string>*
65 virtual const char*
findString(
const char* str)
const;
66 virtual int getString(uint32_t i, std::string &val)
const {
68 virtual int getOpaque(uint32_t, ibis::opaque&)
const;
72 virtual void write(FILE* file)
const;
73 virtual void print(std::ostream& out)
const;
76 void TDListForKeywordIndex(std::string&)
const;
77 void delimitersForKeywordIndex(std::string&)
const;
92 virtual int operator()(std::vector<const char*>& tkns,
char *buf) = 0;
99 void startPositions(
const char *dir,
char *buf, uint32_t nbuf)
const;
100 int readString(uint32_t i, std::string &val)
const;
101 int readString(std::string&,
int,
long,
long,
char*, uint32_t,
102 uint32_t&, off_t&)
const;
106 const char *spto,
const char *spfrom,
108 char *buf, uint32_t nbuf)
const;
127 category(
const part* tbl,
const char* name,
const char* value,
128 const char* dir=0, uint32_t nevt=0);
130 virtual long keywordSearch(
const char* str,
ibis::bitvector& hits)
const {
132 virtual long keywordSearch(
const std::vector<std::string>& vals,
135 virtual long keywordSearch(
const char* str)
const {
137 virtual long keywordSearch(
const std::vector<std::string>& vals)
const {
141 virtual long stringSearch(
const std::vector<std::string>& vals,
144 virtual long stringSearch(
const std::vector<std::string>& vals)
const;
154 virtual void loadIndex(
const char* =0,
int =0)
const throw ();
156 virtual long append(
const char* dt,
const char* df,
const uint32_t nold,
157 const uint32_t nnew, uint32_t nbuf,
char* buf);
161 virtual std::vector<std::string>*
163 virtual int getString(uint32_t i, std::string &val)
const;
168 virtual const char*
getKey(uint32_t i)
const;
169 virtual const char*
isKey(
const char* str)
const;
171 virtual void write(FILE* file)
const;
172 virtual void print(std::ostream& out)
const;
186 void prepareMembers()
const;
187 void readDictionary(
const char *dir=0)
const;
191 #endif // IBIS_CATEGORY_H
virtual long stringSearch(const char *str, ibis::bitvector &hits) const
Given a string literal, return a bitvector that marks the strings that matche it. ...
Definition: category-hc.cpp:1841
virtual void print(std::ostream &out) const
!< Write the metadata entry.
Definition: category-hc.cpp:2689
virtual array_t< uint32_t > * selectUInts(const bitvector &mask) const
Return the positions of records marked 1 in the mask.
Definition: category-hc.cpp:2698
virtual std::vector< std::string > * selectStrings(const bitvector &mask) const
Retrieve the string values from the rows marked 1 in mask.
Definition: category-hc.cpp:130
ibis::direkte * fillIndex(const char *dir=0) const
Build an ibis::direkte index using the existing primary data.
Definition: category-hc.cpp:320
virtual void write(FILE *file) const
Write the current metadata to -part.txt of the data partition.
Definition: category-hc.cpp:2668
int readStrings1(const ibis::bitvector &, std::vector< std::string > &) const
Read the strings marked 1 in the mask.
Definition: category.cpp:3156
virtual double estimateCost(const ibis::qLike &cmp) const
Estimate the cost of evaluating a Like expression.
Definition: category-hc.cpp:789
virtual void unloadIndex() const
Unload the index associated with the column.
Definition: column.cpp:5880
virtual int operator()(std::vector< const char * > &tkns, char *buf)=0
A tokenizer must implement a two-argument operator().
virtual uint32_t getNumKeys() const
Return the number of key values.
Definition: category-hc.cpp:1425
virtual std::vector< std::string > * selectStrings(const bitvector &mask) const
Retrieve the string values from the rows marked 1 in mask.
Definition: category-hc.cpp:2757
void startPositions(const char *dir, char *buf, uint32_t nbuf) const
Locate the starting position of each string.
Definition: category-hc.cpp:1487
virtual long append(const void *, const ibis::bitvector &)
Append the records in vals to the current working dataset.
Definition: category.h:56
virtual long saveSelected(const ibis::bitvector &sel, const char *dest, char *buf, uint32_t nbuf)
Write the selected values to the specified directory.
Definition: category-hc.cpp:3658
The column contains one of the values in a list.
Definition: qExpr.h:560
int readString(uint32_t i, std::string &val) const
Read the string value of ith row.
Definition: category-hc.cpp:2992
virtual const char * isKey(const char *str) const
Is the given string one of the keys in the dictionary? Return a null pointer if not.
Definition: category-hc.cpp:1442
virtual array_t< int64_t > * selectLongs(const bitvector &mask) const
Return the starting positions of strings marked 1 in the mask.
Definition: category-hc.cpp:2718
The class to represent a column of a data partition.
Definition: column.h:65
virtual int getString(uint32_t i, std::string &val) const
Return the string value for the ith row.
Definition: category.h:66
virtual void print(std::ostream &out) const
Print header info.
Definition: category-hc.cpp:1409
int writeStrings(const char *to, const char *from, const char *spto, const char *spfrom, ibis::bitvector &msk, const ibis::bitvector &sel, char *buf, uint32_t nbuf) const
Write the selected strings.
Definition: category-hc.cpp:3739
virtual void loadIndex(const char *iopt=0, int ropt=0) const
Load the index associated with the column.
Definition: category-hc.cpp:1731
virtual int getString(uint32_t i, std::string &val) const
Return the string at the ith row.
Definition: category-hc.cpp:971
The class qString encapsulates information for comparing string values.
Definition: qExpr.h:504
virtual array_t< uint32_t > * selectUInts(const bitvector &mask) const
Return the integers corresponding to the select strings.
Definition: category-hc.cpp:91
virtual ~tokenizer()
Destructor.
Definition: category.h:94
Arbitrary null-terminated strings.
Definition: table.h:51
virtual void write(FILE *file) const
Write the current content to the metadata file for the data partition.
Definition: category-hc.cpp:1356
virtual double estimateCost(const ibis::qContinuousRange &cmp) const
Estimate the cost of evaluating the query expression.
Definition: column.cpp:6612
A data structure for storing null-terminated text.
Definition: category.h:27
The class ibis::part represents a partition of a relational table.
Definition: part.h:27
TYPE_T
Supported data types.
Definition: table.h:25
virtual int getOpaque(uint32_t, ibis::opaque &) const
Return the raw binary value for the ith row.
Definition: category-hc.cpp:3406
virtual const char * findString(const char *str) const
If the input string is found in the data file, it is returned, else this function returns 0...
Definition: category-hc.cpp:3251
virtual double estimateCost(const ibis::qString &cmp) const
Estimate the cost of evaluating a string lookup.
Definition: category-hc.cpp:3640
A specialized low-cardinality text field.
Definition: category.h:120
virtual long append(const void *, const ibis::bitvector &)
Append the records in vals to the current working dataset.
Definition: category.h:158
virtual const char * getKey(uint32_t i) const
Return the ith value in the dictionary.
Definition: category-hc.cpp:1432
A tokenizer class to turn a string buffer into tokens.
Definition: category.h:81
This is an implementation of the the simple bitmap index without the first binning step...
virtual long stringSearch(const char *str, ibis::bitvector &hits) const
Find rows with the exact string as the argument.
Definition: category-hc.cpp:672
int setDictionary(const dictionary &)
Replace the dictionary with the incoming one.
Definition: category-hc.cpp:523
A data structure to represent a sequence of bits.
Definition: bitvector.h:62
virtual const ibis::dictionary * getDictionary() const
Return a pointer to the dictionary used for the categorical values.
Definition: category-hc.cpp:512
virtual long patternSearch(const char *pat) const
Estimate the number of hits for a string pattern.
Definition: category-hc.cpp:893
const char * name() const
Name of the column.
Definition: column.h:78
int readStrings2(const ibis::bitvector &, std::vector< std::string > &) const
Read the strings marked 1 in the mask.
Definition: category.cpp:3297
const column * IDColumnForKeywordIndex() const
!< Print header info.
Definition: category-hc.cpp:3418
virtual void loadIndex(const char *=0, int=0) const
This function makes sure the index is ready.
Definition: category-hc.cpp:997
Provide a dual-directional mapping between strings and integers.
Definition: dict-0.h:19
Representing the operator 'LIKE'.
Definition: qExpr.h:585
A version of precise index that directly uses the integer values.
Definition: idirekte.h:17
Define the class ibis::column.
virtual ~category()
Destructor.
Definition: category-hc.cpp:77
Define a dictionary data structure used by ibis::category.
virtual long append(const char *dt, const char *df, const uint32_t nold, const uint32_t nnew, uint32_t nbuf, char *buf)
Append the data file stored in directory df to the corresponding data file in directory dt...
Definition: category-hc.cpp:1744
virtual long append(const char *dt, const char *df, const uint32_t nold, const uint32_t nnew, uint32_t nbuf, char *buf)
Append the content in df to the directory dt.
Definition: category-hc.cpp:1003