category.h
Go to the documentation of this file.
1 //File: $Id$
2 // Author: John Wu <John.Wu at ACM.org>
3 // Copyright (c) 2000-2016 the Regents of the University of California
4 #ifndef IBIS_CATEGORY_H
5 #define IBIS_CATEGORY_H
6 #include "column.h" // ibis::column
16 #include "dictionary.h" // ibis::dictionary
17 #include "idirekte.h" // ibis::direkte
18 
27 class ibis::text : public ibis::column {
28 public:
29  virtual ~text() {unloadIndex();};
30  text(const part* tbl, FILE* file);
31  text(const part* tbl, const char* name, ibis::TYPE_T t=ibis::TEXT);
32  text(const ibis::column& col); // copy from column
33 
34  virtual long keywordSearch(const char* str, ibis::bitvector& hits) const;
35  virtual long keywordSearch(const std::vector<std::string>& strs,
36  ibis::bitvector& hits) const;
37  virtual long keywordSearch(const char*) const;
38  virtual long keywordSearch(const std::vector<std::string>&) const;
39 
40  virtual long stringSearch(const char* str, ibis::bitvector& hits) const;
41  virtual long stringSearch(const std::vector<std::string>& strs,
42  ibis::bitvector& hits) const;
43  virtual long stringSearch(const char* str) const;
44  virtual long stringSearch(const std::vector<std::string>& strs) const;
45 
46  virtual long patternSearch(const char*, ibis::bitvector&) const;
47  virtual long patternSearch(const char*) const;
48 
50  virtual double estimateCost(const ibis::qString& cmp) const;
51  virtual double estimateCost(const ibis::qAnyString& cmp) const;
52 
53  virtual void loadIndex(const char* iopt=0, int ropt=0) const throw ();
54  virtual long append(const char* dt, const char* df, const uint32_t nold,
55  const uint32_t nnew, uint32_t nbuf, char* buf);
56  virtual long append(const void*, const ibis::bitvector&) {return -1;}
57  virtual long saveSelected(const ibis::bitvector& sel, const char *dest,
58  char *buf, uint32_t nbuf);
60  virtual array_t<uint32_t>* selectUInts(const bitvector& mask) const;
62  virtual array_t<int64_t>* selectLongs(const bitvector& mask) const;
63  virtual std::vector<std::string>*
64  selectStrings(const bitvector& mask) const;
65  virtual const char* findString(const char* str) const;
66  virtual int getString(uint32_t i, std::string &val) const {
67  return readString(i, val);}
68  virtual int getOpaque(uint32_t, ibis::opaque&) const;
69  // virtual std::vector<ibis::opaque>*
70  // selectOpaques(const bitvector& mask) const;
71 
72  virtual void write(FILE* file) const;
73  virtual void print(std::ostream& out) const;
74 
75  const column* IDColumnForKeywordIndex() const;
76  void TDListForKeywordIndex(std::string&) const;
77  void delimitersForKeywordIndex(std::string&) const;
78 
81  struct tokenizer {
92  virtual int operator()(std::vector<const char*>& tkns, char *buf) = 0;
94  virtual ~tokenizer() {}
95  }; // struct tokenizer
96 
97 protected:
98 
99  void startPositions(const char *dir, char *buf, uint32_t nbuf) const;
100  int readString(uint32_t i, std::string &val) const;
101  int readString(std::string&, int, long, long, char*, uint32_t,
102  uint32_t&, off_t&) const;
103  int readStrings1(const ibis::bitvector&, std::vector<std::string>&) const;
104  int readStrings2(const ibis::bitvector&, std::vector<std::string>&) const;
105  int writeStrings(const char *to, const char *from,
106  const char *spto, const char *spfrom,
107  ibis::bitvector &msk, const ibis::bitvector &sel,
108  char *buf, uint32_t nbuf) const;
109 
110 private:
111  text& operator=(const text&);
112 }; // ibis::text
113 
120 class ibis::category : public ibis::text {
121 public:
122  virtual ~category();
123  category(const part* tbl, FILE* file);
124  category(const part* tbl, const char* name);
125  category(const ibis::column& col); // copy from column
126  // a special construct for meta-tag attributes
127  category(const part* tbl, const char* name, const char* value,
128  const char* dir=0, uint32_t nevt=0);
129 
130  virtual long keywordSearch(const char* str, ibis::bitvector& hits) const {
131  return stringSearch(str, hits);}
132  virtual long keywordSearch(const std::vector<std::string>& vals,
133  ibis::bitvector& hits) const {
134  return stringSearch(vals, hits);}
135  virtual long keywordSearch(const char* str) const {
136  return stringSearch(str);}
137  virtual long keywordSearch(const std::vector<std::string>& vals) const {
138  return stringSearch(vals);}
139 
140  virtual long stringSearch(const char* str, ibis::bitvector& hits) const;
141  virtual long stringSearch(const std::vector<std::string>& vals,
142  ibis::bitvector& hits) const;
143  virtual long stringSearch(const char* str) const;
144  virtual long stringSearch(const std::vector<std::string>& vals) const;
145 
146  virtual long patternSearch(const char* pat) const;
147  virtual long patternSearch(const char* pat, ibis::bitvector &hits) const;
148 
150  virtual double estimateCost(const ibis::qLike& cmp) const;
151  virtual double estimateCost(const ibis::qString& cmp) const;
152  virtual double estimateCost(const ibis::qAnyString& cmp) const;
153 
154  virtual void loadIndex(const char* =0, int =0) const throw ();
156  virtual long append(const char* dt, const char* df, const uint32_t nold,
157  const uint32_t nnew, uint32_t nbuf, char* buf);
158  virtual long append(const void*, const ibis::bitvector&) {return -1;}
160  virtual array_t<uint32_t>* selectUInts(const bitvector& mask) const;
161  virtual std::vector<std::string>*
162  selectStrings(const bitvector& mask) const;
163  virtual int getString(uint32_t i, std::string &val) const;
164  // virtual std::vector<ibis::opaque>*
165  // selectOpaques(const bitvector& mask) const;
166 
167  virtual uint32_t getNumKeys() const;
168  virtual const char* getKey(uint32_t i) const;
169  virtual const char* isKey(const char* str) const;
170 
171  virtual void write(FILE* file) const;
172  virtual void print(std::ostream& out) const;
173 
174  ibis::direkte* fillIndex(const char *dir=0) const;
175  virtual const ibis::dictionary* getDictionary() const;
176  int setDictionary(const dictionary&);
177 
178 private:
179  // private member variables
180 
181  // dictionary is mutable in order to delay the reading of dictionary
182  // from disk as late as possible
183  mutable ibis::dictionary dic;
184 
185  // private member functions
186  void prepareMembers() const;
187  void readDictionary(const char *dir=0) const;
188 
189  category& operator=(const category&);
190 }; // ibis::category
191 #endif // IBIS_CATEGORY_H
virtual long stringSearch(const char *str, ibis::bitvector &hits) const
Given a string literal, return a bitvector that marks the strings that matche it. ...
Definition: category-hc.cpp:1841
virtual void print(std::ostream &out) const
!< Write the metadata entry.
Definition: category-hc.cpp:2689
virtual array_t< uint32_t > * selectUInts(const bitvector &mask) const
Return the positions of records marked 1 in the mask.
Definition: category-hc.cpp:2698
virtual std::vector< std::string > * selectStrings(const bitvector &mask) const
Retrieve the string values from the rows marked 1 in mask.
Definition: category-hc.cpp:130
ibis::direkte * fillIndex(const char *dir=0) const
Build an ibis::direkte index using the existing primary data.
Definition: category-hc.cpp:320
virtual void write(FILE *file) const
Write the current metadata to -part.txt of the data partition.
Definition: category-hc.cpp:2668
int readStrings1(const ibis::bitvector &, std::vector< std::string > &) const
Read the strings marked 1 in the mask.
Definition: category.cpp:3156
virtual double estimateCost(const ibis::qLike &cmp) const
Estimate the cost of evaluating a Like expression.
Definition: category-hc.cpp:789
virtual void unloadIndex() const
Unload the index associated with the column.
Definition: column.cpp:5880
virtual int operator()(std::vector< const char * > &tkns, char *buf)=0
A tokenizer must implement a two-argument operator().
virtual uint32_t getNumKeys() const
Return the number of key values.
Definition: category-hc.cpp:1425
virtual std::vector< std::string > * selectStrings(const bitvector &mask) const
Retrieve the string values from the rows marked 1 in mask.
Definition: category-hc.cpp:2757
void startPositions(const char *dir, char *buf, uint32_t nbuf) const
Locate the starting position of each string.
Definition: category-hc.cpp:1487
virtual long append(const void *, const ibis::bitvector &)
Append the records in vals to the current working dataset.
Definition: category.h:56
virtual long saveSelected(const ibis::bitvector &sel, const char *dest, char *buf, uint32_t nbuf)
Write the selected values to the specified directory.
Definition: category-hc.cpp:3658
The column contains one of the values in a list.
Definition: qExpr.h:560
int readString(uint32_t i, std::string &val) const
Read the string value of ith row.
Definition: category-hc.cpp:2992
virtual const char * isKey(const char *str) const
Is the given string one of the keys in the dictionary? Return a null pointer if not.
Definition: category-hc.cpp:1442
virtual array_t< int64_t > * selectLongs(const bitvector &mask) const
Return the starting positions of strings marked 1 in the mask.
Definition: category-hc.cpp:2718
The class to represent a column of a data partition.
Definition: column.h:65
virtual int getString(uint32_t i, std::string &val) const
Return the string value for the ith row.
Definition: category.h:66
virtual void print(std::ostream &out) const
Print header info.
Definition: category-hc.cpp:1409
int writeStrings(const char *to, const char *from, const char *spto, const char *spfrom, ibis::bitvector &msk, const ibis::bitvector &sel, char *buf, uint32_t nbuf) const
Write the selected strings.
Definition: category-hc.cpp:3739
virtual void loadIndex(const char *iopt=0, int ropt=0) const
Load the index associated with the column.
Definition: category-hc.cpp:1731
virtual int getString(uint32_t i, std::string &val) const
Return the string at the ith row.
Definition: category-hc.cpp:971
The class qString encapsulates information for comparing string values.
Definition: qExpr.h:504
virtual array_t< uint32_t > * selectUInts(const bitvector &mask) const
Return the integers corresponding to the select strings.
Definition: category-hc.cpp:91
virtual ~tokenizer()
Destructor.
Definition: category.h:94
Arbitrary null-terminated strings.
Definition: table.h:51
virtual void write(FILE *file) const
Write the current content to the metadata file for the data partition.
Definition: category-hc.cpp:1356
virtual double estimateCost(const ibis::qContinuousRange &cmp) const
Estimate the cost of evaluating the query expression.
Definition: column.cpp:6612
A data structure for storing null-terminated text.
Definition: category.h:27
The class ibis::part represents a partition of a relational table.
Definition: part.h:27
TYPE_T
Supported data types.
Definition: table.h:25
virtual int getOpaque(uint32_t, ibis::opaque &) const
Return the raw binary value for the ith row.
Definition: category-hc.cpp:3406
virtual const char * findString(const char *str) const
If the input string is found in the data file, it is returned, else this function returns 0...
Definition: category-hc.cpp:3251
virtual double estimateCost(const ibis::qString &cmp) const
Estimate the cost of evaluating a string lookup.
Definition: category-hc.cpp:3640
A specialized low-cardinality text field.
Definition: category.h:120
virtual long append(const void *, const ibis::bitvector &)
Append the records in vals to the current working dataset.
Definition: category.h:158
virtual const char * getKey(uint32_t i) const
Return the ith value in the dictionary.
Definition: category-hc.cpp:1432
A tokenizer class to turn a string buffer into tokens.
Definition: category.h:81
This is an implementation of the the simple bitmap index without the first binning step...
virtual long stringSearch(const char *str, ibis::bitvector &hits) const
Find rows with the exact string as the argument.
Definition: category-hc.cpp:672
int setDictionary(const dictionary &)
Replace the dictionary with the incoming one.
Definition: category-hc.cpp:523
A data structure to represent a sequence of bits.
Definition: bitvector.h:62
virtual const ibis::dictionary * getDictionary() const
Return a pointer to the dictionary used for the categorical values.
Definition: category-hc.cpp:512
virtual long patternSearch(const char *pat) const
Estimate the number of hits for a string pattern.
Definition: category-hc.cpp:893
const char * name() const
Name of the column.
Definition: column.h:78
int readStrings2(const ibis::bitvector &, std::vector< std::string > &) const
Read the strings marked 1 in the mask.
Definition: category.cpp:3297
const column * IDColumnForKeywordIndex() const
!< Print header info.
Definition: category-hc.cpp:3418
virtual void loadIndex(const char *=0, int=0) const
This function makes sure the index is ready.
Definition: category-hc.cpp:997
Provide a dual-directional mapping between strings and integers.
Definition: dict-0.h:19
Representing the operator 'LIKE'.
Definition: qExpr.h:585
A version of precise index that directly uses the integer values.
Definition: idirekte.h:17
Define the class ibis::column.
virtual ~category()
Destructor.
Definition: category-hc.cpp:77
Define a dictionary data structure used by ibis::category.
virtual long append(const char *dt, const char *df, const uint32_t nold, const uint32_t nnew, uint32_t nbuf, char *buf)
Append the data file stored in directory df to the corresponding data file in directory dt...
Definition: category-hc.cpp:1744
virtual long append(const char *dt, const char *df, const uint32_t nold, const uint32_t nnew, uint32_t nbuf, char *buf)
Append the content in df to the directory dt.
Definition: category-hc.cpp:1003

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive