00001
00002
00003
00004 #ifndef IBIS_CATEGORY_H
00005 #define IBIS_CATEGORY_H
00015 #include "irelic.h"
00016 #include "column.h"
00017
00023 class ibis::text : public ibis::column {
00024 public:
00025 virtual ~text() {unloadIndex();};
00026 text(const part* tbl, FILE* file);
00027 text(const part* tbl, const char* name, ibis::TYPE_T t=ibis::TEXT);
00028 text(const ibis::column& col);
00029
00030 virtual long keywordSearch(const char* str, ibis::bitvector& hits) const;
00031 virtual long keywordSearch(const char* str) const;
00032
00033
00034
00035
00036 virtual long search(const char* str, ibis::bitvector& hits) const;
00037 virtual long search(const std::vector<std::string>& strs,
00038 ibis::bitvector& hits) const;
00039 virtual long search(const char* str) const {
00040 ibis::bitvector tmp;
00041 long ierr = search(str, tmp);
00042 return (ierr < 0 ? ierr : tmp.cnt());
00043 }
00044 virtual long search(const std::vector<std::string>& strs) const {
00045 ibis::bitvector tmp;
00046 long ierr = search(strs, tmp);
00047 return (ierr < 0 ? ierr : tmp.cnt());
00048 }
00049
00050 virtual double estimateCost(const ibis::qString& cmp) const;
00051 virtual double estimateCost(const ibis::qMultiString& cmp) const;
00052 virtual double estimateCost(const ibis::qContinuousRange& cmp) const {
00053 return ibis::column::estimateCost(cmp);}
00054 virtual double estimateCost(const ibis::qDiscreteRange& cmp) const {
00055 return ibis::column::estimateCost(cmp);}
00056
00057 virtual long append(const char* dt, const char* df, const uint32_t nold,
00058 const uint32_t nnew, uint32_t nbuf, char* buf);
00059 virtual long saveSelected(const ibis::bitvector& sel, const char *dest,
00060 char *buf, uint32_t nbuf);
00062 virtual array_t<uint32_t>* selectUInts(const bitvector& mask) const;
00064 virtual array_t<int64_t>* selectLongs(const bitvector& mask) const;
00065 virtual
00066 std::vector<std::string>* selectStrings(const bitvector& mask) const;
00067 virtual const char* findString(const char* str) const;
00068 virtual void getString(uint32_t i, std::string &val) const {
00069 readString(i, val);}
00070
00071 virtual void write(FILE* file) const;
00072 virtual void print(std::ostream& out) const;
00073
00074 const column* IDColumnForKeywordIndex() const;
00075
00076 protected:
00078 void startPositions(const char *dir, char *buf, uint32_t nbuf) const;
00080 void readString(uint32_t i, std::string &val) const;
00082 int readString(std::string&, int, long, long, char*, uint32_t,
00083 uint32_t&, off_t&) const;
00084 int writeStrings(const char *to, const char *from,
00085 const char *spto, const char *spfrom,
00086 ibis::bitvector &msk, const ibis::bitvector &sel,
00087 char *buf, uint32_t nbuf) const;
00088
00089 private:
00090 text& operator=(const text&);
00091 };
00092
00096 class ibis::dictionary {
00097 public:
00098 typedef std::map< const char*, uint32_t, ibis::lessi > wordList;
00099
00100 ~dictionary() {clear();}
00101 dictionary(const dictionary& dic);
00103 dictionary() : svec(), s2i(), ncontig(1) {
00104 svec.push_back(const_cast<char*>(nullstring.c_str()));
00105 }
00106
00108 uint32_t size() const {return s2i.size();}
00109 inline const char* operator[](uint32_t i) const;
00110 inline uint32_t operator[](const char* str) const;
00111 inline const char* find(const char* str) const;
00112 inline uint32_t insert(const char* str);
00113 inline uint32_t insertRaw(char* str);
00114
00115 void clear();
00116 void read(const char* name);
00117 void write(const char* name) const;
00118
00119 protected:
00120
00128 std::vector<char*> svec;
00130 wordList s2i;
00133 uint32_t ncontig;
00134
00144 static std::string nullstring;
00145 void copy(const dictionary& rhs);
00146
00147 private:
00148 dictionary& operator=(const dictionary&);
00149 };
00150
00157 class ibis::category : public ibis::text {
00158 public:
00159 virtual ~category();
00160 category(const part* tbl, FILE* file);
00161 category(const part* tbl, const char* name);
00162 category(const ibis::column& col);
00163
00164 category(const part* tbl, const char* name, const char* value,
00165 const char* dir=0, uint32_t nevt=0);
00166
00168 virtual long search(const char* str, ibis::bitvector& hits) const;
00170 virtual long search(const std::vector<std::string>& vals,
00171 ibis::bitvector& hits) const;
00173 virtual long search(const char* str) const;
00175 virtual long search(const std::vector<std::string>& vals) const;
00176
00177 virtual long likeSearch(const char* pat) const;
00178 virtual long likeSearch(const char* pat, ibis::bitvector &hits) const;
00179 virtual double estimateCost(const ibis::qLike& cmp) const;
00180 virtual double estimateCost(const ibis::qString& cmp) const;
00181 virtual double estimateCost(const ibis::qMultiString& cmp) const;
00182 virtual double estimateCost(const ibis::qContinuousRange& cmp) const {
00183 return ibis::column::estimateCost(cmp);}
00184 virtual double estimateCost(const ibis::qDiscreteRange& cmp) const {
00185 return ibis::column::estimateCost(cmp);}
00186
00188 virtual long append(const char* dt, const char* df, const uint32_t nold,
00189 const uint32_t nnew, uint32_t nbuf, char* buf);
00191 virtual array_t<uint32_t>* selectUInts(const bitvector& mask) const;
00192
00193
00194 virtual void getString(uint32_t i, std::string &val) const;
00195
00197 virtual uint32_t getNumKeys() const {return dic.size();}
00199 virtual const char* getKey(uint32_t i) const {return dic[i];}
00202 virtual const char* isKey(const char* str) const {
00203 return dic.find(str);}
00204
00205 virtual void write(FILE* file) const;
00206 virtual void print(std::ostream& out) const;
00207
00208 ibis::relic* fillIndex(const char *dir=0) const;
00209
00210 private:
00211
00212
00213
00214
00215 mutable ibis::dictionary dic;
00216
00217
00218 void prepareMembers() const;
00219 void readDictionary(const char *dir=0) const;
00220
00221 category& operator=(const category&);
00222 };
00223
00228 class ibis::blob : public ibis::column {
00229 public:
00230 virtual ~blob() {};
00231 blob(const part*, FILE*);
00232 blob(const part*, const char*);
00233 blob(const ibis::column&);
00234
00235 virtual long search(const char*, ibis::bitvector&) const {return -1;}
00236
00237 virtual void computeMinMax() {}
00238 virtual void computeMinMax(const char*) {}
00239 virtual void computeMinMax(const char*, double&, double&) const {}
00240 virtual void loadIndex(const char*, int) const throw () {}
00241 virtual long indexSize() const {return -1;}
00242 virtual int getValuesArray(void*) const {return -1;}
00243
00244 virtual array_t<char>* selectBytes(const bitvector&) const {return 0;}
00245 virtual array_t<unsigned char>* selectUBytes(const bitvector&) const {return 0;}
00246 virtual array_t<int16_t>* selectShorts(const bitvector&) const {return 0;}
00247 virtual array_t<uint16_t>* selectUShorts(const bitvector&) const {return 0;}
00248 virtual array_t<int32_t>* selectInts(const bitvector&) const {return 0;}
00249 virtual array_t<uint32_t>* selectUInts(const bitvector&) const {return 0;}
00250 virtual array_t<int64_t>* selectLongs(const bitvector&) const {return 0;}
00251 virtual array_t<uint64_t>* selectULongs(const bitvector&) const {return 0;}
00252 virtual array_t<float>* selectFloats(const bitvector&) const {return 0;}
00253 virtual array_t<double>* selectDoubles(const bitvector&) const {return 0;}
00254 virtual std::vector<std::string>* selectStrings(const bitvector&) const {return 0;}
00255
00256 virtual long estimateRange(const ibis::qContinuousRange&,
00257 ibis::bitvector&,
00258 ibis::bitvector&) const {return -1;}
00259 virtual long estimateRange(const ibis::qDiscreteRange&,
00260 ibis::bitvector&,
00261 ibis::bitvector&) const {return -1;}
00262 virtual long evaluateRange(const ibis::qContinuousRange&,
00263 const ibis::bitvector&,
00264 ibis::bitvector&) const {return -1;}
00265 virtual long evaluateRange(const ibis::qDiscreteRange&,
00266 const ibis::bitvector&,
00267 ibis::bitvector&) const {return -1;}
00268 virtual long estimateRange(const ibis::qContinuousRange&) const {return -1;}
00269 virtual long estimateRange(const ibis::qDiscreteRange&) const {return -1;}
00270 virtual double estimateCost(const ibis::qContinuousRange&) const {return 0;}
00271 virtual double estimateCost(const ibis::qDiscreteRange& cmp) const {return 0;}
00272 virtual double estimateCost(const ibis::qString&) const {return 0;}
00273 virtual double estimateCost(const ibis::qMultiString&) const {return 0;}
00274
00275 virtual float getUndecidable(const ibis::qContinuousRange&,
00276 ibis::bitvector&) const {return 1;}
00277 virtual float getUndecidable(const ibis::qDiscreteRange&,
00278 ibis::bitvector&) const {return 1;}
00279
00280 virtual double getActualMin() const {return DBL_MAX;}
00281 virtual double getActualMax() const {return -DBL_MAX;}
00282 virtual double getSum() const {return 0;}
00283 virtual long append(const void*, const ibis::bitvector&) {return -1;}
00284
00285 virtual long append(const char* dt, const char* df, const uint32_t nold,
00286 const uint32_t nnew, uint32_t nbuf, char* buf);
00287 virtual long writeData(const char* dir, uint32_t nold, uint32_t nnew,
00288 ibis::bitvector& mask, const void *va1,
00289 void *va2);
00290
00291 virtual void write(FILE*) const;
00292 virtual void print(std::ostream&) const;
00293
00294 long countRawBytes(const bitvector&) const;
00295 int selectRawBytes(const bitvector&,
00296 array_t<unsigned char>&, array_t<uint32_t>&) const;
00297 int getBlob(uint32_t ind, unsigned char *&buf, uint32_t &size) const;
00298
00299 protected:
00300 int extractAll(const bitvector&,
00301 array_t<unsigned char>&, array_t<uint32_t>&,
00302 const array_t<unsigned char>&,
00303 const array_t<int64_t>&) const;
00304 int extractSome(const bitvector&,
00305 array_t<unsigned char>&, array_t<uint32_t>&,
00306 const array_t<unsigned char>&, const array_t<int64_t>&,
00307 const uint32_t) const;
00308 int extractAll(const bitvector&,
00309 array_t<unsigned char>&, array_t<uint32_t>&,
00310 const char*, const array_t<int64_t>&) const;
00311 int extractSome(const bitvector&,
00312 array_t<unsigned char>&, array_t<uint32_t>&,
00313 const char*, const array_t<int64_t>&, const uint32_t) const;
00314 int extractSome(const bitvector&,
00315 array_t<unsigned char>&, array_t<uint32_t>&,
00316 const char*, const char*, const uint32_t) const;
00317 int readBlob(uint32_t ind, unsigned char *&buf, uint32_t &size,
00318 const array_t<int64_t> &starts, const char *datafile) const;
00319 int readBlob(uint32_t ind, unsigned char *&buf, uint32_t &size,
00320 const char *spfile, const char *datafile) const;
00321 };
00322
00326 inline const char* ibis::dictionary::operator[](uint32_t i) const {
00327 if (i < svec.size()) {
00328 return svec[i];
00329 }
00330 else {
00331 return static_cast<const char*>(0);
00332 }
00333 }
00334
00338 inline uint32_t ibis::dictionary::operator[](const char* str) const {
00339 if (str == 0) return 0;
00340 if (*str == 0) return 0;
00341 wordList::const_iterator it = s2i.find(str);
00342 if (it != s2i.end()) return (*it).second;
00343 else return svec.size();
00344 }
00345
00349 inline const char* ibis::dictionary::find(const char* str) const {
00350 const char* ret = 0;
00351 if (s2i.find(str) != s2i.end())
00352 ret = str;
00353 return ret;
00354 }
00355
00358 inline uint32_t ibis::dictionary::insert(const char* str) {
00359 if (str == 0) return 0;
00360 if (*str == 0) return 0;
00361 wordList::const_iterator it = s2i.find(str);
00362 if (it != s2i.end()) {
00363 return (*it).second;
00364 }
00365 else {
00366 uint32_t ret = svec.size();
00367 char* tmp = ibis::util::strnewdup(str);
00368 svec.push_back(tmp);
00369 s2i[tmp] = ret;
00370 return ret;
00371 }
00372 }
00373
00379 inline uint32_t ibis::dictionary::insertRaw(char* str) {
00380 if (str == 0) return 0;
00381 if (*str == 0) return 0;
00382 wordList::const_iterator it = s2i.find(str);
00383 if (it != s2i.end()) {
00384 return (*it).second;
00385 }
00386 else {
00387 uint32_t ret = svec.size();
00388 svec.push_back(str);
00389 s2i[str] = ret;
00390 return ret;
00391 }
00392 }
00393 #endif // IBIS_CATEGORY_H