00001
00002
00003
00004 #ifndef IBIS_KEYWORDS_H
00005 #define IBIS_KEYWORDS_H
00010
00011 #include "index.h"
00012 #include "category.h"
00013
00032 class ibis::keywords : public ibis::index {
00033 public:
00034 virtual ~keywords() {clear();}
00035 keywords(const ibis::column* c,
00036 const ibis::column* idcol=0, const char* f=0);
00037 keywords(const ibis::column* c, ibis::fileManager::storage* st);
00038
00039 virtual INDEX_TYPE type() const {return KEYWORDS;}
00040 virtual const char* name() const {return "keywords";}
00041 virtual void binBoundaries(std::vector<double>& b) const {b.clear();}
00042 virtual void binWeights(std::vector<uint32_t>& b) const;
00043 virtual double getMin() const {return DBL_MAX;}
00044 virtual double getMax() const {return -DBL_MAX;}
00045 virtual double getSum() const {return -DBL_MAX;}
00047 long search(const char* kw, ibis::bitvector& hits) const;
00049 long search(const char* kw) const;
00050
00051 virtual void print(std::ostream& out) const;
00052 virtual int write(const char* dt) const;
00053 virtual int read(const char* idxfile);
00054 virtual int read(ibis::fileManager::storage* st);
00055 virtual long append(const char* dt, const char* df, uint32_t nnew);
00056
00057 virtual long evaluate(const ibis::qContinuousRange& expr,
00058 ibis::bitvector& hits) const;
00059 virtual void estimate(const ibis::qContinuousRange& expr,
00060 ibis::bitvector& lower,
00061 ibis::bitvector& upper) const;
00062 virtual uint32_t estimate(const ibis::qContinuousRange& expr) const;
00065 virtual float undecidable(const ibis::qContinuousRange& expr,
00066 ibis::bitvector& iffy) const {
00067 iffy.clear();
00068 return 0.0;
00069 }
00070 virtual double estimateCost(const ibis::qContinuousRange& expr) const;
00071 virtual double estimateCost(const ibis::qDiscreteRange& expr) const;
00072
00073 protected:
00074 virtual size_t getSerialSize() const throw();
00078 int readTermDocFile(const ibis::column* idcol, const char* f);
00080 inline char readKeyword(const char*& buf, std::string &key) const;
00082 inline uint32_t readUInt(const char*& buf) const;
00084 int readLine(std::istream& in, std::string& key,
00085 std::vector<uint32_t>& idlist,
00086 char* buf, uint32_t nbuf) const;
00087 void setBits(std::vector<uint32_t>& pos, ibis::bitvector& bvec) const;
00088
00090 void clear();
00091
00092 private:
00093 ibis::dictionary terms;
00094 };
00095
00098 inline char ibis::keywords::readKeyword(const char*& buf,
00099 std::string &keyword) const {
00100 while (isspace(*buf))
00101 ++ buf;
00102 while (isprint(*buf) && !(isspace(*buf) || *buf == ':')) {
00103 keyword += *buf;
00104 ++ buf;
00105 }
00106 while (isspace(*buf))
00107 ++ buf;
00108 return *buf;
00109 }
00110
00111 inline uint32_t ibis::keywords::readUInt(const char*& buf) const {
00112 uint32_t res = 0;
00113 while (*buf && ! isdigit(*buf))
00114 ++ buf;
00115
00116 while (isdigit(*buf)) {
00117 res = res * 10 + (*buf - '0');
00118 ++ buf;
00119 }
00120 return res;
00121 }
00122 #endif