00001
00002
00003
00004 #ifndef IBIS_COLUMN_H
00005 #define IBIS_COLUMN_H
00013 #include "table.h" // ibis::TYPE_T
00014 #include "qExpr.h"
00015 #include "bitvector.h"
00016 #include <string>
00017
00018 namespace ibis {
00019
00020 class category;
00021 class text;
00022 class dictionary;
00023
00024
00025
00026 class colInts;
00027 class colUInts;
00028 class colLongs;
00029 class colULongs;
00030 class colFloats;
00031 class colDoubles;
00032 }
00033
00043 class FASTBIT_CXX_DLLSPEC ibis::column {
00044 public:
00045
00046 virtual ~column();
00048 column(const part* tbl, FILE* file);
00050 column(const part* tbl, ibis::TYPE_T t, const char* name,
00051 const char* desc="", double low=DBL_MAX, double high=-DBL_MAX);
00052 column(const column& rhs);
00053
00054
00056 ibis::TYPE_T type() const {return m_type;}
00057 const char* name() const {return m_name.c_str();}
00058 const char* description() const {return m_desc.c_str();}
00059 const double& lowerBound() const {return lower;}
00060 const double& upperBound() const {return upper;}
00061 inline int elementSize() const;
00062 inline bool isFloat() const;
00063 inline bool isInteger() const;
00064 inline bool isNumeric() const;
00065 void description(const char* d) {m_desc = d;}
00066 void lowerBound(double d) {lower = d;}
00067 void upperBound(double d) {upper = d;}
00068 const part* partition() const {return thePart;}
00069
00070
00071 const char* indexSpec() const;
00072 uint32_t numBins() const;
00073
00074 void indexSpec(const char* spec) {m_bins=spec;}
00076 void preferredBounds(std::vector<double>&) const;
00078 void binWeights(std::vector<uint32_t>&) const;
00079
00083 virtual void computeMinMax();
00084 virtual void computeMinMax(const char *dir);
00088 virtual void computeMinMax(const char *dir,
00089 double& min, double &max) const;
00090
00092 virtual void loadIndex(const char* opt=0) const throw ();
00094 void unloadIndex() const;
00096 virtual long indexSize() const;
00099 void indexSpeedTest() const;
00101 void purgeIndexFile(const char *dir=0) const;
00102
00106 const char* dataFileName(std::string& fname, const char *dir=0) const;
00108 const char* nullMaskName(std::string& fname) const;
00109 void getNullMask(bitvector& mask) const;
00110
00114 virtual void getString(uint32_t i, std::string &val) const {};
00117 virtual const char* findString(const char* str) const
00118 {return static_cast<const char*>(0);}
00119
00122 array_t<int32_t>* getIntArray() const;
00124 array_t<float>* getFloatArray() const;
00126 array_t<double>* getDoubleArray() const;
00127 ibis::fileManager::storage* getRawData() const;
00128 template <typename T> int getRawData(array_t<T>& vals) const;
00129
00132 virtual array_t<char>* selectBytes(const bitvector& mask) const;
00133 virtual array_t<unsigned char>* selectUBytes(const bitvector& mask) const;
00134 virtual array_t<int16_t>* selectShorts(const bitvector& mask) const;
00135 virtual array_t<uint16_t>* selectUShorts(const bitvector& mask) const;
00136 virtual array_t<int32_t>* selectInts(const bitvector& mask) const;
00137 virtual array_t<int64_t>* selectLongs(const bitvector& mask) const;
00138 virtual array_t<uint64_t>* selectULongs(const bitvector& mask) const;
00139 virtual array_t<float>* selectFloats(const bitvector& mask) const;
00140 virtual array_t<double>* selectDoubles(const bitvector& mask) const;
00141 virtual std::vector<std::string>*
00142 selectStrings(const bitvector& mask) const {return 0;}
00143 virtual array_t<uint32_t>* selectUInts(const bitvector& mask) const;
00144 template <typename T>
00145 long selectValues(const bitvector& mask, array_t<T>& vals) const;
00146 template <typename T>
00147 long selectValues(const bitvector& mask,
00148 array_t<T>& vals, array_t<uint32_t>& inds) const;
00149
00150 virtual void write(FILE* file) const;
00151 virtual void print(std::ostream& out) const;
00152 void logMessage(const char* event, const char* fmt, ...) const;
00153 void logWarning(const char* event, const char* fmt, ...) const;
00154
00157 int expandRange(ibis::qContinuousRange& rng) const;
00158 int contractRange(ibis::qContinuousRange& rng) const;
00159
00169 virtual long estimateRange(const ibis::qContinuousRange& cmp,
00170 ibis::bitvector& low,
00171 ibis::bitvector& high) const;
00172 virtual long estimateRange(const ibis::qDiscreteRange& cmp,
00173 ibis::bitvector& low,
00174 ibis::bitvector& high) const;
00175
00178 virtual long evaluateRange(const ibis::qContinuousRange& cmp,
00179 const ibis::bitvector& mask,
00180 ibis::bitvector& res) const;
00181 virtual long evaluateRange(const ibis::qDiscreteRange& cmp,
00182 const ibis::bitvector& mask,
00183 ibis::bitvector& res) const;
00184
00188 virtual long estimateRange(const ibis::qContinuousRange& cmp) const;
00189 virtual long estimateRange(const ibis::qDiscreteRange& cmp) const;
00190
00192 virtual double estimateCost(const ibis::qContinuousRange& cmp) const;
00193 virtual double estimateCost(const ibis::qDiscreteRange& cmp) const;
00194 virtual double estimateCost(const ibis::qString& cmp) const {
00195 return 0;}
00196 virtual double estimateCost(const ibis::qMultiString& cmp) const {
00197 return 0;}
00198
00202 virtual float getUndecidable(const ibis::qContinuousRange& cmp,
00203 ibis::bitvector& iffy) const;
00204 virtual float getUndecidable(const ibis::qDiscreteRange& cmp,
00205 ibis::bitvector& iffy) const;
00206
00208 virtual long append(const char* dt, const char* df, const uint32_t nold,
00209 const uint32_t nnew, const uint32_t nbuf, char* buf);
00210
00212 virtual long writeData(const char* dir, uint32_t nold, uint32_t nnew,
00213 ibis::bitvector& mask, const void *va1,
00214 const void *va2=0);
00215
00216 virtual long saveSelected(const ibis::bitvector& sel, const char *dest,
00217 char *buf, uint32_t nbuf);
00218
00221 long truncateData(const char* dir, uint32_t nent,
00222 ibis::bitvector& mask) const;
00223
00224
00225
00226
00229 virtual double getActualMin() const;
00232 virtual double getActualMax() const;
00234 virtual double getSum() const;
00241 long getCumulativeDistribution(std::vector<double>& bounds,
00242 std::vector<uint32_t>& counts) const;
00254 long getDistribution(std::vector<double>& bbs,
00255 std::vector<uint32_t>& counts) const;
00256
00257 class info;
00258 class indexLock;
00259 class mutexLock;
00260
00261 protected:
00262
00263 const part* thePart;
00264 ibis::bitvector mask_;
00265 ibis::TYPE_T m_type;
00266 std::string m_name;
00267 std::string m_desc;
00268 std::string m_bins;
00269 double lower;
00270 double upper;
00272 mutable ibis::index* idx;
00273
00275 void logError(const char* event, const char* fmt, ...) const;
00278 long string2int(int fptr, dictionary& dic, uint32_t nbuf, char* buf,
00279 array_t<uint32_t>& out) const;
00281 double computeMin() const;
00283 double computeMax() const;
00285 double computeSum() const;
00288 void actualMinMax(const char *fname, const ibis::bitvector& mask,
00289 double &min, double &max) const;
00290 template <typename T>
00291 void actualMinMax(const array_t<T>& vals, const ibis::bitvector& mask,
00292 double& min, double& max) const;
00293 template <typename T>
00294 T computeMin(const array_t<T>& vals,
00295 const ibis::bitvector& mask) const;
00296 template <typename T>
00297 T computeMax(const array_t<T>& vals,
00298 const ibis::bitvector& mask) const;
00299 template <typename T>
00300 double computeSum(const array_t<T>& vals,
00301 const ibis::bitvector& mask) const;
00302
00303 class readLock;
00304 class writeLock;
00305 class softWriteLock;
00306 friend class readLock;
00307 friend class writeLock;
00308 friend class indexLock;
00309 friend class mutexLock;
00310 friend class softWriteLock;
00311
00312 private:
00313
00314
00315 mutable pthread_rwlock_t rwlock;
00316 mutable pthread_mutex_t mutex;
00317 mutable uint32_t idxcnt;
00318
00319 const column& operator=(const column&);
00320 };
00321
00324 class FASTBIT_CXX_DLLSPEC ibis::column::info {
00325 public:
00326 const char* name;
00327 const char* description;
00328 const double expectedMin;
00329 const double expectedMax;
00330 const ibis::TYPE_T type;
00331 info(const ibis::column& col)
00332 : name(col.name()), description(col.description()),
00333 expectedMin(col.lowerBound()),
00334 expectedMax(col.upperBound()), type(col.type()) {};
00335 };
00336
00340 class ibis::column::indexLock {
00341 public:
00342 indexLock(const ibis::column* col, const char* m)
00343 : theColumn(col), mesg(m) {
00344 #if defined(DEBUG) && DEBUG > 0
00345 ibis::util::logMessage("ibis::column::indexLock",
00346 "locking column %s for %s", col->name(),
00347 (m ? m : "?"));
00348 #endif
00349
00350 if (theColumn->idxcnt == 0 && theColumn->idx == 0)
00351 theColumn->loadIndex();
00352 if (theColumn->idx != 0) {
00353 ++ theColumn->idxcnt;
00354
00355 int ierr = pthread_rwlock_rdlock(&(col->rwlock));
00356 if (ierr)
00357 col->logWarning("gainReadAccess", "pthread_rwlock_rdlock() "
00358 "for %s returned %d", m, ierr);
00359 else if (ibis::gVerbose > 9)
00360 col->logMessage("gainReadAccess",
00361 "pthread_rwlock_rdlock for %s", m);
00362 }
00363 }
00364 ~indexLock() {
00365 #if defined(DEBUG) && DEBUG > 0
00366 ibis::util::logMessage("ibis::column::indexLock",
00367 "unlocking column %s (%s)", theColumn->name(),
00368 (mesg ? mesg : "?"));
00369 #endif
00370 if (theColumn->idx != 0) {
00371 int ierr = pthread_rwlock_unlock(&(theColumn->rwlock));
00372 if (ierr)
00373 theColumn->logWarning("releaseReadAccess",
00374 "pthread_rwlock_unlock() for %s "
00375 "returned %d", mesg, ierr);
00376 else if (ibis::gVerbose > 9)
00377 theColumn->logMessage("releaseReadAccess",
00378 "pthread_rwlock_unlock for %s", mesg);
00379
00380 -- (theColumn->idxcnt);
00381 }
00382 }
00383
00384 const ibis::index* getIndex() const {return theColumn->idx;};
00385
00386 private:
00387 const ibis::column* theColumn;
00388 const char* mesg;
00389
00390 indexLock();
00391 indexLock(const indexLock&);
00392 const indexLock& operator=(const indexLock&);
00393 };
00394
00396 class ibis::column::mutexLock {
00397 public:
00398 mutexLock(const ibis::column* col, const char* m)
00399 : theColumn(col), mesg(m) {
00400 if (ibis::gVerbose > 9)
00401 col->logMessage("gainExclusiveAccess",
00402 "pthread_mutex_lock for %s", m);
00403 int ierr = pthread_mutex_lock(&(col->mutex));
00404 if (ierr)
00405 col->logWarning("gainExclusiveAccess", "pthread_mutex_lock() "
00406 "for %s returned %d", m, ierr);
00407 }
00408 ~mutexLock() {
00409 if (ibis::gVerbose > 9)
00410 theColumn->logMessage("releaseExclusiveAccess",
00411 "pthread_mutex_unlock for %s", mesg);
00412 int ierr = pthread_mutex_unlock(&(theColumn->mutex));
00413 if (ierr)
00414 theColumn->logWarning("releaseExclusiveAccess",
00415 "pthread_mutex_unlock() for %s "
00416 "returned %d", mesg, ierr);
00417 }
00418
00419 private:
00420 const ibis::column* theColumn;
00421 const char* mesg;
00422
00423 mutexLock() {};
00424 mutexLock(const mutexLock&) {};
00425 const mutexLock& operator=(const mutexLock&);
00426 };
00427
00429 class ibis::column::writeLock {
00430 public:
00431 writeLock(const ibis::column* col, const char* m)
00432 : theColumn(col), mesg(m) {
00433 #if defined(DEBUG) && DEBUG > 0
00434 ibis::util::logMessage("ibis::column::writeLock",
00435 "locking column %s for %s", col->name(),
00436 (m ? m : "?"));
00437 #endif
00438 int ierr = pthread_rwlock_wrlock(&(col->rwlock));
00439 if (ierr)
00440 col->logWarning("gainWriteAccess", "pthread_rwlock_wrlock() "
00441 "for %s returned %d", m, ierr);
00442 else if (ibis::gVerbose > 9)
00443 col->logMessage("gainWriteAccess",
00444 "pthread_rwlock_wrlock for %s", m);
00445 }
00446 ~writeLock() {
00447 #if defined(DEBUG) && DEBUG > 0
00448 ibis::util::logMessage("ibis::column::writeLock",
00449 "unlocking column %s (%s)", theColumn->name(),
00450 (mesg ? mesg : "?"));
00451 #endif
00452 int ierr = pthread_rwlock_unlock(&(theColumn->rwlock));
00453 if (ierr)
00454 theColumn->logWarning("releaseWriteAccess",
00455 "pthread_rwlock_unlock() for %s "
00456 "returned %d", mesg, ierr);
00457 else if (ibis::gVerbose > 9)
00458 theColumn->logMessage("releaseWriteAccess",
00459 "pthread_rwlock_unlock for %s", mesg);
00460 }
00461
00462 private:
00463 const ibis::column* theColumn;
00464 const char* mesg;
00465
00466 writeLock();
00467 writeLock(const writeLock&);
00468 const writeLock& operator=(const writeLock&);
00469 };
00470
00472 class ibis::column::softWriteLock {
00473 public:
00474 softWriteLock(const ibis::column* col, const char* m)
00475 : theColumn(col), mesg(m),
00476 locked(0 == pthread_rwlock_trywrlock(&(col->rwlock))) {
00477 #if defined(DEBUG) && DEBUG > 0
00478 ibis::util::logMessage("ibis::column::softWriteLock",
00479 "locking column %s for %s", col->name(),
00480 (m ? m : "?"));
00481 #endif
00482 if (ibis::gVerbose > 9 && locked)
00483 col->logMessage("gainWriteAccess",
00484 "pthread_rwlock_wrlock for %s", m);
00485 }
00486 ~softWriteLock() {
00487 #if defined(DEBUG) && DEBUG > 0
00488 ibis::util::logMessage("ibis::column::softWriteLock",
00489 "unlocking column %s (%s)", theColumn->name(),
00490 (mesg ? mesg : "?"));
00491 #endif
00492 if (locked) {
00493 int ierr = pthread_rwlock_unlock(&(theColumn->rwlock));
00494 if (ierr)
00495 theColumn->logWarning("releaseWriteAccess",
00496 "pthread_rwlock_unlock() for %s "
00497 "returned %d", mesg, ierr);
00498 else if (ibis::gVerbose > 9)
00499 theColumn->logMessage("releaseWriteAccess",
00500 "pthread_rwlock_unlock for %s", mesg);
00501 }
00502 }
00503 bool isLocked() const {return locked;}
00504
00505 private:
00506 const ibis::column* theColumn;
00507 const char* mesg;
00508 const bool locked;
00509
00510 softWriteLock();
00511 softWriteLock(const softWriteLock&);
00512 const softWriteLock& operator=(const softWriteLock&);
00513 };
00514
00516 class ibis::column::readLock {
00517 public:
00518 readLock(const ibis::column* col, const char* m)
00519 : theColumn(col), mesg(m) {
00520 #if defined(DEBUG) && DEBUG > 0
00521 ibis::util::logMessage("ibis::column::readLock",
00522 "locking column %s for %s", col->name(),
00523 (m ? m : "?"));
00524 #endif
00525 int ierr = pthread_rwlock_rdlock(&(col->rwlock));
00526 if (ierr)
00527 col->logWarning("gainReadAccess", "pthread_rwlock_rdlock() "
00528 "for %s returned %d", m, ierr);
00529 else if (ibis::gVerbose > 9)
00530 col->logMessage("gainReadAccess",
00531 "pthread_rwlock_rdlock for %s", m);
00532 }
00533 ~readLock() {
00534 #if defined(DEBUG) && DEBUG > 0
00535 ibis::util::logMessage("ibis::column::readLock",
00536 "unlocking column %s (%s)", theColumn->name(),
00537 (mesg ? mesg : "?"));
00538 #endif
00539 int ierr = pthread_rwlock_unlock(&(theColumn->rwlock));
00540 if (ierr)
00541 theColumn->logWarning("releaseReadAccess",
00542 "pthread_rwlock_unlock() for %s "
00543 "returned %d", mesg, ierr);
00544 else if (ibis::gVerbose > 9)
00545 theColumn->logMessage("releaseReadAccess",
00546 "pthread_rwlock_unlock for %s", mesg);
00547 }
00548
00549 private:
00550 const ibis::column* theColumn;
00551 const char* mesg;
00552
00553 readLock();
00554 readLock(const readLock&);
00555 const readLock& operator=(const readLock&);
00556 };
00557
00558 inline int ibis::column::elementSize() const {
00559 int sz = -1;
00560 switch (m_type) {
00561 case ibis::OID: sz = sizeof(rid_t); break;
00562 case ibis::INT: sz = sizeof(int32_t); break;
00563 case ibis::UINT: sz = sizeof(uint32_t); break;
00564 case ibis::LONG: sz = sizeof(int64_t); break;
00565 case ibis::ULONG: sz = sizeof(uint64_t); break;
00566 case ibis::FLOAT: sz = sizeof(float); break;
00567 case ibis::DOUBLE: sz = sizeof(double); break;
00568 case ibis::BYTE: sz = sizeof(char); break;
00569 case ibis::UBYTE: sz = sizeof(unsigned char); break;
00570 case ibis::SHORT: sz = sizeof(int16_t); break;
00571 case ibis::USHORT: sz = sizeof(uint16_t); break;
00572 case ibis::CATEGORY: sz = 0; break;
00573 case ibis::TEXT: sz = 0; break;
00574 default: sz = -1; break;
00575 }
00576 return sz;
00577 }
00578
00579 inline bool ibis::column::isFloat() const {
00580 return(m_type == ibis::FLOAT || m_type == ibis::DOUBLE);
00581 }
00582
00583 inline bool ibis::column::isInteger() const {
00584 return(m_type == ibis::BYTE || m_type == ibis::UBYTE ||
00585 m_type == ibis::SHORT || m_type == ibis::USHORT ||
00586 m_type == ibis::INT || m_type == ibis::UINT ||
00587 m_type == ibis::LONG || m_type == ibis::ULONG);
00588 }
00589
00590 inline bool ibis::column::isNumeric() const {
00591 return(m_type == ibis::BYTE || m_type == ibis::UBYTE ||
00592 m_type == ibis::SHORT || m_type == ibis::USHORT ||
00593 m_type == ibis::INT || m_type == ibis::UINT ||
00594 m_type == ibis::LONG || m_type == ibis::ULONG ||
00595 m_type == ibis::FLOAT || m_type == ibis::DOUBLE);
00596 }
00597
00598
00599 inline std::ostream& operator<<(std::ostream& out, const ibis::column& prop) {
00600 prop.print(out);
00601 return out;
00602 }
00603 #endif // IBIS_COLUMN_H