query.h
Go to the documentation of this file.
1 // File: $Id$
2 // Author: John Wu <John.Wu at ACM.org>
3 // Lawrence Berkeley National Laboratory
4 // Copyright (c) 2000-2016 the Regents of the University of California
5 #ifndef IBIS_QUERY_H
6 #define IBIS_QUERY_H
7 #include "part.h" // ibis::part
11 #include "whereClause.h" // ibis::whereClause
12 #include "selectClause.h" // ibis::selectClause
13 
48 class FASTBIT_CXX_DLLSPEC ibis::query {
49 public:
50  enum QUERY_STATE {
59  HITS_TRUNCATED
60  };
61 
62  virtual ~query();
63  query(const char* dir, const ibis::partList& tl);
64  query(const char* uid=0, const part* et=0, const char* pref=0);
65 
67  const char* id() const {return myID;}
71  const char* dir() const {return myDir;}
73  const char* userName() const {return user;}
75  time_t timestamp() const {return dstime;}
77  const part* partition() const {return mypart;}
79  const selectClause& components() const {return comps;};
80 
81  int setRIDs(const RIDSet& set);
82  int setWhereClause(const char *str);
83  int setWhereClause(const std::vector<const char*>& names,
84  const std::vector<double>& lbounds,
85  const std::vector<double>& rbounds);
86  int setWhereClause(const ibis::qExpr* qexp);
87  int addConditions(const ibis::qExpr* qexp);
88  int addConditions(const char*);
89  virtual int setSelectClause(const char *str);
91  int setPartition(const ibis::part* tbl);
93  int setTable(const ibis::part* tbl) {return setPartition(tbl);}
95  virtual const char* getWhereClause() const {return conds.getString();}
97  virtual const char* getSelectClause() const {return *comps;}
98 
99  void expandQuery();
100  void contractQuery();
101  std::string removeComplexConditions();
102 
104  const RIDSet* getUserRIDs() const {return rids_in;}
105 
106  // Functions to perform estimation.
107 
108  int estimate();
109  long getMinNumHits() const;
110  long getMaxNumHits() const;
113  {return (sup!=0?sup:hits);}
114  long getCandidateRows(std::vector<uint32_t>&) const;
115 
116  // Functions related to full evaluation.
117 
118  int evaluate(const bool evalSelect=false);
123  const ibis::bitvector* getHitVector() const {return hits;}
124  long getNumHits() const;
125  long getHitRows(std::vector<uint32_t> &rids) const;
126  long countHits() const;
127 
128  int orderby(const char *names) const;
129  long limit(const char *names, uint32_t keep,
130  bool updateHits = true);
131 
142  array_t<signed char>* getQualifiedBytes(const char* column_name);
144  array_t<unsigned char>* getQualifiedUBytes(const char* column_name);
146  array_t<int16_t>* getQualifiedShorts(const char* column_name);
148  array_t<uint16_t>* getQualifiedUShorts(const char* column_name);
150  array_t<int32_t>* getQualifiedInts(const char* column_name);
153  array_t<uint32_t>* getQualifiedUInts(const char* column_name);
155  array_t<int64_t>* getQualifiedLongs(const char* column_name);
157  array_t<uint64_t>* getQualifiedULongs(const char* column_name);
160  array_t<float>* getQualifiedFloats(const char* column_name);
163  array_t<double>* getQualifiedDoubles(const char* column_name);
165  std::vector<std::string>* getQualifiedStrings(const char* column_name);
167  RIDSet* getRIDs() const;
169  RIDSet* getRIDs(const ibis::bitvector& mask) const;
171  const RIDSet* getRIDsInBundle(const uint32_t bid) const;
173 
180  void printSelected(std::ostream& out) const;
184  void printSelectedWithRID(std::ostream& out) const;
185 
186  long sequentialScan(ibis::bitvector& bv) const;
187  long getExpandedHits(ibis::bitvector&) const;
188 
189  // used by ibis::bundle
190  RIDSet* readRIDs() const;
191  void writeRIDs(const RIDSet* rids) const;
192 
193  void logMessage(const char* event, const char* fmt, ...) const;
194 
195  // Functions for cleaning up, retrieving query states
196  // and error messages.
197 
199  void clear();
201  QUERY_STATE getState() const;
203  const char* getLastError() const {return lastError;}
205  void clearErrorMessage() const {*lastError=0;}
206 
209  static bool isValidToken(const char* tok);
211  // *** the value 16 is hard coded in functions newToken and ***
212  // *** isValidToken ***
213  static unsigned tokenLength() {return 16;}
214 
216  static void removeQueryRecords()
217  {ibis::gParameters().add("query.purgeTempFiles", "true");}
219  static void keepQueryRecords()
220  {ibis::gParameters().add("query.purgeTempFiles", "false");}
221 
222  class result; // Forward declaration, defined in bundles.h
223  class weight;
224  class readLock;
225  class writeLock;
226  friend class readLock;
227  friend class writeLock;
228 
229 protected:
230  char* user;
237  mutable char lastError[MAX_LINE+PATH_MAX];
238 
239  void logError(const char* event, const char* fmt, ...) const;
240  void logWarning(const char* event, const char* fmt, ...) const;
241  void storeErrorMesg(const char*) const;
242 
243  void reorderExpr(); // reorder query expression
244 
245  bool hasBundles() const;
246  void getBounds();
247  void doEstimate(const qExpr* term, ibis::bitvector& low,
248  ibis::bitvector& high) const;
249 
250  int computeHits();
251  int doEvaluate(const qExpr* term, ibis::bitvector& hits) const;
252  int doEvaluate(const qExpr* term, const ibis::bitvector& mask,
253  ibis::bitvector& hits) const;
254  int doScan(const qExpr* term, const ibis::bitvector& mask,
255  ibis::bitvector& hits) const;
256  int doScan(const qExpr* term, ibis::bitvector& hits) const;
257 
258  int64_t processJoin();
259 
261  virtual void writeQuery();
263  void readQuery(const ibis::partList& tl);
265  void removeFiles();
266 
268  void readHits();
270  void writeHits() const;
272  void printRIDs(const RIDSet& ridset) const;
275  uint32_t countPages(unsigned wordsize) const;
276 
278  int doExpand(ibis::qExpr* exp0) const;
280  int doContract(ibis::qExpr* exp0) const;
281 
282  // A group of functions to count the number of pairs
283  // satisfying the join conditions.
284  int64_t sortJoin(const std::vector<const ibis::deprecatedJoin*>& terms,
285  const ibis::bitvector& mask) const;
286  int64_t sortJoin(const ibis::deprecatedJoin& cmp,
287  const ibis::bitvector& mask) const;
288  int64_t sortEquiJoin(const ibis::deprecatedJoin& cmp,
289  const ibis::bitvector& mask) const;
290  int64_t sortRangeJoin(const ibis::deprecatedJoin& cmp,
291  const ibis::bitvector& mask) const;
292  int64_t sortEquiJoin(const ibis::deprecatedJoin& cmp,
293  const ibis::bitvector& mask,
294  const char* pairfile) const;
295  int64_t sortRangeJoin(const ibis::deprecatedJoin& cmp,
296  const ibis::bitvector& mask,
297  const char* pairfile) const;
298  void orderPairs(const char* pairfile) const;
299  int64_t mergePairs(const char* pairfile) const;
300 
301  template <typename T1, typename T2>
302  int64_t countEqualPairs(const array_t<T1>& val1,
303  const array_t<T2>& val2) const;
304  template <typename T1, typename T2>
305  int64_t countDeltaPairs(const array_t<T1>& val1,
306  const array_t<T2>& val2, const T1& delta) const;
307  template <typename T1, typename T2>
308  int64_t recordEqualPairs(const array_t<T1>& val1,
309  const array_t<T2>& val2,
310  const array_t<uint32_t>& ind1,
311  const array_t<uint32_t>& ind2,
312  const char* pairfile) const;
313  template <typename T1, typename T2>
314  int64_t recordDeltaPairs(const array_t<T1>& val1,
315  const array_t<T2>& val2,
316  const array_t<uint32_t>& ind1,
317  const array_t<uint32_t>& ind2,
318  const T1& delta, const char* pairfile) const;
319 
320  // functions for access control
321  void gainReadAccess(const char* mesg) const {
322  if (ibis::gVerbose > 10)
323  logMessage("gainReadAccess", "acquiring a read lock for %s",
324  mesg);
325  if (0 != pthread_rwlock_rdlock(&lock))
326  logMessage("gainReadAccess",
327  "unable to gain read access to rwlock for %s", mesg);
328  }
329  void gainWriteAccess(const char* mesg) const {
330  if (ibis::gVerbose > 10)
331  logMessage("gainWriteAccess", "acquiring a write lock for %s",
332  mesg);
333  if (0 != pthread_rwlock_wrlock(&lock))
334  logMessage("gainWriteAccess",
335  "unable to gain write access to rwlock for %s", mesg);
336  }
337  void releaseAccess(const char* mesg) const {
338  if (ibis::gVerbose > 10)
339  logMessage("releaseAccess", "releasing rwlock for %s", mesg);
340  if (0 != pthread_rwlock_unlock(&lock))
341  logMessage("releaseAccess", "unable to unlock the rwlock for %s",
342  mesg);
343  }
344 
345 private:
346  char* myID; // The unique ID of this query object
347  char* myDir; // Name of the directory containing the query record
348  RIDSet* rids_in; // Rid list specified in an RID query
349  const part* mypart; // Data partition used to process the query
350  time_t dstime; // When query evaluation started
351  mutable pthread_rwlock_t lock; // Rwlock for access control
352 
353  // private functions
354  static char* newToken(const char*);
355  void setMyDir(const char *pref);
357 
358  query(const query&);
359  query& operator=(const query&);
360 }; // class ibis::query
361 
362 namespace ibis {
370  template <>
371  int64_t query::countEqualPairs(const array_t<int32_t>& val1,
372  const array_t<uint32_t>& val2) const;
373  template <>
374  int64_t query::countEqualPairs(const array_t<uint32_t>& val1,
375  const array_t<int32_t>& val2) const;
376  template <>
377  int64_t query::countDeltaPairs(const array_t<int32_t>& val1,
378  const array_t<uint32_t>& val2,
379  const int32_t& delta) const;
380  template <>
381  int64_t query::countDeltaPairs(const array_t<uint32_t>& val1,
382  const array_t<int32_t>& val2,
383  const uint32_t& delta) const;
384  template <>
385  int64_t query::recordEqualPairs(const array_t<int32_t>& val1,
386  const array_t<uint32_t>& val2,
387  const array_t<uint32_t>& ind1,
388  const array_t<uint32_t>& ind2,
389  const char *pairfile) const;
390  template <>
391  int64_t query::recordEqualPairs(const array_t<uint32_t>& val1,
392  const array_t<int32_t>& val2,
393  const array_t<uint32_t>& ind1,
394  const array_t<uint32_t>& ind2,
395  const char *pairfile) const;
396  template <>
397  int64_t query::recordDeltaPairs(const array_t<int32_t>& val1,
398  const array_t<uint32_t>& val2,
399  const array_t<uint32_t>& ind1,
400  const array_t<uint32_t>& ind2,
401  const int32_t& delta,
402  const char *pairfile) const;
403  template <>
404  int64_t query::recordDeltaPairs(const array_t<uint32_t>& val1,
405  const array_t<int32_t>& val2,
406  const array_t<uint32_t>& ind1,
407  const array_t<uint32_t>& ind2,
408  const uint32_t& delta,
409  const char *pairfile) const;
411 }
412 
415 public:
416  virtual double operator()(const ibis::qExpr* ex) const;
417  weight(const ibis::part* ds) : dataset(ds) {}
418 
419 private:
420  const ibis::part* dataset;
421 };
422 
428 public:
429  readLock(const query* q, const char* m) : theQuery(q), mesg(m) {
430  theQuery->gainReadAccess(m);
431  };
432  ~readLock() {theQuery->releaseAccess(mesg);}
433 private:
434  const query* theQuery;
435  const char* mesg;
436 
437  readLock() {}; // no default constructor
438  readLock(const readLock&) {}; // can not copy
439 }; // class ibis::query::readLock
440 
446 public:
447  writeLock(const query* q, const char* m) : theQuery(q), mesg(m) {
448  theQuery->gainWriteAccess(m);
449  };
450  ~writeLock() {theQuery->releaseAccess(mesg);}
451 private:
452  const query* theQuery;
453  const char* mesg;
454 
455  writeLock() {}; // no default constructor
456  writeLock(const writeLock&) {}; // can not copy
457 }; // ibis::query::writeLock
458 #endif // IBIS_QUERY_H
void add(const char *name, const char *value)
Add a name-value pair to the resource list.
Definition: resource.cpp:169
void logMessage(const char *event, const char *fmt,...) const
Used to print information about the progress or state of query processing.
Definition: query.cpp:2615
void readHits()
Read the results of the query.
Definition: query.cpp:4659
The query object contains a list of RIDs.
Definition: query.h:53
static void keepQueryRecords()
Tell the destructor to leave stored information on disk.
Definition: query.h:219
void printRIDs(const RIDSet &ridset) const
Export the Row IDs of the hits to log file.
Definition: query.cpp:2473
void writeHits() const
Write the results of the query.
Definition: query.cpp:4672
array_t< signed char > * getQualifiedBytes(const char *column_name)
The functions getQualifiedTTT return the values of selected columns in the records that satisfies the...
Definition: query.cpp:1603
A representation of the where clause.
Definition: whereClause.h:161
array_t< unsigned char > * getQualifiedUBytes(const char *column_name)
Retrieve the values of column_name as 8-bit unsigned integers.
Definition: query.cpp:1621
array_t< uint32_t > * getQualifiedUInts(const char *column_name)
Retrieve unsigned integer values from records satisfying the query conditions.
Definition: query.cpp:1693
int setTable(const ibis::part *tbl)
This is deprecated, will be removed soon.
Definition: query.h:93
ibis::bitvector * hits
!< Status of the query
Definition: query.h:234
long getNumHits() const
Compute the number of records in the exact solution.
Definition: query.cpp:1249
ibis::part::readLock * dslock
!< Estimated upper bound
Definition: query.h:236
QUERY_STATE state
!< Select clause
Definition: query.h:233
RIDSet * getRIDs() const
Return the list of row IDs of the hits.
Definition: query.cpp:1435
void contractQuery()
Contracts where clause to preferred bounds.
Definition: query.cpp:1888
const char * dir() const
Return the directory for any persistent data.
Definition: query.h:71
std::string removeComplexConditions()
Separate out the sub-expressions that are not simple.
Definition: query.cpp:1931
void writeRIDs(const RIDSet *rids) const
Write the list of RIDs to a file named "-rids".
Definition: query.cpp:4716
The top level query expression object.
Definition: qExpr.h:36
The exact hits are computed.
Definition: query.h:57
const char * getLastError() const
Return the last error message recorded internally.
Definition: query.h:203
void clear()
Releases the resources held by the query object.
Definition: query.cpp:4727
array_t< int64_t > * getQualifiedLongs(const char *column_name)
Retrieve values of column_name as 64-bit integers.
Definition: query.cpp:1711
int evaluate(const bool evalSelect=false)
Computes the exact hits.
Definition: query.cpp:1005
query(const char *dir, const ibis::partList &tl)
Constructor.
Definition: query.cpp:2043
const RIDSet * getUserRIDs() const
Return a const pointer to the copy of the user supplied RID set.
Definition: query.h:104
A functor to be used by the function reorder.
Definition: qExpr.h:122
A class to represent the select clause.
Definition: selectClause.h:112
A upper and a lower bound are computed.
Definition: query.h:56
long sequentialScan(ibis::bitvector &bv) const
Perform a simple sequential scan.
Definition: query.cpp:3131
The current implementation of FastBit is code named IBIS; most data structures and functions are in t...
Definition: bord.h:16
const RIDSet * getRIDsInBundle(const uint32_t bid) const
Return the list of row IDs of the hits within the specified bundle.
Definition: query.cpp:1512
long getHitRows(std::vector< uint32_t > &rids) const
Extract the positions of the bits that are 1s in the solution.
Definition: query.cpp:1273
const ibis::bitvector * getHitVector() const
Return the pointer to the internal hit vector.
Definition: query.h:123
The query object is currently empty.
Definition: query.h:51
long getCandidateRows(std::vector< uint32_t > &) const
Extract the positions of candidates.
Definition: query.cpp:956
const part * partition() const
Return the pointer to the data partition used to process the query.
Definition: query.h:77
void readQuery(const ibis::partList &tl)
Read the status information from disk.
Definition: query.cpp:4484
long getMaxNumHits() const
Return the number of records in the upper bound.
Definition: query.cpp:941
QUERY_STATE getState() const
Return the current state of query.
Definition: query.cpp:1825
const selectClause & components() const
Return a list of names specified in the select clause.
Definition: query.h:79
array_t< uint64_t > * getQualifiedULongs(const char *column_name)
Retrieve values of column_name as 64-bit unsigned integers.
Definition: query.cpp:1728
whereClause conds
!< Name of the user who specified the query
Definition: query.h:231
void doEstimate(const qExpr *term, ibis::bitvector &low, ibis::bitvector &high) const
Use index only to come up with a upper bound and a lower bound.
Definition: query.cpp:2726
void logError(const char *event, const char *fmt,...) const
!< The warning/error message
Definition: query.cpp:2511
ibis::resource & gParameters()
List of in-memory data.
Definition: resource.cpp:545
virtual void writeQuery()
Write the basic information about the query to disk.
Definition: query.cpp:4615
Provide a read lock on an ibis::part.
Definition: part.h:1478
QUERY_STATE
Definition: query.h:50
A data structure for representing user queries.
Definition: query.h:48
void orderPairs(const char *pairfile) const
Sort the content of the file as ibis::rid_t.
Definition: query.cpp:7781
virtual int setSelectClause(const char *str)
Specifies the select clause for the query.
Definition: query.cpp:196
long getExpandedHits(ibis::bitvector &) const
Get a bitvector containing all rows satisfying the query condition.
Definition: query.cpp:3235
array_t< int32_t > * getQualifiedInts(const char *column_name)
Retrieve integer values from records satisfying the query conditions.
Definition: query.cpp:1675
A class to be used for reordering the terms in the where clauses.
Definition: query.h:414
selectClause comps
!< Query conditions
Definition: query.h:232
void printSelected(std::ostream &out) const
Print the values of the selected columns to the specified output stream.
Definition: query.cpp:4828
long getMinNumHits() const
Return the number of records in the lower bound.
Definition: query.cpp:931
int setPartition(const ibis::part *tbl)
Resets the data partition associated with the query.
Definition: query.cpp:113
int orderby(const char *names) const
Re-order the results according to the new "ORDER BY" specification.
Definition: query.cpp:1330
char lastError[MAX_LINE+PATH_MAX]
!< A read lock on the mypart
Definition: query.h:237
virtual const char * getSelectClause() const
Return the select clause string.
Definition: query.h:97
time_t timestamp() const
The time stamp on the data used to process the query.
Definition: query.h:75
array_t< double > * getQualifiedDoubles(const char *column_name)
Retrieve double precision floating-point values from records satisfying the query conditions...
Definition: query.cpp:1773
The class ibis::part represents a partition of a relational table.
Definition: part.h:27
int64_t countEqualPairs(const array_t< T1 > &val1, const array_t< T2 > &val2) const
Assume the two input arrays are sorted in ascending order, count the number of elements that match...
Definition: query.cpp:5816
ibis::bitvector * sup
!< Solution in bitvector form (or lower bound)
Definition: query.h:235
int setRIDs(const RIDSet &set)
Specify a list of Row IDs for the query object.
Definition: query.cpp:710
long countHits() const
Count the number of hits.
Definition: query.cpp:1308
array_t< int16_t > * getQualifiedShorts(const char *column_name)
Retrieve the values of column_name as 16-bit integers.
Definition: query.cpp:1639
long limit(const char *names, uint32_t keep, bool updateHits=true)
Truncate the results to provide the top-K rows.
Definition: query.cpp:1369
Declares ibis::selectClause class.
array_t< uint16_t > * getQualifiedUShorts(const char *column_name)
Retrieve the values of column_name as 16-bit unsigned integers.
Definition: query.cpp:1657
int setWhereClause(const char *str)
Specify the where clause in the string form.
Definition: query.cpp:260
static void removeQueryRecords()
Tell the destructor to remove all stored information about queries.
Definition: query.h:216
int doEvaluate(const qExpr *term, ibis::bitvector &hits) const
Evaluate the query expression.
Definition: query.cpp:3889
const char * userName() const
User started the query.
Definition: query.h:73
int64_t countDeltaPairs(const array_t< T1 > &val1, const array_t< T2 > &val2, const T1 &delta) const
Assume the two input arrays are sorted in ascending order, count the number of elements that are with...
Definition: query.cpp:5934
int doScan(const qExpr *term, const ibis::bitvector &mask, ibis::bitvector &hits) const
Masked sequential scan.
Definition: query.cpp:3430
uint32_t countPages(unsigned wordsize) const
Count the number of pages accessed to retrieve every value in the hit vector.
Definition: query.cpp:4874
A read lock on a query object.
Definition: query.h:427
Only top-K results are stored.
Definition: query.h:58
std::vector< std::string > * getQualifiedStrings(const char *column_name)
Retrieve string values from records satisfying the query conditions.
Definition: query.cpp:1802
void clearErrorMessage() const
Reset the last error message to blank.
Definition: query.h:205
The query object has a where clause.
Definition: query.h:54
static unsigned tokenLength()
Length of the query token.
Definition: query.h:213
void getBounds()
Compute the upper and lower bounds for range queries.
Definition: query.cpp:2668
int addConditions(const ibis::qExpr *qexp)
Add a set of conditions to the existing where clause.
Definition: query.cpp:560
virtual double operator()(const ibis::qExpr *ex) const
Generate a weight based on estimated query processing costs.
Definition: query.cpp:38
A data structure to represent a sequence of bits.
Definition: bitvector.h:62
virtual ~query()
Desctructor.
Definition: query.cpp:2104
int64_t processJoin()
Process the join operation and return the number of pairs.
Definition: query.cpp:5001
int64_t sortEquiJoin(const ibis::deprecatedJoin &cmp, const ibis::bitvector &mask) const
Performing an equi-join by sorting the selected values first.
Definition: query.cpp:6522
SET_COMPONENTS & (SET_RIDS | SET_PREDICATE).
Definition: query.h:55
The query object has a select clause.
Definition: query.h:52
Declares ibis::whereClause class.
const ibis::bitvector * getCandidateVector() const
Return a pointer to the bit vector representing the candidates.
Definition: query.h:112
int64_t sortRangeJoin(const ibis::deprecatedJoin &cmp, const ibis::bitvector &mask) const
Performing a range join by sorting the selected values.
Definition: query.cpp:6804
void expandQuery()
Expands where clause to preferred bounds.
Definition: query.cpp:1852
int doContract(ibis::qExpr *exp0) const
Contract range conditions to remove the need of candidate check.
Definition: query.cpp:4969
void printSelectedWithRID(std::ostream &out) const
Print the values of the columns in the select clause without functions.
Definition: query.cpp:4851
int estimate()
Function to perform estimation.
Definition: query.cpp:758
array_t< float > * getQualifiedFloats(const char *column_name)
Retrieve floating-point values from records satisfying the query conditions.
Definition: query.cpp:1746
RIDSet * readRIDs() const
Read RIDs from the file named "-rids".
Definition: query.cpp:4683
static bool isValidToken(const char *tok)
Is the given string a valid query token.
Definition: query.cpp:2193
virtual const char * getWhereClause() const
Return the where clause string.
Definition: query.h:95
Definition: const.h:299
void storeErrorMesg(const char *) const
Store the message into member variable lastError for later use.
Definition: query.cpp:2494
Define the class ibis::part.
int doExpand(ibis::qExpr *exp0) const
Expand range conditions to remove the need of candidate check.
Definition: query.cpp:4942
A write lock on a query object.
Definition: query.h:445
A join is defined by two names and a numerical expression.
Definition: qExpr.h:1240
int computeHits()
Generate the hit vector.
Definition: query.cpp:3025
const char * id() const
Return an identifier of the query.
Definition: query.h:67
void removeFiles()
Remove the files written by this object.
Definition: query.cpp:4766

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive