iroster.h
Go to the documentation of this file.
1 //File: $Id$
2 // Author: John Wu <John.Wu at ACM.org>
3 // Copyright (c) 2000-2016 the Regents of the University of California
4 #ifndef IBIS_ROSTER_H
5 #define IBIS_ROSTER_H
6 #include "array_t.h"
7 #include "util.h"
10 
19 class ibis::roster {
20 public:
21  ~roster() {clear();};
22  roster(const ibis::column* c, const char* dir = 0);
24  uint32_t offset = 8);
25 
26  const char* name() const {return "roster list";}
27  const ibis::column* getColumn() const {return col;}
28  uint32_t size() const;
29 
30  int read(const char* idxfile);
31  int read(ibis::fileManager::storage* st);
32 
34  void print(std::ostream& out) const;
36  int write(const char* dt) const;
38  int writeSorted(const char* dt) const;
39 
40  const array_t<uint32_t>& array() const {return ind;}
41  inline uint32_t operator[](uint32_t i) const;
42 
49  template <typename T>
50  int locate(const ibis::array_t<T>& vals,
51  ibis::bitvector& positions) const;
54  template <typename T>
55  int locate(const ibis::array_t<T>& vals,
56  std::vector<uint32_t>& positions) const;
57 
63  template <typename T>
64  int locate(const std::vector<T>& vals,
65  ibis::bitvector& positions) const;
68  template <typename T>
69  int locate(const std::vector<T>& vals,
70  std::vector<uint32_t>& positions) const;
71 
74  template <class T>
75  static long mergeBlock2(const char *dsrc, const char *dout,
76  const uint32_t segment, array_t<T>& buf1,
77  array_t<T>& buf2, array_t<T>& buf3);
78 
79 // /// A templated multi-way merge sort algorithm for a data file with
80 // /// values of the same type. Return the number of passes if
81 // /// successful, other return a negative number to indicate error.
82 // template <class Type, uint32_t mway=64, uint32_t block=8192>
83 // static int diskSort(const char *datafile, const char *scratchfile);
84 
85 protected:
86 // /// This function performs the initial sort of blocks. Entries in each
87 // /// (@c mway * @c block)-segment is sorted and written back to the same
88 // /// data file.
89 // template <class Type, uint32_t mway, uint32_t block>
90 // static int diskSortInit(const char *datafile);
91 // /// Merge blocks. The variable @c segment contains the number of
92 // /// consecutive entries (a segement) that are already sorted. To
93 // /// consecutive such segments will be merged.
94 // template <class Type, uint32_t mway, uint32_t block>
95 // static int diskSortMerge(const char *from, const char *to,
96 // uint32_t segment);
97 
98  uint32_t locate(const double& val) const;
99 
100  template <typename T> int
101  icSearch(const std::vector<T>& vals, std::vector<uint32_t>& pos) const;
102  template <typename T> int
103  oocSearch(const std::vector<T>& vals, std::vector<uint32_t>& pos) const;
104  template <typename inT, typename myT> int
105  locate2(const std::vector<inT>&, std::vector<uint32_t>&) const;
106  template <typename T> int
107  icSearch(const ibis::array_t<T>& vals, std::vector<uint32_t>& pos) const;
108  template <typename T> int
109  oocSearch(const ibis::array_t<T>& vals, std::vector<uint32_t>& pos) const;
110  template <typename inT, typename myT> int
111  locate2(const ibis::array_t<inT>&, std::vector<uint32_t>&) const;
112 
113 private:
114  // private member variables
115  const ibis::column* col;
116  array_t<uint32_t> ind;
117  mutable int inddes;
118 
119  // private member functions
120  void clear() {ind.clear(); if (inddes>=0) UnixClose(inddes);};
121  int write(FILE* fptr) const;
122 
124  void icSort(const char* f = 0);
126  void oocSort(const char* f = 0);
127  template <class T>
128  long oocSortBlocks(const char *src, const char *dest,
129  const char *ind, const uint32_t mblock,
130  array_t<T>& dbuf1, array_t<T>& dbuf2,
131  array_t<uint32_t>& ibuf) const;
132  template <class T>
133  long oocMergeBlocks(const char *dsrc, const char *dout,
134  const char *isrc, const char *iout,
135  const uint32_t mblock, const uint32_t stride,
136  array_t<T>& dbuf1, array_t<T>& dbuf2,
137  array_t<uint32_t>& ibuf1,
138  array_t<uint32_t>& ibuf2) const;
139 
140  roster(); // not implemented
141  roster(const roster&); // not implemented
142  const roster& operator=(const roster&); // not implemented
143 }; // ibis::roster
144 
145 namespace ibis {
146  template <> int
147  roster::locate(const std::vector<double>&, ibis::bitvector&) const;
148  template <> int
150 }
151 
153 inline uint32_t ibis::roster::operator[](uint32_t i) const {
154  uint32_t tmp = UINT_MAX;
155  if (i < ind.size()) {
156  tmp = ind[i];
157  }
158  else if (inddes >= 0) {
159  if (static_cast<off_t>(i*sizeof(uint32_t)) !=
160  UnixSeek(inddes, i*sizeof(uint32_t), SEEK_SET))
161  return tmp;
162  if (sizeof(uint32_t) != UnixRead(inddes, &tmp, sizeof(uint32_t)))
163  return UINT_MAX;
164  }
165  else {
166  LOGGER(ibis::gVerbose > 0)
167  << "Warning -- roster(ind[" << ind.size() << "], inddes="
168  << inddes << ")::operator[]: index i (" << i
169  << ") is out of range";
170  }
171  return tmp;
172 } // ibis::roster::operator[]
173 #endif // IBIS_ROSTER_H
174 
size_t size() const
Definition: array_t.h:69
int locate(const ibis::array_t< T > &vals, ibis::bitvector &positions) const
Locate the values and set their positions in the bitvector.
Definition: iroster.cpp:3112
void clear()
Reset the size to zero.
Definition: array_t.h:171
The storage class treats all memory as char*.
Definition: fileManager.h:237
void print(std::ostream &out) const
Output a minimal information about the roster list.
Definition: iroster.cpp:2102
A roster is a list of values in ascending order plus their original positions.
Definition: iroster.h:19
The current implementation of FastBit is code named IBIS; most data structures and functions are in t...
Definition: bord.h:16
The class to represent a column of a data partition.
Definition: column.h:65
int icSearch(const std::vector< T > &vals, std::vector< uint32_t > &pos) const
In-core searching function.
Definition: iroster.cpp:2646
int write(const char *dt) const
Write two files, .ind for indices and .srt to the sorted values.
Definition: iroster.cpp:66
Defines minor utility functions and common classes used by FastBit.
int locate2(const std::vector< inT > &, std::vector< uint32_t > &) const
Cast the incoming values into the type of the column (myT) and then locate the positions of the recor...
Definition: iroster.cpp:3285
int writeSorted(const char *dt) const
Write the sorted version of the attribute values to a .srt file.
Definition: iroster.cpp:139
static long mergeBlock2(const char *dsrc, const char *dout, const uint32_t segment, array_t< T > &buf1, array_t< T > &buf2, array_t< T > &buf3)
A two-way merge algorithm.
Definition: iroster.cpp:1919
int oocSearch(const std::vector< T > &vals, std::vector< uint32_t > &pos) const
Out-of-core search function.
Definition: iroster.cpp:2762
A data structure to represent a sequence of bits.
Definition: bitvector.h:62
uint32_t operator[](uint32_t i) const
Return the row number of the ith smallest value.
Definition: iroster.h:153

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive