tafel.h
Go to the documentation of this file.
1 // File: $Id$
2 // Author: John Wu <John.Wu at ACM.org>
3 // Copyright (c) 2007-2016 the Regents of the University of California
4 #ifndef IBIS_TAFEL_H
5 #define IBIS_TAFEL_H
6 #include "table.h" // ibis::table
7 #include "bitvector.h" // ibis::bitvector
8 
11 
12 namespace ibis {
13  class tafel;
14 }
15 
25 class ibis::tafel : public ibis::tablex {
26 public:
27  tafel() : mrows(0U) {}
28  virtual ~tafel() {clear();}
29 
30  virtual int addColumn(const char* cname, ibis::TYPE_T ctype,
31  const char* cdesc, const char* idx);
32  virtual int SQLCreateTable(const char *stmt, std::string&);
33 
34  virtual int append(const char* cname, uint64_t begin, uint64_t end,
35  void* values);
36  virtual int appendRow(const ibis::table::row&);
37  virtual int appendRow(const char*, const char*);
38  virtual int appendRows(const std::vector<ibis::table::row>&);
39  virtual int readCSV(const char* filename, int maxrows,
40  const char* outputdir, const char* delimiters);
41  virtual int readSQLDump(const char* filename, std::string& tname,
42  int maxrows, const char* outputdir);
43 
44  virtual int write(const char* dir, const char* tname=0,
45  const char* tdesc=0, const char* idx=0,
46  const char* nvpairs=0) const;
47  virtual int writeMetaData(const char* dir, const char* tname=0,
48  const char* tdesc=0, const char* idx=0,
49  const char* nvpairs=0) const;
50 
51  virtual void clearData();
52  virtual int32_t reserveBuffer(uint32_t);
53  virtual uint32_t bufferCapacity() const;
54 
55  virtual uint32_t mRows() const {return mrows;}
56  virtual uint32_t mColumns() const {return cols.size();}
57  virtual void describe(std::ostream&) const;
58  virtual table* toTable(const char* nm=0, const char* de=0);
59  virtual void setASCIIDictionary(const char*, const char*);
60  virtual const char* getASCIIDictionary(const char*) const;
61 
63  struct column {
65  std::string name;
67  std::string desc;
69  std::string indexSpec;
73  std::string dictfile;
80  void* values;
87  void* defval;
90 
91  column();
92  ~column();
93  }; // column
94  typedef std::map<const char*, column*, ibis::lessi> columnList;
96  const columnList& getColumns() const {return cols;}
97 
98 protected:
100  columnList cols;
102  std::vector<column*> colorder;
105  std::string metatags;
107  ibis::bitvector::word_t mrows;
108 
110  void clear();
111 
113  void normalize();
114 
115  int writeData(const char* dir, const char* tname,
116  const char* tdesc, const char* idx,
117  const char* nvpairs, uint32_t offset) const;
118  template <typename T>
119  void append(const T* in, ibis::bitvector::word_t be,
120  ibis::bitvector::word_t en, array_t<T>& out,
121  const T& fill, ibis::bitvector& mask) const;
122  void appendString(const std::vector<std::string>* in,
123  ibis::bitvector::word_t be,
124  ibis::bitvector::word_t en,
125  std::vector<std::string>& out,
126  ibis::bitvector& mask) const;
127  void appendRaw(const ibis::array_t<unsigned char>* in,
128  ibis::bitvector::word_t be,
129  ibis::bitvector::word_t en,
130  std::vector<std::string>& out,
131  ibis::bitvector& mask) const;
132 
133  template <typename T>
134  void locate(ibis::TYPE_T, std::vector<array_t<T>*>& buf,
135  std::vector<ibis::bitvector*>& msk) const;
136  void locateString(ibis::TYPE_T t,
137  std::vector<std::vector<std::string>*>& buf,
138  std::vector<ibis::bitvector*>& msk) const;
139  void locateBlob(std::vector<std::vector<ibis::opaque>*>& buf,
140  std::vector<ibis::bitvector*>& msk) const;
141  template <typename T>
142  void append(const std::vector<std::string>& nm, const std::vector<T>& va,
143  std::vector<array_t<T>*>& buf,
144  std::vector<ibis::bitvector*>& msk);
145  void appendString(const std::vector<std::string>& nm,
146  const std::vector<std::string>& va,
147  std::vector<std::vector<std::string>*>& buf,
148  std::vector<ibis::bitvector*>& msk);
149  void appendBlob(const std::vector<std::string>& nm,
150  const std::vector<ibis::opaque>& va,
151  std::vector<std::vector<ibis::opaque>*>& buf,
152  std::vector<ibis::bitvector*>& msk);
153  int parseLine(const char* str, const char* del, const char* id);
154 
155  int32_t doReserve(uint32_t);
156  int assignDefaultValue(ibis::tafel::column &col, const char *val) const;
159  uint32_t preferredSize() const;
160 
161 private:
162  tafel(const tafel&);
163  tafel& operator=(const tafel&);
164 }; // class ibis::tafel
165 #endif // IBIS_TAFEL_H
int assignDefaultValue(ibis::tafel::column &col, const char *val) const
Assign the default value for the given column.
Definition: tafel.cpp:417
virtual int SQLCreateTable(const char *stmt, std::string &)
Ingest a complete SQL CREATE TABLE statement.
Definition: tafel.cpp:121
virtual int writeMetaData(const char *dir, const char *tname=0, const char *tdesc=0, const char *idx=0, const char *nvpairs=0) const
Write the metadata file if no metadata file already exists in the given directory.
Definition: tafel.cpp:1559
const columnList & getColumns() const
The list of columns stored in memory.
Definition: tafel.h:96
An expandable table.
Definition: tafel.h:25
void locateBlob(std::vector< std::vector< ibis::opaque > * > &buf, std::vector< ibis::bitvector * > &msk) const
Locate the buffers and masks associated with a string-valued data type.
Definition: tafel.cpp:1295
std::string indexSpec
Index specification for the column.
Definition: tafel.h:69
void appendString(const std::vector< std::string > *in, ibis::bitvector::word_t be, ibis::bitvector::word_t en, std::vector< std::string > &out, ibis::bitvector &mask) const
Copy the incoming strings to out[be:en-1].
Definition: tafel.cpp:658
virtual uint32_t mColumns() const
The number of columns in this table.
Definition: tafel.h:56
std::string desc
Description of the column.
Definition: tafel.h:67
columnList cols
List of columns in alphabetical order.
Definition: tafel.h:100
virtual table * toTable(const char *nm=0, const char *de=0)
Stop expanding the current set of data records.
Definition: tafel.cpp:3788
A simple struct for storing a row of a table.
Definition: table.h:395
virtual int write(const char *dir, const char *tname=0, const char *tdesc=0, const char *idx=0, const char *nvpairs=0) const
Write the data values and update the metadata file.
Definition: tafel.cpp:1847
The current implementation of FastBit is code named IBIS; most data structures and functions are in t...
Definition: bord.h:16
int parseLine(const char *str, const char *del, const char *id)
Digest a line of text and place the values identified into the corresponding columns.
Definition: tafel.cpp:2988
virtual int readCSV(const char *filename, int maxrows, const char *outputdir, const char *delimiters)
Read the content of the named file as comma-separated values.
Definition: tafel.cpp:3309
virtual void describe(std::ostream &) const
Print a description of the table to the specified output stream.
Definition: tafel.cpp:3770
virtual int32_t reserveBuffer(uint32_t)
Attempt to reserve enough memory for maxr rows to be stored in memory.
Definition: tafel.cpp:2534
virtual void setASCIIDictionary(const char *, const char *)
Set the name of the ASCII dictionary file for a column of categorical values.
Definition: tafel.cpp:3843
The class for expandable tables.
Definition: table.h:483
uint32_t preferredSize() const
Compute the number of rows that are likely to fit in available memory.
Definition: tafel.cpp:2930
In-memory version of a column.
Definition: tafel.h:63
virtual int appendRows(const std::vector< ibis::table::row > &)
Add multiple rows.
Definition: tafel.cpp:1435
void clear()
Clear all content. Removes both data and metadata.
Definition: tafel.cpp:2968
void appendBlob(const std::vector< std::string > &nm, const std::vector< ibis::opaque > &va, std::vector< std::vector< ibis::opaque > * > &buf, std::vector< ibis::bitvector * > &msk)
Append one row to blob columns.
Definition: tafel.cpp:1313
A buffer is intended to be a temporary workspace in memory.
Definition: fileManager.h:128
ibis::bitvector mask
Valid values are marked 1, null values are marked 0.
Definition: tafel.h:89
int32_t doReserve(uint32_t)
Reserve space for maxr records in memory.
Definition: tafel.cpp:2620
void locateString(ibis::TYPE_T t, std::vector< std::vector< std::string > * > &buf, std::vector< ibis::bitvector * > &msk) const
Locate the buffers and masks associated with a string-valued data type.
Definition: tafel.cpp:1241
Template array_t is a replacement of std::vector.
Definition: array_t.h:24
The abstract table class.
Definition: table.h:77
virtual int append(const char *cname, uint64_t begin, uint64_t end, void *values)
Copy the incoming values to rows [begin:end) of column cn.
Definition: tafel.cpp:685
virtual void clearData()
Remove all data recorded.
Definition: tafel.cpp:2456
virtual uint32_t mRows() const
The number of rows in memory.
Definition: tafel.h:55
ibis::TYPE_T type
Type of the data.
Definition: tafel.h:75
TYPE_T
Supported data types.
Definition: table.h:25
std::vector< column * > colorder
Order of columns as they were specified through addColumn.
Definition: tafel.h:102
std::string name
Name of the column.
Definition: tafel.h:65
void * values
Pointer to the in-memory storage.
Definition: tafel.h:80
virtual int readSQLDump(const char *filename, std::string &tname, int maxrows, const char *outputdir)
Read a SQL dump from database systems such as MySQL.
Definition: tafel.cpp:3461
int readSQLStatement(std::istream &, ibis::fileManager::buffer< char > &, ibis::fileManager::buffer< char > &) const
Read one complete SQL statment from an SQL dump file.
Definition: tafel.cpp:3646
~column()
Destructor.
Definition: tafel.cpp:3883
A data structure to represent a sequence of bits.
Definition: bitvector.h:62
void locate(ibis::TYPE_T, std::vector< array_t< T > * > &buf, std::vector< ibis::bitvector * > &msk) const
Locate the buffers and masks associated with a data type.
Definition: tafel.cpp:1188
virtual int addColumn(const char *cname, ibis::TYPE_T ctype, const char *cdesc, const char *idx)
Add metadata about a new column.
Definition: tafel.cpp:32
void * defval
The default value for the column.
Definition: tafel.h:87
Definition of Word-Aligned Hybrid code.
FastBit Table Interface.
const char * delimiters
Delimiters used to separate a string of names.
Definition: util.cpp:71
virtual const char * getASCIIDictionary(const char *) const
Retrieve the name of the ASCII dictionary file associated with a column of categorical values...
Definition: tafel.cpp:3865
virtual int appendRow(const ibis::table::row &)
Add one row.
Definition: tafel.cpp:1347
column()
Default constructor. The name and type are assigned later.
Definition: tafel.cpp:3879
std::string dictfile
Dictionary file name.
Definition: tafel.h:73
std::string metatags
Meta tags.
Definition: tafel.h:105
virtual uint32_t bufferCapacity() const
Capacity of the memory buffer.
Definition: tafel.cpp:2859
void normalize()
Make all short columns catch up with the longest one.
Definition: tafel.cpp:781
ibis::bitvector::word_t mrows
Number of rows of this table.
Definition: tafel.h:107

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive