An expandable table. More...
#include <tafel.h>
Classes | |
struct | column |
In-memory version of a column. More... | |
Public Types | |
typedef std::map< const char *, column *, ibis::lessi > | columnList |
Public Member Functions | |
virtual int | addColumn (const char *cname, ibis::TYPE_T ctype, const char *cdesc, const char *idx) |
Add metadata about a new column. More... | |
virtual int | append (const char *cname, uint64_t begin, uint64_t end, void *values) |
Copy the incoming values to rows [begin:end) of column cn. | |
virtual int | appendRow (const ibis::table::row &) |
Add one row. More... | |
virtual int | appendRow (const char *, const char *) |
Append a row stored in ASCII form. More... | |
virtual int | appendRows (const std::vector< ibis::table::row > &) |
Add multiple rows. More... | |
virtual uint32_t | bufferCapacity () const |
Capacity of the memory buffer. More... | |
virtual void | clearData () |
Remove all data recorded. More... | |
virtual void | describe (std::ostream &) const |
Print a description of the table to the specified output stream. | |
virtual const char * | getASCIIDictionary (const char *) const |
Retrieve the name of the ASCII dictionary file associated with a column of categorical values. More... | |
const columnList & | getColumns () const |
The list of columns stored in memory. | |
virtual uint32_t | mColumns () const |
The number of columns in this table. | |
virtual uint32_t | mRows () const |
The number of rows in memory. More... | |
virtual int | readCSV (const char *filename, int maxrows, const char *outputdir, const char *delimiters) |
Read the content of the named file as comma-separated values. More... | |
virtual int | readSQLDump (const char *filename, std::string &tname, int maxrows, const char *outputdir) |
Read a SQL dump from database systems such as MySQL. More... | |
virtual int32_t | reserveBuffer (uint32_t) |
Attempt to reserve enough memory for maxr rows to be stored in memory. More... | |
virtual void | setASCIIDictionary (const char *, const char *) |
Set the name of the ASCII dictionary file for a column of categorical values. More... | |
virtual int | SQLCreateTable (const char *stmt, std::string &) |
Ingest a complete SQL CREATE TABLE statement. More... | |
virtual table * | toTable (const char *nm=0, const char *de=0) |
Stop expanding the current set of data records. More... | |
virtual int | write (const char *dir, const char *tname=0, const char *tdesc=0, const char *idx=0, const char *nvpairs=0) const |
Write the data values and update the metadata file. More... | |
virtual int | writeMetaData (const char *dir, const char *tname=0, const char *tdesc=0, const char *idx=0, const char *nvpairs=0) const |
Write the metadata file if no metadata file already exists in the given directory. More... | |
Public Member Functions inherited from ibis::tablex | |
virtual uint32_t | getPartitionMax () const |
Get the recommended number of rows in a data partition. | |
virtual int | parseNamesAndTypes (const char *txt) |
Parse names and data types in string form. More... | |
virtual int | readNamesAndTypes (const char *filename) |
Read a file containing the names and types of columns. More... | |
virtual void | setPartitionMax (uint32_t m) |
Set the recommended number of rows in a data partition. | |
Protected Member Functions | |
template<typename T > | |
void | append (const T *in, ibis::bitvector::word_t be, ibis::bitvector::word_t en, array_t< T > &out, const T &fill, ibis::bitvector &mask) const |
Add values to an array of type T. More... | |
template<typename T > | |
void | append (const std::vector< std::string > &nm, const std::vector< T > &va, std::vector< array_t< T > * > &buf, std::vector< ibis::bitvector * > &msk) |
Append one row to columns of a particular type. More... | |
void | appendBlob (const std::vector< std::string > &nm, const std::vector< ibis::opaque > &va, std::vector< std::vector< ibis::opaque > * > &buf, std::vector< ibis::bitvector * > &msk) |
Append one row to blob columns. More... | |
void | appendRaw (const ibis::array_t< unsigned char > *in, ibis::bitvector::word_t be, ibis::bitvector::word_t en, std::vector< std::string > &out, ibis::bitvector &mask) const |
void | appendString (const std::vector< std::string > *in, ibis::bitvector::word_t be, ibis::bitvector::word_t en, std::vector< std::string > &out, ibis::bitvector &mask) const |
Copy the incoming strings to out[be:en-1]. More... | |
void | appendString (const std::vector< std::string > &nm, const std::vector< std::string > &va, std::vector< std::vector< std::string > * > &buf, std::vector< ibis::bitvector * > &msk) |
Append one row to string-valued columns. More... | |
int | assignDefaultValue (ibis::tafel::column &col, const char *val) const |
Assign the default value for the given column. More... | |
void | clear () |
Clear all content. Removes both data and metadata. | |
int32_t | doReserve (uint32_t) |
Reserve space for maxr records in memory. More... | |
template<typename T > | |
void | locate (ibis::TYPE_T, std::vector< array_t< T > * > &buf, std::vector< ibis::bitvector * > &msk) const |
Locate the buffers and masks associated with a data type. | |
void | locateBlob (std::vector< std::vector< ibis::opaque > * > &buf, std::vector< ibis::bitvector * > &msk) const |
Locate the buffers and masks associated with a string-valued data type. | |
void | locateString (ibis::TYPE_T t, std::vector< std::vector< std::string > * > &buf, std::vector< ibis::bitvector * > &msk) const |
Locate the buffers and masks associated with a string-valued data type. | |
void | normalize () |
Make all short columns catch up with the longest one. | |
int | parseLine (const char *str, const char *del, const char *id) |
Digest a line of text and place the values identified into the corresponding columns. More... | |
uint32_t | preferredSize () const |
Compute the number of rows that are likely to fit in available memory. More... | |
int | readSQLStatement (std::istream &, ibis::fileManager::buffer< char > &, ibis::fileManager::buffer< char > &) const |
Read one complete SQL statment from an SQL dump file. More... | |
int | writeData (const char *dir, const char *tname, const char *tdesc, const char *idx, const char *nvpairs, uint32_t offset) const |
Protected Member Functions inherited from ibis::tablex | |
tablex () | |
Protected default constructor. More... | |
Protected Attributes | |
std::vector< column * > | colorder |
Order of columns as they were specified through addColumn . | |
columnList | cols |
List of columns in alphabetical order. | |
std::string | metatags |
Meta tags. More... | |
ibis::bitvector::word_t | mrows |
Number of rows of this table. | |
Protected Attributes inherited from ibis::tablex | |
uint32_t | ipart |
Current partition number being used for writing. | |
uint32_t | maxpart |
Recommended size of data partitions to be created. | |
Additional Inherited Members | |
Static Public Member Functions inherited from ibis::tablex | |
static ibis::tablex * | create () |
Create a minimalistic table exclusively for entering new records. More... | |
An expandable table.
It inherents from ibis::tablex only, therefore does not support any querying functions. It stores all its content in memory, therefore it can only handle relatively small number of rows.
To perform queries on the underlying data, convert this object into a table object. Call function write to make the in memory data persistent.
|
virtual |
Add metadata about a new column.
Return value
Implements ibis::tablex.
References ibis::BLOB, ibis::CATEGORY, colorder, cols, ibis::tafel::column::desc, ibis::DOUBLE, ibis::FLOAT, ibis::tafel::column::indexSpec, ibis::INT, ibis::LONG, ibis::tafel::column::name, ibis::OID, ibis::SHORT, ibis::TEXT, ibis::tafel::column::type, ibis::UBYTE, ibis::UINT, ibis::ULONG, ibis::UNKNOWN_TYPE, ibis::USHORT, and ibis::tafel::column::values.
Referenced by ibis::tablex::parseNamesAndTypes().
|
protected |
Add values to an array of type T.
The input values (in) are copied to out[be:en-1]. If the array out has less then be elements to start with, it will be filled with value fill. The output mask indicates whether the values in array out are valid. This version works with one column as at a time.
References ibis::bitvector::adjustSize(), ibis::bitvector::appendFill(), ibis::util::copy(), ibis::array_t< T >::resize(), and ibis::array_t< T >::size().
|
protected |
Append one row to columns of a particular type.
This version with multiple columns but only one row.
References ibis::array_t< T >::push_back().
|
protected |
Append one row to blob columns.
This version with multiple columns but only one row.
|
virtual |
Add one row.
If an array of names has the same number of elements as the array of values, the names are used as column names. If the names are not specified explicitly, the values are assigned to the columns of the same data type in the order as they are specified through addColumn
or if the same order as they are recreated from an existing dataset (which is typically alphabetical).
Return the number of values added to the new row.
append
, this function can not be used to introduce new columns in a table. A new column must be added with addColumn
.Implements ibis::tablex.
References ibis::table::row::blobsvalues, ibis::table::row::bytesvalues, ibis::CATEGORY, ibis::table::row::catsvalues, ibis::DOUBLE, ibis::table::row::doublesvalues, ibis::FLOAT, ibis::table::row::floatsvalues, ibis::INT, ibis::table::row::intsvalues, ibis::LONG, ibis::table::row::longsvalues, ibis::table::row::nColumns(), ibis::SHORT, ibis::table::row::shortsvalues, ibis::TEXT, ibis::table::row::textsvalues, ibis::UBYTE, ibis::table::row::ubytesvalues, ibis::UINT, ibis::table::row::uintsvalues, ibis::ULONG, ibis::table::row::ulongsvalues, ibis::USHORT, and ibis::table::row::ushortsvalues.
|
virtual |
Append a row stored in ASCII form.
The ASCII form of the values are assumed to be separated by comma (,) or space, but additional delimiters may be added through the second argument.
Return the number of values added to the new row.
Implements ibis::tablex.
|
virtual |
Add multiple rows.
Rows in the incoming vector are processed on after another. The ordering of the values in earlier rows are automatically carried over to the later rows until another set of names is specified.
Return the number of new rows added.
Implements ibis::tablex.
References ibis::CATEGORY, ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::SHORT, ibis::TEXT, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
protected |
Copy the incoming strings to out[be:en-1].
Work with one column at a time.
References ibis::bitvector::adjustSize(), ibis::bitvector::appendFill(), and ibis::util::copy().
|
protected |
Append one row to string-valued columns.
This version with multiple columns but only one row.
|
protected |
Assign the default value for the given column.
Returns 0 on success and a negative number for error.
References ibis::BLOB, ibis::CATEGORY, ibis::util::copy(), ibis::tafel::column::defval, ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::tafel::column::name, ibis::util::readString(), ibis::SHORT, ibis::TEXT, ibis::tafel::column::type, ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
virtual |
Capacity of the memory buffer.
Report the maximum number of rows can be stored with this object before more memory will be allocated. A return value of zero (0) may also indicate that it does not know about its capacity.
Reimplemented from ibis::tablex.
References ibis::BLOB, ibis::CATEGORY, ibis::bitvector::clear(), ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::tafel::column::mask, ibis::OID, ibis::SHORT, ibis::TEXT, ibis::tafel::column::type, ibis::UBYTE, ibis::UINT, ibis::ULONG, ibis::USHORT, and ibis::tafel::column::values.
|
virtual |
Remove all data recorded.
Keeps the information about columns. It is intended to prepare for new rows after invoking the function write.
Implements ibis::tablex.
References ibis::BLOB, ibis::CATEGORY, ibis::bitvector::clear(), ibis::util::clear(), ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::tafel::column::mask, ibis::OID, ibis::SHORT, ibis::TEXT, ibis::tafel::column::type, ibis::UBYTE, ibis::UINT, ibis::ULONG, ibis::USHORT, and ibis::tafel::column::values.
|
protected |
Reserve space for maxr records in memory.
This function does not perform error checking. The public version of it reserveBuffer does.
References ibis::BLOB, ibis::array_t< T >::capacity(), ibis::CATEGORY, ibis::bitvector::clear(), ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::tafel::column::mask, ibis::OID, ibis::array_t< T >::reserve(), ibis::array_t< T >::resize(), ibis::SHORT, ibis::TEXT, ibis::tafel::column::type, ibis::UBYTE, ibis::UINT, ibis::ULONG, ibis::USHORT, and ibis::tafel::column::values.
|
virtual |
Retrieve the name of the ASCII dictionary file associated with a column of categorical values.
Implements ibis::tablex.
References ibis::tafel::column::dictfile.
|
inlinevirtual |
The number of rows in memory.
It is the maximum number of rows in any column.
Implements ibis::tablex.
References mrows.
|
protected |
Digest a line of text and place the values identified into the corresponding columns.
The actual values are extracted by ibis::util::readInt, ibis::util::readUInt, ibis::util::readDouble and ibis::util::readString. When any of these functions returns an error condition, this function assumes the value to be recorded is a NULL. The presence of a NULL value is marked by a 0-bit in the mask associated with the column. The actual value in the associated buffer is the largest integer value for an integer column and a quiet NaN for floating-point valued column.
References ibis::BLOB, ibis::CATEGORY, ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::tafel::column::mask, ibis::tafel::column::name, ibis::OID, ibis::util::readDouble(), ibis::util::readInt(), ibis::util::readString(), ibis::util::readUInt(), ibis::SHORT, ibis::TEXT, ibis::tafel::column::type, ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, ibis::USHORT, and ibis::tafel::column::values.
|
protected |
Compute the number of rows that are likely to fit in available memory.
It only count string valued column to cost 16 bytes for each row. This can be a significant underestimate of the actual cost. Memory fragmentation may also significantly reduce the available space.
References ibis::fileManager::bytesFree(), ibis::util::coarsen(), ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::OID, ibis::SHORT, ibis::tafel::column::type, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
virtual |
Read the content of the named file as comma-separated values.
Append the records to this table. If the argument memrows is greater than 0, this function will reserve space to read this many records. If the total number of records is more than memrows and the output directory name is specified, then the records will be written the outputdir and the memory is made available for later records. If outputdir is not specified, this function attempts to expand the memory allocated, which may run out of memory. Furthermore, repeated allocations can be time-consuming.
By default the records are delimited by comma (,) and blank space. One may specify alternative delimiters using the last argument.
Upon successful completion of this funciton, the return value is the number of rows processed. However, not all of them may remain in memory because ealier rows may have been written to disk.
Implements ibis::tablex.
References ibis::fileManager::buffer< T >::address(), ibis::util::coarsen(), ibis::horometer::CPUTime(), ibis::horometer::realTime(), ibis::fileManager::buffer< T >::resize(), ibis::fileManager::buffer< T >::size(), ibis::horometer::start(), ibis::horometer::stop(), and ibis::util::write().
|
virtual |
Read a SQL dump from database systems such as MySQL.
The entire file will be read into memory in one shot unless both memrows and outputdir are specified. In cases where both memrows and outputdir are specified, this function reads a maximum of memrows before write the data to outputdir under the name tname, which leaves no more than memrows number of rows in memory. The value returned from this function is the number of rows processed including those written to disk. Use function mRows to determine how many are still in memory.
If the SQL dump file contains statement to create table, then the existing metadata is overwritten. Otherwise, it reads insert statements and convert the ASCII data into binary format in memory.
Implements ibis::tablex.
References ibis::fileManager::buffer< T >::address(), ibis::util::coarsen(), ibis::horometer::CPUTime(), ibis::util::delimiters, ibis::util::readString(), ibis::horometer::realTime(), ibis::horometer::start(), ibis::horometer::stop(), and ibis::util::write().
|
protected |
Read one complete SQL statment from an SQL dump file.
It will read one line at a time until a semicolon ';' is found. It will expand the buffers as needed. The return value is either the number of bytes in the SQL statement or an eror code (less than 0).
References ibis::fileManager::buffer< T >::address(), ibis::fileManager::buffer< T >::resize(), and ibis::fileManager::buffer< T >::size().
|
virtual |
Attempt to reserve enough memory for maxr rows to be stored in memory.
This function will not reserve space for more than 1 billion rows. If maxr is less than mrows, it will simply return mrows. It calls doReserve to performs the actual reservations. If doReserve throws an exception, it will reduce the value of maxr and try again. It will give up after 5 tries and return -1, otherwise, it returns the actual capacity allocated.
Reimplemented from ibis::tablex.
References ibis::fileManager::bytesFree(), ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::OID, ibis::SHORT, ibis::UBYTE, ibis::UINT, ibis::ULONG, and ibis::USHORT.
|
virtual |
Set the name of the ASCII dictionary file for a column of categorical values.
Implements ibis::tablex.
References ibis::CATEGORY, ibis::tafel::column::dictfile, ibis::tafel::column::name, ibis::tafel::column::type, ibis::TYPESTRING, and ibis::UINT.
|
virtual |
Ingest a complete SQL CREATE TABLE statement.
Creates all metadata specified. It extracts the table name (into tname) to be used later by functions such as write and writeMetaData.
The statement is expected to be in the form of "create table tname (column1, column2, ...)". It can not contain embedded comments.
Because the SQL standard supports many more data types than FastBit does, many SQL column types are mapped in a crude manner. Here is the current list.
References ibis::BLOB, ibis::CATEGORY, ibis::util::clear(), ibis::tafel::column::defval, ibis::DOUBLE, ibis::FLOAT, ibis::INT, ibis::LONG, ibis::tafel::column::name, ibis::util::readString(), ibis::SHORT, ibis::TEXT, ibis::tafel::column::type, ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, ibis::USHORT, and ibis::tafel::column::values.
|
virtual |
Stop expanding the current set of data records.
Convert a tablex object into a table object, so that they can participate in queries. The data records held by the tablex object is transfered to the table object, however, the metadata remains with this object.
Implements ibis::tablex.
References ibis::part::getColumn(), ibis::tafel::column::name, ibis::array_t< T >::resize(), ibis::column::setNullMask(), ibis::tafel::column::type, ibis::UNKNOWN_TYPE, and ibis::tafel::column::values.
|
virtual |
Write the data values and update the metadata file.
Return error code:
Implements ibis::tablex.
References ibis::horometer::CPUTime(), ibis::horometer::realTime(), ibis::horometer::start(), and ibis::horometer::stop().
|
virtual |
Write the metadata file if no metadata file already exists in the given directory.
Return error code:
Implements ibis::tablex.
References ibis::util::checksum(), ibis::horometer::CPUTime(), ibis::tafel::column::desc, ibis::tafel::column::dictfile, ibis::DOUBLE, ibis::FLOAT, ibis::fileManager::flushDir(), ibis::dictionary::fromASCII(), ibis::util::getFileSize(), ibis::gParameters(), ibis::tafel::column::indexSpec, ibis::fileManager::instance(), ibis::INT, ibis::util::int2string(), ibis::LONG, ibis::tafel::column::name, ibis::horometer::realTime(), ibis::util::secondsToString(), ibis::SHORT, ibis::dictionary::size(), ibis::horometer::start(), ibis::horometer::stop(), ibis::tafel::column::type, ibis::TYPESTRING, ibis::UBYTE, ibis::UINT, ibis::ULONG, ibis::USHORT, and ibis::dictionary::write().
|
protected |
Meta tags.
They are optional name-value pairs meant to describe the data partition.