FasTensor  1.0.0
Transform Supercomputing for AI
ft_endpoint_dir.h
Go to the documentation of this file.
1 /*
2 ****************************
3 
4 FasTensor (FT) Copyright (c) 2021, The Regents of the University of
5 California, through Lawrence Berkeley National Laboratory (subject to
6 receipt of any required approvals from the U.S. Dept. of Energy).
7 All rights reserved.
8 
9 If you have questions about your rights to use or distribute this software,
10 please contact Berkeley Lab's Intellectual Property Office at
12 
13 NOTICE. This Software was developed under funding from the U.S. Department
14 of Energy and the U.S. Government consequently retains certain rights. As
15 such, the U.S. Government has been granted for itself and others acting on
16 its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the
17 Software to reproduce, distribute copies to the public, prepare derivative
18 works, and perform publicly and display publicly, and to permit others to do so.
19 
20 
21 ****************************
22 
23 
24 *** License Agreement ***
25 
26 FasTensor (FT) Copyright (c) 2021, The Regents of the University of
27 California, through Lawrence Berkeley National Laboratory (subject to
28 receipt of any required approvals from the U.S. Dept. of Energy).
29 All rights reserved.
30 
31 Redistribution and use in source and binary forms, with or without
32 modification, are permitted provided that the following conditions are met:
33 
34 (1) Redistributions of source code must retain the above copyright notice,
35 this list of conditions and the following disclaimer.
36 
37 (2) Redistributions in binary form must reproduce the above copyright
38 notice, this list of conditions and the following disclaimer in the
39 documentation and/or other materials provided with the distribution.
40 
41 (3) Neither the name of the University of California, Lawrence Berkeley
42 National Laboratory, U.S. Dept. of Energy nor the names of its contributors
43 may be used to endorse or promote products derived from this software
44 without specific prior written permission.
45 
46 
47 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
48 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
51 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
52 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
53 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
54 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
55 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
56 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
57 POSSIBILITY OF SUCH DAMAGE.
58 
59 You are under no obligation whatsoever to provide any bug fixes, patches,
60 or upgrades to the features, functionality or performance of the source
61 code ("Enhancements") to anyone; however, if you choose to make your
62 Enhancements available either publicly, or directly to Lawrence Berkeley
63 National Laboratory, without imposing a separate written license agreement
64 for such Enhancements, then you hereby grant the following license: a
65 non-exclusive, royalty-free perpetual license to install, use, modify,
66 prepare derivative works, incorporate into other computer software,
67 distribute, and sublicense such enhancements or derivative works thereof,
68 in binary and source code form.
69 */
70 
80 #ifndef END_POINT_DIR_H
81 #define END_POINT_DIR_H
82 
83 #define DIR_MERGE_INDEX 0
84 #define DIR_SUB_CMD_ARG 1
85 #define DIR_INPUT_SEARCH_RGX 2
86 #define DIR_OUPUT_REPLACE_RGX 3
87 #define DIR_OUPUT_REPLACE_RGX_ARG 4
88 #define DIR_FILE_SORT_INDEXES 15
89 #define DIR_N_FILES 6
90 #define DIR_GET_FILE_SIZE 7
91 #define DIR_SET_CHUNK_SIZE 8
92 #define DIR_GET_ALL_CHUNK_TAGS 9
93 #define DIR_SET_ALL_CHUNK_TAGS 10
94 
95 //Todo
96 #define DIR_INPUT_ELASTIC_SIZE 11
97 #define DIR_OUTPUT_ELASTIC_SIZE 12
98 #define DIR_SET_OUTPUT_FILE_NAMES 13
99 
100 #include "ft_utility.h"
101 #include "ft_type.h"
102 #include "ft_endpoint.h"
103 #include "ft_endpoint_hdf5.h"
104 #include "ft_endpoint_tdms.h"
105 #include <string>
106 #include <iostream>
107 #include <vector>
108 #include <math.h>
109 #include <regex>
110 
111 //
112 //I/O layer
113 class EndpointDIR : public Endpoint
114 {
115 private:
116  std::string endpoint_info;
117  AuEndpointType sub_endpoint_type;
118  std::string sub_endpoint_info; //Directory of files
119  Endpoint *sub_endpoint = nullptr;
120 
121  std::string dir_str;
122  std::vector<std::string> dir_file_list;
123  int dir_file_list_current_index = 0; //index of the current one in read/write
124 
125  std::string append_sub_endpoint_info; //dir_file_list[i] + append_sub_endpoint_info is the finale sub_endpoint_info
126  std::vector<int> dir_chunk_size, dir_overlap_size; //set chunk size to be each sub_endpoint
127 
128  int dir_data_merge_index = 0;
129 
130  //http://www.cplusplus.com/reference/regex/ECMAScript/
131  bool input_replace_regex_flag = false;
132  std::regex *input_filter_regex; //The regex to "search" on list of file as input
133 
134  bool output_replace_regex_flag = false;
135  std::regex *output_replace_regex; //The regex to "replace" on list of file as output (input)
136  std::string output_replace_regex_aug;
137  std::string output_replace_regex_match_str;
138 
139  bool has_ordering_on_file_list = false; //User may apply ordering on list, based on sorted ordring
140  std::vector<size_t> order_on_file_list;
141 
142 public:
148  EndpointDIR(std::string endpoint_info_p)
149  {
150  endpoint_info = endpoint_info_p;
152  if (sub_endpoint_type == EP_HDF5)
153  {
154  sub_endpoint = new EndpointHDF5();
155  std::vector<std::string> cmd_argv;
156  sub_endpoint->Control(OP_DISABLE_MPI_IO, cmd_argv);
157  sub_endpoint->Control(OP_DISABLE_COLLECTIVE_IO, cmd_argv);
158  }
159  else if (sub_endpoint_type == EP_TDMS)
160  {
161  sub_endpoint = new EndpointTDMS();
162  }
164  sub_endpoint->SetDataElementType(data_element_type);
165  }
166 
168  {
169  }
170 
172  {
173  if (sub_endpoint != nullptr)
174  delete sub_endpoint;
175  }
181  int ExtractMeta() override;
187  int PrintInfo() override;
188 
194  int Create() override;
195 
201  int Open() override;
202 
211  int Read(std::vector<unsigned long long> start, std::vector<unsigned long long> end, void *data) override;
212 
221  int Write(std::vector<unsigned long long> start, std::vector<unsigned long long> end, void *data) override;
222 
228  int Close() override;
229 
230  void Map2MyType() override;
231 
232  void EnableCollectiveIO() override;
233 
234  void DisableCollectiveIO() override;
235 
236  int ParseEndpointInfo() override;
237 
243  std::vector<int> GetDirChunkSize() override;
244 
245  void SetDirChunkSize(std::vector<int> &dir_chunk_size_p) override;
246 
247  std::vector<std::string> GetDirFileVector() override;
248 
249  void SetDirFileVector(std::vector<std::string> &file_list) override;
250 
257  int Control(int opt_code, std::vector<std::string> &parameter_v) override;
258 
264  void SetMergeIndex(int index_p);
265 
271  int GetMergeIndex();
272 
280  int WriteAttribute(const std::string &name, const void *data, FTDataType data_type_p, const size_t &data_length_p = 0) override;
281 
289  int ReadAttribute(const std::string &name, void *data, FTDataType data_type_p, const size_t &data_length_p = 0) override;
290 
291  int GetAttributeSize(const std::string &name, FTDataType data_type_p) override;
292 
299  int ReadAllAttributeName(std::vector<std::string> &attr_name);
300 };
301 #endif
Definition: ft_endpoint_dir.h:114
int ParseEndpointInfo() override
parse endpoint_info to my own info
Definition: ft_endpoint_dir.cpp:402
int PrintInfo() override
print information about the endpoint
Definition: ft_endpoint_dir.cpp:392
int Create() override
create the endpoint
Definition: ft_endpoint_dir.cpp:173
int WriteAttribute(const std::string &name, const void *data, FTDataType data_type_p, const size_t &data_length_p=0) override
Set the Attribute object Do not need to be pure virtual method.
Definition: ft_endpoint_dir.cpp:621
int Read(std::vector< unsigned long long > start, std::vector< unsigned long long > end, void *data) override
read the data from end-point
Definition: ft_endpoint_dir.cpp:194
int ReadAttribute(const std::string &name, void *data, FTDataType data_type_p, const size_t &data_length_p=0) override
Get the Attribute object Do not need to be pure virtual method.
Definition: ft_endpoint_dir.cpp:652
int ReadAllAttributeName(std::vector< std::string > &attr_name)
Read all attribute name.
Definition: ft_endpoint_dir.cpp:603
std::vector< std::string > GetDirFileVector() override
Get the Dir File Vector object.
Definition: ft_endpoint_dir.cpp:438
void SetDirFileVector(std::vector< std::string > &file_list) override
Set the Dir File Vector object.
Definition: ft_endpoint_dir.cpp:443
EndpointDIR()
Definition: ft_endpoint_dir.h:167
int GetMergeIndex()
Get the Merge Index object.
Definition: ft_endpoint_dir.cpp:592
~EndpointDIR()
Definition: ft_endpoint_dir.h:171
int Write(std::vector< unsigned long long > start, std::vector< unsigned long long > end, void *data) override
write the data to the end-point
Definition: ft_endpoint_dir.cpp:297
EndpointDIR(std::string endpoint_info_p)
Construct a new EndpointDIR object.
Definition: ft_endpoint_dir.h:148
void Map2MyType() override
call the finalize to close everything (like call Destractor)
Definition: ft_endpoint_dir.cpp:398
void SetMergeIndex(int index_p)
Set the Merge Index.
Definition: ft_endpoint_dir.cpp:582
int ExtractMeta() override
extracts metadata, possbile endpoint_ranks/endpoint_dim_size/data_element_type
Definition: ft_endpoint_dir.cpp:88
void SetDirChunkSize(std::vector< int > &dir_chunk_size_p) override
Set the Dir Chunk Size object.
Definition: ft_endpoint_dir.cpp:433
int GetAttributeSize(const std::string &name, FTDataType data_type_p) override
Definition: ft_endpoint_dir.cpp:664
void DisableCollectiveIO() override
Definition: ft_endpoint_dir.cpp:388
void EnableCollectiveIO() override
Definition: ft_endpoint_dir.cpp:384
int Open() override
open the endpoint
Definition: ft_endpoint_dir.cpp:181
int Control(int opt_code, std::vector< std::string > &parameter_v) override
call a special operator on endpoint such as, enable collective I/O for HDF5 dump file from MEMORY to ...
Definition: ft_endpoint_dir.cpp:493
int Close() override
close the end-point
Definition: ft_endpoint_dir.cpp:379
std::vector< int > GetDirChunkSize() override
Get the Chunk Size object.
Definition: ft_endpoint_dir.cpp:428
Definition: ft_endpoint_hdf5.h:108
Define the class for the Endpoint used by ArrayUDF to store the data. It contains basic infomation fo...
Definition: ft_endpoint.h:106
void SetDataElementType(AuEndpointDataType data_element_type_p)
set the type of data element
Definition: ft_endpoint.cpp:104
void SetEndpointType(AuEndpointType endpoint_type_p)
Set the Endpoint Type object.
Definition: ft_endpoint.cpp:355
AuEndpointDataType data_element_type
Definition: ft_endpoint.h:113
virtual int Control(int opt_code, std::vector< std::string > &parameter_v)
call a special operator on endpoint such as, enable collective I/O for HDF5 dump file from MEMORY to ...
Definition: ft_endpoint.cpp:421
Definition: ft_endpoint_tdms.h:130
#define OP_DISABLE_COLLECTIVE_IO
Definition: ft_endpoint_hdf5.h:103
#define OP_DISABLE_MPI_IO
Definition: ft_endpoint_hdf5.h:101
AuEndpointType
Definition: ft_type.h:95
@ EP_HDF5
Definition: ft_type.h:96
@ EP_DIR
Definition: ft_type.h:103
@ EP_TDMS
Definition: ft_type.h:105
AuEndpointDataType
Definition: ft_type.h:118