hdf5.h
- performanceh5py
- usabilityh5py
(Andrew's Example)1: temperature = np.random.random(1024) 2: wind = np.random.random(2048) 3: f = h5py.File('weather.hdf5') 4: f["/15/temperature"] = temperature 5: f["/15/temperature"].attrs["dt"] = 10.0 6: f["/15/wind"] = wind 7: 8: dataset[0:10:2] 9: 10: big_dataset = f.create_dataset("big", 11: shape=(1024, 1024, 1024, 512), 12: dtype='float32') 13: big_dataset[344, 678, 23, 36] = 42.0 14: 15: compressed_dataset = f.create_dataset("comp", shape=(1024,), 16: dtype='int32', 17: compression='gzip')
H5CPP
(Pretty Close)1: fvec temperature = arma::randu<fvec>(1024); 2: fvec wind = arma::randu<fvec>(2048); 3: auto fd = h5::create("weather.hdf5", H5F_ACC_TRUNC); 4: auto ds = h5::write(fd, "/15/temperature", temperature); 5: ds["dt"] = 10.0f; 6: h5::write(fd, "/15/wind", wind); 7: 8: h5::read<fvec>(ds, h5::offset{0}, h5::count{5}, h5::stride{2}); 9: 10: auto big = h5::create<float>(fd,"big" 11: ,h5::current_dims{1024, 1024, 1024, 512} 12: ,h5::chunk{16, 16, 16, 8}); 13: float value {42.0}; 14: h5::write<float>(ds, &value 15: ,h5::offset{344, 678, 23, 36} 16: ,h5::count{1, 1, 1, 1}); 17: 18: auto comp = h5::create<int>(fd, "comp" 19: ,h5::current_dims{1024} 20: ,h5::chunk{64} | h5::gzip{4});
hdf5.h
(Not Even Close)"The C programming language does not have a sense of humor." (Pieter Hintjens)
fd = H5Fcreate("weather.hdf5", H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); // > 20 different API calls: // // H5Acreate, H5Aclose, H5Awrite, // H5Dcreate, H5Dclose, H5Dread, H5Dwrite, // H5Pcreate, H5Pclose, H5Pset_create_intermediate_group, // H5Pset_layout, H5Pset_chunk, H5Pset_deflate, // H5Screate, H5Sclose, H5Screate_simple, H5Sselect_hyperslab H5Fclose(fd);
H5CPP comes in two parts:
(You can use 1. w/o using 2.)
RAII idiom for descriptors
h5::fd_t fd = h5::create("example.h5",H5F_ACC_TRUNC);
Built-in types, POD structs or arrays of them
arma::fvec vec = arma::randu<fvec>(1024);
Templates accept type-safe configuration arguments in any order
h5::write(fd, "path/to/object", vec, h5::current_dims{100}, h5::max_dims{H5S_UNLIMITED}, h5::offset{3}, h5::block{2}, h5::stride{2}, h5::chunk{20} | h5::gzip{9} );
hid_t
handles1: // H5CPP_CONVERSION_EXPLICIT 2: // H5CPP_CONVERSION_FROM_CAPI_DISABLED 3: // H5CPP_CONVERSION_TO_CAPI_DISABLED 4: 5: h5::fd_t fd = h5::create("example.h5",H5F_ACC_TRUNC); 6: hsize_t size {0}; 7: 8: // compiler error if H5CPP_CONVERSION_EXPLICIT is defined 9: H5Fget_filesize(fd, &size); 10: 11: // compiler error if H5CPP_CONVERSION_TO_CAPI_DISABLED is def. 12: H5Fget_filesize(static_cast<hid_t>(fd), &size);
h5::error::any
1: // capture errors centrally with the granularity you desire 2: try { 3: my_deeply_embedded_io_calls(); 4: } catch ( const h5::error::io::dataset::create& e ){ 5: // handle file creation error 6: } catch ( const h5::error::io::dataset::write& e ){ 7: } catch ( const h5::error::io::file::create& e ){ 8: } catch ( const h5::error::io::file::close& e ){ 9: } catch ( const h5::any& e ) { 10: std::cerr << e.what() << std::endl; 11: }
1: typedef struct s1_t { // Write this 2: int a; float b; double c; 3: } s1_t; 4: typedef struct s2_t { // Read that 5: double c; int a; 6: } s2_t; 7: 8: // Easy 9: 10: std::vector<s1_t> s1(LENGTH); 11: std::generate( std::begin(s1), std::end(s1), [i=-1]() mutable { 12: return s1_t{++i,static_cast<float>(i*i), 1.0/(i+1)}; }); 13: 14: auto fd = h5::create(H5FILE_NAME, H5F_ACC_TRUNC); 15: h5::write(fd, DATASETNAME, s1); 16: 17: auto data = h5::read< std::vector<s2_t> >(fd, DATASETNAME); 18: std::cout << "reading back data previously written:\n\t"; 19: for (auto r:data) 20: std::cout << r.c << " "; 21: std::cout << std::endl;
1: #include <h5cpp/core> 2: #include "your_data_definition.h" // <- struct { ... }; 3: #include <h5cpp/io> 4: auto fd = h5::create("NYSE high freq dataset.h5"); 5: h5::pt_t pt = h5::create<ns::nyse_stock_quote> 6: (fd 7: ,"price_quotes/2019-01-24.qte" 8: ,h5::max_dims{H5S_UNLIMITED} 9: ,h5::chunk{1024} | h5::gzip{9} ); 10: quote_update_t qu; 11: bool having_a_good_day{true}; 12: while( having_a_good_day ){ 13: try{ 14: recieve_data_from_udp_stream( qu ) 15: h5::append(pt, qu); 16: } catch ( ... ){ 17: if( cant_fix_connection() ) 18: having_a_good_day = false; 19: } 20: }
Blast off!
1: h5::ds_t ds = h5::open(fd,"some_dataset", h5::high_throughput);
Measurements below from a Lenovo notebook:
[MB/s] | write | read |
---|---|---|
5cpp: block | 265.69 | 518.413 |
5cpp:packet | 290.57 | 499.881 |
POSIX IO | 288.566 | 513.516 |
[MB/s] | write | read |
---|---|---|
h5cpp: block | 2457.41 | 3251.99 |
h5cpp:packet | 1636.30 | 3151.15 |
POSIX IO | 1443.59 | 5325.60 |
(… and let us know what you think!)
h5cpp-dev_1.10.4.1_amd64.deb
(headers)h5cpp_1.10.4.1_amd64.deb
(LLVM)sudo dpkg -i ...
Hold Your Questions For Later!