19 #ifndef avro_DataFile_hh__    20 #define avro_DataFile_hh__    24 #include "buffer/Buffer.hh"    25 #include "ValidSchema.hh"    26 #include "Specific.hh"    34 #include "boost/utility.hpp"    35 #include <boost/iostreams/filtering_stream.hpp>    44 #ifdef SNAPPY_CODEC_AVAILABLE    50 const int SyncSize = 16;
    62     const std::string filename_;
    65     const size_t syncInterval_;
    68     std::unique_ptr<OutputStream> stream_;
    69     std::unique_ptr<OutputStream> buffer_;
    70     const DataFileSync sync_;
    73     typedef std::map<std::string, std::vector<uint8_t> > Metadata;
    78     static std::unique_ptr<OutputStream> makeStream(
const char* filename);
    79     static DataFileSync makeSync();
    82     void setMetadata(
const std::string& key, 
const std::string& value);
    92     void init(
const ValidSchema &schema, 
size_t syncInterval, 
const Codec &codec);
   109     uint64_t getCurrentBlockStart();
   121         size_t syncInterval, 
Codec codec = NULL_CODEC);
   146 template <
typename T>
   148     std::unique_ptr<DataFileWriterBase> base_;
   154         size_t syncInterval = 16 * 1024, 
Codec codec = NULL_CODEC) :
   158         size_t syncInterval = 16 * 1024, 
Codec codec = NULL_CODEC) :
   159         base_(
new DataFileWriterBase(std::move(outputStream), schema, syncInterval, codec)) { }
   165         base_->syncIfNeeded();
   197     const std::string filename_;
   198     const std::unique_ptr<InputStream> stream_;
   200     int64_t objectCount_;
   209     std::unique_ptr<InputStream> dataStream_;
   210     typedef std::map<std::string, std::vector<uint8_t> > Metadata;
   216     std::unique_ptr<boost::iostreams::filtering_istream> os_;
   217     std::vector<char> compressed_;
   218     std::string uncompressed;
   221     void readDataBlock();
   222     void doSeek(int64_t position);
   237     void decr() { --objectCount_; }
   283     void seek(int64_t position);
   290     void sync(int64_t position);
   295     bool pastSync(int64_t position);
   300     int64_t previousSync();
   306 template <
typename T>
   308     std::unique_ptr<DataFileReaderBase> base_;
   316         base_->init(readerSchema);
   321         base_->init(readerSchema);
   362         base_->init(readerSchema);
   371         if (base_->hasMore()) {
   392     void close() { 
return base_->close(); }
   398     void seek(int64_t position) { base_->seek(position); }
   405     void sync(int64_t position) { base_->sync(position); }
   410     bool pastSync(int64_t position) { 
return base_->pastSync(position); }
 Low level support for encoding avro values. 
 
Type-independent portion of DataFileWriter. 
Definition: DataFile.hh:61
 
void decode(Decoder &d, T &t)
Generic decoder function that makes use of the codec_traits. 
Definition: Specific.hh:339
 
const ValidSchema & schema() const
Returns the schema for this data file. 
Definition: DataFile.hh:135
 
bool read(T &datum)
Reads the next entry from the data file. 
Definition: DataFile.hh:370
 
const ValidSchema & readerSchema()
Returns the schema for this object. 
Definition: DataFile.hh:267
 
const ValidSchema & schema() const
Returns the schema for this data file. 
Definition: DataFile.hh:185
 
std::array< uint8_t, SyncSize > DataFileSync
The sync value. 
Definition: DataFile.hh:54
 
A bunch of templates and specializations for encoding and decoding specific types. 
Definition: AvroParse.hh:30
 
void decr()
Decrements the number of objects yet to read. 
Definition: DataFile.hh:237
 
void close()
Closes the current file. 
Definition: DataFile.hh:180
 
DataFileReader(const char *filename, const ValidSchema &readerSchema)
Constructs the reader for the given file and the reader is expected to use the given schema...
Definition: DataFile.hh:314
 
int64_t previousSync()
Return the last synchronization point before our current position. 
Definition: DataFile.hh:415
 
An Avro datafile that can store objects of type T. 
Definition: DataFile.hh:147
 
void incr()
Increments the object count. 
Definition: DataFile.hh:114
 
void flush()
Flushes any unwritten data into the file. 
Definition: DataFile.hh:190
 
DataFileReader(const char *filename)
Constructs the reader for the given file and the reader is expected to use the schema that is used wi...
Definition: DataFile.hh:328
 
const ValidSchema & dataSchema()
Returns the schema stored with the data file. 
Definition: DataFile.hh:387
 
std::shared_ptr< Encoder > EncoderPtr
Shared pointer to Encoder. 
Definition: Encoder.hh:147
 
Decoder & decoder()
Returns the current decoder for this reader. 
Definition: DataFile.hh:227
 
void write(const T &datum)
Writes the given piece of data into the file. 
Definition: DataFile.hh:164
 
std::shared_ptr< Decoder > DecoderPtr
Shared pointer to Decoder. 
Definition: Decoder.hh:177
 
Codec
Specify type of compression to use when writing data files. 
Definition: DataFile.hh:40
 
void encode(Encoder &e, const T &t)
Generic encoder function that makes use of the codec_traits. 
Definition: Specific.hh:331
 
const ValidSchema & readerSchema()
Returns the schema for this object. 
Definition: DataFile.hh:382
 
The type independent portion of rader. 
Definition: DataFile.hh:196
 
uint64_t getCurrentBlockStart()
Returns the byte offset (within the current file) of the start of the current block being written...
Definition: DataFile.hh:173
 
A ValidSchema is basically a non-mutable Schema that has passed some minumum of sanity checks...
Definition: ValidSchema.hh:40
 
void seek(int64_t position)
Move to a specific, known synchronization point, for example one returned from previousSync(). 
Definition: DataFile.hh:398
 
Reads the contents of data file one after another. 
Definition: DataFile.hh:307
 
DataFileReader(std::unique_ptr< DataFileReaderBase > base, const ValidSchema &readerSchema)
Constructs a reader using the reader base. 
Definition: DataFile.hh:360
 
const ValidSchema & dataSchema()
Returns the schema stored with the data file. 
Definition: DataFile.hh:272
 
DataFileWriter(const char *filename, const ValidSchema &schema, size_t syncInterval=16 *1024, Codec codec=NULL_CODEC)
Constructs a new data file. 
Definition: DataFile.hh:153
 
DataFileReader(std::unique_ptr< DataFileReaderBase > base)
Constructs a reader using the reader base. 
Definition: DataFile.hh:347
 
bool pastSync(int64_t position)
Return true if past the next synchronization point after a position. 
Definition: DataFile.hh:410
 
The abstract base class for all Avro encoders. 
Definition: Encoder.hh:52
 
Encoder & encoder() const
Returns the current encoder for this writer. 
Definition: DataFile.hh:98
 
void close()
Closes the reader. 
Definition: DataFile.hh:392
 
void sync(int64_t position)
Move to the next synchronization point after a position. 
Definition: DataFile.hh:405
 
Decoder is an interface implemented by every decoder capable of decoding Avro data. 
Definition: Decoder.hh:48