19 #ifndef avro_DataFile_hh__ 
   20 #define avro_DataFile_hh__ 
   24 #include "buffer/Buffer.hh" 
   25 #include "ValidSchema.hh" 
   26 #include "Specific.hh" 
   33 #include "boost/array.hpp" 
   34 #include "boost/utility.hpp" 
   35 #include <boost/iostreams/filtering_stream.hpp> 
   36 #include <boost/scoped_ptr.hpp> 
   57     const std::string filename_;
 
   60     const size_t syncInterval_;
 
   63     std::auto_ptr<OutputStream> stream_;
 
   64     std::auto_ptr<OutputStream> buffer_;
 
   65     const DataFileSync sync_;
 
   68     typedef std::map<std::string, std::vector<uint8_t> > Metadata;
 
   72     static std::auto_ptr<OutputStream> makeStream(
const char* filename);
 
   73     static DataFileSync makeSync();
 
   76     void setMetadata(
const std::string& key, 
const std::string& value);
 
  105         size_t syncInterval, 
Codec codec = NULL_CODEC);
 
  128 template <
typename T>
 
  130     std::auto_ptr<DataFileWriterBase> base_;
 
  136         size_t syncInterval = 16 * 1024, 
Codec codec = NULL_CODEC) :
 
  143         base_->syncIfNeeded();
 
  169     const std::string filename_;
 
  170     const std::auto_ptr<InputStream> stream_;
 
  172     int64_t objectCount_;
 
  179     std::auto_ptr<InputStream> dataStream_;
 
  180     typedef std::map<std::string, std::vector<uint8_t> > Metadata;
 
  186     boost::scoped_ptr<boost::iostreams::filtering_istream> os_;
 
  187     std::vector<char> compressed_;
 
  191     bool readDataBlock();
 
  206     void decr() { --objectCount_; }
 
  250 template <
typename T>
 
  252     std::auto_ptr<DataFileReaderBase> base_;
 
  260         base_->init(readerSchema);
 
  297         base_->init(readerSchema);
 
  306         if (base_->hasMore()) {
 
  327     void close() { 
return base_->close(); }
 
Low level support for encoding avro values. 
const ValidSchema & schema() const 
Returns the schema for this data file. 
Definition: DataFile.hh:157
Type-independent portion of DataFileWriter. 
Definition: DataFile.hh:56
void decode(Decoder &d, T &t)
Generic decoder function that makes use of the codec_traits. 
Definition: Specific.hh:304
bool read(T &datum)
Reads the next entry from the data file. 
Definition: DataFile.hh:305
const ValidSchema & readerSchema()
Returns the schema for this object. 
Definition: DataFile.hh:234
boost::shared_ptr< Decoder > DecoderPtr
Shared pointer to Decoder. 
Definition: Decoder.hh:161
A bunch of templates and specializations for encoding and decoding specific types. 
Definition: AvroParse.hh:31
void decr()
Decrements the number of objects yet to read. 
Definition: DataFile.hh:206
void close()
Closes the current file. 
Definition: DataFile.hh:152
DataFileReader(const char *filename, const ValidSchema &readerSchema)
Constructs the reader for the given file and the reader is expected to use the given schema...
Definition: DataFile.hh:258
DataFileReader(std::auto_ptr< DataFileReaderBase > base)
Constructs a reader using the reader base. 
Definition: DataFile.hh:282
An Avro datafile that can store objects of type T. 
Definition: DataFile.hh:129
void incr()
Increments the object count. 
Definition: DataFile.hh:98
void flush()
Flushes any unwritten data into the file. 
Definition: DataFile.hh:162
DataFileReader(const char *filename)
Constructs the reader for the given file and the reader is expected to use the schema that is used wi...
Definition: DataFile.hh:267
const ValidSchema & dataSchema()
Returns the schema stored with the data file. 
Definition: DataFile.hh:322
Decoder & decoder()
Returns the current decoder for this reader. 
Definition: DataFile.hh:196
void write(const T &datum)
Writes the given piece of data into the file. 
Definition: DataFile.hh:142
Codec
Specify type of compression to use when writing data files. 
Definition: DataFile.hh:41
void encode(Encoder &e, const T &t)
Generic encoder function that makes use of the codec_traits. 
Definition: Specific.hh:296
const ValidSchema & readerSchema()
Returns the schema for this object. 
Definition: DataFile.hh:317
The type independent portion of rader. 
Definition: DataFile.hh:168
A ValidSchema is basically a non-mutable Schema that has passed some minumum of sanity checks...
Definition: ValidSchema.hh:40
Encoder & encoder() const 
Returns the current encoder for this writer. 
Definition: DataFile.hh:87
Reads the contents of data file one after another. 
Definition: DataFile.hh:251
const ValidSchema & dataSchema()
Returns the schema stored with the data file. 
Definition: DataFile.hh:239
boost::shared_ptr< Encoder > EncoderPtr
Shared pointer to Encoder. 
Definition: Encoder.hh:144
DataFileWriter(const char *filename, const ValidSchema &schema, size_t syncInterval=16 *1024, Codec codec=NULL_CODEC)
Constructs a new data file. 
Definition: DataFile.hh:135
boost::array< uint8_t, 16 > DataFileSync
The sync value. 
Definition: DataFile.hh:49
DataFileReader(std::auto_ptr< DataFileReaderBase > base, const ValidSchema &readerSchema)
Constructs a reader using the reader base. 
Definition: DataFile.hh:295
The abstract base class for all Avro encoders. 
Definition: Encoder.hh:53
void close()
Closes the reader. 
Definition: DataFile.hh:327
Decoder is an interface implemented by every decoder capable of decoding Avro data. 
Definition: Decoder.hh:49
const ValidSchema & schema() const 
Returns the schema for this data file. 
Definition: DataFile.hh:117