Avro C++
GenericDatum.hh
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * https://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 #ifndef avro_GenericDatum_hh__
20 #define avro_GenericDatum_hh__
21 
22 #include <cstdint>
23 #include <map>
24 #include <string>
25 #include <vector>
26 
27 #if __cplusplus >= 201703L
28 #include <any>
29 #else
30 #include "boost/any.hpp"
31 #endif
32 
33 #include "LogicalType.hh"
34 #include "Node.hh"
35 #include "ValidSchema.hh"
36 
37 namespace avro {
38 
61 class AVRO_DECL GenericDatum {
62 protected:
63  Type type_;
64  LogicalType logicalType_;
65 #if __cplusplus >= 201703L
66  std::any value_;
67 #else
68  boost::any value_;
69 #endif
70 
71  explicit GenericDatum(Type t)
72  : type_(t), logicalType_(LogicalType::NONE) {}
73 
74  GenericDatum(Type t, LogicalType logicalType)
75  : type_(t), logicalType_(logicalType) {}
76 
77  template<typename T>
78  GenericDatum(Type t, LogicalType logicalType, const T &v)
79  : type_(t), logicalType_(logicalType), value_(v) {}
80 
81  void init(const NodePtr &schema);
82 
83 public:
87  Type type() const;
88 
92  LogicalType logicalType() const;
93 
99  template<typename T>
100  const T &value() const;
101 
111  template<typename T>
112  T &value();
113 
117  bool isUnion() const { return type_ == AVRO_UNION; }
118 
123  size_t unionBranch() const;
124 
129  void selectBranch(size_t branch);
130 
132  GenericDatum() : type_(AVRO_NULL), logicalType_(LogicalType::NONE) {}
133 
136  // NOLINTNEXTLINE(google-explicit-constructor)
137  GenericDatum(bool v)
138  : type_(AVRO_BOOL), logicalType_(LogicalType::NONE), value_(v) {}
139 
142  // NOLINTNEXTLINE(google-explicit-constructor)
143  GenericDatum(int32_t v)
144  : type_(AVRO_INT), logicalType_(LogicalType::NONE), value_(v) {}
145 
148  // NOLINTNEXTLINE(google-explicit-constructor)
149  GenericDatum(int64_t v)
150  : type_(AVRO_LONG), logicalType_(LogicalType::NONE), value_(v) {}
151 
154  // NOLINTNEXTLINE(google-explicit-constructor)
155  GenericDatum(float v)
156  : type_(AVRO_FLOAT), logicalType_(LogicalType::NONE), value_(v) {}
157 
160  // NOLINTNEXTLINE(google-explicit-constructor)
161  GenericDatum(double v)
162  : type_(AVRO_DOUBLE), logicalType_(LogicalType::NONE), value_(v) {}
163 
166  // NOLINTNEXTLINE(google-explicit-constructor)
167  GenericDatum(const std::string &v)
168  : type_(AVRO_STRING), logicalType_(LogicalType::NONE), value_(v) {}
169 
173  // NOLINTNEXTLINE(google-explicit-constructor)
174  GenericDatum(const std::vector<uint8_t> &v) : type_(AVRO_BYTES), logicalType_(LogicalType::NONE), value_(v) {}
175 
182  // NOLINTNEXTLINE(google-explicit-constructor)
184  GenericDatum(const NodePtr &schema);
185 
192  template<typename T>
193  GenericDatum(const NodePtr &schema, const T &v) : type_(schema->type()), logicalType_(schema->logicalType()) {
194  init(schema);
195 #if __cplusplus >= 201703L
196  *std::any_cast<T>(&value_) = v;
197 #else
198  *boost::any_cast<T>(&value_) = v;
199 #endif
200  }
201 
208  explicit GenericDatum(const ValidSchema &schema);
209 };
210 
214 class AVRO_DECL GenericContainer {
215  NodePtr schema_;
216  static void assertType(const NodePtr &schema, Type type);
217 
218 protected:
222  GenericContainer(Type type, const NodePtr &s) : schema_(s) {
223  assertType(s, type);
224  }
225 
226 public:
228  const NodePtr &schema() const {
229  return schema_;
230  }
231 };
232 
236 class AVRO_DECL GenericUnion : public GenericContainer {
237  size_t curBranch_;
238  GenericDatum datum_;
239 
240 public:
246  explicit GenericUnion(const NodePtr &schema) : GenericContainer(AVRO_UNION, schema), curBranch_(schema->leaves()) {
247  selectBranch(0);
248  }
249 
253  size_t currentBranch() const { return curBranch_; }
254 
259  void selectBranch(size_t branch) {
260  if (curBranch_ != branch) {
261  datum_ = GenericDatum(schema()->leafAt(branch));
262  curBranch_ = branch;
263  }
264  }
265 
271  return datum_;
272  }
273 
278  const GenericDatum &datum() const {
279  return datum_;
280  }
281 };
282 
286 class AVRO_DECL GenericRecord : public GenericContainer {
287  std::vector<GenericDatum> fields_;
288 
289 public:
294  explicit GenericRecord(const NodePtr &schema);
295 
299  size_t fieldCount() const {
300  return fields_.size();
301  }
302 
306  size_t fieldIndex(const std::string &name) const {
307  size_t index = 0;
308  if (!schema()->nameIndex(name, index)) {
309  throw Exception("Invalid field name: " + name);
310  }
311  return index;
312  }
313 
318  bool hasField(const std::string &name) const {
319  size_t index = 0;
320  return schema()->nameIndex(name, index);
321  }
322 
326  const GenericDatum &field(const std::string &name) const {
327  return fieldAt(fieldIndex(name));
328  }
329 
334  GenericDatum &field(const std::string &name) {
335  return fieldAt(fieldIndex(name));
336  }
337 
341  const GenericDatum &fieldAt(size_t pos) const {
342  return fields_[pos];
343  }
344 
349  GenericDatum &fieldAt(size_t pos) {
350  return fields_[pos];
351  }
352 
356  void setFieldAt(size_t pos, const GenericDatum &v) {
357  // assertSameType(v, schema()->leafAt(pos));
358  fields_[pos] = v;
359  }
360 };
361 
365 class AVRO_DECL GenericArray : public GenericContainer {
366 public:
370  typedef std::vector<GenericDatum> Value;
371 
376  explicit GenericArray(const NodePtr &schema) : GenericContainer(AVRO_ARRAY, schema) {
377  }
378 
382  const Value &value() const {
383  return value_;
384  }
385 
390  return value_;
391  }
392 
393 private:
394  Value value_;
395 };
396 
400 class AVRO_DECL GenericMap : public GenericContainer {
401 public:
405  typedef std::vector<std::pair<std::string, GenericDatum>> Value;
406 
411  explicit GenericMap(const NodePtr &schema) : GenericContainer(AVRO_MAP, schema) {
412  }
413 
417  const Value &value() const {
418  return value_;
419  }
420 
425  return value_;
426  }
427 
428 private:
429  Value value_;
430 };
431 
435 class AVRO_DECL GenericEnum : public GenericContainer {
436  size_t value_;
437 
438  static size_t index(const NodePtr &schema, const std::string &symbol) {
439  size_t result;
440  if (schema->nameIndex(symbol, result)) {
441  return result;
442  }
443  throw Exception("No such symbol");
444  }
445 
446 public:
451  explicit GenericEnum(const NodePtr &schema) : GenericContainer(AVRO_ENUM, schema), value_(0) {
452  }
453 
454  GenericEnum(const NodePtr &schema, const std::string &symbol) : GenericContainer(AVRO_ENUM, schema), value_(index(schema, symbol)) {
455  }
456 
461  const std::string &symbol(size_t n) {
462  if (n < schema()->names()) {
463  return schema()->nameAt(n);
464  }
465  throw Exception("Not as many symbols");
466  }
467 
472  size_t index(const std::string &symbol) const {
473  return index(schema(), symbol);
474  }
475 
479  size_t set(const std::string &symbol) {
480  return value_ = index(symbol);
481  }
482 
486  void set(size_t n) {
487  if (n < schema()->names()) {
488  value_ = n;
489  return;
490  }
491  throw Exception("Not as many symbols");
492  }
493 
497  size_t value() const {
498  return value_;
499  }
500 
504  const std::string &symbol() const {
505  return schema()->nameAt(value_);
506  }
507 };
508 
512 class AVRO_DECL GenericFixed : public GenericContainer {
513  std::vector<uint8_t> value_;
514 
515 public:
520  explicit GenericFixed(const NodePtr &schema) : GenericContainer(AVRO_FIXED, schema) {
521  value_.resize(schema->fixedSize());
522  }
523 
524  GenericFixed(const NodePtr &schema, const std::vector<uint8_t> &v);
525 
529  const std::vector<uint8_t> &value() const {
530  return value_;
531  }
532 
536  std::vector<uint8_t> &value() {
537  return value_;
538  }
539 };
540 
541 inline Type GenericDatum::type() const {
542  return (type_ == AVRO_UNION) ?
543 #if __cplusplus >= 201703L
544  std::any_cast<GenericUnion>(&value_)->datum().type()
545  :
546 #else
547  boost::any_cast<GenericUnion>(&value_)->datum().type()
548  :
549 #endif
550  type_;
551 }
552 
554  return (type_ == AVRO_UNION) ?
555 #if __cplusplus >= 201703L
556  std::any_cast<GenericUnion>(&value_)->datum().logicalType() :
557 #else
558  boost::any_cast<GenericUnion>(&value_)->datum().logicalType() :
559 #endif
560  logicalType_;
561 }
562 
563 template<typename T>
565  return (type_ == AVRO_UNION) ?
566 #if __cplusplus >= 201703L
567  std::any_cast<GenericUnion>(&value_)->datum().value<T>()
568  : *std::any_cast<T>(&value_);
569 #else
570  boost::any_cast<GenericUnion>(&value_)->datum().value<T>()
571  : *boost::any_cast<T>(&value_);
572 #endif
573 }
574 
575 template<typename T>
576 const T &GenericDatum::value() const {
577  return (type_ == AVRO_UNION) ?
578 #if __cplusplus >= 201703L
579  std::any_cast<GenericUnion>(&value_)->datum().value<T>()
580  : *std::any_cast<T>(&value_);
581 #else
582  boost::any_cast<GenericUnion>(&value_)->datum().value<T>()
583  : *boost::any_cast<T>(&value_);
584 #endif
585 }
586 
587 inline size_t GenericDatum::unionBranch() const {
588 #if __cplusplus >= 201703L
589  return std::any_cast<GenericUnion>(&value_)->currentBranch();
590 #else
591  return boost::any_cast<GenericUnion>(&value_)->currentBranch();
592 #endif
593 }
594 
595 inline void GenericDatum::selectBranch(size_t branch) {
596 #if __cplusplus >= 201703L
597  std::any_cast<GenericUnion>(&value_)->selectBranch(branch);
598 #else
599  boost::any_cast<GenericUnion>(&value_)->selectBranch(branch);
600 #endif
601 }
602 
603 } // namespace avro
604 #endif // avro_GenericDatum_hh__
avro::AVRO_NULL
@ AVRO_NULL
Definition: Types.hh:40
avro::GenericDatum::type
Type type() const
The avro data type this datum holds.
Definition: GenericDatum.hh:541
avro::GenericArray::Value
std::vector< GenericDatum > Value
The contents type for the array.
Definition: GenericDatum.hh:370
avro::GenericRecord::field
GenericDatum & field(const std::string &name)
Returns the reference to the field with the given name name, which can be used to change the contents...
Definition: GenericDatum.hh:334
avro::GenericDatum::GenericDatum
GenericDatum(double v)
Makes a new AVRO_DOUBLE datum whose value is of type double.
Definition: GenericDatum.hh:161
avro::GenericRecord::field
const GenericDatum & field(const std::string &name) const
Returns the field with the given name name.
Definition: GenericDatum.hh:326
avro::GenericDatum::GenericDatum
GenericDatum(int64_t v)
Makes a new AVRO_LONG datum whose value is of type int64_t.
Definition: GenericDatum.hh:149
avro::AVRO_LONG
@ AVRO_LONG
Definition: Types.hh:36
avro::GenericDatum::isUnion
bool isUnion() const
Returns true if and only if this datum is a union.
Definition: GenericDatum.hh:117
avro::GenericFixed::value
const std::vector< uint8_t > & value() const
Returns the contents of this fixed.
Definition: GenericDatum.hh:529
avro::GenericUnion::selectBranch
void selectBranch(size_t branch)
Selects a new branch.
Definition: GenericDatum.hh:259
avro::AVRO_ENUM
@ AVRO_ENUM
Definition: Types.hh:43
avro::GenericDatum::GenericDatum
GenericDatum(int32_t v)
Makes a new AVRO_INT datum whose value is of type int32_t.
Definition: GenericDatum.hh:143
avro::GenericContainer::schema
const NodePtr & schema() const
Returns the schema for this object.
Definition: GenericDatum.hh:228
avro::AVRO_FLOAT
@ AVRO_FLOAT
Definition: Types.hh:37
avro::GenericFixed
Generic container for Avro fixed.
Definition: GenericDatum.hh:512
avro::GenericEnum::index
size_t index(const std::string &symbol) const
Returns the cardinal for the given symbol symbol.
Definition: GenericDatum.hh:472
avro::GenericUnion::currentBranch
size_t currentBranch() const
Returns the index of the current branch.
Definition: GenericDatum.hh:253
avro::GenericDatum::unionBranch
size_t unionBranch() const
Returns the index of the current branch, if this is a union.
Definition: GenericDatum.hh:587
avro::GenericEnum::set
size_t set(const std::string &symbol)
Set the value for this enum corresponding to the given symbol symbol.
Definition: GenericDatum.hh:479
avro::AVRO_BOOL
@ AVRO_BOOL
Definition: Types.hh:39
avro::GenericUnion
Generic container for unions.
Definition: GenericDatum.hh:236
avro::AVRO_STRING
@ AVRO_STRING
Definition: Types.hh:33
avro::GenericDatum::logicalType
LogicalType logicalType() const
The avro logical type that augments the main data type this datum holds.
Definition: GenericDatum.hh:553
avro::GenericEnum::set
void set(size_t n)
Set the value for this enum corresponding to the given cardinal n.
Definition: GenericDatum.hh:486
avro::GenericEnum
Generic container for Avro enum.
Definition: GenericDatum.hh:435
avro::GenericArray::value
Value & value()
Returns the reference to the contents of this array.
Definition: GenericDatum.hh:389
avro::AVRO_BYTES
@ AVRO_BYTES
Definition: Types.hh:34
avro::GenericEnum::GenericEnum
GenericEnum(const NodePtr &schema)
Constructs a generic enum corresponding to the given schema schema, which should be of Avro type enum...
Definition: GenericDatum.hh:451
avro::GenericUnion::datum
const GenericDatum & datum() const
Returns the datum corresponding to the currently selected branch in this union.
Definition: GenericDatum.hh:278
avro::GenericEnum::value
size_t value() const
Returns the cardinal for the current value of this enum.
Definition: GenericDatum.hh:497
avro::GenericArray::GenericArray
GenericArray(const NodePtr &schema)
Constructs a generic array corresponding to the given schema schema, which should be of Avro type arr...
Definition: GenericDatum.hh:376
avro::GenericMap
The generic container for Avro maps.
Definition: GenericDatum.hh:400
avro::GenericDatum
Generic datum which can hold any Avro type.
Definition: GenericDatum.hh:61
avro::GenericRecord::fieldAt
const GenericDatum & fieldAt(size_t pos) const
Returns the field at the given position pos.
Definition: GenericDatum.hh:341
avro::LogicalType
Definition: LogicalType.hh:28
avro::GenericRecord::setFieldAt
void setFieldAt(size_t pos, const GenericDatum &v)
Replaces the field at the given position pos with v.
Definition: GenericDatum.hh:356
avro::GenericDatum::GenericDatum
GenericDatum(float v)
Makes a new AVRO_FLOAT datum whose value is of type float.
Definition: GenericDatum.hh:155
avro::GenericDatum::selectBranch
void selectBranch(size_t branch)
Selects a new branch in the union if this is a union.
Definition: GenericDatum.hh:595
avro::GenericMap::value
Value & value()
Returns the reference to the contents of this map.
Definition: GenericDatum.hh:424
avro::GenericArray::value
const Value & value() const
Returns the contents of this array.
Definition: GenericDatum.hh:382
avro::GenericRecord::fieldAt
GenericDatum & fieldAt(size_t pos)
Returns the reference to the field at the given position pos, which can be used to change the content...
Definition: GenericDatum.hh:349
avro::GenericDatum::GenericDatum
GenericDatum(bool v)
Makes a new AVRO_BOOL datum whose value is of type bool.
Definition: GenericDatum.hh:137
avro::AVRO_INT
@ AVRO_INT
Definition: Types.hh:35
avro
A bunch of templates and specializations for encoding and decoding specific types.
Definition: AvroParse.hh:30
avro::GenericUnion::GenericUnion
GenericUnion(const NodePtr &schema)
Constructs a generic union corresponding to the given schema schema, and the given value.
Definition: GenericDatum.hh:246
avro::GenericRecord::hasField
bool hasField(const std::string &name) const
Returns true if a field with the given name name is located in this r false otherwise.
Definition: GenericDatum.hh:318
avro::GenericDatum::GenericDatum
GenericDatum(const std::string &v)
Makes a new AVRO_STRING datum whose value is of type std::string.
Definition: GenericDatum.hh:167
avro::ValidSchema
A ValidSchema is basically a non-mutable Schema that has passed some minimum of sanity checks.
Definition: ValidSchema.hh:40
avro::AVRO_ARRAY
@ AVRO_ARRAY
Definition: Types.hh:44
avro::AVRO_DOUBLE
@ AVRO_DOUBLE
Definition: Types.hh:38
avro::GenericFixed::value
std::vector< uint8_t > & value()
Returns the reference to the contents of this fixed.
Definition: GenericDatum.hh:536
avro::AVRO_FIXED
@ AVRO_FIXED
Definition: Types.hh:47
avro::GenericRecord
The generic container for Avro records.
Definition: GenericDatum.hh:286
avro::GenericDatum::GenericDatum
GenericDatum(const std::vector< uint8_t > &v)
Makes a new AVRO_BYTES datum whose value is of type std::vector<uint8_t>.
Definition: GenericDatum.hh:174
avro::GenericMap::GenericMap
GenericMap(const NodePtr &schema)
Constructs a generic map corresponding to the given schema schema, which should be of Avro type map.
Definition: GenericDatum.hh:411
avro::GenericDatum::value
const T & value() const
Returns the value held by this datum.
Definition: GenericDatum.hh:576
avro::GenericDatum::GenericDatum
GenericDatum(const NodePtr &schema, const T &v)
Constructs a datum corresponding to the given avro type and set the value.
Definition: GenericDatum.hh:193
avro::GenericContainer::GenericContainer
GenericContainer(Type type, const NodePtr &s)
Constructs a container corresponding to the given schema.
Definition: GenericDatum.hh:222
avro::GenericMap::value
const Value & value() const
Returns the contents of this map.
Definition: GenericDatum.hh:417
avro::GenericRecord::fieldIndex
size_t fieldIndex(const std::string &name) const
Returns index of the field with the given name name.
Definition: GenericDatum.hh:306
avro::Type
Type
The "type" for the schema.
Definition: Types.hh:31
avro::GenericRecord::fieldCount
size_t fieldCount() const
Returns the number of fields in the current record.
Definition: GenericDatum.hh:299
avro::GenericArray
The generic container for Avro arrays.
Definition: GenericDatum.hh:365
avro::AVRO_UNION
@ AVRO_UNION
Definition: Types.hh:46
avro::GenericDatum::GenericDatum
GenericDatum()
Makes a new AVRO_NULL datum.
Definition: GenericDatum.hh:132
avro::GenericContainer
The base class for all generic type for containers.
Definition: GenericDatum.hh:214
avro::GenericFixed::GenericFixed
GenericFixed(const NodePtr &schema)
Constructs a generic enum corresponding to the given schema schema, which should be of Avro type fixe...
Definition: GenericDatum.hh:520
avro::GenericMap::Value
std::vector< std::pair< std::string, GenericDatum > > Value
The contents type for the map.
Definition: GenericDatum.hh:405
avro::GenericUnion::datum
GenericDatum & datum()
Returns the datum corresponding to the currently selected branch in this union.
Definition: GenericDatum.hh:270
avro::AVRO_MAP
@ AVRO_MAP
Definition: Types.hh:45
avro::GenericEnum::symbol
const std::string & symbol(size_t n)
Returns the symbol corresponding to the cardinal n.
Definition: GenericDatum.hh:461
avro::Exception
Wrapper for std::runtime_error that provides convenience constructor for boost::format objects.
Definition: Exception.hh:31
avro::GenericEnum::symbol
const std::string & symbol() const
Returns the symbol for the current value of this enum.
Definition: GenericDatum.hh:504