libwreport 3.38
input.h
1#ifndef WREPORT_BUFR_INPUT_H
2#define WREPORT_BUFR_INPUT_H
3
4#include <wreport/error.h>
5#include <wreport/var.h>
6#include <wreport/bulletin.h>
7#include <string>
8#include <functional>
9
10namespace wreport {
11struct Bulletin;
12
13namespace bulletin {
14struct AssociatedField;
15}
16
17namespace bufr {
18
20{
21 Bulletin& out;
22 unsigned subset_count;
23 DispatchToSubsets(Bulletin& out, unsigned subset_count) : out(out), subset_count(subset_count) {}
24
25 void add_missing(Varinfo info)
26 {
27 for (unsigned i = 0; i < subset_count; ++i)
28 out.subsets[i].store_variable_undef(info);
29 }
30 void add_same(const Var& var)
31 {
32 for (unsigned i = 0; i < subset_count; ++i)
33 out.subsets[i].store_variable(Var(var));
34 }
35 void add_var(unsigned subset, Var&& var)
36 {
37 out.subsets[subset].store_variable(var);
38 }
39};
40
41
45class Input
46{
47protected:
52 void scan_section_length(unsigned sec_no);
53
54public:
56 const uint8_t* data;
57
59 size_t data_len;
60
68 const char* fname = nullptr;
69
77 size_t start_offset = 0;
78
80 unsigned s4_cursor = 0;
81
83 uint8_t pbyte = 0;
84
86 int pbyte_len = 0;
87
89 unsigned sec[6];
90
91
98 explicit Input(const std::string& in);
99
108
120 void scan_other_sections(bool has_optional);
121
123 unsigned offset() const { return s4_cursor; }
124
126 unsigned bits_left() const { return static_cast<unsigned>((data_len - s4_cursor) * 8 + pbyte_len); }
127
129 inline unsigned read_byte(unsigned pos) const
130 {
131 return (unsigned)data[pos];
132 }
133
135 inline unsigned read_byte(unsigned section, unsigned pos) const
136 {
137 return (unsigned)data[sec[section] + pos];
138 }
139
141 unsigned read_number(unsigned pos, unsigned byte_len) const
142 {
143 unsigned res = 0;
144 for (unsigned i = 0; i < byte_len; ++i)
145 {
146 res <<= 8;
147 res |= data[pos + i];
148 }
149 return res;
150 }
151
156 inline unsigned read_number(unsigned section, unsigned pos, unsigned byte_len) const
157 {
158 return read_number(sec[section] + pos, byte_len);
159 }
160
165 uint32_t get_bits(unsigned n)
166 {
167 uint32_t result = 0;
168
169 if (s4_cursor == data_len)
170 parse_error("end of buffer while looking for %u bits of bit-packed data", n);
171
172 // TODO: review and benchmark and possibly simplify
173 // (a possible alternative approach is to keep a current bitmask that
174 // starts at 0x80 and is shifted right by 1 at each read until it
175 // reaches 0, and get rid of pbyte_len)
176 for (unsigned i = 0; i < n; i++)
177 {
178 if (pbyte_len == 0)
179 {
180 pbyte_len = 8;
181 pbyte = data[s4_cursor++];
182 }
183 result <<= 1;
184 if (pbyte & 0x80)
185 result |= 1;
186 pbyte <<= 1;
187 pbyte_len--;
188 }
189
190 return result;
191 }
192
196 void skip_bits(unsigned n)
197 {
198 if (s4_cursor == data_len)
199 parse_error("end of buffer while looking for %u bits of bit-packed data", n);
200
201 for (unsigned i = 0; i < n; i++)
202 {
203 if (pbyte_len == 0)
204 {
205 pbyte_len = 8;
206 pbyte = data[s4_cursor++];
207 }
208 pbyte <<= 1;
209 pbyte_len--;
210 }
211 }
212
214 void debug_dump_next_bits(const char* desc, unsigned count, const std::vector<unsigned>& groups={}) const;
215
220 void debug_find_sequence(const char* pattern) const;
221
223 void parse_error(const char* fmt, ...) const WREPORT_THROWF_ATTRS(2, 3);
224
226 void parse_error(unsigned pos, const char* fmt, ...) const WREPORT_THROWF_ATTRS(3, 4);
227
229 void parse_error(unsigned section, unsigned pos, const char* fmt, ...) const WREPORT_THROWF_ATTRS(4, 5);
230
243 void check_available_data(unsigned pos, size_t datalen, const char* expected);
244
259 void check_available_message_data(unsigned section, unsigned pos, size_t datalen, const char* expected);
260
275 void check_available_section_data(unsigned section, unsigned pos, size_t datalen, const char* expected);
276
289 void decode_compressed_number(Var& dest, uint32_t base, unsigned diffbits);
290
299 void decode_number(Var& dest);
300
304 bool decode_compressed_base(Varinfo info, uint32_t& base, uint32_t& diffbits);
305
310 void decode_compressed_number(Varinfo info, unsigned subsets, std::function<void(unsigned, Var&&)> dest);
311
312 void decode_string(Varinfo info, unsigned subsets, DispatchToSubsets& dest);
313
314 void decode_compressed_number(Varinfo info, unsigned subsets, DispatchToSubsets& dest);
315
320 void decode_compressed_number_af(Varinfo info, const bulletin::AssociatedField& afield, unsigned subsets, std::function<void(unsigned, Var&&)> dest);
321
333 void decode_compressed_semantic_number(Var& dest, unsigned subsets);
334
351 bool decode_string(unsigned bit_len, char* str, size_t& len);
352
364 void decode_string(Var& dest);
365
377 void decode_string(Var& dest, unsigned subsets);
378
383 void decode_string(Varinfo info, unsigned subsets, std::function<void(unsigned, Var&&)> dest);
384
396 void decode_binary(Var& dest);
397
405 std::string decode_uncompressed_bitmap(unsigned size);
406
420 std::string decode_compressed_bitmap(unsigned size);
421};
422
423}
424}
425#endif
Storage for the decoded data of a BUFR or CREX message.
Definition: bulletin.h:30
std::vector< Subset > subsets
Decoded variables.
Definition: bulletin.h:122
A physical variable.
Definition: var.h:25
Binary buffer with bit-level read operations.
Definition: input.h:46
size_t data_len
Input buffer size.
Definition: input.h:59
unsigned read_byte(unsigned pos) const
Read a byte value at offset pos.
Definition: input.h:129
Input(const std::string &in)
Wrap a string iinto a Input.
uint32_t get_bits(unsigned n)
Get the integer value of the next 'n' bits from the decode input n must be <= 32.
Definition: input.h:165
void check_available_section_data(unsigned section, unsigned pos, size_t datalen, const char *expected)
Check that the given section in the input buffer contains at least datalen characters after offset po...
void decode_compressed_number(Var &dest, uint32_t base, unsigned diffbits)
Decode a compressed number as described by dest.info(), ad set it as value for dest.
void debug_dump_next_bits(const char *desc, unsigned count, const std::vector< unsigned > &groups={}) const
Dump to stderr 'count' bits of 'buf', starting at the 'ofs-th' bit.
uint8_t pbyte
Byte we are currently decoding.
Definition: input.h:83
std::string decode_uncompressed_bitmap(unsigned size)
Decode an uncompressed bitmap of size bits.
void scan_other_sections(bool has_optional)
Scan the message filling in the sec[] array of section start offsets of all sections from 2 on.
unsigned bits_left() const
Return the number of bits left in the message to be decoded.
Definition: input.h:126
void scan_lead_sections()
Scan the message filling in the sec[] array of start offsets of sections 0 and 1.
void scan_section_length(unsigned sec_no)
Scan length of section sec_no, filling in the start of the next section in sec[sec_no + 1].
unsigned offset() const
Return the current decoding byte offset.
Definition: input.h:123
unsigned s4_cursor
Offset of the byte we are currently decoding.
Definition: input.h:80
void void void void check_available_data(unsigned pos, size_t datalen, const char *expected)
Check that the input buffer contains at least datalen characters after offset pos; throw error_parse ...
unsigned read_number(unsigned pos, unsigned byte_len) const
Read a big endian integer value byte_len bytes long, at offset pos.
Definition: input.h:141
void decode_compressed_number_af(Varinfo info, const bulletin::AssociatedField &afield, unsigned subsets, std::function< void(unsigned, Var &&)> dest)
Decode a number as described by info from a compressed bufr with subsets subsets, and send the result...
void debug_find_sequence(const char *pattern) const
Match the given pattern as regexp on the still unread input bitstream, with bits converted to a strin...
unsigned sec[6]
Offsets of the start of BUFR sections.
Definition: input.h:89
size_t start_offset
File offset of the start of the message.
Definition: input.h:77
void check_available_message_data(unsigned section, unsigned pos, size_t datalen, const char *expected)
Check that the input buffer contains at least datalen characters after offset pos in section section;...
unsigned read_byte(unsigned section, unsigned pos) const
Read a byte value at offset pos inside section section.
Definition: input.h:135
unsigned read_number(unsigned section, unsigned pos, unsigned byte_len) const
Read a big endian integer value byte_len bytes long, at offset pos inside section section.
Definition: input.h:156
const uint8_t * data
Input buffer.
Definition: input.h:56
void skip_bits(unsigned n)
Skip the next n bits.
Definition: input.h:196
const char * fname
Input file name (optional).
Definition: input.h:68
void decode_binary(Var &dest)
Decode a generic binary value as-is, as described by dest.info(), ad set it as value for dest.
bool decode_compressed_base(Varinfo info, uint32_t &base, uint32_t &diffbits)
Decode the base value for a variable in a compressed BUFR.
std::string decode_compressed_bitmap(unsigned size)
Decode a "compressed" bitmap of size bits.
void decode_compressed_semantic_number(Var &dest, unsigned subsets)
Decode a number as described by dest.info(), and set it as value for dest.
int pbyte_len
Bits left in pbyte to decode.
Definition: input.h:86
void parse_error(const char *fmt,...) const WREPORT_THROWF_ATTRS(2
Throw an error_parse at the current decoding location.
void decode_number(Var &dest)
Decode a number as described by dest.info(), and set it as value for dest.
wreport exceptions.
#define WREPORT_THROWF_ATTRS(a, b)
Tell the compiler that a function always throws and expects printf-style arguments.
Definition: error.h:56
String functions.
Definition: benchmark.h:13
Information about a variable.
Definition: varinfo.h:139
Definition: input.h:20
Definition: associated_fields.h:13