| 1 | # |
|---|
| 2 | # Reflects Oren's comments, adds yamlbyte.h at the bottom |
|---|
| 3 | # |
|---|
| 4 | subject: Revision #4 of YAML Bytecodes |
|---|
| 5 | summary: > |
|---|
| 6 | This proposal defines a 'preparsed' format where a YAML syntax |
|---|
| 7 | is converted into a series of events, as bytecodes. Each bytecode |
|---|
| 8 | appears on its own line, starting with a single character and ending |
|---|
| 9 | with a line feed character, '\n'. |
|---|
| 10 | codes: |
|---|
| 11 | # |
|---|
| 12 | # Primary Bytecodes (Capital Letters) |
|---|
| 13 | # |
|---|
| 14 | # These bytecodes form the minimum needed to represent YAML information |
|---|
| 15 | # from the serial model (ie, without format and comments) |
|---|
| 16 | # |
|---|
| 17 | 'D': |
|---|
| 18 | name: Document |
|---|
| 19 | desc: > |
|---|
| 20 | Indicates that a document has begun, either it is |
|---|
| 21 | the beginning of a YAML stream, or a --- has been |
|---|
| 22 | found. Thus, an empty document is expressed |
|---|
| 23 | as "D\n" |
|---|
| 24 | 'V': |
|---|
| 25 | name: Directive |
|---|
| 26 | desc: > |
|---|
| 27 | This represents any YAML directives immediately following |
|---|
| 28 | a 'D' bytecode. For example '--- %YAML:1.0' produces the |
|---|
| 29 | bytecode "D\nVYAML:1.0\n". |
|---|
| 30 | 'P': |
|---|
| 31 | name: Pause Stream |
|---|
| 32 | desc: > |
|---|
| 33 | This is the instruction when a document is terminated, but |
|---|
| 34 | another document has not yet begun. Thus, it is optional, |
|---|
| 35 | and typically used to pause parsing. For example, |
|---|
| 36 | a stream starting with an empty document, but then in a |
|---|
| 37 | hold state for the next document would be: "D\nP\n" |
|---|
| 38 | '\z': |
|---|
| 39 | name: Finish (end stream) |
|---|
| 40 | desc: > |
|---|
| 41 | YAML bytecodes are meant to be passable as a single "C" |
|---|
| 42 | string, and thus the null terminator can optionally be |
|---|
| 43 | used to signal the end of a stream. When writing bytecodes |
|---|
| 44 | out to a flat file, the file need not contain a null |
|---|
| 45 | terminator; however, when read into memory it should |
|---|
| 46 | always have a null terminator. |
|---|
| 47 | 'M': |
|---|
| 48 | name: Mapping |
|---|
| 49 | desc: > |
|---|
| 50 | Indicates the begin of a mapping, children of the |
|---|
| 51 | mapping are provided as a series of K1,V1,K2,V2 |
|---|
| 52 | pairs as they are found in the input stream. For |
|---|
| 53 | example, the bytecodes for "{ a: b, c: d }" would |
|---|
| 54 | be "M\nSa\nSb\nSc\nSd\nE\n" |
|---|
| 55 | 'Q': |
|---|
| 56 | name: Sequence |
|---|
| 57 | desc: > |
|---|
| 58 | Indicates the begin of a sequence, children are provided |
|---|
| 59 | following till a '.' bytecode is encountered. So, the |
|---|
| 60 | bytecodes for "[ one, two ]" would be "Q\nSone\nStwo\nE\n" |
|---|
| 61 | 'E': |
|---|
| 62 | name: End Collection |
|---|
| 63 | desc: > |
|---|
| 64 | This closes the outermost Collection (Mapping, Sequence), |
|---|
| 65 | note that the document has one and only one node following |
|---|
| 66 | it, therefore it is not a branch. |
|---|
| 67 | 'S': |
|---|
| 68 | name: Scalar |
|---|
| 69 | desc: > |
|---|
| 70 | This indicates the start of a scalar value, which can |
|---|
| 71 | be continued by the 'N' and 'C' bytecodes. This bytecode |
|---|
| 72 | is used for sequence entries, keys, values, etc. |
|---|
| 73 | 'C': |
|---|
| 74 | name: Scalar Continuation |
|---|
| 75 | desc: > |
|---|
| 76 | Since a scalar may not fit within a buffer, and since it |
|---|
| 77 | may not contain a \n character, it may have to be broken |
|---|
| 78 | into several chunks. |
|---|
| 79 | 'N': |
|---|
| 80 | name: Normalized New Line (in a scalar value) |
|---|
| 81 | desc: > |
|---|
| 82 | Scalar values must be chunked so that new lines and |
|---|
| 83 | null values do not occur within a 'S' or 'C' bytecode |
|---|
| 84 | (in the bytecodes, all other C0 need not be escaped). |
|---|
| 85 | This bytecode is then used to represent one or more |
|---|
| 86 | newlines, with the number of newlines optionally |
|---|
| 87 | following. For example, |
|---|
| 88 | "Hello\nWorld" would be "SHello\nN\nCWorld\n", and |
|---|
| 89 | "Hello\n\n\nWorld" is "SHello\nN3\nCWorld\n" |
|---|
| 90 | |
|---|
| 91 | If the new line is an LS or a PS, the N bytecode can |
|---|
| 92 | be followed with a L or P. Thus, "Hello\PWorld\L" is |
|---|
| 93 | reported "SHello\nNP\nWorld\NL\n" |
|---|
| 94 | |
|---|
| 95 | 'Z': |
|---|
| 96 | name: Null Character (in a scalar value) |
|---|
| 97 | desc: > |
|---|
| 98 | As in normalized new lines above, since the null character |
|---|
| 99 | cannot be used in the bytecodes, is must be escaped, ie, |
|---|
| 100 | "Hello\zWorld" would be "SHello\nZ\nCWorld\n". |
|---|
| 101 | 'A': |
|---|
| 102 | name: Alias |
|---|
| 103 | desc: > |
|---|
| 104 | This is used when ever there is an alias node, for |
|---|
| 105 | example, "[ &X one, *X ]" would be normalized |
|---|
| 106 | to "S\nAX\nSone\nRX\nE\n" -- in this example, the |
|---|
| 107 | anchor bytecode applies to the very next content |
|---|
| 108 | bytecode. |
|---|
| 109 | 'R': |
|---|
| 110 | name: Reference (Anchor) |
|---|
| 111 | desc: > |
|---|
| 112 | This bytecode associates an anchor with the very next |
|---|
| 113 | content node, see the 'A' alias bytecode. |
|---|
| 114 | 'T': |
|---|
| 115 | name: Transfer |
|---|
| 116 | desc: > |
|---|
| 117 | This is the transfer method. If the value begins with |
|---|
| 118 | a '!', then it is not normalized. Otherwise, the value |
|---|
| 119 | is a fully qualified URL, with a semicolon. The transfer |
|---|
| 120 | method applies only to the node immediately following, |
|---|
| 121 | and thus it can be seen as a modifier like the anchor. |
|---|
| 122 | For example, "Ttag:yaml.org,2002:str\nSstring\n" is |
|---|
| 123 | normalized, "T!str\nSstring\n" is not. |
|---|
| 124 | # |
|---|
| 125 | # Formatting bytecodes (lower case) |
|---|
| 126 | # |
|---|
| 127 | # The following bytecodes are purely at the syntax level and |
|---|
| 128 | # useful for pretty printers and emitters. Since the range of |
|---|
| 129 | # lower case letters is contiguous, it could be easy for a |
|---|
| 130 | # processor to simply ignore all bytecodes in this range. |
|---|
| 131 | # |
|---|
| 132 | 'c': |
|---|
| 133 | name: Comment |
|---|
| 134 | desc: > |
|---|
| 135 | This is a single line comment. It is terminated like all |
|---|
| 136 | of the other variable length items, with a '\n'. |
|---|
| 137 | 'i': |
|---|
| 138 | name: Indent |
|---|
| 139 | desc: > |
|---|
| 140 | Specifies number of additional spaces to indent for |
|---|
| 141 | subsequent block style nodes, "i4\n" specifies 4 char indent. |
|---|
| 142 | 's': |
|---|
| 143 | name: Scalar styling |
|---|
| 144 | desc: > |
|---|
| 145 | This bytecode, is followed with one of the following |
|---|
| 146 | items to indicate the style to be used for the very |
|---|
| 147 | next content node. It is an error to specify a style for |
|---|
| 148 | a scalar other than double quoted when it must be escaped. |
|---|
| 149 | Furthermore, there must be agreement between the style |
|---|
| 150 | and the very next content node, in other words, a scalar |
|---|
| 151 | style requires that the next content node be an S. |
|---|
| 152 | |
|---|
| 153 | > flow scalar |
|---|
| 154 | " double quoted scalar |
|---|
| 155 | ' single quoted scalar |
|---|
| 156 | | literal scalar |
|---|
| 157 | p plain scalar |
|---|
| 158 | { inline mapping |
|---|
| 159 | [ inline sequence |
|---|
| 160 | b block style (for mappings and sequences'") |
|---|
| 161 | |
|---|
| 162 | # |
|---|
| 163 | # Advanced bytecodes (not alphabetic) |
|---|
| 164 | # |
|---|
| 165 | # These are optional goodies which one could find useful. |
|---|
| 166 | # |
|---|
| 167 | '#': |
|---|
| 168 | name: Line Number |
|---|
| 169 | desc: > |
|---|
| 170 | This bytecode allows the line number of the very next |
|---|
| 171 | node to be reported. |
|---|
| 172 | '!': |
|---|
| 173 | name: Notice |
|---|
| 174 | desc: > |
|---|
| 175 | This is a message sent from the producer to the consumer |
|---|
| 176 | regarding the state of the stream or document. It does |
|---|
| 177 | not necessarly end a stream, as the 'finish' bytecode can |
|---|
| 178 | be used for this purpose. This signal has a packed format, |
|---|
| 179 | with the error number, a comma, and a textual message: |
|---|
| 180 | "#22\n!73,Indentation mismatch\n" |
|---|
| 181 | "#132\n!84,Tabs are illegal for indentation\n" |
|---|
| 182 | ',': |
|---|
| 183 | name: Span |
|---|
| 184 | desc: > |
|---|
| 185 | This bytecode gives the span of the very next 'S', 'M', |
|---|
| 186 | or 'Q' bytecode -- including its subordinates. For scalars, |
|---|
| 187 | it includes the span of all subordinate 'N' and 'C' codes. |
|---|
| 188 | For mappings or sequences, this gives the length all the |
|---|
| 189 | way to the corresponding 'E' bytecode so that the entire |
|---|
| 190 | branch can be skipped. The length is given starting at |
|---|
| 191 | the corresponding 'S', 'M' or 'Q' bytecode and extends |
|---|
| 192 | to the first character following subordinate nodes. |
|---|
| 193 | |
|---|
| 194 | Since this length instruction is meant to be used to 'speed' |
|---|
| 195 | things up, and since calculating the length via hand is not |
|---|
| 196 | really ideal, the length is expressed in Hex. This will allow |
|---|
| 197 | programs to easily convert the length to an actual value |
|---|
| 198 | (converting from hex to integers is easier than decimal). |
|---|
| 199 | Furthermore, all leading x's are ignored (so that they can |
|---|
| 200 | be filled in later) and if the bytecode value is all x's, |
|---|
| 201 | then the length is unknown. Lastly, this length is expressed |
|---|
| 202 | in 8 bit units for UTF-8, and 16 bit units for UTF-16. |
|---|
| 203 | |
|---|
| 204 | For example, |
|---|
| 205 | --- [[one, two], three] |
|---|
| 206 | Is expressed as, |
|---|
| 207 | "?25\nD\n?x1E\nQ\n?xxE\nQ\nSone\nStwo\nE\nSthree\nE\n" |
|---|
| 208 | |
|---|
| 209 | Thus it is seen that the address of D plus 37 is the null |
|---|
| 210 | terminator for the string, the first 'Q' plus 30 also |
|---|
| 211 | gives the null teriminator, and the second 'Q' plus |
|---|
| 212 | 14 jumps to the opening 'S' for the third scalar. |
|---|
| 213 | '@': |
|---|
| 214 | name: Allocate |
|---|
| 215 | desc: > |
|---|
| 216 | This is a hint telling the processor how many items |
|---|
| 217 | are in the following collection (mapping pairs, or |
|---|
| 218 | sequence values), or how many character units need |
|---|
| 219 | to be allocated to hold the next value. Clearly this |
|---|
| 220 | is encoding specific value. The length which |
|---|
| 221 | follows is in hex (not decimal). |
|---|
| 222 | |
|---|
| 223 | For example, "one", could be "@x3\nSone" |
|---|
| 224 | |
|---|
| 225 | design: |
|---|
| 226 | - |
|---|
| 227 | name: streaming support |
|---|
| 228 | problem: > |
|---|
| 229 | The interface should ideally allow for a YAML document to be |
|---|
| 230 | moved incrementally as a stream through a process. In particular, |
|---|
| 231 | YAML is inheritently line oriented, thus the interface should |
|---|
| 232 | probably reflect this fundamental character. |
|---|
| 233 | solution: > |
|---|
| 234 | The bytecodes deliver scalars as chunks, each chunk limited to |
|---|
| 235 | at most one line. While this is not ideal for passing large |
|---|
| 236 | binary objects, it is simple and easy to understand. |
|---|
| 237 | - |
|---|
| 238 | name: push |
|---|
| 239 | problem: > |
|---|
| 240 | The most common 'parsers' out there for YAML are push style, where |
|---|
| 241 | the producer owns the 'C' program stack, and the consumer keeps |
|---|
| 242 | its state as a heap object. Ideal use of a push interface is an |
|---|
| 243 | emitter, since this allows the sender (the application program) |
|---|
| 244 | to use the program stack and thus keep its state on the call stack |
|---|
| 245 | in local, automatic variables. |
|---|
| 246 | solution: > |
|---|
| 247 | A push interface simply can call a single event handler with a |
|---|
| 248 | (bytecode, payload) tuple. Since the core complexity is in the |
|---|
| 249 | bytecodes, the actual function signature is straight-forward |
|---|
| 250 | allowing for relative language independence. Since the bytecode |
|---|
| 251 | is always one character, the event handler could just receive |
|---|
| 252 | a string where the tuple is implicit. |
|---|
| 253 | - |
|---|
| 254 | name: pull |
|---|
| 255 | problem: > |
|---|
| 256 | The other alternative for a streaming interface is a 'pull' mechanism, |
|---|
| 257 | or iterator model where the consumer owns the C stack and the producer |
|---|
| 258 | keeps any state needed as a heap object. Ideal use of a pull |
|---|
| 259 | interface is a parser, since this allows the receiver (the application |
|---|
| 260 | program) to use the program stack, keeping its state on the call stack |
|---|
| 261 | in local variables. |
|---|
| 262 | solution: > |
|---|
| 263 | A pull interface would also be a simple function, that when called |
|---|
| 264 | filles a buffer with binary node(s). Or, in a language with |
|---|
| 265 | garbage collection, could be implemented as an iterator returning |
|---|
| 266 | a string containing the bytecode line (bytecode followed immediately |
|---|
| 267 | by the bytecode argument as a single string) or as a tuple. |
|---|
| 268 | - |
|---|
| 269 | name: pull2push |
|---|
| 270 | problem: > |
|---|
| 271 | This is done easily via a small loop which pulls from the |
|---|
| 272 | iterator and pushes to the event handler. |
|---|
| 273 | solution: > |
|---|
| 274 | For python, assuming the parser is implemented as an iterator |
|---|
| 275 | where one can 'pull' bytecode, args tuples, and assuming the |
|---|
| 276 | emitter has a event callback taking a bytecode, args tuple, |
|---|
| 277 | we have: |
|---|
| 278 | |
|---|
| 279 | def push2pull(parser, emitter): |
|---|
| 280 | for (bytecode, args) in parser: |
|---|
| 281 | emitter.push(bytecode, args) |
|---|
| 282 | |
|---|
| 283 | - |
|---|
| 284 | name: push2pull |
|---|
| 285 | problem: > |
|---|
| 286 | This requires the entire YAML stream be cashed in memory, or |
|---|
| 287 | each of the two stages in a thread or different continuation |
|---|
| 288 | with shared memory or pipe between them. |
|---|
| 289 | solution: > |
|---|
| 290 | This use case seems much easier with a binary stream; that is, |
|---|
| 291 | one need not convert the style of functions between the push |
|---|
| 292 | vs pull pattern. And, for languages supporting continuations, |
|---|
| 293 | (ruby) perhaps push vs pull is not even an issue... for a |
|---|
| 294 | language like python, one would use the threaded Queue object, |
|---|
| 295 | one thread pushes (bytecode, args) tuples into the Queue, while |
|---|
| 296 | the other thread pulls the tuples out. Simple. |
|---|
| 297 | - |
|---|
| 298 | name: neutrality |
|---|
| 299 | problem: > |
|---|
| 300 | It would be ideal of the C Program interface was simple enough |
|---|
| 301 | to be independent of programming language. In an ideal case, |
|---|
| 302 | imagine a flow of YAML structured data through various processing |
|---|
| 303 | stages on a server; where each processing stage is written in |
|---|
| 304 | a different programming language. |
|---|
| 305 | solution: > |
|---|
| 306 | While it may be hard for each language to write a syntax parser |
|---|
| 307 | filled with all of the little details, it would be much much |
|---|
| 308 | easier to write a parser for these bytecodes; as it involves |
|---|
| 309 | simple string handling, dispatching on the first character in |
|---|
| 310 | each string. |
|---|
| 311 | - |
|---|
| 312 | name: tools |
|---|
| 313 | problem: > |
|---|
| 314 | A goal of mine is to have a YPATH expression language, a schema |
|---|
| 315 | language, and a transformation language. I would like these items |
|---|
| 316 | to be reusable by a great number of platforms/languages, and in |
|---|
| 317 | particular as its own callable processing stage. |
|---|
| 318 | solution: > |
|---|
| 319 | If such an expression language was written on top of a bytecode |
|---|
| 320 | format like this, via a simple pull function (/w adapters for |
|---|
| 321 | push2pull and pull2push) quite a bit of reusability could emerge. |
|---|
| 322 | Imagine a schema validator which is injected into the bytecode stream |
|---|
| 323 | and it is an identity operation unless an exception occurs, in |
|---|
| 324 | which case, it terminates the document and makes the next document |
|---|
| 325 | be a description of the validation error. |
|---|
| 326 | - |
|---|
| 327 | name: encoding |
|---|
| 328 | problem: > |
|---|
| 329 | Text within the bytecode format must be given an encoding. There are |
|---|
| 330 | several considerations at hand listed below. |
|---|
| 331 | solution: > |
|---|
| 332 | The YAML bytecode format uses the same encodings as YAML itself, |
|---|
| 333 | and thus is independent of actual encoding. A parser library should |
|---|
| 334 | have several functions to convert between the encodings. |
|---|
| 335 | examples: |
|---|
| 336 | - |
|---|
| 337 | yaml: | |
|---|
| 338 | --- |
|---|
| 339 | - plain |
|---|
| 340 | - > |
|---|
| 341 | this is a flow scalar |
|---|
| 342 | - > |
|---|
| 343 | another flow scalar which is continued |
|---|
| 344 | on a second line and indented 2 spaces |
|---|
| 345 | - &001 !str | |
|---|
| 346 | This is a block scalar, both typed |
|---|
| 347 | and anchored |
|---|
| 348 | - *001 # this was an alias |
|---|
| 349 | - "This is a \"double quoted\" scalar" |
|---|
| 350 | bytecode: | |
|---|
| 351 | D |
|---|
| 352 | Q |
|---|
| 353 | Splain |
|---|
| 354 | f |
|---|
| 355 | Sthis is a flow scalar |
|---|
| 356 | Sanother flow scalar which is continued |
|---|
| 357 | Con a second line and indented 2 spaces |
|---|
| 358 | b |
|---|
| 359 | a001 |
|---|
| 360 | t!str |
|---|
| 361 | SThis is a block scalar, both typed |
|---|
| 362 | N |
|---|
| 363 | Cand anchored |
|---|
| 364 | R001 |
|---|
| 365 | cthis was an alias |
|---|
| 366 | d |
|---|
| 367 | SThis is a "double quoted" scalar |
|---|
| 368 | E |
|---|
| 369 | cheader: | |
|---|
| 370 | /* yamlbyte.h |
|---|
| 371 | * |
|---|
| 372 | * The YAML bytecode "C" interface header file. See the YAML bytecode |
|---|
| 373 | * reference for bytecode sequence rules and for the meaning of each |
|---|
| 374 | * bytecode. |
|---|
| 375 | */ |
|---|
| 376 | |
|---|
| 377 | #ifndef YAMLBYTE_H |
|---|
| 378 | #define YAMLBYTE_H |
|---|
| 379 | #include <stddef.h> |
|---|
| 380 | /* list out the various YAML bytecodes */ |
|---|
| 381 | typedef enum { |
|---|
| 382 | /* content bytecodes */ |
|---|
| 383 | YAML_FINISH = 0, |
|---|
| 384 | YAML_DOCUMENT = 'D', |
|---|
| 385 | YAML_DIRECTIVE = 'V', |
|---|
| 386 | YAML_PAUSE = 'P', |
|---|
| 387 | YAML_MAPPING = 'M', |
|---|
| 388 | YAML_SEQUENCE = 'S', |
|---|
| 389 | YAML_ENDMAPSEQ = 'E', |
|---|
| 390 | YAML_SCALAR = 'S', |
|---|
| 391 | YAML_CONTINUE = 'C', |
|---|
| 392 | YAML_NEWLINE = 'N', |
|---|
| 393 | YAML_NULLCHAR = 'Z', |
|---|
| 394 | YAML_ALIAS = 'A', |
|---|
| 395 | YAML_ANCHOR = 'R', |
|---|
| 396 | YAML_TRANSFER = 'T', |
|---|
| 397 | /* formatting bytecodes */ |
|---|
| 398 | YAML_COMMENT = 'c', |
|---|
| 399 | YAML_INDENT = 'i', |
|---|
| 400 | YAML_STYLE = 's', |
|---|
| 401 | /* other bytecodes */ |
|---|
| 402 | YAML_LINENUMBER = '#', |
|---|
| 403 | YAML_NOTICE = '!', |
|---|
| 404 | YAML_SPAN = ',', |
|---|
| 405 | YAML_ALLOC = '@' |
|---|
| 406 | } yaml_code_t; |
|---|
| 407 | |
|---|
| 408 | /* additional modifiers for the YAML_STYLE bytecode */ |
|---|
| 409 | typedef enum { |
|---|
| 410 | YAML_FLOW = '>', |
|---|
| 411 | YAML_LITERAL = '|', |
|---|
| 412 | YAML_BLOCK = 'b', |
|---|
| 413 | YAML_PLAIN = 'p', |
|---|
| 414 | YAML_INLINE_MAPPING = '{', |
|---|
| 415 | YAML_INLINE_SEQUENCE = '}', |
|---|
| 416 | YAML_SINGLE_QUOTED = 39, |
|---|
| 417 | YAML_DOUBLE_QUOTED = '"' |
|---|
| 418 | } yaml_style_t; |
|---|
| 419 | |
|---|
| 420 | typedef unsigned char yaml_utf8_t; |
|---|
| 421 | typedef unsigned short yaml_utf16_t; |
|---|
| 422 | #ifdef YAML_UTF8 |
|---|
| 423 | #ifdef YAML_UTF16 |
|---|
| 424 | #error Must only define YAML_UTF8 or YAML_UTF16 |
|---|
| 425 | #endif |
|---|
| 426 | typedef yaml_utf8_t yaml_char_t; |
|---|
| 427 | #else |
|---|
| 428 | #ifdef YAML_UTF16 |
|---|
| 429 | typedef yaml_utf16_t yaml_char_t; |
|---|
| 430 | #else |
|---|
| 431 | #error Must define YAML_UTF8 or YAML_UTF16 |
|---|
| 432 | #endif |
|---|
| 433 | #endif |
|---|
| 434 | |
|---|
| 435 | /* return value for push function, tell parser if you want to stop */ |
|---|
| 436 | typedef enum { |
|---|
| 437 | YAML_MORE = 1, /* producer should continue to fire events */ |
|---|
| 438 | YAML_STOP = 0 /* producer should stop firing events */ |
|---|
| 439 | } yaml_more_t; |
|---|
| 440 | |
|---|
| 441 | /* push bytecodes from a producer to a consumer |
|---|
| 442 | * where arg is null terminated /w a length */ |
|---|
| 443 | typedef void * yaml_consumer_t; |
|---|
| 444 | typedef |
|---|
| 445 | yaml_more_t |
|---|
| 446 | (*yaml_push_t)( |
|---|
| 447 | yaml_consumer_t self, |
|---|
| 448 | yaml_code_t code, |
|---|
| 449 | const yaml_char_t *arg, |
|---|
| 450 | size_t arglen |
|---|
| 451 | ); |
|---|
| 452 | |
|---|
| 453 | /* pull bytecodes by the producer from the consumer, where |
|---|
| 454 | * producer must null terminate buff and return the number |
|---|
| 455 | * of sizeof(yaml_char_t) bytes used */ |
|---|
| 456 | typedef void * yaml_producer_t; |
|---|
| 457 | typedef |
|---|
| 458 | size_t |
|---|
| 459 | (*yaml_pull_t)( |
|---|
| 460 | yaml_producer_t self, |
|---|
| 461 | yaml_code_t *code, |
|---|
| 462 | yaml_char_t *buff, /* at least 1K buffer */ |
|---|
| 463 | size_t buffsize |
|---|
| 464 | ); /* returns number of bytes used in the buffer */ |
|---|
| 465 | |
|---|
| 466 | /* canonical helper to show how to hook up a parser (as a push |
|---|
| 467 | * producer) to an emitter (as a push consumer) */ |
|---|
| 468 | #define YAML_PULL2PUSH(pull, producer, push, consumer) \ |
|---|
| 469 | do { \ |
|---|
| 470 | yaml_code_t code = YAML_NOTICE; \ |
|---|
| 471 | yaml_more_t more = YAML_CONTINUE; \ |
|---|
| 472 | yaml_char_t buff[1024]; \ |
|---|
| 473 | size_t size = 0; \ |
|---|
| 474 | memset(buff, 0, 1024 * sizeof(yaml_char_t)); \ |
|---|
| 475 | while( code && more) { \ |
|---|
| 476 | size = (pull)((producer),&code, buff, 1024); \ |
|---|
| 477 | assert(size < 1024 && !buff[size]); \ |
|---|
| 478 | more = (push)((consumer),code, buff, size); \ |
|---|
| 479 | } \ |
|---|
| 480 | buff[0] = 0; \ |
|---|
| 481 | (push)((consumer),YAML_FINISH, buff, 0); \ |
|---|
| 482 | } while(1) |
|---|
| 483 | |
|---|
| 484 | #endif |
|---|