1 /* 2 * Binary data packing/unpacking module for ucode. 3 * Copyright (C) 2021 Jo-Philipp Wich <jo@mein.io> 4 * 5 * This module is heavily based on the Python 3.10 "_struct.c" module source 6 * published under the following license: 7 * 8 * ----------------------------------------------------------------------------------- 9 * 10 * 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and 11 * the Individual or Organization ("Licensee") accessing and otherwise using Python 12 * 3.10.0 software in source or binary form and its associated documentation. 13 * 14 * 2. Subject to the terms and conditions of this License Agreement, PSF hereby 15 * grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, 16 * analyze, test, perform and/or display publicly, prepare derivative works, 17 * distribute, and otherwise use Python 3.10.0 alone or in any derivative 18 * version, provided, however, that PSF's License Agreement and PSF's notice of 19 * copyright, i.e., "Copyright © 2001-2021 Python Software Foundation; All Rights 20 * Reserved" are retained in Python 3.10.0 alone or in any derivative version 21 * prepared by Licensee. 22 * 23 * 3. In the event Licensee prepares a derivative work that is based on or 24 * incorporates Python 3.10.0 or any part thereof, and wants to make the 25 * derivative work available to others as provided herein, then Licensee hereby 26 * agrees to include in any such work a brief summary of the changes made to Python 27 * 3.10.0. 28 * 29 * 4. PSF is making Python 3.10.0 available to Licensee on an "AS IS" basis. 30 * PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF 31 * EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR 32 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE 33 * USE OF PYTHON 3.10.0 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 34 * 35 * 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 3.10.0 36 * FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF 37 * MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 3.10.0, OR ANY DERIVATIVE 38 * THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 39 * 40 * 6. This License Agreement will automatically terminate upon a material breach of 41 * its terms and conditions. 42 * 43 * 7. Nothing in this License Agreement shall be deemed to create any relationship 44 * of agency, partnership, or joint venture between PSF and Licensee. This License 45 * Agreement does not grant permission to use PSF trademarks or trade name in a 46 * trademark sense to endorse or promote products or services of Licensee, or any 47 * third party. 48 * 49 * 8. By copying, installing or otherwise using Python 3.10.0, Licensee agrees 50 * to be bound by the terms and conditions of this License Agreement. 51 * 52 * ----------------------------------------------------------------------------------- 53 * 54 * Brief summary of changes compared to the original Python 3.10 source: 55 * 56 * - Inlined and refactored IEEE 754 float conversion routines 57 * - Usage of stdbool for function return values and boolean parameters 58 * - Renamed functions and structures for clarity 59 * - Interface adapated to ucode C api 60 * - Removed unused code 61 */ 62 63 /** 64 * # Handle Packed Binary Data 65 * 66 * The `struct` module provides routines for interpreting byte strings as packed 67 * binary data. 68 * 69 * Functions can be individually imported and directly accessed using the 70 * {@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import#named_import named import} 71 * syntax: 72 * 73 * ``` 74 * import { pack, unpack } from 'struct'; 75 * 76 * let buffer = pack('bhl', -13, 1234, 444555666); 77 * let values = unpack('bhl', buffer); 78 * ``` 79 * 80 * Alternatively, the module namespace can be imported 81 * using a wildcard import statement: 82 * 83 * ``` 84 * import * as struct from 'struct'; 85 * 86 * let buffer = struct.pack('bhl', -13, 1234, 444555666); 87 * let values = struct.unpack('bhl', buffer); 88 * ``` 89 * 90 * Additionally, the struct module namespace may also be imported by invoking 91 * the `ucode` interpreter with the `-lstruct` switch. 92 * 93 * ## Format Strings 94 * 95 * Format strings describe the data layout when packing and unpacking data. 96 * They are built up from format-characters, which specify the type of data 97 * being packed/unpacked. In addition, special characters control the byte 98 * order, size and alignment. 99 * 100 * Each format string consists of an optional prefix character which describes 101 * the overall properties of the data and one or more format characters which 102 * describe the actual data values and padding. 103 * 104 * ### Byte Order, Size, and Alignment 105 * 106 * By default, C types are represented in the machine's native format and byte 107 * order, and properly aligned by skipping pad bytes if necessary (according to 108 * the rules used by the C compiler). 109 * 110 * This behavior is chosen so that the bytes of a packed struct correspond 111 * exactly to the memory layout of the corresponding C struct. 112 * 113 * Whether to use native byte ordering and padding or standard formats depends 114 * on the application. 115 * 116 * Alternatively, the first character of the format string can be used to indicate 117 * the byte order, size and alignment of the packed data, according to the 118 * following table: 119 * 120 * | Character | Byte order | Size | Alignment | 121 * |-----------|------------------------|----------|-----------| 122 * | `@` | native | native | native | 123 * | `=` | native | standard | none | 124 * | `<` | little-endian | standard | none | 125 * | `>` | big-endian | standard | none | 126 * | `!` | network (= big-endian) | standard | none | 127 * 128 * If the first character is not one of these, `'@'` is assumed. 129 * 130 * Native byte order is big-endian or little-endian, depending on the 131 * host system. For example, Intel x86, AMD64 (x86-64), and Apple M1 are 132 * little-endian; IBM z and many legacy architectures are big-endian. 133 * 134 * Native size and alignment are determined using the C compiler's 135 * `sizeof` expression. This is always combined with native byte order. 136 * 137 * Standard size depends only on the format character; see the table in 138 * the `format-characters` section. 139 * 140 * Note the difference between `'@'` and `'='`: both use native byte order, 141 * but the size and alignment of the latter is standardized. 142 * 143 * The form `'!'` represents the network byte order which is always big-endian 144 * as defined in `IETF RFC 1700`. 145 * 146 * There is no way to indicate non-native byte order (force byte-swapping); use 147 * the appropriate choice of `'<'` or `'>'`. 148 * 149 * Notes: 150 * 151 * (1) Padding is only automatically added between successive structure members. 152 * No padding is added at the beginning or the end of the encoded struct. 153 * 154 * (2) No padding is added when using non-native size and alignment, e.g. 155 * with '<', '>', '=', and '!'. 156 * 157 * (3) To align the end of a structure to the alignment requirement of a 158 * particular type, end the format with the code for that type with a repeat 159 * count of zero. 160 * 161 * 162 * ### Format Characters 163 * 164 * Format characters have the following meaning; the conversion between C and 165 * ucode values should be obvious given their types. The 'Standard size' column 166 * refers to the size of the packed value in bytes when using standard size; 167 * that is, when the format string starts with one of `'<'`, `'>'`, `'!'` or 168 * `'='`. When using native size, the size of the packed value is platform 169 * dependent. 170 * 171 * | Format | C Type | Ucode type | Standard size | Notes | 172 * |--------|----------------------|------------|----------------|----------| 173 * | `x` | *pad byte* | *no value* | | (7) | 174 * | `c` | `char` | string | 1 | | 175 * | `b` | `signed char` | int | 1 | (1), (2) | 176 * | `B` | `unsigned char` | int | 1 | (2) | 177 * | `?` | `_Bool` | bool | 1 | (1) | 178 * | `h` | `short` | int | 2 | (2) | 179 * | `H` | `unsigned short` | int | 2 | (2) | 180 * | `i` | `int` | int | 4 | (2) | 181 * | `I` | `unsigned int` | int | 4 | (2) | 182 * | `l` | `long` | int | 4 | (2) | 183 * | `L` | `unsigned long` | int | 4 | (2) | 184 * | `q` | `long long` | int | 8 | (2) | 185 * | `Q` | `unsigned long long` | int | 8 | (2) | 186 * | `n` | `ssize_t` | int | | (3) | 187 * | `N` | `size_t` | int | | (3) | 188 * | `e` | (6) | double | 2 | (4) | 189 * | `f` | `float` | double | 4 | (4) | 190 * | `d` | `double` | double | 8 | (4) | 191 * | `s` | `char[]` | double | | (9) | 192 * | `p` | `char[]` | double | | (8) | 193 * | `P` | `void *` | int | | (5) | 194 * | `*` | `char[]` | string | | (10) | 195 * 196 * Notes: 197 * 198 * - (1) The `'?'` conversion code corresponds to the `_Bool` type defined by 199 * C99. If this type is not available, it is simulated using a `char`. In 200 * standard mode, it is always represented by one byte. 201 * 202 * - (2) When attempting to pack a non-integer using any of the integer 203 * conversion codes, this module attempts to convert the given value into an 204 * integer. If the value is not convertible, a type error exception is thrown. 205 * 206 * - (3) The `'n'` and `'N'` conversion codes are only available for the native 207 * size (selected as the default or with the `'@'` byte order character). 208 * For the standard size, you can use whichever of the other integer formats 209 * fits your application. 210 * 211 * - (4) For the `'f'`, `'d'` and `'e'` conversion codes, the packed 212 * representation uses the IEEE 754 binary32, binary64 or binary16 format 213 * (for `'f'`, `'d'` or `'e'` respectively), regardless of the floating-point 214 * format used by the platform. 215 * 216 * - (5) The `'P'` format character is only available for the native byte 217 * ordering (selected as the default or with the `'@'` byte order character). 218 * The byte order character `'='` chooses to use little- or big-endian 219 * ordering based on the host system. The struct module does not interpret 220 * this as native ordering, so the `'P'` format is not available. 221 * 222 * - (6) The IEEE 754 binary16 "half precision" type was introduced in the 2008 223 * revision of the `IEEE 754` standard. It has a sign bit, a 5-bit exponent 224 * and 11-bit precision (with 10 bits explicitly stored), and can represent 225 * numbers between approximately `6.1e-05` and `6.5e+04` at full precision. 226 * This type is not widely supported by C compilers: on a typical machine, an 227 * unsigned short can be used for storage, but not for math operations. See 228 * the Wikipedia page on the `half-precision floating-point format` for more 229 * information. 230 * 231 * - (7) When packing, `'x'` inserts one NUL byte. 232 * 233 * - (8) The `'p'` format character encodes a "Pascal string", meaning a short 234 * variable-length string stored in a *fixed number of bytes*, given by the 235 * count. The first byte stored is the length of the string, or 255, 236 * whichever is smaller. The bytes of the string follow. If the string 237 * passed in to `pack()` is too long (longer than the count minus 1), only 238 * the leading `count-1` bytes of the string are stored. If the string is 239 * shorter than `count-1`, it is padded with null bytes so that exactly count 240 * bytes in all are used. Note that for `unpack()`, the `'p'` format 241 * character consumes `count` bytes, but that the string returned can never 242 * contain more than 255 bytes. 243 * 244 * - (9) For the `'s'` format character, the count is interpreted as the length 245 * of the bytes, not a repeat count like for the other format characters; for 246 * example, `'10s'` means a single 10-byte string mapping to or from a single 247 * ucode byte string, while `'10c'` means 10 separate one byte character 248 * elements (e.g., `cccccccccc`) mapping to or from ten different ucode byte 249 * strings. If a count is not given, it defaults to 1. For packing, the 250 * string is truncated or padded with null bytes as appropriate to make it 251 * fit. For unpacking, the resulting bytes object always has exactly the 252 * specified number of bytes. As a special case, `'0s'` means a single, 253 * empty string (while `'0c'` means 0 characters). 254 * 255 * - (10) The `*` format character serves as wildcard. For `pack()` it will 256 * append the corresponding byte argument string as-is, not applying any 257 * padding or zero filling. When a repeat count is given, that many bytes of 258 * the input byte string argument will be appended at most on `pack()`, 259 * effectively truncating longer input strings. For `unpack()`, the wildcard 260 * format will yield a byte string containing the entire remaining input data 261 * bytes, or - when a repeat count is given - that many bytes of input data 262 * at most. 263 * 264 * A format character may be preceded by an integral repeat count. For example, 265 * the format string `'4h'` means exactly the same as `'hhhh'`. 266 * 267 * Whitespace characters between formats are ignored; a count and its format 268 * must not contain whitespace though. 269 * 270 * When packing a value `x` using one of the integer formats (`'b'`, 271 * `'B'`, `'h'`, `'H'`, `'i'`, `'I'`, `'l'`, `'L'`, 272 * `'q'`, `'Q'`), if `x` is outside the valid range for that format, a type 273 * error exception is raised. 274 * 275 * For the `'?'` format character, the return value is either `true` or `false`. 276 * When packing, the truish result value of the argument is used. Either 0 or 1 277 * in the native or standard bool representation will be packed, and any 278 * non-zero value will be `true` when unpacking. 279 * 280 * ## Examples 281 * 282 * Note: 283 * Native byte order examples (designated by the `'@'` format prefix or 284 * lack of any prefix character) may not match what the reader's 285 * machine produces as 286 * that depends on the platform and compiler. 287 * 288 * Pack and unpack integers of three different sizes, using big endian 289 * ordering: 290 * 291 * ``` 292 * import { pack, unpack } from 'struct'; 293 * 294 * pack(">bhl", 1, 2, 3); // "\x01\x00\x02\x00\x00\x00\x03" 295 * unpack(">bhl", "\x01\x00\x02\x00\x00\x00\x03"); // [ 1, 2, 3 ] 296 * ``` 297 * 298 * Attempt to pack an integer which is too large for the defined field: 299 * 300 * ```bash 301 * $ ucode -lstruct -p 'struct.pack(">h", 99999)' 302 * Type error: Format 'h' requires numeric argument between -32768 and 32767 303 * In [-p argument], line 1, byte 24: 304 * 305 * `struct.pack(">h", 99999)` 306 * Near here -------------^ 307 * ``` 308 * 309 * Demonstrate the difference between `'s'` and `'c'` format characters: 310 * 311 * ``` 312 * import { pack } from 'struct'; 313 * 314 * pack("@ccc", "1", "2", "3"); // "123" 315 * pack("@3s", "123"); // "123" 316 * ``` 317 * 318 * The ordering of format characters may have an impact on size in native 319 * mode since padding is implicit. In standard mode, the user is 320 * responsible for inserting any desired padding. 321 * 322 * Note in the first `pack()` call below that three NUL bytes were added after 323 * the packed `'#'` to align the following integer on a four-byte boundary. 324 * In this example, the output was produced on a little endian machine: 325 * 326 * ``` 327 * import { pack } from 'struct'; 328 * 329 * pack("@ci", "#", 0x12131415); // "#\x00\x00\x00\x15\x14\x13\x12" 330 * pack("@ic", 0x12131415, "#"); // "\x15\x14\x13\x12#" 331 * ``` 332 * 333 * The following format `'ih0i'` results in two pad bytes being added at the 334 * end, assuming the platform's ints are aligned on 4-byte boundaries: 335 * 336 * ``` 337 * import { pack } from 'struct'; 338 * 339 * pack("ih0i", 0x01010101, 0x0202); // "\x01\x01\x01\x01\x02\x02\x00\x00" 340 * ``` 341 * 342 * Use the wildcard format to extract the remainder of the input data: 343 * 344 * ``` 345 * import { unpack } from 'struct'; 346 * 347 * unpack("ccc*", "foobarbaz"); // [ "f", "o", "o", "barbaz" ] 348 * unpack("ccc3*", "foobarbaz"); // [ "f", "o", "o", "bar" ] 349 * ``` 350 * 351 * Use the wildcard format to pack binary stings as-is into the result data: 352 * 353 * ``` 354 * import { pack } from 'struct'; 355 * 356 * pack("h*h", 0x0101, "\x02\x00\x03", 0x0404); // "\x01\x01\x02\x00\x03\x04\x04" 357 * pack("c3*c", "a", "foobar", "c"); // "afooc" 358 * ``` 359 * 360 * @module struct 361 */ 362 363 #include <ctype.h> 364 #include <errno.h> 365 #include <limits.h> 366 #include <math.h> 367 #include <stdlib.h> 368 #include <float.h> 369 #include <assert.h> 370 371 #include "ucode/module.h" 372 #include "ucode/vallist.h" 373 374 typedef struct formatdef { 375 char format; 376 ssize_t size; 377 ssize_t alignment; 378 uc_value_t* (*unpack)(uc_vm_t *, const char *, const struct formatdef *); 379 bool (*pack)(uc_vm_t *, char *, uc_value_t *, const struct formatdef *); 380 } formatdef_t; 381 382 typedef struct { 383 const formatdef_t *fmtdef; 384 ssize_t offset; 385 ssize_t size; 386 ssize_t repeat; 387 } formatcode_t; 388 389 typedef struct { 390 size_t len; 391 size_t size; 392 size_t ncodes; 393 formatcode_t codes[]; 394 } formatstate_t; 395 396 typedef struct { 397 uc_resource_t resource; 398 size_t length; 399 size_t capacity; 400 size_t position; 401 } formatbuffer_t; 402 403 404 /* Define various structs to figure out the alignments of types */ 405 406 typedef struct { char c; short x; } st_short; 407 typedef struct { char c; int x; } st_int; 408 typedef struct { char c; long x; } st_long; 409 typedef struct { char c; float x; } st_float; 410 typedef struct { char c; double x; } st_double; 411 typedef struct { char c; void *x; } st_void_p; 412 typedef struct { char c; size_t x; } st_size_t; 413 typedef struct { char c; bool x; } st_bool; 414 typedef struct { char c; long long x; } s_long_long; 415 416 #define SHORT_ALIGN (sizeof(st_short) - sizeof(short)) 417 #define INT_ALIGN (sizeof(st_int) - sizeof(int)) 418 #define LONG_ALIGN (sizeof(st_long) - sizeof(long)) 419 #define FLOAT_ALIGN (sizeof(st_float) - sizeof(float)) 420 #define DOUBLE_ALIGN (sizeof(st_double) - sizeof(double)) 421 #define VOID_P_ALIGN (sizeof(st_void_p) - sizeof(void *)) 422 #define SIZE_T_ALIGN (sizeof(st_size_t) - sizeof(size_t)) 423 #define BOOL_ALIGN (sizeof(st_bool) - sizeof(bool)) 424 #define LONG_LONG_ALIGN (sizeof(s_long_long) - sizeof(long long)) 425 426 #ifdef __powerc 427 #pragma options align=reset 428 #endif 429 430 431 static bool 432 ucv_as_long(uc_vm_t *vm, uc_value_t *v, long *p) 433 { 434 char *s, *e; 435 int64_t i; 436 double d; 437 long x; 438 439 errno = 0; 440 441 switch (ucv_type(v)) { 442 case UC_INTEGER: 443 i = ucv_int64_get(v); 444 445 if (i < LONG_MIN || i > LONG_MAX) 446 errno = ERANGE; 447 448 x = (long)i; 449 break; 450 451 case UC_DOUBLE: 452 d = ucv_double_get(v); 453 x = (long)d; 454 455 if (isnan(d) || d < (double)LONG_MIN || d > (double)LONG_MAX || d - x != 0) 456 errno = ERANGE; 457 458 break; 459 460 case UC_BOOLEAN: 461 x = (long)ucv_boolean_get(v); 462 break; 463 464 case UC_NULL: 465 x = 0; 466 break; 467 468 case UC_STRING: 469 s = ucv_string_get(v); 470 x = strtol(s, &e, 0); 471 472 if (e == s || *e != '\0') 473 errno = EINVAL; 474 475 break; 476 477 default: 478 errno = EINVAL; 479 x = 0; 480 break; 481 } 482 483 if (errno != 0) { 484 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 485 (errno == ERANGE) 486 ? "Argument out of range" 487 : "Argument not convertible to number"); 488 489 return false; 490 } 491 492 *p = x; 493 494 return true; 495 } 496 497 static bool 498 ucv_as_ulong(uc_vm_t *vm, uc_value_t *v, unsigned long *p) 499 { 500 unsigned long x; 501 char *s, *e; 502 uint64_t i; 503 double d; 504 505 errno = 0; 506 507 switch (ucv_type(v)) { 508 case UC_INTEGER: 509 i = ucv_uint64_get(v); 510 511 if (i > ULONG_MAX) 512 errno = ERANGE; 513 514 x = (unsigned long)i; 515 break; 516 517 case UC_DOUBLE: 518 d = ucv_double_get(v); 519 x = (unsigned long)d; 520 521 if (isnan(d) || d < 0 || d > (double)ULONG_MAX || d - x != 0) 522 errno = ERANGE; 523 524 break; 525 526 case UC_BOOLEAN: 527 x = (unsigned long)ucv_boolean_get(v); 528 break; 529 530 case UC_NULL: 531 x = 0; 532 break; 533 534 case UC_STRING: 535 s = ucv_string_get(v); 536 x = strtoul(s, &e, 0); 537 538 if (e == s || *e != '\0') 539 errno = EINVAL; 540 541 break; 542 543 default: 544 errno = EINVAL; 545 x = 0; 546 break; 547 } 548 549 if (errno != 0) { 550 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 551 (errno == ERANGE) 552 ? "Argument out of range" 553 : "Argument not convertible to number"); 554 555 return false; 556 } 557 558 *p = x; 559 560 return true; 561 } 562 563 static bool 564 ucv_as_longlong(uc_vm_t *vm, uc_value_t *v, long long *p) 565 { 566 char *s, *e; 567 long long x; 568 int64_t i; 569 double d; 570 571 errno = 0; 572 573 switch (ucv_type(v)) { 574 case UC_INTEGER: 575 i = ucv_int64_get(v); 576 577 if (i < LLONG_MIN || i > LLONG_MAX) 578 errno = ERANGE; 579 580 x = (long long)i; 581 break; 582 583 case UC_DOUBLE: 584 d = ucv_double_get(v); 585 x = (long long)d; 586 587 if (isnan(d) || d < (double)LLONG_MIN || d > (double)LLONG_MAX || d - x != 0) 588 errno = ERANGE; 589 590 break; 591 592 case UC_BOOLEAN: 593 x = (long long)ucv_boolean_get(v); 594 break; 595 596 case UC_NULL: 597 x = 0; 598 break; 599 600 case UC_STRING: 601 s = ucv_string_get(v); 602 x = strtoll(s, &e, 0); 603 604 if (e == s || *e != '\0') 605 errno = EINVAL; 606 607 break; 608 609 default: 610 errno = EINVAL; 611 x = 0; 612 break; 613 } 614 615 if (errno != 0) { 616 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 617 (errno == ERANGE) 618 ? "Argument out of range" 619 : "Argument not convertible to number"); 620 621 return false; 622 } 623 624 *p = x; 625 626 return true; 627 } 628 629 static bool 630 ucv_as_ulonglong(uc_vm_t *vm, uc_value_t *v, unsigned long long *p) 631 { 632 unsigned long long x; 633 char *s, *e; 634 uint64_t i; 635 double d; 636 637 errno = 0; 638 639 switch (ucv_type(v)) { 640 case UC_INTEGER: 641 i = ucv_uint64_get(v); 642 643 if (i > ULLONG_MAX) 644 errno = ERANGE; 645 646 x = (unsigned long long)i; 647 break; 648 649 case UC_DOUBLE: 650 d = ucv_double_get(v); 651 x = (unsigned long long)d; 652 653 if (isnan(d) || d < 0 || d > (double)ULLONG_MAX || d - x != 0) 654 errno = ERANGE; 655 656 break; 657 658 case UC_BOOLEAN: 659 x = (unsigned long long)ucv_boolean_get(v); 660 break; 661 662 case UC_NULL: 663 x = 0; 664 break; 665 666 case UC_STRING: 667 s = ucv_string_get(v); 668 x = strtoull(s, &e, 0); 669 670 if (e == s || *e != '\0') 671 errno = EINVAL; 672 673 break; 674 675 default: 676 errno = EINVAL; 677 x = 0; 678 break; 679 } 680 681 if (errno != 0) { 682 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 683 (errno == ERANGE) 684 ? "Argument out of range" 685 : "Argument not convertible to number"); 686 687 return false; 688 } 689 690 *p = x; 691 692 return true; 693 } 694 695 static bool 696 ucv_as_ssize_t(uc_vm_t *vm, uc_value_t *v, ssize_t *p) 697 { 698 char *s, *e; 699 int64_t i; 700 ssize_t x; 701 double d; 702 703 errno = 0; 704 705 switch (ucv_type(v)) { 706 case UC_INTEGER: 707 i = ucv_int64_get(v); 708 709 if (i < -1 || i > SSIZE_MAX) 710 errno = ERANGE; 711 712 x = (ssize_t)i; 713 break; 714 715 case UC_DOUBLE: 716 d = ucv_double_get(v); 717 x = (ssize_t)d; 718 719 if (isnan(d) || d < -1 || d > (double)SSIZE_MAX || d - x != 0) 720 errno = ERANGE; 721 722 break; 723 724 case UC_BOOLEAN: 725 x = (ssize_t)ucv_boolean_get(v); 726 break; 727 728 case UC_NULL: 729 x = 0; 730 break; 731 732 case UC_STRING: 733 s = ucv_string_get(v); 734 i = strtoll(s, &e, 0); 735 736 if (e == s || *e != '\0') 737 errno = EINVAL; 738 else if (i < -1 || i > SSIZE_MAX) 739 errno = ERANGE; 740 741 x = (ssize_t)i; 742 break; 743 744 default: 745 errno = EINVAL; 746 x = 0; 747 break; 748 } 749 750 if (errno != 0) { 751 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 752 (errno == ERANGE) 753 ? "Argument out of range" 754 : "Argument not convertible to number"); 755 756 return false; 757 } 758 759 *p = x; 760 761 return true; 762 } 763 764 /* Same, but handling size_t */ 765 766 static bool 767 ucv_as_size_t(uc_vm_t *vm, uc_value_t *v, size_t *p) 768 { 769 char *s, *e; 770 uint64_t i; 771 double d; 772 size_t x; 773 774 errno = 0; 775 776 switch (ucv_type(v)) { 777 case UC_INTEGER: 778 i = ucv_uint64_get(v); 779 780 if (i > SIZE_MAX) 781 errno = ERANGE; 782 783 x = (size_t)i; 784 break; 785 786 case UC_DOUBLE: 787 d = ucv_double_get(v); 788 x = (size_t)d; 789 790 if (isnan(d) || d < 0 || d > (double)SIZE_MAX || d - x != 0) 791 errno = ERANGE; 792 793 break; 794 795 case UC_BOOLEAN: 796 x = (size_t)ucv_boolean_get(v); 797 break; 798 799 case UC_NULL: 800 x = 0; 801 break; 802 803 case UC_STRING: 804 s = ucv_string_get(v); 805 i = strtoull(s, &e, 0); 806 807 if (e == s || *e != '\0') 808 errno = EINVAL; 809 else if (i > SIZE_MAX) 810 errno = ERANGE; 811 812 x = (size_t)i; 813 break; 814 815 default: 816 errno = EINVAL; 817 x = 0; 818 break; 819 } 820 821 if (errno != 0) { 822 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 823 (errno == ERANGE) 824 ? "Argument out of range" 825 : "Argument not convertible to number"); 826 827 return false; 828 } 829 830 *p = x; 831 832 return true; 833 } 834 835 static bool 836 ucv_as_double(uc_vm_t *vm, uc_value_t *v, double *p) 837 { 838 char *s, *e; 839 int64_t i; 840 double x; 841 842 errno = 0; 843 844 switch (ucv_type(v)) { 845 case UC_INTEGER: 846 i = ucv_int64_get(v); 847 848 if (errno == 0) { 849 if (i < -DBL_MAX || i > DBL_MAX) 850 errno = ERANGE; 851 } 852 853 x = (double)i; 854 break; 855 856 case UC_DOUBLE: 857 x = ucv_double_get(v); 858 break; 859 860 case UC_BOOLEAN: 861 x = (double)ucv_boolean_get(v); 862 break; 863 864 case UC_NULL: 865 x = 0.0; 866 break; 867 868 case UC_STRING: 869 s = ucv_string_get(v); 870 x = strtod(s, &e); 871 872 if (e == s || *e != '\0') 873 errno = EINVAL; 874 875 break; 876 877 default: 878 errno = EINVAL; 879 x = 0.0; 880 break; 881 } 882 883 if (errno != 0) { 884 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 885 (errno == ERANGE) 886 ? "Argument out of range" 887 : "Argument not convertible to number"); 888 889 return false; 890 } 891 892 *p = x; 893 894 return true; 895 } 896 897 898 /* Floating point helpers */ 899 900 static bool 901 double_pack16(double d, char *buf, bool little_endian) 902 { 903 int32_t exponent = 0; 904 uint16_t bits = 0; 905 bool sign = false; 906 double fraction; 907 uint8_t *p; 908 909 if (d == 0.0) { 910 sign = (copysign(1.0, d) == -1.0); 911 } 912 else if (isnan(d)) { 913 sign = (copysign(1.0, d) == -1.0); 914 exponent = 0x1f; 915 bits = 512; 916 } 917 else if (!isfinite(d)) { 918 sign = (d < 0.0); 919 exponent = 0x1f; 920 } 921 else { 922 if (d < 0.0) { 923 sign = true; 924 d = -d; 925 } 926 927 fraction = frexp(d, &exponent); 928 929 assert(fraction >= 0.5 && fraction < 1.0); 930 931 fraction *= 2.0; 932 exponent--; 933 934 if (exponent >= 16) { 935 errno = ERANGE; 936 937 return false; 938 } 939 else if (exponent < -25) { 940 fraction = 0.0; 941 exponent = 0; 942 } 943 else if (exponent < -14) { 944 fraction = ldexp(fraction, 14 + exponent); 945 exponent = 0; 946 } 947 else { 948 fraction -= 1.0; 949 exponent += 15; 950 } 951 952 fraction *= 1024.0; 953 bits = (uint16_t)fraction; 954 955 assert(bits < 1024); 956 assert(exponent < 31); 957 958 if ((fraction - bits > 0.5) || ((fraction - bits == 0.5) && (bits % 2))) { 959 if (++bits == 1024) { 960 bits = 0; 961 962 if (++exponent == 31) { 963 errno = ERANGE; 964 965 return false; 966 } 967 } 968 } 969 } 970 971 bits |= (exponent << 10) | (sign << 15); 972 973 p = (uint8_t *)buf + little_endian; 974 *p = (bits >> 8) & 0xff; 975 976 p += (little_endian ? -1 : 1); 977 *p = bits & 0xff; 978 979 return true; 980 } 981 982 static bool 983 double_pack32(double d, char *buf, bool little_endian) 984 { 985 int8_t step = little_endian ? -1 : 1; 986 int32_t exponent = 0; 987 uint32_t bits = 0; 988 bool sign = false; 989 double fraction; 990 uint8_t *p; 991 992 if (d == 0.0) { 993 sign = (copysign(1.0, d) == -1.0); 994 } 995 else if (isnan(d)) { 996 sign = (copysign(1.0, d) == -1.0); 997 exponent = 0xff; 998 bits = 0x7fffff; 999 } 1000 else if (!isfinite(d)) { 1001 sign = (d < 0.0); 1002 exponent = 0xff; 1003 } 1004 else { 1005 if (d < 0.0) { 1006 sign = true; 1007 d = -d; 1008 } 1009 1010 fraction = frexp(d, &exponent); 1011 1012 if (fraction == 0.0) { 1013 exponent = 0; 1014 } 1015 else { 1016 assert(fraction >= 0.5 && fraction < 1.0); 1017 1018 fraction *= 2.0; 1019 exponent--; 1020 } 1021 1022 if (exponent >= 128) { 1023 errno = ERANGE; 1024 1025 return false; 1026 } 1027 else if (exponent < -126) { 1028 fraction = ldexp(fraction, 126 + exponent); 1029 exponent = 0; 1030 } 1031 else if (exponent != 0 || fraction != 0.0) { 1032 fraction -= 1.0; 1033 exponent += 127; 1034 } 1035 1036 fraction *= 8388608.0; 1037 bits = (uint32_t)(fraction + 0.5); 1038 1039 assert(bits <= 8388608); 1040 1041 if (bits >> 23) { 1042 bits = 0; 1043 1044 if (++exponent >= 255) { 1045 errno = ERANGE; 1046 1047 return false; 1048 } 1049 } 1050 } 1051 1052 p = (uint8_t *)buf + (little_endian ? 3 : 0); 1053 *p = (sign << 7) | (exponent >> 1); 1054 1055 p += step; 1056 *p = ((exponent & 1) << 7) | (bits >> 16); 1057 1058 p += step; 1059 *p = (bits >> 8) & 0xff; 1060 1061 p += step; 1062 *p = bits & 0xff; 1063 1064 return true; 1065 } 1066 1067 #define double_pack64 uc_double_pack 1068 1069 static double 1070 double_unpack16(const char *buf, bool little_endian) 1071 { 1072 uint32_t fraction; 1073 int32_t exponent; 1074 uint8_t *p; 1075 bool sign; 1076 double d; 1077 1078 p = (uint8_t *)buf + little_endian; 1079 sign = (*p >> 7) & 1; 1080 exponent = (*p & 0x7c) >> 2; 1081 fraction = (*p & 0x03) << 8; 1082 1083 p += little_endian ? -1 : 1; 1084 fraction |= *p; 1085 1086 if (exponent == 0x1f) { 1087 if (fraction == 0) 1088 return sign ? -INFINITY : INFINITY; 1089 else 1090 return sign ? -NAN : NAN; 1091 } 1092 1093 d = (double)fraction / 1024.0; 1094 1095 if (exponent == 0) { 1096 exponent = -14; 1097 } 1098 else { 1099 exponent -= 15; 1100 d += 1.0; 1101 } 1102 1103 d = ldexp(d, exponent); 1104 1105 return sign ? -d : d; 1106 } 1107 1108 static double 1109 double_unpack32(const char *buf, bool little_endian) 1110 { 1111 int8_t step = little_endian ? -1 : 1; 1112 uint32_t fraction; 1113 int32_t exponent; 1114 uint8_t *p; 1115 bool sign; 1116 double d; 1117 1118 p = (uint8_t *)buf + (little_endian ? 3 : 0); 1119 sign = (*p >> 7) & 1; 1120 exponent = (*p & 0x7f) << 1; 1121 1122 p += step; 1123 exponent |= (*p >> 7) & 1; 1124 fraction = (*p & 0x7f) << 16; 1125 1126 p += step; 1127 fraction |= *p << 8; 1128 1129 p += step; 1130 fraction |= *p; 1131 1132 if (exponent == 0xff) { 1133 if (fraction == 0) 1134 return sign ? -INFINITY : INFINITY; 1135 else 1136 return sign ? -NAN : NAN; 1137 } 1138 1139 d = (double)fraction / 8388608.0; 1140 1141 if (exponent == 0) { 1142 exponent = -126; 1143 } 1144 else { 1145 exponent -= 127; 1146 d += 1.0; 1147 } 1148 1149 d = ldexp(d, exponent); 1150 1151 return sign ? -d : d; 1152 } 1153 1154 #define double_unpack64 uc_double_unpack 1155 1156 static bool 1157 range_exception(uc_vm_t *vm, const formatdef_t *f, bool is_unsigned) 1158 { 1159 /* ulargest is the largest unsigned value with f->size bytes. 1160 * Note that the simpler: 1161 * ((size_t)1 << (f->size * 8)) - 1 1162 * doesn't work when f->size == sizeof(size_t) because C doesn't 1163 * define what happens when a left shift count is >= the number of 1164 * bits in the integer being shifted; e.g., on some boxes it doesn't 1165 * shift at all when they're equal. 1166 */ 1167 const size_t ulargest = (size_t)-1 >> ((sizeof(size_t) - f->size)*8); 1168 1169 assert(f->size >= 1 && f->size <= (ssize_t)sizeof(size_t)); 1170 1171 if (is_unsigned) { 1172 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 1173 "Format '%c' requires numeric argument between 0 and %zu", 1174 f->format, 1175 ulargest); 1176 } 1177 else { 1178 const ssize_t largest = (ssize_t)(ulargest >> 1); 1179 1180 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 1181 "Format '%c' requires numeric argument between %zd and %zd", 1182 f->format, 1183 ~ largest, 1184 largest); 1185 } 1186 1187 return false; 1188 } 1189 1190 1191 /* Native mode routines. ****************************************************/ 1192 1193 static uc_value_t * 1194 native_unpack_char(uc_vm_t *vm, const char *p, const formatdef_t *f) 1195 { 1196 return ucv_string_new_length(p, 1); 1197 } 1198 1199 static uc_value_t * 1200 native_unpack_byte(uc_vm_t *vm, const char *p, const formatdef_t *f) 1201 { 1202 return ucv_int64_new(*(signed char *)p); 1203 } 1204 1205 static uc_value_t * 1206 native_unpack_ubyte(uc_vm_t *vm, const char *p, const formatdef_t *f) 1207 { 1208 return ucv_uint64_new(*(unsigned char *)p); 1209 } 1210 1211 static uc_value_t * 1212 native_unpack_short(uc_vm_t *vm, const char *p, const formatdef_t *f) 1213 { 1214 short x = 0; 1215 1216 memcpy(&x, p, sizeof(x)); 1217 1218 return ucv_int64_new(x); 1219 } 1220 1221 static uc_value_t * 1222 native_unpack_ushort(uc_vm_t *vm, const char *p, const formatdef_t *f) 1223 { 1224 unsigned short x = 0; 1225 1226 memcpy(&x, p, sizeof(x)); 1227 1228 return ucv_uint64_new(x); 1229 } 1230 1231 static uc_value_t * 1232 native_unpack_int(uc_vm_t *vm, const char *p, const formatdef_t *f) 1233 { 1234 int x = 0; 1235 1236 memcpy(&x, p, sizeof(x)); 1237 1238 return ucv_int64_new(x); 1239 } 1240 1241 static uc_value_t * 1242 native_unpack_uint(uc_vm_t *vm, const char *p, const formatdef_t *f) 1243 { 1244 unsigned int x = 0; 1245 1246 memcpy(&x, p, sizeof(x)); 1247 1248 return ucv_uint64_new(x); 1249 } 1250 1251 static uc_value_t * 1252 native_unpack_long(uc_vm_t *vm, const char *p, const formatdef_t *f) 1253 { 1254 long x = 0; 1255 1256 memcpy(&x, p, sizeof(x)); 1257 1258 return ucv_int64_new(x); 1259 } 1260 1261 static uc_value_t * 1262 native_unpack_ulong(uc_vm_t *vm, const char *p, const formatdef_t *f) 1263 { 1264 unsigned long x = 0; 1265 1266 memcpy(&x, p, sizeof(x)); 1267 1268 return ucv_uint64_new(x); 1269 } 1270 1271 static uc_value_t * 1272 native_unpack_ssize_t(uc_vm_t *vm, const char *p, const formatdef_t *f) 1273 { 1274 ssize_t x = 0; 1275 1276 memcpy(&x, p, sizeof(x)); 1277 1278 return ucv_int64_new(x); 1279 } 1280 1281 static uc_value_t * 1282 native_unpack_size_t(uc_vm_t *vm, const char *p, const formatdef_t *f) 1283 { 1284 size_t x = 0; 1285 1286 memcpy(&x, p, sizeof(x)); 1287 1288 return ucv_uint64_new(x); 1289 } 1290 1291 static uc_value_t * 1292 native_unpack_longlong(uc_vm_t *vm, const char *p, const formatdef_t *f) 1293 { 1294 long long x = 0; 1295 1296 memcpy(&x, p, sizeof(x)); 1297 1298 return ucv_int64_new(x); 1299 } 1300 1301 static uc_value_t * 1302 native_unpack_ulonglong(uc_vm_t *vm, const char *p, const formatdef_t *f) 1303 { 1304 unsigned long long x = 0; 1305 1306 memcpy(&x, p, sizeof(x)); 1307 1308 return ucv_uint64_new(x); 1309 } 1310 1311 static uc_value_t * 1312 native_unpack_bool(uc_vm_t *vm, const char *p, const formatdef_t *f) 1313 { 1314 bool x = false; 1315 1316 memcpy(&x, p, sizeof(x)); 1317 1318 return ucv_boolean_new(x != 0); 1319 } 1320 1321 1322 static uc_value_t * 1323 native_unpack_halffloat(uc_vm_t *vm, const char *p, const formatdef_t *f) 1324 { 1325 #if __BYTE_ORDER == __LITTLE_ENDIAN 1326 return ucv_double_new(double_unpack16(p, true)); 1327 #else 1328 return ucv_double_new(double_unpack16(p, false)); 1329 #endif 1330 } 1331 1332 static uc_value_t * 1333 native_unpack_float(uc_vm_t *vm, const char *p, const formatdef_t *f) 1334 { 1335 float x = 0.0; 1336 1337 memcpy(&x, p, sizeof(x)); 1338 1339 return ucv_double_new(x); 1340 } 1341 1342 static uc_value_t * 1343 native_unpack_double(uc_vm_t *vm, const char *p, const formatdef_t *f) 1344 { 1345 double x = 0.0; 1346 1347 memcpy(&x, p, sizeof(x)); 1348 1349 return ucv_double_new(x); 1350 } 1351 1352 static uc_value_t * 1353 native_unpack_void_p(uc_vm_t *vm, const char *p, const formatdef_t *f) 1354 { 1355 void *x = NULL; 1356 1357 memcpy(&x, p, sizeof(x)); 1358 1359 return ucv_int64_new((intptr_t)x); 1360 } 1361 1362 static bool 1363 native_pack_byte(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1364 { 1365 long x = 0; 1366 1367 if (!ucv_as_long(vm, v, &x)) 1368 return false; 1369 1370 if (x < -128 || x > 127) { 1371 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 1372 "Byte format requires numeric value between -128 and 127"); 1373 1374 return false; 1375 } 1376 1377 *p = (char)x; 1378 1379 return true; 1380 } 1381 1382 static bool 1383 native_pack_ubyte(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1384 { 1385 long x = 0; 1386 1387 if (!ucv_as_long(vm, v, &x)) 1388 return false; 1389 1390 if (x < 0 || x > 255) { 1391 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 1392 "Unsigned byte format requires numeric value between 0 and 255"); 1393 1394 return false; 1395 } 1396 1397 *(unsigned char *)p = (unsigned char)x; 1398 1399 return true; 1400 } 1401 1402 static bool 1403 native_pack_char(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1404 { 1405 char *s = NULL; 1406 1407 if (ucv_type(v) == UC_STRING) { 1408 s = ucv_string_get(v); 1409 *p = *s; 1410 } 1411 else { 1412 s = ucv_to_string(vm, v); 1413 *p = *s; 1414 free(s); 1415 } 1416 1417 return true; 1418 } 1419 1420 static bool 1421 native_pack_short(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1422 { 1423 long x = 0; 1424 short y = 0; 1425 1426 if (!ucv_as_long(vm, v, &x)) 1427 return false; 1428 1429 if (x < SHRT_MIN || x > SHRT_MAX) { 1430 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 1431 "Short format requires numeric value between %d and %d", 1432 (int)SHRT_MIN, (int)SHRT_MAX); 1433 1434 return false; 1435 } 1436 1437 y = (short)x; 1438 memcpy(p, &y, sizeof(y)); 1439 1440 return true; 1441 } 1442 1443 static bool 1444 native_pack_ushort(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1445 { 1446 unsigned short y = 0; 1447 long x = 0; 1448 1449 if (!ucv_as_long(vm, v, &x)) 1450 return false; 1451 1452 if (x < 0 || x > USHRT_MAX) { 1453 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 1454 "Unsigned short format requires numeric value between 0 and %u", 1455 (unsigned int)USHRT_MAX); 1456 1457 return false; 1458 } 1459 1460 y = (unsigned short)x; 1461 memcpy(p, &y, sizeof(y)); 1462 1463 return true; 1464 } 1465 1466 static bool 1467 native_pack_int(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1468 { 1469 long x = 0; 1470 int y = 0; 1471 1472 if (!ucv_as_long(vm, v, &x)) 1473 return false; 1474 1475 if (sizeof(long) > sizeof(int)) { 1476 if ((x < ((long)INT_MIN)) || (x > ((long)INT_MAX))) 1477 return range_exception(vm, f, false); 1478 } 1479 1480 y = (int)x; 1481 memcpy(p, &y, sizeof(y)); 1482 1483 return true; 1484 } 1485 1486 static bool 1487 native_pack_uint(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1488 { 1489 unsigned long x = 0; 1490 unsigned int y = 0; 1491 1492 if (!ucv_as_ulong(vm, v, &x)) 1493 return false; 1494 1495 if (sizeof(long) > sizeof(int)) { 1496 if (x > ((unsigned long)UINT_MAX)) 1497 return range_exception(vm, f, true); 1498 } 1499 1500 y = (unsigned int)x; 1501 memcpy(p, &y, sizeof(y)); 1502 1503 return true; 1504 } 1505 1506 static bool 1507 native_pack_long(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1508 { 1509 long x = 0; 1510 1511 if (!ucv_as_long(vm, v, &x)) 1512 return false; 1513 1514 memcpy(p, &x, sizeof(x)); 1515 1516 return true; 1517 } 1518 1519 static bool 1520 native_pack_ulong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1521 { 1522 unsigned long x = 0; 1523 1524 if (!ucv_as_ulong(vm, v, &x)) 1525 return false; 1526 1527 memcpy(p, &x, sizeof(x)); 1528 1529 return true; 1530 } 1531 1532 static bool 1533 native_pack_ssize_t(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1534 { 1535 ssize_t x = 0; 1536 1537 if (!ucv_as_ssize_t(vm, v, &x)) 1538 return false; 1539 1540 memcpy(p, &x, sizeof(x)); 1541 1542 return true; 1543 } 1544 1545 static bool 1546 native_pack_size_t(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1547 { 1548 size_t x = 0; 1549 1550 if (!ucv_as_size_t(vm, v, &x)) 1551 return false; 1552 1553 memcpy(p, &x, sizeof(x)); 1554 1555 return true; 1556 } 1557 1558 static bool 1559 native_pack_longlong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1560 { 1561 long long x = 0; 1562 1563 if (!ucv_as_longlong(vm, v, &x)) 1564 return false; 1565 1566 memcpy(p, &x, sizeof(x)); 1567 1568 return true; 1569 } 1570 1571 static bool 1572 native_pack_ulonglong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1573 { 1574 unsigned long long x = 0; 1575 1576 if (!ucv_as_ulonglong(vm, v, &x)) 1577 return false; 1578 1579 memcpy(p, &x, sizeof(x)); 1580 1581 return true; 1582 } 1583 1584 1585 static bool 1586 native_pack_bool(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1587 { 1588 bool x = 0; 1589 1590 x = ucv_is_truish(v); 1591 1592 memcpy(p, &x, sizeof(x)); 1593 1594 return true; 1595 } 1596 1597 static bool 1598 native_pack_halffloat(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1599 { 1600 double x; 1601 1602 if (!ucv_as_double(vm, v, &x)) 1603 return false; 1604 1605 #if __BYTE_ORDER == __LITTLE_ENDIAN 1606 return double_pack16(x, p, true); 1607 #else 1608 return double_pack16(x, p, false); 1609 #endif 1610 } 1611 1612 static bool 1613 native_pack_float(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1614 { 1615 double d = 0.0; 1616 float x = 0.0; 1617 1618 if (!ucv_as_double(vm, v, &d)) 1619 return false; 1620 1621 x = (float)d; 1622 memcpy(p, &x, sizeof(x)); 1623 1624 return true; 1625 } 1626 1627 static bool 1628 native_pack_double(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1629 { 1630 double x = 0.0; 1631 1632 if (!ucv_as_double(vm, v, &x)) 1633 return false; 1634 1635 memcpy(p, &x, sizeof(x)); 1636 1637 return true; 1638 } 1639 1640 static bool 1641 native_pack_void_p(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1642 { 1643 long long int i = 0; 1644 void *x = NULL; 1645 1646 if (!ucv_as_longlong(vm, v, &i)) 1647 return false; 1648 1649 x = (void *)(intptr_t)i; 1650 memcpy(p, &x, sizeof(x)); 1651 1652 return true; 1653 } 1654 1655 static const formatdef_t native_endian_table[] = { 1656 { 'x', sizeof(char), 0, NULL, NULL }, 1657 { 'b', sizeof(char), 0, native_unpack_byte, native_pack_byte }, 1658 { 'B', sizeof(char), 0, native_unpack_ubyte, native_pack_ubyte }, 1659 { 'c', sizeof(char), 0, native_unpack_char, native_pack_char }, 1660 { '*', sizeof(char), 0, NULL, NULL }, 1661 { 's', sizeof(char), 0, NULL, NULL }, 1662 { 'p', sizeof(char), 0, NULL, NULL }, 1663 { 'h', sizeof(short), SHORT_ALIGN, native_unpack_short, native_pack_short }, 1664 { 'H', sizeof(short), SHORT_ALIGN, native_unpack_ushort, native_pack_ushort }, 1665 { 'i', sizeof(int), INT_ALIGN, native_unpack_int, native_pack_int }, 1666 { 'I', sizeof(int), INT_ALIGN, native_unpack_uint, native_pack_uint }, 1667 { 'l', sizeof(long), LONG_ALIGN, native_unpack_long, native_pack_long }, 1668 { 'L', sizeof(long), LONG_ALIGN, native_unpack_ulong, native_pack_ulong }, 1669 { 'n', sizeof(size_t), SIZE_T_ALIGN, native_unpack_ssize_t, native_pack_ssize_t }, 1670 { 'N', sizeof(size_t), SIZE_T_ALIGN, native_unpack_size_t, native_pack_size_t }, 1671 { 'q', sizeof(long long), LONG_LONG_ALIGN, native_unpack_longlong, native_pack_longlong }, 1672 { 'Q', sizeof(long long), LONG_LONG_ALIGN, native_unpack_ulonglong,native_pack_ulonglong }, 1673 { '?', sizeof(bool), BOOL_ALIGN, native_unpack_bool, native_pack_bool }, 1674 { 'e', sizeof(short), SHORT_ALIGN, native_unpack_halffloat, native_pack_halffloat }, 1675 { 'f', sizeof(float), FLOAT_ALIGN, native_unpack_float, native_pack_float }, 1676 { 'd', sizeof(double), DOUBLE_ALIGN, native_unpack_double, native_pack_double }, 1677 { 'P', sizeof(void *), VOID_P_ALIGN, native_unpack_void_p, native_pack_void_p }, 1678 { 0 } 1679 }; 1680 1681 1682 /* Big-endian routines. *****************************************************/ 1683 1684 static uc_value_t * 1685 be_unpack_int(uc_vm_t *vm, const char *p, const formatdef_t *f) 1686 { 1687 const unsigned char *bytes = (const unsigned char *)p; 1688 ssize_t i = f->size; 1689 long x = 0; 1690 1691 do { 1692 x = (x<<8) | *bytes++; 1693 } while (--i > 0); 1694 1695 /* Extend the sign bit. */ 1696 if ((ssize_t)sizeof(long) > f->size) 1697 x |= -(x & (1L << ((8 * f->size) - 1))); 1698 1699 return ucv_int64_new(x); 1700 } 1701 1702 static uc_value_t * 1703 be_unpack_uint(uc_vm_t *vm, const char *p, const formatdef_t *f) 1704 { 1705 const unsigned char *bytes = (const unsigned char *)p; 1706 ssize_t i = f->size; 1707 unsigned long x = 0; 1708 1709 do { 1710 x = (x<<8) | *bytes++; 1711 } while (--i > 0); 1712 1713 return ucv_uint64_new(x); 1714 } 1715 1716 static uc_value_t * 1717 be_unpack_longlong(uc_vm_t *vm, const char *p, const formatdef_t *f) 1718 { 1719 const unsigned char *bytes = (const unsigned char *)p; 1720 ssize_t i = f->size; 1721 long long x = 0; 1722 1723 do { 1724 x = (x<<8) | *bytes++; 1725 } while (--i > 0); 1726 1727 /* Extend the sign bit. */ 1728 if ((ssize_t)sizeof(long long) > f->size) 1729 x |= -(x & ((long long)1 << ((8 * f->size) - 1))); 1730 1731 return ucv_int64_new(x); 1732 } 1733 1734 static uc_value_t * 1735 be_unpack_ulonglong(uc_vm_t *vm, const char *p, const formatdef_t *f) 1736 { 1737 const unsigned char *bytes = (const unsigned char *)p; 1738 unsigned long long x = 0; 1739 ssize_t i = f->size; 1740 1741 do { 1742 x = (x<<8) | *bytes++; 1743 } while (--i > 0); 1744 1745 return ucv_uint64_new(x); 1746 } 1747 1748 static uc_value_t * 1749 be_unpack_halffloat(uc_vm_t *vm, const char *p, const formatdef_t *f) 1750 { 1751 return ucv_double_new(double_unpack16(p, false)); 1752 } 1753 1754 static uc_value_t * 1755 be_unpack_float(uc_vm_t *vm, const char *p, const formatdef_t *f) 1756 { 1757 return ucv_double_new(double_unpack32(p, false)); 1758 } 1759 1760 static uc_value_t * 1761 be_unpack_double(uc_vm_t *vm, const char *p, const formatdef_t *f) 1762 { 1763 return ucv_double_new(double_unpack64(p, false)); 1764 } 1765 1766 static uc_value_t * 1767 be_unpack_bool(uc_vm_t *vm, const char *p, const formatdef_t *f) 1768 { 1769 return ucv_boolean_new(*p != 0); 1770 } 1771 1772 static bool 1773 be_pack_int(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1774 { 1775 unsigned char *q = (unsigned char *)p; 1776 ssize_t i = 0; 1777 long x = 0; 1778 1779 if (!ucv_as_long(vm, v, &x)) 1780 return false; 1781 1782 i = f->size; 1783 1784 if (i != sizeof(long)) { 1785 if ((i == 2) && (x < -32768 || x > 32767)) 1786 return range_exception(vm, f, false); 1787 #if UINT_MAX < ULONG_MAX 1788 else if ((i == 4) && (x < -2147483648L || x > 2147483647L)) 1789 return range_exception(vm, f, false); 1790 #endif 1791 } 1792 1793 do { 1794 q[--i] = (unsigned char)(x & 0xffL); 1795 x >>= 8; 1796 } while (i > 0); 1797 1798 return true; 1799 } 1800 1801 static bool 1802 be_pack_uint(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1803 { 1804 unsigned char *q = (unsigned char *)p; 1805 unsigned long x = 0; 1806 ssize_t i = 0; 1807 1808 if (!ucv_as_ulong(vm, v, &x)) 1809 return false; 1810 1811 i = f->size; 1812 1813 if (i != sizeof(long)) { 1814 unsigned long maxint = 1; 1815 maxint <<= (unsigned long)(i * 8); 1816 if (x >= maxint) 1817 return range_exception(vm, f, true); 1818 } 1819 1820 do { 1821 q[--i] = (unsigned char)(x & 0xffUL); 1822 x >>= 8; 1823 } while (i > 0); 1824 1825 return true; 1826 } 1827 1828 static bool 1829 be_pack_longlong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1830 { 1831 unsigned char *q = (unsigned char *)p; 1832 long long x = 0; 1833 ssize_t i = 0; 1834 1835 if (!ucv_as_longlong(vm, v, &x)) 1836 return false; 1837 1838 i = f->size; 1839 1840 do { 1841 q[--i] = (unsigned char)(x & 0xffL); 1842 x >>= 8; 1843 } while (i > 0); 1844 1845 return true; 1846 } 1847 1848 static bool 1849 be_pack_ulonglong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1850 { 1851 unsigned char *q = (unsigned char *)p; 1852 unsigned long long x = 0; 1853 ssize_t i = 0; 1854 1855 if (!ucv_as_ulonglong(vm, v, &x)) 1856 return false; 1857 1858 i = f->size; 1859 1860 do { 1861 q[--i] = (unsigned char)(x & 0xffUL); 1862 x >>= 8; 1863 } while (i > 0); 1864 1865 return true; 1866 } 1867 1868 static bool 1869 be_pack_halffloat(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1870 { 1871 double x = 0.0; 1872 1873 if (!ucv_as_double(vm, v, &x)) 1874 return false; 1875 1876 return double_pack16(x, p, false); 1877 } 1878 1879 static bool 1880 be_pack_float(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1881 { 1882 double x = 0.0; 1883 1884 if (!ucv_as_double(vm, v, &x)) 1885 return false; 1886 1887 if (!double_pack32(x, p, 0)) { 1888 uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Argument out of range"); 1889 1890 return false; 1891 } 1892 1893 return true; 1894 } 1895 1896 static bool 1897 be_pack_double(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1898 { 1899 double x = 0.0; 1900 1901 if (!ucv_as_double(vm, v, &x)) 1902 return false; 1903 1904 if (!double_pack64(x, p, 0)) { 1905 uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Argument out of range"); 1906 1907 return false; 1908 } 1909 1910 return true; 1911 } 1912 1913 static bool 1914 be_pack_bool(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1915 { 1916 *p = (char)ucv_is_truish(v); 1917 1918 return true; 1919 } 1920 1921 static formatdef_t big_endian_table[] = { 1922 { 'x', 1, 0, NULL, NULL }, 1923 { 'b', 1, 0, native_unpack_byte, native_pack_byte }, 1924 { 'B', 1, 0, native_unpack_ubyte, native_pack_ubyte }, 1925 { 'c', 1, 0, native_unpack_char, native_pack_char }, 1926 { '*', 1, 0, NULL, NULL }, 1927 { 's', 1, 0, NULL, NULL }, 1928 { 'p', 1, 0, NULL, NULL }, 1929 { 'h', 2, 0, be_unpack_int, be_pack_int }, 1930 { 'H', 2, 0, be_unpack_uint, be_pack_uint }, 1931 { 'i', 4, 0, be_unpack_int, be_pack_int }, 1932 { 'I', 4, 0, be_unpack_uint, be_pack_uint }, 1933 { 'l', 4, 0, be_unpack_int, be_pack_int }, 1934 { 'L', 4, 0, be_unpack_uint, be_pack_uint }, 1935 { 'q', 8, 0, be_unpack_longlong, be_pack_longlong }, 1936 { 'Q', 8, 0, be_unpack_ulonglong, be_pack_ulonglong }, 1937 { '?', 1, 0, be_unpack_bool, be_pack_bool }, 1938 { 'e', 2, 0, be_unpack_halffloat, be_pack_halffloat }, 1939 { 'f', 4, 0, be_unpack_float, be_pack_float }, 1940 { 'd', 8, 0, be_unpack_double, be_pack_double }, 1941 { 0 } 1942 }; 1943 1944 1945 /* Little-endian routines. *****************************************************/ 1946 1947 static uc_value_t * 1948 le_unpack_int(uc_vm_t *vm, const char *p, const formatdef_t *f) 1949 { 1950 const unsigned char *bytes = (const unsigned char *)p; 1951 ssize_t i = f->size; 1952 long x = 0; 1953 1954 do { 1955 x = (x<<8) | bytes[--i]; 1956 } while (i > 0); 1957 1958 /* Extend the sign bit. */ 1959 if ((ssize_t)sizeof(long) > f->size) 1960 x |= -(x & (1L << ((8 * f->size) - 1))); 1961 1962 return ucv_int64_new(x); 1963 } 1964 1965 static uc_value_t * 1966 le_unpack_uint(uc_vm_t *vm, const char *p, const formatdef_t *f) 1967 { 1968 const unsigned char *bytes = (const unsigned char *)p; 1969 ssize_t i = f->size; 1970 unsigned long x = 0; 1971 1972 do { 1973 x = (x<<8) | bytes[--i]; 1974 } while (i > 0); 1975 1976 return ucv_uint64_new(x); 1977 } 1978 1979 static uc_value_t * 1980 le_unpack_longlong(uc_vm_t *vm, const char *p, const formatdef_t *f) 1981 { 1982 const unsigned char *bytes = (const unsigned char *)p; 1983 ssize_t i = f->size; 1984 long long x = 0; 1985 1986 do { 1987 x = (x<<8) | bytes[--i]; 1988 } while (i > 0); 1989 1990 /* Extend the sign bit. */ 1991 if ((ssize_t)sizeof(long long) > f->size) 1992 x |= -(x & ((long long)1 << ((8 * f->size) - 1))); 1993 1994 return ucv_int64_new(x); 1995 } 1996 1997 static uc_value_t * 1998 le_unpack_ulonglong(uc_vm_t *vm, const char *p, const formatdef_t *f) 1999 { 2000 const unsigned char *bytes = (const unsigned char *)p; 2001 unsigned long long x = 0; 2002 ssize_t i = f->size; 2003 2004 do { 2005 x = (x<<8) | bytes[--i]; 2006 } while (i > 0); 2007 2008 return ucv_uint64_new(x); 2009 } 2010 2011 static uc_value_t * 2012 le_unpack_halffloat(uc_vm_t *vm, const char *p, const formatdef_t *f) 2013 { 2014 return ucv_double_new(double_unpack16(p, true)); 2015 } 2016 2017 static uc_value_t * 2018 le_unpack_float(uc_vm_t *vm, const char *p, const formatdef_t *f) 2019 { 2020 return ucv_double_new(double_unpack32(p, true)); 2021 } 2022 2023 static uc_value_t * 2024 le_unpack_double(uc_vm_t *vm, const char *p, const formatdef_t *f) 2025 { 2026 return ucv_double_new(double_unpack64(p, true)); 2027 } 2028 2029 static bool 2030 le_pack_int(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 2031 { 2032 unsigned char *q = (unsigned char *)p; 2033 ssize_t i = 0; 2034 long x = 0; 2035 2036 if (!ucv_as_long(vm, v, &x)) 2037 return false; 2038 2039 i = f->size; 2040 2041 if (i != sizeof(long)) { 2042 if ((i == 2) && (x < -32768 || x > 32767)) 2043 return range_exception(vm, f, false); 2044 #if UINT_MAX < ULONG_MAX 2045 else if ((i == 4) && (x < -2147483648L || x > 2147483647L)) 2046 return range_exception(vm, f, false); 2047 #endif 2048 } 2049 2050 do { 2051 *q++ = (unsigned char)(x & 0xffL); 2052 x >>= 8; 2053 } while (--i > 0); 2054 2055 return true; 2056 } 2057 2058 static bool 2059 le_pack_uint(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 2060 { 2061 unsigned char *q = (unsigned char *)p; 2062 unsigned long x = 0; 2063 ssize_t i = 0; 2064 2065 if (!ucv_as_ulong(vm, v, &x)) 2066 return false; 2067 2068 i = f->size; 2069 2070 if (i != sizeof(long)) { 2071 unsigned long maxint = 1; 2072 maxint <<= (unsigned long)(i * 8); 2073 2074 if (x >= maxint) 2075 return range_exception(vm, f, true); 2076 } 2077 2078 do { 2079 *q++ = (unsigned char)(x & 0xffUL); 2080 x >>= 8; 2081 } while (--i > 0); 2082 2083 return true; 2084 } 2085 2086 static bool 2087 le_pack_longlong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 2088 { 2089 unsigned char *q = (unsigned char *)p; 2090 long long x = 0; 2091 ssize_t i = 0; 2092 2093 if (!ucv_as_longlong(vm, v, &x)) 2094 return false; 2095 2096 i = f->size; 2097 2098 do { 2099 *q++ = (unsigned char)(x & 0xffL); 2100 x >>= 8; 2101 } while (--i > 0); 2102 2103 return true; 2104 } 2105 2106 static bool 2107 le_pack_ulonglong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 2108 { 2109 unsigned char *q = (unsigned char *)p; 2110 unsigned long long x = 0; 2111 ssize_t i = 0; 2112 2113 if (!ucv_as_ulonglong(vm, v, &x)) 2114 return false; 2115 2116 i = f->size; 2117 2118 do { 2119 *q++ = (unsigned char)(x & 0xffUL); 2120 x >>= 8; 2121 } while (--i > 0); 2122 2123 return true; 2124 } 2125 2126 static bool 2127 le_pack_halffloat(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 2128 { 2129 double x = 0.0; 2130 2131 if (!ucv_as_double(vm, v, &x)) 2132 return false; 2133 2134 return double_pack16(x, p, true); 2135 } 2136 2137 static bool 2138 le_pack_float(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 2139 { 2140 double x = 0.0; 2141 2142 if (!ucv_as_double(vm, v, &x)) 2143 return false; 2144 2145 if (!double_pack32(x, p, 1)) { 2146 uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Argument out of range"); 2147 2148 return false; 2149 } 2150 2151 return true; 2152 } 2153 2154 static bool 2155 le_pack_double(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 2156 { 2157 double x = 0.0; 2158 2159 if (!ucv_as_double(vm, v, &x)) 2160 return false; 2161 2162 if (!double_pack64(x, p, 1)) { 2163 uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Argument out of range"); 2164 2165 return false; 2166 } 2167 2168 return true; 2169 } 2170 2171 static formatdef_t little_endian_table[] = { 2172 { 'x', 1, 0, NULL, NULL }, 2173 { 'b', 1, 0, native_unpack_byte, native_pack_byte }, 2174 { 'B', 1, 0, native_unpack_ubyte, native_pack_ubyte }, 2175 { 'c', 1, 0, native_unpack_char, native_pack_char }, 2176 { '*', 1, 0, NULL, NULL }, 2177 { 's', 1, 0, NULL, NULL }, 2178 { 'p', 1, 0, NULL, NULL }, 2179 { 'h', 2, 0, le_unpack_int, le_pack_int }, 2180 { 'H', 2, 0, le_unpack_uint, le_pack_uint }, 2181 { 'i', 4, 0, le_unpack_int, le_pack_int }, 2182 { 'I', 4, 0, le_unpack_uint, le_pack_uint }, 2183 { 'l', 4, 0, le_unpack_int, le_pack_int }, 2184 { 'L', 4, 0, le_unpack_uint, le_pack_uint }, 2185 { 'q', 8, 0, le_unpack_longlong, le_pack_longlong }, 2186 { 'Q', 8, 0, le_unpack_ulonglong, le_pack_ulonglong }, 2187 { '?', 1, 0, be_unpack_bool, be_pack_bool }, 2188 { 'e', 2, 0, le_unpack_halffloat, le_pack_halffloat }, 2189 { 'f', 4, 0, le_unpack_float, le_pack_float }, 2190 { 'd', 8, 0, le_unpack_double, le_pack_double }, 2191 { 0 } 2192 }; 2193 2194 2195 static const formatdef_t * 2196 select_format_table(const char **pfmt) 2197 { 2198 const char *fmt = (*pfmt)++; /* May be backed out of later */ 2199 2200 switch (*fmt) { 2201 case '<': 2202 return little_endian_table; 2203 2204 case '>': 2205 case '!': /* Network byte order is big-endian */ 2206 return big_endian_table; 2207 2208 case '=': /* Host byte order -- different from native in alignment! */ 2209 #if __BYTE_ORDER == __LITTLE_ENDIAN 2210 return little_endian_table; 2211 #else 2212 return big_endian_table; 2213 #endif 2214 2215 default: 2216 --*pfmt; /* Back out of pointer increment */ 2217 /* Fall through */ 2218 2219 case '@': 2220 return native_endian_table; 2221 } 2222 } 2223 2224 2225 /* Get the table entry for a format code */ 2226 2227 static const formatdef_t * 2228 lookup_table_entry(uc_vm_t *vm, int c, const formatdef_t *table) 2229 { 2230 for (; table->format != '\0'; table++) { 2231 if (table->format == c) { 2232 return table; 2233 } 2234 } 2235 2236 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 2237 "Unrecognized character '%c' in struct format", 2238 c); 2239 2240 return NULL; 2241 } 2242 2243 2244 /* Align a size according to a format code. Return -1 on overflow. */ 2245 2246 static ssize_t 2247 align_for_entry(ssize_t size, const formatdef_t *e) 2248 { 2249 ssize_t extra; 2250 2251 if (e->alignment && size > 0) { 2252 extra = (e->alignment - 1) - (size - 1) % (e->alignment); 2253 2254 if (extra > SSIZE_MAX - size) 2255 return -1; 2256 2257 size += extra; 2258 } 2259 2260 return size; 2261 } 2262 2263 2264 static void 2265 optimize_functions(void) 2266 { 2267 /* Check endian and swap in faster functions */ 2268 const formatdef_t *native = native_endian_table; 2269 formatdef_t *other, *ptr; 2270 2271 #if __BYTE_ORDER == __LITTLE_ENDIAN 2272 other = little_endian_table; 2273 #else 2274 other = big_endian_table; 2275 #endif 2276 2277 /* Scan through the native table, find a matching 2278 entry in the endian table and swap in the 2279 native implementations whenever possible 2280 (64-bit platforms may not have "standard" sizes) */ 2281 while (native->format != '\0' && other->format != '\0') { 2282 ptr = other; 2283 2284 while (ptr->format != '\0') { 2285 if (ptr->format == native->format) { 2286 /* Match faster when formats are 2287 listed in the same order */ 2288 if (ptr == other) 2289 other++; 2290 2291 /* Only use the trick if the 2292 size matches */ 2293 if (ptr->size != native->size) 2294 break; 2295 2296 /* Skip float and double, could be 2297 "unknown" float format */ 2298 if (ptr->format == 'd' || ptr->format == 'f') 2299 break; 2300 2301 /* Skip bool, semantics are different for standard size */ 2302 if (ptr->format == '?') 2303 break; 2304 2305 ptr->pack = native->pack; 2306 ptr->unpack = native->unpack; 2307 break; 2308 } 2309 2310 ptr++; 2311 } 2312 2313 native++; 2314 } 2315 } 2316 2317 static formatstate_t * 2318 parse_format(uc_vm_t *vm, uc_value_t *fmtval) 2319 { 2320 ssize_t size, num, itemsize; 2321 const formatdef_t *e, *f; 2322 const char *fmt, *s; 2323 formatstate_t *state; 2324 formatcode_t *codes; 2325 size_t ncodes; 2326 char c; 2327 2328 if (ucv_type(fmtval) != UC_STRING) { 2329 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 2330 "Format value not a string"); 2331 2332 return NULL; 2333 } 2334 2335 fmt = ucv_string_get(fmtval); 2336 2337 if (strlen(fmt) != ucv_string_length(fmtval)) { 2338 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 2339 "Format string contains embedded null character"); 2340 2341 return NULL; 2342 } 2343 2344 f = select_format_table(&fmt); 2345 2346 s = fmt; 2347 size = 0; 2348 ncodes = 0; 2349 2350 while ((c = *s++) != '\0') { 2351 if (isspace(c)) 2352 continue; 2353 2354 if ('' <= c && c <= '9') { 2355 num = c - ''; 2356 2357 while ('' <= (c = *s++) && c <= '9') { 2358 /* overflow-safe version of 2359 if (num*10 + (c - '') > SSIZE_MAX) { ... } */ 2360 if (num >= SSIZE_MAX / 10 && ( 2361 num > SSIZE_MAX / 10 || 2362 (c - '') > SSIZE_MAX % 10)) 2363 goto overflow; 2364 2365 num = num*10 + (c - ''); 2366 } 2367 2368 if (c == '\0') { 2369 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 2370 "Format string contains repeat count given without format specifier"); 2371 2372 return NULL; 2373 } 2374 } 2375 else 2376 num = 1; 2377 2378 e = lookup_table_entry(vm, c, f); 2379 2380 if (e == NULL) 2381 return NULL; 2382 2383 switch (c) { 2384 case '*': /* fall through */ 2385 case 's': 2386 case 'p': 2387 ncodes++; 2388 break; 2389 2390 case 'x': 2391 break; 2392 2393 default: 2394 if (num) 2395 ncodes++; 2396 2397 break; 2398 } 2399 2400 itemsize = e->size; 2401 size = align_for_entry(size, e); 2402 2403 if (size == -1) 2404 goto overflow; 2405 2406 /* if (size + num * itemsize > SSIZE_MAX) { ... } */ 2407 if (num > (SSIZE_MAX - size) / itemsize) 2408 goto overflow; 2409 2410 size += (c != '*') ? num * itemsize : 0; 2411 } 2412 2413 /* check for overflow */ 2414 if ((ncodes + 1) > ((size_t)SSIZE_MAX / sizeof(formatcode_t))) { 2415 uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, "Out of memory"); 2416 2417 return NULL; 2418 } 2419 2420 state = xalloc(sizeof(*state) + ncodes * sizeof(formatcode_t)); 2421 state->size = size; 2422 state->ncodes = ncodes; 2423 2424 codes = state->codes; 2425 2426 s = fmt; 2427 size = 0; 2428 2429 while ((c = *s++) != '\0') { 2430 if (isspace(c)) 2431 continue; 2432 2433 if ('' <= c && c <= '9') { 2434 num = c - ''; 2435 2436 while ('' <= (c = *s++) && c <= '9') 2437 num = num*10 + (c - ''); 2438 2439 } 2440 else if (c == '*') 2441 num = -1; 2442 else 2443 num = 1; 2444 2445 e = lookup_table_entry(vm, c, f); 2446 2447 if (e == NULL) 2448 continue; 2449 2450 size = align_for_entry(size, e); 2451 2452 if (c == '*' || c == 's' || c == 'p') { 2453 codes->offset = size; 2454 codes->size = num; 2455 codes->fmtdef = e; 2456 codes->repeat = 1; 2457 codes++; 2458 size += (c != '*') ? num : 0; 2459 } 2460 else if (c == 'x') { 2461 size += num; 2462 } 2463 else if (num) { 2464 codes->offset = size; 2465 codes->size = e->size; 2466 codes->fmtdef = e; 2467 codes->repeat = num; 2468 codes++; 2469 size += e->size * num; 2470 } 2471 } 2472 2473 return state; 2474 2475 overflow: 2476 uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, 2477 "Total struct size too long"); 2478 2479 return NULL; 2480 } 2481 2482 static bool 2483 grow_buffer(uc_vm_t *vm, void **buf, size_t *bufsz, size_t length) 2484 { 2485 const size_t overhead = sizeof(uc_string_t) + 1; 2486 2487 if (length > *bufsz) { 2488 size_t old_size = *bufsz; 2489 size_t new_size = (length + 7u) & ~7u; 2490 2491 if (*buf != NULL) { 2492 new_size = *bufsz; 2493 2494 while (length > new_size) { 2495 if (new_size > SIZE_MAX - (new_size >> 1)) { 2496 uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, 2497 "Overflow reallocating buffer from %zu to %zu bytes", 2498 *bufsz, length); 2499 2500 return false; 2501 } 2502 2503 new_size += ((new_size >> 1) + 7u) & ~7u; 2504 } 2505 } 2506 2507 char *tmp = realloc(*buf, new_size + overhead); 2508 2509 if (!tmp) { 2510 uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, 2511 "Error reallocating buffer to %zu+%zu bytes: %m", 2512 new_size, overhead); 2513 2514 return false; 2515 } 2516 2517 if (*buf) 2518 memset(tmp + overhead + old_size - 1, 0, new_size - old_size + 1); 2519 else 2520 memset(tmp, 0, new_size + overhead); 2521 2522 *buf = tmp; 2523 *bufsz = new_size; 2524 } 2525 2526 return true; 2527 } 2528 2529 static bool 2530 uc_pack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff, 2531 void **buf, size_t *pos, size_t *capacity) 2532 { 2533 size_t ncode, arg, off, new_pos; 2534 formatcode_t *code; 2535 ssize_t size, n; 2536 const void *p; 2537 2538 for (ncode = 0, code = &state->codes[0], arg = argoff, off = 0; 2539 ncode < state->ncodes; 2540 code = &state->codes[++ncode]) { 2541 if (code->fmtdef->format == '*') { 2542 uc_value_t *v = uc_fn_arg(arg++); 2543 2544 if (ucv_type(v) != UC_STRING) 2545 continue; 2546 2547 n = ucv_string_length(v); 2548 2549 if (code->size == -1 || code->size > n) 2550 off += n; 2551 else 2552 off += code->size; 2553 } 2554 else { 2555 arg += code->repeat; 2556 } 2557 } 2558 2559 new_pos = *pos + state->size + off; 2560 2561 if (!grow_buffer(vm, buf, capacity, new_pos)) 2562 return NULL; 2563 2564 for (ncode = 0, code = &state->codes[0], off = 0; 2565 ncode < state->ncodes; 2566 code = &state->codes[++ncode]) { 2567 const formatdef_t *e = code->fmtdef; 2568 char *res = *buf + sizeof(uc_string_t) + *pos + code->offset + off; 2569 ssize_t j = code->repeat; 2570 2571 while (j--) { 2572 uc_value_t *v = uc_fn_arg(argoff++); 2573 2574 size = code->size; 2575 2576 if (e->format == '*') { 2577 if (ucv_type(v) != UC_STRING) { 2578 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 2579 "Argument for '*' must be a string"); 2580 2581 return false; 2582 } 2583 2584 n = ucv_string_length(v); 2585 p = ucv_string_get(v); 2586 2587 if (size == -1 || n < size) 2588 size = n; 2589 else if (n > size) 2590 n = size; 2591 2592 off += size; 2593 2594 if (n > 0) 2595 memcpy(res, p, n); 2596 } 2597 else if (e->format == 's') { 2598 if (ucv_type(v) != UC_STRING) { 2599 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 2600 "Argument for 's' must be a string"); 2601 2602 return false; 2603 } 2604 2605 n = ucv_string_length(v); 2606 p = ucv_string_get(v); 2607 2608 if (n > size) 2609 n = size; 2610 2611 if (n > 0) 2612 memcpy(res, p, n); 2613 } 2614 else if (e->format == 'p') { 2615 if (ucv_type(v) != UC_STRING) { 2616 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 2617 "Argument for 'p' must be a string"); 2618 2619 return false; 2620 } 2621 2622 n = ucv_string_length(v); 2623 p = ucv_string_get(v); 2624 2625 if (n > (size - 1)) 2626 n = size - 1; 2627 2628 if (n > 0) 2629 memcpy(res + 1, p, n); 2630 2631 if (n > 255) 2632 n = 255; 2633 2634 *res = (unsigned char)n; 2635 } 2636 else { 2637 if (!e->pack(vm, res, v, e)) 2638 return false; 2639 } 2640 2641 res += size; 2642 } 2643 } 2644 2645 *pos = new_pos; 2646 2647 return true; 2648 } 2649 2650 static uc_value_t * 2651 uc_unpack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, 2652 const char *buf, long long pos, size_t *rem, bool single) 2653 { 2654 uc_value_t *result; 2655 formatcode_t *code; 2656 size_t ncode, off; 2657 ssize_t size, n; 2658 2659 if (pos < 0) 2660 pos += *rem; 2661 2662 if (pos < 0 || (size_t)pos >= *rem) 2663 return NULL; 2664 2665 buf += pos; 2666 *rem -= pos; 2667 2668 result = single ? NULL : ucv_array_new(vm); 2669 2670 for (ncode = 0, code = &state->codes[0], off = 0; 2671 ncode < state->ncodes; 2672 code = &state->codes[++ncode]) { 2673 const formatdef_t *e = code->fmtdef; 2674 const char *res = buf + code->offset + off; 2675 ssize_t j = code->repeat; 2676 2677 while (j--) { 2678 uc_value_t *v = NULL; 2679 2680 size = code->size; 2681 2682 if (e->format == '*') { 2683 if (size == -1 || (size_t)size > *rem) 2684 size = *rem; 2685 2686 off += size; 2687 } 2688 else if (size >= 0 && (size_t)size > *rem) { 2689 goto fail; 2690 } 2691 2692 if (e->format == 's' || e->format == '*') { 2693 v = ucv_string_new_length(res, size); 2694 } 2695 else if (e->format == 'p') { 2696 n = *(unsigned char *)res; 2697 2698 if (n >= size) 2699 n = (size > 0 ? size - 1 : 0); 2700 2701 v = ucv_string_new_length(res + 1, n); 2702 } 2703 else { 2704 v = e->unpack(vm, res, e); 2705 } 2706 2707 if (v == NULL) 2708 goto fail; 2709 2710 res += size; 2711 *rem -= size; 2712 2713 if (single) 2714 return v; 2715 2716 ucv_array_push(result, v); 2717 } 2718 } 2719 2720 return result; 2721 2722 fail: 2723 ucv_put(result); 2724 2725 return NULL; 2726 } 2727 2728 2729 /** 2730 * Pack given values according to specified format. 2731 * 2732 * The `pack()` function creates a byte string containing the argument values 2733 * packed according to the given format string. 2734 * 2735 * Returns the packed string. 2736 * 2737 * Raises a runtime exception if a given argument value does not match the 2738 * required type of the corresponding format string directive or if and invalid 2739 * format string is provided. 2740 * 2741 * @function module:struct#pack 2742 * 2743 * @param {string} format 2744 * The format string. 2745 * 2746 * @param {...*} values 2747 * Variable number of values to pack. 2748 * 2749 * @returns {string} 2750 * 2751 * @example 2752 * // Pack the values 1, 2, 3 as three consecutive unsigned int values 2753 * // in network byte order. 2754 * const data = pack('!III', 1, 2, 3); 2755 */ 2756 static uc_value_t * 2757 uc_pack(uc_vm_t *vm, size_t nargs) 2758 { 2759 uc_value_t *fmtval = uc_fn_arg(0); 2760 size_t pos = 0, capacity = 0; 2761 uc_string_t *us = NULL; 2762 formatstate_t *state; 2763 2764 state = parse_format(vm, fmtval); 2765 2766 if (!state) 2767 return NULL; 2768 2769 if (!uc_pack_common(vm, nargs, state, 1, (void **)&us, &pos, &capacity)) { 2770 free(state); 2771 free(us); 2772 2773 return NULL; 2774 } 2775 2776 free(state); 2777 2778 us->header.type = UC_STRING; 2779 us->header.refcount = 1; 2780 us->length = pos; 2781 2782 return &us->header; 2783 } 2784 2785 /** 2786 * Unpack given byte string according to specified format. 2787 * 2788 * The `unpack()` function interpretes a byte string according to the given 2789 * format string and returns the resulting values. If the optional offset 2790 * argument is given, unpacking starts from this byte position within the input. 2791 * If not specified, the start offset defaults to `0`, the start of the given 2792 * input string. 2793 * 2794 * Returns an array of unpacked values. 2795 * 2796 * Raises a runtime exception if the format string is invalid or if an invalid 2797 * input string or offset value is given. 2798 * 2799 * @function module:struct#unpack 2800 * 2801 * @param {string} format 2802 * The format string. 2803 * 2804 * @param {string} input 2805 * The input string to unpack. 2806 * 2807 * @param {number} [offset=0] 2808 * The offset within the input string to start unpacking from. 2809 * 2810 * @returns {array} 2811 * 2812 * @example 2813 * // Unpack three consecutive unsigned int values in network byte order. 2814 * const numbers = 2815 * unpack('!III', '\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03'); 2816 * print(numbers, "\n"); // [ 1, 2, 3 ] 2817 */ 2818 static uc_value_t * 2819 uc_unpack(uc_vm_t *vm, size_t nargs) 2820 { 2821 uc_value_t *fmtval = uc_fn_arg(0); 2822 uc_value_t *bufval = uc_fn_arg(1); 2823 uc_value_t *offset = uc_fn_arg(2); 2824 uc_value_t *res = NULL; 2825 formatstate_t *state; 2826 long long pos = 0; 2827 size_t rem; 2828 char *buf; 2829 2830 if (ucv_type(bufval) != UC_STRING) { 2831 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 2832 "Buffer value not a string"); 2833 2834 return NULL; 2835 } 2836 2837 if (offset && !ucv_as_longlong(vm, offset, &pos)) 2838 return NULL; 2839 2840 state = parse_format(vm, fmtval); 2841 2842 if (!state) 2843 return NULL; 2844 2845 buf = ucv_string_get(bufval); 2846 rem = ucv_string_length(bufval); 2847 res = uc_unpack_common(vm, nargs, state, buf, pos, &rem, false); 2848 2849 free(state); 2850 2851 return res; 2852 } 2853 2854 2855 /** 2856 * Represents a struct instance created by `new()`. 2857 * 2858 * @class module:struct.instance 2859 * @hideconstructor 2860 * 2861 * @see {@link module:struct#new|new()} 2862 * 2863 * @example 2864 * 2865 * const fmt = struct.new(…); 2866 * 2867 * fmt.pack(…); 2868 * 2869 * const values = fmt.unpack(…); 2870 */ 2871 2872 /** 2873 * Precompile format string. 2874 * 2875 * The `new()` function precompiles the given format string argument and returns 2876 * a `struct` object instance useful for packing and unpacking multiple items 2877 * without having to recompute the internal format each time. 2878 * 2879 * Returns an precompiled struct format instance. 2880 * 2881 * Raises a runtime exception if the format string is invalid. 2882 * 2883 * @function module:struct#new 2884 * 2885 * @param {string} format 2886 * The format string. 2887 * 2888 * @returns {module:struct.instance} 2889 * 2890 * @example 2891 * // Create a format of three consecutive unsigned int values in network byte order. 2892 * const fmt = struct.new('!III'); 2893 * const buf = fmt.pack(1, 2, 3); // "\x00\x00\x00\x01…" 2894 * print(fmt.unpack(buf), "\n"); // [ 1, 2, 3 ] 2895 */ 2896 static uc_value_t * 2897 uc_struct_new(uc_vm_t *vm, size_t nargs) 2898 { 2899 uc_value_t *fmtval = uc_fn_arg(0); 2900 formatstate_t *state; 2901 2902 state = parse_format(vm, fmtval); 2903 2904 if (!state) 2905 return NULL; 2906 2907 return ucv_resource_create(vm, "struct.format", state); 2908 } 2909 2910 /** 2911 * Pack given values. 2912 * 2913 * The `pack()` function creates a byte string containing the argument values 2914 * packed according to the given format instance. 2915 * 2916 * Returns the packed string. 2917 * 2918 * Raises a runtime exception if a given argument value does not match the 2919 * required type of the corresponding format string directive. 2920 * 2921 * @function module:struct.instance#pack 2922 * 2923 * @param {...*} values 2924 * Variable number of values to pack. 2925 * 2926 * @returns {string} 2927 * 2928 * @example 2929 * const fmt = struct.new(…); 2930 * const data = fmt.pack(…); 2931 */ 2932 static uc_value_t * 2933 uc_struct_pack(uc_vm_t *vm, size_t nargs) 2934 { 2935 formatstate_t **state = uc_fn_this("struct.format"); 2936 size_t pos = 0, capacity = 0; 2937 uc_string_t *us = NULL; 2938 2939 if (!state || !*state) 2940 return NULL; 2941 2942 if (!uc_pack_common(vm, nargs, *state, 0, (void **)&us, &pos, &capacity)) { 2943 free(us); 2944 2945 return NULL; 2946 } 2947 2948 us->header.type = UC_STRING; 2949 us->header.refcount = 1; 2950 us->length = pos; 2951 2952 return &us->header; 2953 } 2954 2955 /** 2956 * Unpack given byte string. 2957 * 2958 * The `unpack()` function interpretes a byte string according to the given 2959 * format instance and returns the resulting values. If the optional offset 2960 * argument is given, unpacking starts from this byte position within the input. 2961 * If not specified, the start offset defaults to `0`, the start of the given 2962 * input string. 2963 * 2964 * Returns an array of unpacked values. 2965 * 2966 * Raises a runtime exception if an invalid input string or offset value is 2967 * given. 2968 * 2969 * @function module:struct.instance#unpack 2970 * 2971 * @param {string} input 2972 * The input string to unpack. 2973 * 2974 * @param {number} [offset=0] 2975 * The offset within the input string to start unpacking from. 2976 * 2977 * @returns {array} 2978 * 2979 * @example 2980 * const fmt = struct.new(…); 2981 * const values = fmt.unpack(…); 2982 */ 2983 static uc_value_t * 2984 uc_struct_unpack(uc_vm_t *vm, size_t nargs) 2985 { 2986 formatstate_t **state = uc_fn_this("struct.format"); 2987 uc_value_t *bufval = uc_fn_arg(0); 2988 uc_value_t *offset = uc_fn_arg(1); 2989 long long pos = 0; 2990 size_t rem; 2991 char *buf; 2992 2993 if (!state || !*state) 2994 return NULL; 2995 2996 if (ucv_type(bufval) != UC_STRING) { 2997 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 2998 "Buffer value not a string"); 2999 3000 return NULL; 3001 } 3002 3003 if (offset && !ucv_as_longlong(vm, offset, &pos)) 3004 return NULL; 3005 3006 buf = ucv_string_get(bufval); 3007 rem = ucv_string_length(bufval); 3008 3009 return uc_unpack_common(vm, nargs, *state, buf, pos, &rem, false); 3010 } 3011 3012 3013 /** 3014 * Represents a struct buffer instance created by `buffer()`. 3015 * 3016 * @class module:struct.buffer 3017 * @hideconstructor 3018 * 3019 * @see {@link module:struct#buffer|buffer()} 3020 * 3021 * @example 3022 * 3023 * const buf = struct.buffer(); 3024 * 3025 * buf.put('I', 12345); 3026 * 3027 * const value = buf.get('I'); 3028 */ 3029 3030 /** 3031 * Creates a new struct buffer instance. 3032 * 3033 * The `buffer()` function creates a new struct buffer object that can be used 3034 * for incremental packing and unpacking of binary data. If an initial data 3035 * string is provided, the buffer is initialized with this content. 3036 * 3037 * Note that even when initial data is provided, the buffer position is always 3038 * set to zero. This design assumes that the primary intent when initializing 3039 * a buffer with data is to read (unpack) from the beginning. If you want to 3040 * append data to a pre-initialized buffer, you need to explicitly move the 3041 * position to the end, either by calling `end()` or by setting the position 3042 * manually with `pos()`. 3043 * 3044 * Returns a new struct buffer instance. 3045 * 3046 * @function module:struct#buffer 3047 * 3048 * @param {string} [initialData] 3049 * Optional initial data to populate the buffer with. 3050 * 3051 * @returns {module:struct.buffer} 3052 * 3053 * @example 3054 * // Create an empty buffer 3055 * const emptyBuf = struct.buffer(); 3056 * 3057 * // Create a buffer with initial data 3058 * const dataBuf = struct.buffer("\x01\x02\x03\x04"); 3059 * 3060 * // Read from the beginning of the initialized buffer 3061 * const value = dataBuf.get('I'); 3062 * 3063 * // Append data to the initialized buffer 3064 * dataBuf.end().put('I', 5678); 3065 * 3066 * // Alternative chained syntax for initializing and appending 3067 * const buf = struct.buffer("\x01\x02\x03\x04").end().put('I', 5678); 3068 */ 3069 static uc_value_t * 3070 uc_fmtbuf_new(uc_vm_t *vm, size_t nargs) 3071 { 3072 formatbuffer_t *buffer = xalloc(sizeof(*buffer)); 3073 uc_value_t *init_data = uc_fn_arg(0); 3074 3075 buffer->resource.header.type = UC_RESOURCE; 3076 buffer->resource.header.refcount = 1; 3077 buffer->resource.type = ucv_resource_type_lookup(vm, "struct.buffer"); 3078 3079 if (ucv_type(init_data) == UC_STRING) { 3080 char *buf = ucv_string_get(init_data); 3081 size_t len = ucv_string_length(init_data); 3082 3083 if (!grow_buffer(vm, &buffer->resource.data, &buffer->capacity, len)) { 3084 free(buffer); 3085 3086 return NULL; 3087 } 3088 3089 buffer->length = len; 3090 memcpy((char *)buffer->resource.data + sizeof(uc_string_t), buf, len); 3091 } 3092 3093 return &buffer->resource.header; 3094 } 3095 3096 static formatbuffer_t * 3097 formatbuffer_ctx(uc_vm_t *vm) 3098 { 3099 uc_value_t *ctx = vm->callframes.entries[vm->callframes.count - 1].ctx; 3100 3101 if (ucv_type(ctx) != UC_RESOURCE) 3102 return NULL; 3103 3104 uc_resource_t *res = (uc_resource_t *)ctx; 3105 3106 if (!res->type || strcmp(res->type->name, "struct.buffer") != 0) 3107 return NULL; 3108 3109 return (formatbuffer_t *)res; 3110 } 3111 3112 /** 3113 * Get or set the current position in the buffer. 3114 * 3115 * If called without arguments, returns the current position. 3116 * If called with a position argument, sets the current position to that value. 3117 * 3118 * @function module:struct.buffer#pos 3119 * 3120 * @param {number} [position] 3121 * The position to set. If omitted, the current position is returned. 3122 * 3123 * @returns {number|module:struct.buffer} 3124 * If called without arguments, returns the current position. 3125 * If called with a position argument, returns the buffer instance for chaining. 3126 * 3127 * @example 3128 * const currentPos = buf.pos(); 3129 * buf.pos(10); // Set position to 10 3130 */ 3131 static uc_value_t * 3132 uc_fmtbuf_pos(uc_vm_t *vm, size_t nargs) 3133 { 3134 formatbuffer_t *buffer = formatbuffer_ctx(vm); 3135 uc_value_t *new_pos = uc_fn_arg(0); 3136 3137 if (!buffer) 3138 return NULL; 3139 3140 if (new_pos) { 3141 long long pos; 3142 3143 if (!ucv_as_longlong(vm, new_pos, &pos)) 3144 return NULL; 3145 3146 if (pos < 0) pos += buffer->length; 3147 if (pos < 0) pos = 0; 3148 3149 if (!grow_buffer(vm, &buffer->resource.data, &buffer->capacity, pos)) 3150 return NULL; 3151 3152 buffer->position = pos; 3153 3154 if (buffer->position > buffer->length) 3155 buffer->length = buffer->position; 3156 3157 return ucv_get(&buffer->resource.header); 3158 } 3159 3160 return ucv_uint64_new(buffer->position); 3161 } 3162 3163 /** 3164 * Get or set the current buffer length. 3165 * 3166 * If called without arguments, returns the current length of the buffer. 3167 * If called with a length argument, sets the buffer length to that value, 3168 * padding the data with trailing zero bytes or truncating it depending on 3169 * whether the updated length is larger or smaller than the current length 3170 * respectively. 3171 * 3172 * In case the updated length is smaller than the current buffer offset, the 3173 * position is updated accordingly, so that it points to the new end of the 3174 * truncated buffer data. 3175 * 3176 * @function module:struct.buffer#length 3177 * 3178 * @param {number} [length] 3179 * The length to set. If omitted, the current length is returned. 3180 * 3181 * @returns {number|module:struct.buffer} 3182 * If called without arguments, returns the current length. 3183 * If called with a length argument, returns the buffer instance for chaining. 3184 * 3185 * @example 3186 * const buf = struct.buffer("abc"); // Initialize buffer with three bytes 3187 * const currentLen = buf.length(); // Returns 3 3188 * 3189 * buf.length(6); // Extend to 6 bytes 3190 * buf.slice(); // Trailing null bytes: "abc\x00\x00\x00" 3191 * 3192 * buf.length(2); // Truncate to 2 bytes 3193 * buf.slice(); // Truncated data: "ab" 3194 */ 3195 static uc_value_t * 3196 uc_fmtbuf_length(uc_vm_t *vm, size_t nargs) 3197 { 3198 formatbuffer_t *buffer = formatbuffer_ctx(vm); 3199 uc_value_t *new_len = uc_fn_arg(0); 3200 3201 if (!buffer) 3202 return NULL; 3203 3204 if (new_len) { 3205 size_t len; 3206 3207 if (!ucv_as_size_t(vm, new_len, &len)) 3208 return NULL; 3209 3210 if (len > buffer->length) { 3211 if (!grow_buffer(vm, &buffer->resource.data, &buffer->capacity, len)) 3212 return NULL; 3213 3214 buffer->length = len; 3215 } 3216 else if (len < buffer->length) { 3217 memset((char *)buffer->resource.data + sizeof(uc_string_t) + len, 3218 0, buffer->length - len); 3219 3220 buffer->length = len; 3221 3222 if (len < buffer->position) 3223 buffer->position = len; 3224 } 3225 3226 return ucv_get(&buffer->resource.header); 3227 } 3228 3229 return ucv_uint64_new(buffer->length); 3230 } 3231 3232 /** 3233 * Set the buffer position to the start (0). 3234 * 3235 * @function module:struct.buffer#start 3236 * 3237 * @returns {module:struct.buffer} 3238 * The buffer instance. 3239 * 3240 * @example 3241 * buf.start(); 3242 */ 3243 static uc_value_t * 3244 uc_fmtbuf_start(uc_vm_t *vm, size_t nargs) 3245 { 3246 formatbuffer_t *buffer = formatbuffer_ctx(vm); 3247 3248 if (!buffer) 3249 return NULL; 3250 3251 buffer->position = 0; 3252 3253 return ucv_get(&buffer->resource.header); 3254 } 3255 3256 /** 3257 * Set the buffer position to the end. 3258 * 3259 * @function module:struct.buffer#end 3260 * 3261 * @returns {module:struct.buffer} 3262 * The buffer instance. 3263 * 3264 * @example 3265 * buf.end(); 3266 */ 3267 static uc_value_t * 3268 uc_fmtbuf_end(uc_vm_t *vm, size_t nargs) 3269 { 3270 formatbuffer_t *buffer = formatbuffer_ctx(vm); 3271 3272 if (!buffer) 3273 return NULL; 3274 3275 buffer->position = buffer->length; 3276 3277 return ucv_get(&buffer->resource.header); 3278 } 3279 3280 /** 3281 * Pack data into the buffer at the current position. 3282 * 3283 * The `put()` function packs the given values into the buffer according to 3284 * the specified format string, starting at the current buffer position. 3285 * The format string follows the same syntax as used in `struct.pack()`. 3286 * 3287 * For a detailed explanation of the format string syntax, refer to the 3288 * ["Format Strings" section]{@link module:struct} in the module 3289 * documentation. 3290 * 3291 * @function module:struct.buffer#put 3292 * 3293 * @param {string} format 3294 * The format string specifying how to pack the data. 3295 * 3296 * @param {...*} values 3297 * The values to pack into the buffer. 3298 * 3299 * @returns {module:struct.buffer} 3300 * The buffer instance. 3301 * 3302 * @see {@link module:struct#pack|struct.pack()} 3303 * 3304 * @example 3305 * buf.put('II', 1234, 5678); 3306 */ 3307 static uc_value_t * 3308 uc_fmtbuf_put(uc_vm_t *vm, size_t nargs) 3309 { 3310 formatbuffer_t *buffer = formatbuffer_ctx(vm); 3311 uc_value_t *fmt = uc_fn_arg(0); 3312 formatstate_t *state; 3313 bool res; 3314 3315 if (!buffer) 3316 return NULL; 3317 3318 state = parse_format(vm, fmt); 3319 3320 if (!state) 3321 return NULL; 3322 3323 res = uc_pack_common(vm, nargs, state, 1, 3324 &buffer->resource.data, &buffer->position, &buffer->capacity); 3325 3326 free(state); 3327 3328 if (!res) 3329 return NULL; 3330 3331 if (buffer->position > buffer->length) 3332 buffer->length = buffer->position; 3333 3334 return ucv_get(&buffer->resource.header); 3335 } 3336 3337 static uc_value_t * 3338 fmtbuf_get_common(uc_vm_t *vm, size_t nargs, bool single) 3339 { 3340 formatbuffer_t *buffer = formatbuffer_ctx(vm); 3341 uc_value_t *fmt = uc_fn_arg(0); 3342 formatstate_t *state; 3343 uc_value_t *result; 3344 size_t rem; 3345 char *buf; 3346 3347 if (!buffer) 3348 return NULL; 3349 3350 if (single && ucv_type(fmt) == UC_INTEGER) { 3351 int64_t len = ucv_int64_get(fmt); 3352 3353 if (errno != 0) 3354 goto ebounds; 3355 3356 size_t spos, epos; 3357 3358 if (len < 0) { 3359 if (len == INT64_MIN) 3360 goto ebounds; 3361 3362 if ((uint64_t)-len > buffer->position) 3363 return NULL; 3364 3365 spos = buffer->position + len; 3366 epos = buffer->position; 3367 } 3368 else { 3369 if ((uint64_t)len > (SIZE_MAX - buffer->position)) 3370 goto ebounds; 3371 3372 if (buffer->position + len > buffer->length) 3373 return NULL; 3374 3375 spos = buffer->position; 3376 epos = buffer->position + len; 3377 3378 buffer->position = epos; 3379 } 3380 3381 return ucv_string_new_length( 3382 (char *)buffer->resource.data + sizeof(uc_string_t) + spos, 3383 epos - spos); 3384 3385 ebounds: 3386 uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, 3387 "Length value out of bounds"); 3388 3389 return NULL; 3390 } 3391 3392 state = parse_format(vm, fmt); 3393 3394 if (!state) 3395 return NULL; 3396 3397 if (single && (state->ncodes != 1 || state->codes[0].repeat != 1)) { 3398 free(state); 3399 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 3400 "get() expects a format string for a single value. " 3401 "Use read() for multiple values."); 3402 3403 return NULL; 3404 } 3405 3406 rem = buffer->length; 3407 buf = (char *)buffer->resource.data + sizeof(uc_string_t); 3408 3409 result = uc_unpack_common(vm, nargs, state, 3410 buf, buffer->position, &rem, single); 3411 3412 if (result) 3413 buffer->position = buffer->length - rem; 3414 3415 free(state); 3416 3417 return result; 3418 } 3419 3420 /** 3421 * Unpack a single value from the buffer at the current position. 3422 * 3423 * The `get()` function unpacks a single value from the buffer according to the 3424 * specified format string, starting at the current buffer position. 3425 * The format string follows the same syntax as used in `struct.unpack()`. 3426 * 3427 * For a detailed explanation of the format string syntax, refer to the 3428 * ["Format Strings" section]{@link module:struct} in the module documentation. 3429 * 3430 * Alternatively, `get()` accepts a postive or negative integer as format, which 3431 * specifies the length of a string to unpack before or after the current 3432 * position. Negative values extract that many bytes before the current offset 3433 * while postive ones extracts that many bytes after. 3434 * 3435 * @function module:struct.buffer#get 3436 * 3437 * @param {string|number} format 3438 * The format string specifying how to unpack the data. 3439 * 3440 * @returns {*} 3441 * The unpacked value. 3442 * 3443 * @see {@link module:struct#unpack|struct.unpack()} 3444 * 3445 * @example 3446 * const val = buf.get('I'); 3447 * const str = buf.get(5); // equivalent to buf.get('5s') 3448 * const str = buf.get(-3); // equivalent to buf.pos(buf.pos() - 3).get('3s') 3449 */ 3450 static uc_value_t * 3451 uc_fmtbuf_get(uc_vm_t *vm, size_t nargs) 3452 { 3453 return fmtbuf_get_common(vm, nargs, true); 3454 } 3455 3456 /** 3457 * Unpack multiple values from the buffer at the current position. 3458 * 3459 * The `read()` function unpacks multiple values from the buffer according to 3460 * the specified format string, starting at the current buffer position. 3461 * The format string follows the same syntax as used in `struct.unpack()`. 3462 * 3463 * For a detailed explanation of the format string syntax, refer to the 3464 * ["Format Strings" section]{@link module:struct} in the module documentation. 3465 * 3466 * @function module:struct.buffer#get 3467 * 3468 * @param {string} format 3469 * The format string specifying how to unpack the data. 3470 * 3471 * @returns {array} 3472 * An array containing the unpacked values. 3473 * 3474 * @see {@link module:struct#unpack|struct.unpack()} 3475 * 3476 * @example 3477 * const values = buf.get('II'); 3478 */ 3479 static uc_value_t * 3480 uc_fmtbuf_read(uc_vm_t *vm, size_t nargs) 3481 { 3482 return fmtbuf_get_common(vm, nargs, false); 3483 } 3484 3485 /** 3486 * Extract a slice of the buffer content. 3487 * 3488 * The `slice()` function returns a substring of the buffer content 3489 * between the specified start and end positions. 3490 * 3491 * Both the start and end position values may be negative, in which case they're 3492 * relative to the end of the buffer, e.g. `slice(-3)` will extract the last 3493 * three bytes of data. 3494 * 3495 * @function module:struct.buffer#slice 3496 * 3497 * @param {number} [start=0] 3498 * The starting position of the slice. 3499 * 3500 * @param {number} [end=buffer.length()] 3501 * The ending position of the slice (exclusive). 3502 * 3503 * @returns {string} 3504 * A string containing the specified slice of the buffer content. 3505 * 3506 * @example 3507 * const slice = buf.slice(4, 8); 3508 */ 3509 static uc_value_t * 3510 uc_fmtbuf_slice(uc_vm_t *vm, size_t nargs) 3511 { 3512 formatbuffer_t *buffer = formatbuffer_ctx(vm); 3513 uc_value_t *from = uc_fn_arg(0); 3514 uc_value_t *to = uc_fn_arg(1); 3515 long long spos, epos; 3516 char *buf; 3517 3518 if (!buffer) 3519 return NULL; 3520 3521 spos = 0; 3522 epos = buffer->length; 3523 3524 if (from && !ucv_as_longlong(vm, from, &spos)) 3525 return NULL; 3526 3527 if (to && !ucv_as_longlong(vm, to, &epos)) 3528 return NULL; 3529 3530 if (spos < 0) spos += buffer->length; 3531 if (spos < 0) spos = 0; 3532 if ((unsigned long long)spos > buffer->length) spos = buffer->length; 3533 3534 if (epos < 0) epos += buffer->length; 3535 if (epos < spos) epos = spos; 3536 if ((unsigned long long)epos > buffer->length) epos = buffer->length; 3537 3538 buf = (char *)buffer->resource.data + sizeof(uc_string_t) + spos; 3539 3540 return ucv_string_new_length(buf, epos - spos); 3541 } 3542 3543 /** 3544 * Set a slice of the buffer content to given byte value. 3545 * 3546 * The `set()` function overwrites a substring of the buffer content with the 3547 * given byte value, similar to the C `memset()` function, between the specified 3548 * start and end positions. 3549 * 3550 * Both the start and end position values may be negative, in which case they're 3551 * relative to the end of the buffer, e.g. `set(0, -2)` will overwrite the last 3552 * two bytes of data with `\x00`. 3553 * 3554 * When the start or end positions are beyond the current buffer length, the 3555 * buffer is grown accordingly. 3556 * 3557 * @function module:struct.buffer#set 3558 * 3559 * @param {number|string} [value=0] 3560 * The byte value to use when overwriting buffer contents. When a string is 3561 * given, the first character is used as value. 3562 * 3563 * @param {number} [start=0] 3564 * The position to start overwriting from. 3565 * 3566 * @param {number} [end=buffer.length()] 3567 * The position to end overwriting (exclusive). 3568 * 3569 * @returns {module:struct.buffer} 3570 * The buffer instance. 3571 * 3572 * @example 3573 * const buf = struct.buffer("abcde"); 3574 * buf.set("X", 2, 4).slice(); // Buffer content is now "abXXe" 3575 * buf.set().slice(); // Buffer content is now "\x00\x00\x00\x00\x00" 3576 */ 3577 static uc_value_t * 3578 uc_fmtbuf_set(uc_vm_t *vm, size_t nargs) 3579 { 3580 formatbuffer_t *buffer = formatbuffer_ctx(vm); 3581 uc_value_t *byte = uc_fn_arg(0); 3582 uc_value_t *from = uc_fn_arg(1); 3583 uc_value_t *to = uc_fn_arg(2); 3584 long long spos, epos; 3585 long bval; 3586 3587 if (!buffer) 3588 return NULL; 3589 3590 bval = 0; 3591 spos = 0; 3592 epos = buffer->length; 3593 3594 if (ucv_type(byte) == UC_STRING) 3595 bval = *ucv_string_get(byte); 3596 else if (byte && !ucv_as_long(vm, byte, &bval)) 3597 return NULL; 3598 3599 if (from && !ucv_as_longlong(vm, from, &spos)) 3600 return NULL; 3601 3602 if (to && !ucv_as_longlong(vm, to, &epos)) 3603 return NULL; 3604 3605 if (spos < 0) spos += buffer->length; 3606 if (spos < 0) spos = 0; 3607 3608 if (epos < 0) epos += buffer->length; 3609 3610 if (epos > spos) { 3611 if ((unsigned long long)epos > buffer->length) { 3612 if (!grow_buffer(vm, &buffer->resource.data, &buffer->capacity, epos)) 3613 return NULL; 3614 3615 buffer->length = epos; 3616 } 3617 3618 memset((char *)buffer->resource.data + sizeof(uc_string_t) + spos, 3619 bval, epos - spos); 3620 } 3621 3622 return ucv_get(&buffer->resource.header); 3623 } 3624 3625 /** 3626 * Extract and remove all content from the buffer. 3627 * 3628 * The `pull()` function returns all content of the buffer as a string 3629 * and resets the buffer to an empty state. 3630 * 3631 * @function module:struct.buffer#pull 3632 * 3633 * @returns {string} 3634 * A string containing all the buffer content. 3635 * 3636 * @example 3637 * const allData = buf.pull(); 3638 */ 3639 static uc_value_t * 3640 uc_fmtbuf_pull(uc_vm_t *vm, size_t nargs) 3641 { 3642 formatbuffer_t *buffer = formatbuffer_ctx(vm); 3643 uc_string_t *us; 3644 3645 if (!buffer) 3646 return NULL; 3647 3648 if (!buffer->resource.data) 3649 return ucv_string_new_length("", 0); 3650 3651 us = buffer->resource.data; 3652 us->header.type = UC_STRING; 3653 us->header.refcount = 1; 3654 us->length = buffer->length; 3655 3656 buffer->resource.data = NULL; 3657 buffer->capacity = 0; 3658 buffer->position = 0; 3659 buffer->length = 0; 3660 3661 return &us->header; 3662 } 3663 3664 3665 static const uc_function_list_t struct_inst_fns[] = { 3666 { "pack", uc_struct_pack }, 3667 { "unpack", uc_struct_unpack } 3668 }; 3669 3670 static const uc_function_list_t buffer_inst_fns[] = { 3671 { "pos", uc_fmtbuf_pos }, 3672 { "length", uc_fmtbuf_length }, 3673 { "start", uc_fmtbuf_start }, 3674 { "end", uc_fmtbuf_end }, 3675 { "set", uc_fmtbuf_set }, 3676 { "put", uc_fmtbuf_put }, 3677 { "get", uc_fmtbuf_get }, 3678 { "read", uc_fmtbuf_read }, 3679 { "slice", uc_fmtbuf_slice }, 3680 { "pull", uc_fmtbuf_pull }, 3681 }; 3682 3683 static const uc_function_list_t struct_fns[] = { 3684 { "pack", uc_pack }, 3685 { "unpack", uc_unpack }, 3686 { "new", uc_struct_new }, 3687 { "buffer", uc_fmtbuf_new } 3688 }; 3689 3690 void uc_module_init(uc_vm_t *vm, uc_value_t *scope) 3691 { 3692 optimize_functions(); 3693 3694 uc_function_list_register(scope, struct_fns); 3695 3696 uc_type_declare(vm, "struct.format", struct_inst_fns, free); 3697 uc_type_declare(vm, "struct.buffer", buffer_inst_fns, free); 3698 } 3699
This page was automatically generated by LXR 0.3.1. • OpenWrt