1 /* 2 * Binary data packing/unpacking module for ucode. 3 * Copyright (C) 2021 Jo-Philipp Wich <jo@mein.io> 4 * 5 * This module is heavily based on the Python 3.10 "_struct.c" module source 6 * published under the following license: 7 * 8 * ----------------------------------------------------------------------------------- 9 * 10 * 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and 11 * the Individual or Organization ("Licensee") accessing and otherwise using Python 12 * 3.10.0 software in source or binary form and its associated documentation. 13 * 14 * 2. Subject to the terms and conditions of this License Agreement, PSF hereby 15 * grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, 16 * analyze, test, perform and/or display publicly, prepare derivative works, 17 * distribute, and otherwise use Python 3.10.0 alone or in any derivative 18 * version, provided, however, that PSF's License Agreement and PSF's notice of 19 * copyright, i.e., "Copyright © 2001-2021 Python Software Foundation; All Rights 20 * Reserved" are retained in Python 3.10.0 alone or in any derivative version 21 * prepared by Licensee. 22 * 23 * 3. In the event Licensee prepares a derivative work that is based on or 24 * incorporates Python 3.10.0 or any part thereof, and wants to make the 25 * derivative work available to others as provided herein, then Licensee hereby 26 * agrees to include in any such work a brief summary of the changes made to Python 27 * 3.10.0. 28 * 29 * 4. PSF is making Python 3.10.0 available to Licensee on an "AS IS" basis. 30 * PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF 31 * EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR 32 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE 33 * USE OF PYTHON 3.10.0 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 34 * 35 * 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 3.10.0 36 * FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF 37 * MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 3.10.0, OR ANY DERIVATIVE 38 * THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 39 * 40 * 6. This License Agreement will automatically terminate upon a material breach of 41 * its terms and conditions. 42 * 43 * 7. Nothing in this License Agreement shall be deemed to create any relationship 44 * of agency, partnership, or joint venture between PSF and Licensee. This License 45 * Agreement does not grant permission to use PSF trademarks or trade name in a 46 * trademark sense to endorse or promote products or services of Licensee, or any 47 * third party. 48 * 49 * 8. By copying, installing or otherwise using Python 3.10.0, Licensee agrees 50 * to be bound by the terms and conditions of this License Agreement. 51 * 52 * ----------------------------------------------------------------------------------- 53 * 54 * Brief summary of changes compared to the original Python 3.10 source: 55 * 56 * - Inlined and refactored IEEE 754 float conversion routines 57 * - Usage of stdbool for function return values and boolean parameters 58 * - Renamed functions and structures for clarity 59 * - Interface adapated to ucode C api 60 * - Removed unused code 61 */ 62 63 /** 64 * # Handle Packed Binary Data 65 * 66 * The `struct` module provides routines for interpreting byte strings as packed 67 * binary data. 68 * 69 * Functions can be individually imported and directly accessed using the 70 * {@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import#named_import named import} 71 * syntax: 72 * 73 * ``` 74 * import { pack, unpack } from 'struct'; 75 * 76 * let buffer = pack('bhl', -13, 1234, 444555666); 77 * let values = unpack('bhl', buffer); 78 * ``` 79 * 80 * Alternatively, the module namespace can be imported 81 * using a wildcard import statement: 82 * 83 * ``` 84 * import * as struct from 'struct'; 85 * 86 * let buffer = struct.pack('bhl', -13, 1234, 444555666); 87 * let values = struct.unpack('bhl', buffer); 88 * ``` 89 * 90 * Additionally, the struct module namespace may also be imported by invoking 91 * the `ucode` interpreter with the `-lstruct` switch. 92 * 93 * ## Format Strings 94 * 95 * Format strings describe the data layout when packing and unpacking data. 96 * They are built up from format-characters, which specify the type of data 97 * being packed/unpacked. In addition, special characters control the byte 98 * order, size and alignment. 99 * 100 * Each format string consists of an optional prefix character which describes 101 * the overall properties of the data and one or more format characters which 102 * describe the actual data values and padding. 103 * 104 * ### Byte Order, Size, and Alignment 105 * 106 * By default, C types are represented in the machine's native format and byte 107 * order, and properly aligned by skipping pad bytes if necessary (according to 108 * the rules used by the C compiler). 109 * 110 * This behavior is chosen so that the bytes of a packed struct correspond 111 * exactly to the memory layout of the corresponding C struct. 112 * 113 * Whether to use native byte ordering and padding or standard formats depends 114 * on the application. 115 * 116 * Alternatively, the first character of the format string can be used to indicate 117 * the byte order, size and alignment of the packed data, according to the 118 * following table: 119 * 120 * | Character | Byte order | Size | Alignment | 121 * |-----------|------------------------|----------|-----------| 122 * | `@` | native | native | native | 123 * | `=` | native | standard | none | 124 * | `<` | little-endian | standard | none | 125 * | `>` | big-endian | standard | none | 126 * | `!` | network (= big-endian) | standard | none | 127 * 128 * If the first character is not one of these, `'@'` is assumed. 129 * 130 * Native byte order is big-endian or little-endian, depending on the 131 * host system. For example, Intel x86, AMD64 (x86-64), and Apple M1 are 132 * little-endian; IBM z and many legacy architectures are big-endian. 133 * 134 * Native size and alignment are determined using the C compiler's 135 * `sizeof` expression. This is always combined with native byte order. 136 * 137 * Standard size depends only on the format character; see the table in 138 * the `format-characters` section. 139 * 140 * Note the difference between `'@'` and `'='`: both use native byte order, 141 * but the size and alignment of the latter is standardized. 142 * 143 * The form `'!'` represents the network byte order which is always big-endian 144 * as defined in `IETF RFC 1700`. 145 * 146 * There is no way to indicate non-native byte order (force byte-swapping); use 147 * the appropriate choice of `'<'` or `'>'`. 148 * 149 * Notes: 150 * 151 * (1) Padding is only automatically added between successive structure members. 152 * No padding is added at the beginning or the end of the encoded struct. 153 * 154 * (2) No padding is added when using non-native size and alignment, e.g. 155 * with '<', '>', '=', and '!'. 156 * 157 * (3) To align the end of a structure to the alignment requirement of a 158 * particular type, end the format with the code for that type with a repeat 159 * count of zero. 160 * 161 * 162 * ### Format Characters 163 * 164 * Format characters have the following meaning; the conversion between C and 165 * ucode values should be obvious given their types. The 'Standard size' column 166 * refers to the size of the packed value in bytes when using standard size; 167 * that is, when the format string starts with one of `'<'`, `'>'`, `'!'` or 168 * `'='`. When using native size, the size of the packed value is platform 169 * dependent. 170 * 171 * | Format | C Type | Ucode type | Standard size | Notes | 172 * |--------|----------------------|------------|----------------|----------| 173 * | `x` | *pad byte* | *no value* | | (7) | 174 * | `c` | `char` | string | 1 | | 175 * | `b` | `signed char` | int | 1 | (1), (2) | 176 * | `B` | `unsigned char` | int | 1 | (2) | 177 * | `?` | `_Bool` | bool | 1 | (1) | 178 * | `h` | `short` | int | 2 | (2) | 179 * | `H` | `unsigned short` | int | 2 | (2) | 180 * | `i` | `int` | int | 4 | (2) | 181 * | `I` | `unsigned int` | int | 4 | (2) | 182 * | `l` | `long` | int | 4 | (2) | 183 * | `L` | `unsigned long` | int | 4 | (2) | 184 * | `q` | `long long` | int | 8 | (2) | 185 * | `Q` | `unsigned long long` | int | 8 | (2) | 186 * | `n` | `ssize_t` | int | | (3) | 187 * | `N` | `size_t` | int | | (3) | 188 * | `e` | (6) | double | 2 | (4) | 189 * | `f` | `float` | double | 4 | (4) | 190 * | `d` | `double` | double | 8 | (4) | 191 * | `s` | `char[]` | double | | (9) | 192 * | `p` | `char[]` | double | | (8) | 193 * | `P` | `void *` | int | | (5) | 194 * | `*` | `char[]` | string | | (10) | 195 * 196 * Notes: 197 * 198 * - (1) The `'?'` conversion code corresponds to the `_Bool` type defined by 199 * C99. If this type is not available, it is simulated using a `char`. In 200 * standard mode, it is always represented by one byte. 201 * 202 * - (2) When attempting to pack a non-integer using any of the integer 203 * conversion codes, this module attempts to convert the given value into an 204 * integer. If the value is not convertible, a type error exception is thrown. 205 * 206 * - (3) The `'n'` and `'N'` conversion codes are only available for the native 207 * size (selected as the default or with the `'@'` byte order character). 208 * For the standard size, you can use whichever of the other integer formats 209 * fits your application. 210 * 211 * - (4) For the `'f'`, `'d'` and `'e'` conversion codes, the packed 212 * representation uses the IEEE 754 binary32, binary64 or binary16 format 213 * (for `'f'`, `'d'` or `'e'` respectively), regardless of the floating-point 214 * format used by the platform. 215 * 216 * - (5) The `'P'` format character is only available for the native byte 217 * ordering (selected as the default or with the `'@'` byte order character). 218 * The byte order character `'='` chooses to use little- or big-endian 219 * ordering based on the host system. The struct module does not interpret 220 * this as native ordering, so the `'P'` format is not available. 221 * 222 * - (6) The IEEE 754 binary16 "half precision" type was introduced in the 2008 223 * revision of the `IEEE 754` standard. It has a sign bit, a 5-bit exponent 224 * and 11-bit precision (with 10 bits explicitly stored), and can represent 225 * numbers between approximately `6.1e-05` and `6.5e+04` at full precision. 226 * This type is not widely supported by C compilers: on a typical machine, an 227 * unsigned short can be used for storage, but not for math operations. See 228 * the Wikipedia page on the `half-precision floating-point format` for more 229 * information. 230 * 231 * - (7) When packing, `'x'` inserts one NUL byte. 232 * 233 * - (8) The `'p'` format character encodes a "Pascal string", meaning a short 234 * variable-length string stored in a *fixed number of bytes*, given by the 235 * count. The first byte stored is the length of the string, or 255, 236 * whichever is smaller. The bytes of the string follow. If the string 237 * passed in to `pack()` is too long (longer than the count minus 1), only 238 * the leading `count-1` bytes of the string are stored. If the string is 239 * shorter than `count-1`, it is padded with null bytes so that exactly count 240 * bytes in all are used. Note that for `unpack()`, the `'p'` format 241 * character consumes `count` bytes, but that the string returned can never 242 * contain more than 255 bytes. 243 * 244 * - (9) For the `'s'` format character, the count is interpreted as the length 245 * of the bytes, not a repeat count like for the other format characters; for 246 * example, `'10s'` means a single 10-byte string mapping to or from a single 247 * ucode byte string, while `'10c'` means 10 separate one byte character 248 * elements (e.g., `cccccccccc`) mapping to or from ten different ucode byte 249 * strings. If a count is not given, it defaults to 1. For packing, the 250 * string is truncated or padded with null bytes as appropriate to make it 251 * fit. For unpacking, the resulting bytes object always has exactly the 252 * specified number of bytes. As a special case, `'0s'` means a single, 253 * empty string (while `'0c'` means 0 characters). 254 * 255 * - (10) The `*` format character serves as wildcard. For `pack()` it will 256 * append the corresponding byte argument string as-is, not applying any 257 * padding or zero filling. When a repeat count is given, that many bytes of 258 * the input byte string argument will be appended at most on `pack()`, 259 * effectively truncating longer input strings. For `unpack()`, the wildcard 260 * format will yield a byte string containing the entire remaining input data 261 * bytes, or - when a repeat count is given - that many bytes of input data 262 * at most. 263 * 264 * A format character may be preceded by an integral repeat count. For example, 265 * the format string `'4h'` means exactly the same as `'hhhh'`. 266 * 267 * Whitespace characters between formats are ignored; a count and its format 268 * must not contain whitespace though. 269 * 270 * When packing a value `x` using one of the integer formats (`'b'`, 271 * `'B'`, `'h'`, `'H'`, `'i'`, `'I'`, `'l'`, `'L'`, 272 * `'q'`, `'Q'`), if `x` is outside the valid range for that format, a type 273 * error exception is raised. 274 * 275 * For the `'?'` format character, the return value is either `true` or `false`. 276 * When packing, the truish result value of the argument is used. Either 0 or 1 277 * in the native or standard bool representation will be packed, and any 278 * non-zero value will be `true` when unpacking. 279 * 280 * ## Examples 281 * 282 * Note: 283 * Native byte order examples (designated by the `'@'` format prefix or 284 * lack of any prefix character) may not match what the reader's 285 * machine produces as 286 * that depends on the platform and compiler. 287 * 288 * Pack and unpack integers of three different sizes, using big endian 289 * ordering: 290 * 291 * ``` 292 * import { pack, unpack } from 'struct'; 293 * 294 * pack(">bhl", 1, 2, 3); // "\x01\x00\x02\x00\x00\x00\x03" 295 * unpack(">bhl", "\x01\x00\x02\x00\x00\x00\x03"); // [ 1, 2, 3 ] 296 * ``` 297 * 298 * Attempt to pack an integer which is too large for the defined field: 299 * 300 * ```bash 301 * $ ucode -lstruct -p 'struct.pack(">h", 99999)' 302 * Type error: Format 'h' requires numeric argument between -32768 and 32767 303 * In [-p argument], line 1, byte 24: 304 * 305 * `struct.pack(">h", 99999)` 306 * Near here -------------^ 307 * ``` 308 * 309 * Demonstrate the difference between `'s'` and `'c'` format characters: 310 * 311 * ``` 312 * import { pack } from 'struct'; 313 * 314 * pack("@ccc", "1", "2", "3"); // "123" 315 * pack("@3s", "123"); // "123" 316 * ``` 317 * 318 * The ordering of format characters may have an impact on size in native 319 * mode since padding is implicit. In standard mode, the user is 320 * responsible for inserting any desired padding. 321 * 322 * Note in the first `pack()` call below that three NUL bytes were added after 323 * the packed `'#'` to align the following integer on a four-byte boundary. 324 * In this example, the output was produced on a little endian machine: 325 * 326 * ``` 327 * import { pack } from 'struct'; 328 * 329 * pack("@ci", "#", 0x12131415); // "#\x00\x00\x00\x15\x14\x13\x12" 330 * pack("@ic", 0x12131415, "#"); // "\x15\x14\x13\x12#" 331 * ``` 332 * 333 * The following format `'ih0i'` results in two pad bytes being added at the 334 * end, assuming the platform's ints are aligned on 4-byte boundaries: 335 * 336 * ``` 337 * import { pack } from 'struct'; 338 * 339 * pack("ih0i", 0x01010101, 0x0202); // "\x01\x01\x01\x01\x02\x02\x00\x00" 340 * ``` 341 * 342 * Use the wildcard format to extract the remainder of the input data: 343 * 344 * ``` 345 * import { unpack } from 'struct'; 346 * 347 * unpack("ccc*", "foobarbaz"); // [ "f", "o", "o", "barbaz" ] 348 * unpack("ccc3*", "foobarbaz"); // [ "f", "o", "o", "bar" ] 349 * ``` 350 * 351 * Use the wildcard format to pack binary stings as-is into the result data: 352 * 353 * ``` 354 * import { pack } from 'struct'; 355 * 356 * pack("h*h", 0x0101, "\x02\x00\x03", 0x0404); // "\x01\x01\x02\x00\x03\x04\x04" 357 * pack("c3*c", "a", "foobar", "c"); // "afooc" 358 * ``` 359 * 360 * @module struct 361 */ 362 363 #include <ctype.h> 364 #include <errno.h> 365 #include <limits.h> 366 #include <math.h> 367 #include <stdlib.h> 368 #include <float.h> 369 #include <assert.h> 370 371 #include "ucode/module.h" 372 #include "ucode/vallist.h" 373 374 static uc_resource_type_t *struct_type; 375 376 typedef struct formatdef { 377 char format; 378 ssize_t size; 379 ssize_t alignment; 380 uc_value_t* (*unpack)(uc_vm_t *, const char *, const struct formatdef *); 381 bool (*pack)(uc_vm_t *, char *, uc_value_t *, const struct formatdef *); 382 } formatdef_t; 383 384 typedef struct { 385 const formatdef_t *fmtdef; 386 ssize_t offset; 387 ssize_t size; 388 ssize_t repeat; 389 } formatcode_t; 390 391 typedef struct { 392 size_t len; 393 size_t size; 394 size_t ncodes; 395 formatcode_t codes[]; 396 } formatstate_t; 397 398 399 /* Define various structs to figure out the alignments of types */ 400 401 typedef struct { char c; short x; } st_short; 402 typedef struct { char c; int x; } st_int; 403 typedef struct { char c; long x; } st_long; 404 typedef struct { char c; float x; } st_float; 405 typedef struct { char c; double x; } st_double; 406 typedef struct { char c; void *x; } st_void_p; 407 typedef struct { char c; size_t x; } st_size_t; 408 typedef struct { char c; bool x; } st_bool; 409 typedef struct { char c; long long x; } s_long_long; 410 411 #define SHORT_ALIGN (sizeof(st_short) - sizeof(short)) 412 #define INT_ALIGN (sizeof(st_int) - sizeof(int)) 413 #define LONG_ALIGN (sizeof(st_long) - sizeof(long)) 414 #define FLOAT_ALIGN (sizeof(st_float) - sizeof(float)) 415 #define DOUBLE_ALIGN (sizeof(st_double) - sizeof(double)) 416 #define VOID_P_ALIGN (sizeof(st_void_p) - sizeof(void *)) 417 #define SIZE_T_ALIGN (sizeof(st_size_t) - sizeof(size_t)) 418 #define BOOL_ALIGN (sizeof(st_bool) - sizeof(bool)) 419 #define LONG_LONG_ALIGN (sizeof(s_long_long) - sizeof(long long)) 420 421 #ifdef __powerc 422 #pragma options align=reset 423 #endif 424 425 426 static bool 427 ucv_as_long(uc_vm_t *vm, uc_value_t *v, long *p) 428 { 429 char *s, *e; 430 int64_t i; 431 double d; 432 long x; 433 434 errno = 0; 435 436 switch (ucv_type(v)) { 437 case UC_INTEGER: 438 i = ucv_int64_get(v); 439 440 if (i < LONG_MIN || i > LONG_MAX) 441 errno = ERANGE; 442 443 x = (long)i; 444 break; 445 446 case UC_DOUBLE: 447 d = ucv_double_get(v); 448 x = (long)d; 449 450 if (isnan(d) || d < (double)LONG_MIN || d > (double)LONG_MAX || d - x != 0) 451 errno = ERANGE; 452 453 break; 454 455 case UC_BOOLEAN: 456 x = (long)ucv_boolean_get(v); 457 break; 458 459 case UC_NULL: 460 x = 0; 461 break; 462 463 case UC_STRING: 464 s = ucv_string_get(v); 465 x = strtol(s, &e, 0); 466 467 if (e == s || *e != '\0') 468 errno = EINVAL; 469 470 break; 471 472 default: 473 errno = EINVAL; 474 x = 0; 475 break; 476 } 477 478 if (errno != 0) { 479 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 480 (errno == ERANGE) 481 ? "Argument out of range" 482 : "Argument not convertible to number"); 483 484 return false; 485 } 486 487 *p = x; 488 489 return true; 490 } 491 492 static bool 493 ucv_as_ulong(uc_vm_t *vm, uc_value_t *v, unsigned long *p) 494 { 495 unsigned long x; 496 char *s, *e; 497 uint64_t i; 498 double d; 499 500 errno = 0; 501 502 switch (ucv_type(v)) { 503 case UC_INTEGER: 504 i = ucv_uint64_get(v); 505 506 if (i > ULONG_MAX) 507 errno = ERANGE; 508 509 x = (unsigned long)i; 510 break; 511 512 case UC_DOUBLE: 513 d = ucv_double_get(v); 514 x = (unsigned long)d; 515 516 if (isnan(d) || d < 0 || d > (double)ULONG_MAX || d - x != 0) 517 errno = ERANGE; 518 519 break; 520 521 case UC_BOOLEAN: 522 x = (unsigned long)ucv_boolean_get(v); 523 break; 524 525 case UC_NULL: 526 x = 0; 527 break; 528 529 case UC_STRING: 530 s = ucv_string_get(v); 531 x = strtoul(s, &e, 0); 532 533 if (e == s || *e != '\0') 534 errno = EINVAL; 535 536 break; 537 538 default: 539 errno = EINVAL; 540 x = 0; 541 break; 542 } 543 544 if (errno != 0) { 545 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 546 (errno == ERANGE) 547 ? "Argument out of range" 548 : "Argument not convertible to number"); 549 550 return false; 551 } 552 553 *p = x; 554 555 return true; 556 } 557 558 static bool 559 ucv_as_longlong(uc_vm_t *vm, uc_value_t *v, long long *p) 560 { 561 char *s, *e; 562 long long x; 563 int64_t i; 564 double d; 565 566 errno = 0; 567 568 switch (ucv_type(v)) { 569 case UC_INTEGER: 570 i = ucv_int64_get(v); 571 572 if (i < LLONG_MIN || i > LLONG_MAX) 573 errno = ERANGE; 574 575 x = (long long)i; 576 break; 577 578 case UC_DOUBLE: 579 d = ucv_double_get(v); 580 x = (long long)d; 581 582 if (isnan(d) || d < (double)LLONG_MIN || d > (double)LLONG_MAX || d - x != 0) 583 errno = ERANGE; 584 585 break; 586 587 case UC_BOOLEAN: 588 x = (long long)ucv_boolean_get(v); 589 break; 590 591 case UC_NULL: 592 x = 0; 593 break; 594 595 case UC_STRING: 596 s = ucv_string_get(v); 597 x = strtoll(s, &e, 0); 598 599 if (e == s || *e != '\0') 600 errno = EINVAL; 601 602 break; 603 604 default: 605 errno = EINVAL; 606 x = 0; 607 break; 608 } 609 610 if (errno != 0) { 611 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 612 (errno == ERANGE) 613 ? "Argument out of range" 614 : "Argument not convertible to number"); 615 616 return false; 617 } 618 619 *p = x; 620 621 return true; 622 } 623 624 static bool 625 ucv_as_ulonglong(uc_vm_t *vm, uc_value_t *v, unsigned long long *p) 626 { 627 unsigned long long x; 628 char *s, *e; 629 uint64_t i; 630 double d; 631 632 errno = 0; 633 634 switch (ucv_type(v)) { 635 case UC_INTEGER: 636 i = ucv_uint64_get(v); 637 638 if (i > ULLONG_MAX) 639 errno = ERANGE; 640 641 x = (unsigned long long)i; 642 break; 643 644 case UC_DOUBLE: 645 d = ucv_double_get(v); 646 x = (unsigned long long)d; 647 648 if (isnan(d) || d < 0 || d > (double)ULLONG_MAX || d - x != 0) 649 errno = ERANGE; 650 651 break; 652 653 case UC_BOOLEAN: 654 x = (unsigned long long)ucv_boolean_get(v); 655 break; 656 657 case UC_NULL: 658 x = 0; 659 break; 660 661 case UC_STRING: 662 s = ucv_string_get(v); 663 x = strtoull(s, &e, 0); 664 665 if (e == s || *e != '\0') 666 errno = EINVAL; 667 668 break; 669 670 default: 671 errno = EINVAL; 672 x = 0; 673 break; 674 } 675 676 if (errno != 0) { 677 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 678 (errno == ERANGE) 679 ? "Argument out of range" 680 : "Argument not convertible to number"); 681 682 return false; 683 } 684 685 *p = x; 686 687 return true; 688 } 689 690 static bool 691 ucv_as_ssize_t(uc_vm_t *vm, uc_value_t *v, ssize_t *p) 692 { 693 char *s, *e; 694 int64_t i; 695 ssize_t x; 696 double d; 697 698 errno = 0; 699 700 switch (ucv_type(v)) { 701 case UC_INTEGER: 702 i = ucv_int64_get(v); 703 704 if (i < -1 || i > SSIZE_MAX) 705 errno = ERANGE; 706 707 x = (ssize_t)i; 708 break; 709 710 case UC_DOUBLE: 711 d = ucv_double_get(v); 712 x = (ssize_t)d; 713 714 if (isnan(d) || d < -1 || d > (double)SSIZE_MAX || d - x != 0) 715 errno = ERANGE; 716 717 break; 718 719 case UC_BOOLEAN: 720 x = (ssize_t)ucv_boolean_get(v); 721 break; 722 723 case UC_NULL: 724 x = 0; 725 break; 726 727 case UC_STRING: 728 s = ucv_string_get(v); 729 i = strtoll(s, &e, 0); 730 731 if (e == s || *e != '\0') 732 errno = EINVAL; 733 else if (i < -1 || i > SSIZE_MAX) 734 errno = ERANGE; 735 736 x = (ssize_t)i; 737 break; 738 739 default: 740 errno = EINVAL; 741 x = 0; 742 break; 743 } 744 745 if (errno != 0) { 746 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 747 (errno == ERANGE) 748 ? "Argument out of range" 749 : "Argument not convertible to number"); 750 751 return false; 752 } 753 754 *p = x; 755 756 return true; 757 } 758 759 /* Same, but handling size_t */ 760 761 static bool 762 ucv_as_size_t(uc_vm_t *vm, uc_value_t *v, size_t *p) 763 { 764 char *s, *e; 765 uint64_t i; 766 double d; 767 size_t x; 768 769 errno = 0; 770 771 switch (ucv_type(v)) { 772 case UC_INTEGER: 773 i = ucv_uint64_get(v); 774 775 if (i > SIZE_MAX) 776 errno = ERANGE; 777 778 x = (size_t)i; 779 break; 780 781 case UC_DOUBLE: 782 d = ucv_double_get(v); 783 x = (size_t)d; 784 785 if (isnan(d) || d < 0 || d > (double)SIZE_MAX || d - x != 0) 786 errno = ERANGE; 787 788 break; 789 790 case UC_BOOLEAN: 791 x = (size_t)ucv_boolean_get(v); 792 break; 793 794 case UC_NULL: 795 x = 0; 796 break; 797 798 case UC_STRING: 799 s = ucv_string_get(v); 800 i = strtoull(s, &e, 0); 801 802 if (e == s || *e != '\0') 803 errno = EINVAL; 804 else if (i > SIZE_MAX) 805 errno = ERANGE; 806 807 x = (size_t)i; 808 break; 809 810 default: 811 errno = EINVAL; 812 x = 0; 813 break; 814 } 815 816 if (errno != 0) { 817 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 818 (errno == ERANGE) 819 ? "Argument out of range" 820 : "Argument not convertible to number"); 821 822 return false; 823 } 824 825 *p = x; 826 827 return true; 828 } 829 830 static bool 831 ucv_as_double(uc_vm_t *vm, uc_value_t *v, double *p) 832 { 833 char *s, *e; 834 int64_t i; 835 double x; 836 837 errno = 0; 838 839 switch (ucv_type(v)) { 840 case UC_INTEGER: 841 i = ucv_int64_get(v); 842 843 if (errno == 0) { 844 if (i < -DBL_MAX || i > DBL_MAX) 845 errno = ERANGE; 846 } 847 848 x = (double)i; 849 break; 850 851 case UC_DOUBLE: 852 x = ucv_double_get(v); 853 break; 854 855 case UC_BOOLEAN: 856 x = (double)ucv_boolean_get(v); 857 break; 858 859 case UC_NULL: 860 x = 0.0; 861 break; 862 863 case UC_STRING: 864 s = ucv_string_get(v); 865 x = strtod(s, &e); 866 867 if (e == s || *e != '\0') 868 errno = EINVAL; 869 870 break; 871 872 default: 873 errno = EINVAL; 874 x = 0.0; 875 break; 876 } 877 878 if (errno != 0) { 879 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 880 (errno == ERANGE) 881 ? "Argument out of range" 882 : "Argument not convertible to number"); 883 884 return false; 885 } 886 887 *p = x; 888 889 return true; 890 } 891 892 893 /* Floating point helpers */ 894 895 static bool 896 double_pack16(double d, char *buf, bool little_endian) 897 { 898 int32_t exponent = 0; 899 uint16_t bits = 0; 900 bool sign = false; 901 double fraction; 902 uint8_t *p; 903 904 if (d == 0.0) { 905 sign = (copysign(1.0, d) == -1.0); 906 } 907 else if (isnan(d)) { 908 sign = (copysign(1.0, d) == -1.0); 909 exponent = 0x1f; 910 bits = 512; 911 } 912 else if (!isfinite(d)) { 913 sign = (d < 0.0); 914 exponent = 0x1f; 915 } 916 else { 917 if (d < 0.0) { 918 sign = true; 919 d = -d; 920 } 921 922 fraction = frexp(d, &exponent); 923 924 assert(fraction >= 0.5 && fraction < 1.0); 925 926 fraction *= 2.0; 927 exponent--; 928 929 if (exponent >= 16) { 930 errno = ERANGE; 931 932 return false; 933 } 934 else if (exponent < -25) { 935 fraction = 0.0; 936 exponent = 0; 937 } 938 else if (exponent < -14) { 939 fraction = ldexp(fraction, 14 + exponent); 940 exponent = 0; 941 } 942 else { 943 fraction -= 1.0; 944 exponent += 15; 945 } 946 947 fraction *= 1024.0; 948 bits = (uint16_t)fraction; 949 950 assert(bits < 1024); 951 assert(exponent < 31); 952 953 if ((fraction - bits > 0.5) || ((fraction - bits == 0.5) && (bits % 2))) { 954 if (++bits == 1024) { 955 bits = 0; 956 957 if (++exponent == 31) { 958 errno = ERANGE; 959 960 return false; 961 } 962 } 963 } 964 } 965 966 bits |= (exponent << 10) | (sign << 15); 967 968 p = (uint8_t *)buf + little_endian; 969 *p = (bits >> 8) & 0xff; 970 971 p += (little_endian ? -1 : 1); 972 *p = bits & 0xff; 973 974 return true; 975 } 976 977 static bool 978 double_pack32(double d, char *buf, bool little_endian) 979 { 980 int8_t step = little_endian ? -1 : 1; 981 int32_t exponent = 0; 982 uint32_t bits = 0; 983 bool sign = false; 984 double fraction; 985 uint8_t *p; 986 987 if (d == 0.0) { 988 sign = (copysign(1.0, d) == -1.0); 989 } 990 else if (isnan(d)) { 991 sign = (copysign(1.0, d) == -1.0); 992 exponent = 0xff; 993 bits = 0x7fffff; 994 } 995 else if (!isfinite(d)) { 996 sign = (d < 0.0); 997 exponent = 0xff; 998 } 999 else { 1000 if (d < 0.0) { 1001 sign = true; 1002 d = -d; 1003 } 1004 1005 fraction = frexp(d, &exponent); 1006 1007 if (fraction == 0.0) { 1008 exponent = 0; 1009 } 1010 else { 1011 assert(fraction >= 0.5 && fraction < 1.0); 1012 1013 fraction *= 2.0; 1014 exponent--; 1015 } 1016 1017 if (exponent >= 128) { 1018 errno = ERANGE; 1019 1020 return false; 1021 } 1022 else if (exponent < -126) { 1023 fraction = ldexp(fraction, 126 + exponent); 1024 exponent = 0; 1025 } 1026 else if (exponent != 0 || fraction != 0.0) { 1027 fraction -= 1.0; 1028 exponent += 127; 1029 } 1030 1031 fraction *= 8388608.0; 1032 bits = (uint32_t)(fraction + 0.5); 1033 1034 assert(bits <= 8388608); 1035 1036 if (bits >> 23) { 1037 bits = 0; 1038 1039 if (++exponent >= 255) { 1040 errno = ERANGE; 1041 1042 return false; 1043 } 1044 } 1045 } 1046 1047 p = (uint8_t *)buf + (little_endian ? 3 : 0); 1048 *p = (sign << 7) | (exponent >> 1); 1049 1050 p += step; 1051 *p = ((exponent & 1) << 7) | (bits >> 16); 1052 1053 p += step; 1054 *p = (bits >> 8) & 0xff; 1055 1056 p += step; 1057 *p = bits & 0xff; 1058 1059 return true; 1060 } 1061 1062 #define double_pack64 uc_double_pack 1063 1064 static double 1065 double_unpack16(const char *buf, bool little_endian) 1066 { 1067 uint32_t fraction; 1068 int32_t exponent; 1069 uint8_t *p; 1070 bool sign; 1071 double d; 1072 1073 p = (uint8_t *)buf + little_endian; 1074 sign = (*p >> 7) & 1; 1075 exponent = (*p & 0x7c) >> 2; 1076 fraction = (*p & 0x03) << 8; 1077 1078 p += little_endian ? -1 : 1; 1079 fraction |= *p; 1080 1081 if (exponent == 0x1f) { 1082 if (fraction == 0) 1083 return sign ? -INFINITY : INFINITY; 1084 else 1085 return sign ? -NAN : NAN; 1086 } 1087 1088 d = (double)fraction / 1024.0; 1089 1090 if (exponent == 0) { 1091 exponent = -14; 1092 } 1093 else { 1094 exponent -= 15; 1095 d += 1.0; 1096 } 1097 1098 d = ldexp(d, exponent); 1099 1100 return sign ? -d : d; 1101 } 1102 1103 static double 1104 double_unpack32(const char *buf, bool little_endian) 1105 { 1106 int8_t step = little_endian ? -1 : 1; 1107 uint32_t fraction; 1108 int32_t exponent; 1109 uint8_t *p; 1110 bool sign; 1111 double d; 1112 1113 p = (uint8_t *)buf + (little_endian ? 3 : 0); 1114 sign = (*p >> 7) & 1; 1115 exponent = (*p & 0x7f) << 1; 1116 1117 p += step; 1118 exponent |= (*p >> 7) & 1; 1119 fraction = (*p & 0x7f) << 16; 1120 1121 p += step; 1122 fraction |= *p << 8; 1123 1124 p += step; 1125 fraction |= *p; 1126 1127 if (exponent == 0xff) { 1128 if (fraction == 0) 1129 return sign ? -INFINITY : INFINITY; 1130 else 1131 return sign ? -NAN : NAN; 1132 } 1133 1134 d = (double)fraction / 8388608.0; 1135 1136 if (exponent == 0) { 1137 exponent = -126; 1138 } 1139 else { 1140 exponent -= 127; 1141 d += 1.0; 1142 } 1143 1144 d = ldexp(d, exponent); 1145 1146 return sign ? -d : d; 1147 } 1148 1149 #define double_unpack64 uc_double_unpack 1150 1151 static bool 1152 range_exception(uc_vm_t *vm, const formatdef_t *f, bool is_unsigned) 1153 { 1154 /* ulargest is the largest unsigned value with f->size bytes. 1155 * Note that the simpler: 1156 * ((size_t)1 << (f->size * 8)) - 1 1157 * doesn't work when f->size == sizeof(size_t) because C doesn't 1158 * define what happens when a left shift count is >= the number of 1159 * bits in the integer being shifted; e.g., on some boxes it doesn't 1160 * shift at all when they're equal. 1161 */ 1162 const size_t ulargest = (size_t)-1 >> ((sizeof(size_t) - f->size)*8); 1163 1164 assert(f->size >= 1 && f->size <= (ssize_t)sizeof(size_t)); 1165 1166 if (is_unsigned) { 1167 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 1168 "Format '%c' requires numeric argument between 0 and %zu", 1169 f->format, 1170 ulargest); 1171 } 1172 else { 1173 const ssize_t largest = (ssize_t)(ulargest >> 1); 1174 1175 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 1176 "Format '%c' requires numeric argument between %zd and %zd", 1177 f->format, 1178 ~ largest, 1179 largest); 1180 } 1181 1182 return false; 1183 } 1184 1185 1186 /* Native mode routines. ****************************************************/ 1187 1188 static uc_value_t * 1189 native_unpack_char(uc_vm_t *vm, const char *p, const formatdef_t *f) 1190 { 1191 return ucv_string_new_length(p, 1); 1192 } 1193 1194 static uc_value_t * 1195 native_unpack_byte(uc_vm_t *vm, const char *p, const formatdef_t *f) 1196 { 1197 return ucv_int64_new(*(signed char *)p); 1198 } 1199 1200 static uc_value_t * 1201 native_unpack_ubyte(uc_vm_t *vm, const char *p, const formatdef_t *f) 1202 { 1203 return ucv_uint64_new(*(unsigned char *)p); 1204 } 1205 1206 static uc_value_t * 1207 native_unpack_short(uc_vm_t *vm, const char *p, const formatdef_t *f) 1208 { 1209 short x = 0; 1210 1211 memcpy(&x, p, sizeof(x)); 1212 1213 return ucv_int64_new(x); 1214 } 1215 1216 static uc_value_t * 1217 native_unpack_ushort(uc_vm_t *vm, const char *p, const formatdef_t *f) 1218 { 1219 unsigned short x = 0; 1220 1221 memcpy(&x, p, sizeof(x)); 1222 1223 return ucv_uint64_new(x); 1224 } 1225 1226 static uc_value_t * 1227 native_unpack_int(uc_vm_t *vm, const char *p, const formatdef_t *f) 1228 { 1229 int x = 0; 1230 1231 memcpy(&x, p, sizeof(x)); 1232 1233 return ucv_int64_new(x); 1234 } 1235 1236 static uc_value_t * 1237 native_unpack_uint(uc_vm_t *vm, const char *p, const formatdef_t *f) 1238 { 1239 unsigned int x = 0; 1240 1241 memcpy(&x, p, sizeof(x)); 1242 1243 return ucv_uint64_new(x); 1244 } 1245 1246 static uc_value_t * 1247 native_unpack_long(uc_vm_t *vm, const char *p, const formatdef_t *f) 1248 { 1249 long x = 0; 1250 1251 memcpy(&x, p, sizeof(x)); 1252 1253 return ucv_int64_new(x); 1254 } 1255 1256 static uc_value_t * 1257 native_unpack_ulong(uc_vm_t *vm, const char *p, const formatdef_t *f) 1258 { 1259 unsigned long x = 0; 1260 1261 memcpy(&x, p, sizeof(x)); 1262 1263 return ucv_uint64_new(x); 1264 } 1265 1266 static uc_value_t * 1267 native_unpack_ssize_t(uc_vm_t *vm, const char *p, const formatdef_t *f) 1268 { 1269 ssize_t x = 0; 1270 1271 memcpy(&x, p, sizeof(x)); 1272 1273 return ucv_int64_new(x); 1274 } 1275 1276 static uc_value_t * 1277 native_unpack_size_t(uc_vm_t *vm, const char *p, const formatdef_t *f) 1278 { 1279 size_t x = 0; 1280 1281 memcpy(&x, p, sizeof(x)); 1282 1283 return ucv_uint64_new(x); 1284 } 1285 1286 static uc_value_t * 1287 native_unpack_longlong(uc_vm_t *vm, const char *p, const formatdef_t *f) 1288 { 1289 long long x = 0; 1290 1291 memcpy(&x, p, sizeof(x)); 1292 1293 return ucv_int64_new(x); 1294 } 1295 1296 static uc_value_t * 1297 native_unpack_ulonglong(uc_vm_t *vm, const char *p, const formatdef_t *f) 1298 { 1299 unsigned long long x = 0; 1300 1301 memcpy(&x, p, sizeof(x)); 1302 1303 return ucv_uint64_new(x); 1304 } 1305 1306 static uc_value_t * 1307 native_unpack_bool(uc_vm_t *vm, const char *p, const formatdef_t *f) 1308 { 1309 bool x = false; 1310 1311 memcpy(&x, p, sizeof(x)); 1312 1313 return ucv_boolean_new(x != 0); 1314 } 1315 1316 1317 static uc_value_t * 1318 native_unpack_halffloat(uc_vm_t *vm, const char *p, const formatdef_t *f) 1319 { 1320 #if __BYTE_ORDER == __LITTLE_ENDIAN 1321 return ucv_double_new(double_unpack16(p, true)); 1322 #else 1323 return ucv_double_new(double_unpack16(p, false)); 1324 #endif 1325 } 1326 1327 static uc_value_t * 1328 native_unpack_float(uc_vm_t *vm, const char *p, const formatdef_t *f) 1329 { 1330 float x = 0.0; 1331 1332 memcpy(&x, p, sizeof(x)); 1333 1334 return ucv_double_new(x); 1335 } 1336 1337 static uc_value_t * 1338 native_unpack_double(uc_vm_t *vm, const char *p, const formatdef_t *f) 1339 { 1340 double x = 0.0; 1341 1342 memcpy(&x, p, sizeof(x)); 1343 1344 return ucv_double_new(x); 1345 } 1346 1347 static uc_value_t * 1348 native_unpack_void_p(uc_vm_t *vm, const char *p, const formatdef_t *f) 1349 { 1350 void *x = NULL; 1351 1352 memcpy(&x, p, sizeof(x)); 1353 1354 return ucv_int64_new((intptr_t)x); 1355 } 1356 1357 static bool 1358 native_pack_byte(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1359 { 1360 long x = 0; 1361 1362 if (!ucv_as_long(vm, v, &x)) 1363 return false; 1364 1365 if (x < -128 || x > 127) { 1366 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 1367 "Byte format requires numeric value between -128 and 127"); 1368 1369 return false; 1370 } 1371 1372 *p = (char)x; 1373 1374 return true; 1375 } 1376 1377 static bool 1378 native_pack_ubyte(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1379 { 1380 long x = 0; 1381 1382 if (!ucv_as_long(vm, v, &x)) 1383 return false; 1384 1385 if (x < 0 || x > 255) { 1386 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 1387 "Unsigned byte format requires numeric value between 0 and 255"); 1388 1389 return false; 1390 } 1391 1392 *(unsigned char *)p = (unsigned char)x; 1393 1394 return true; 1395 } 1396 1397 static bool 1398 native_pack_char(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1399 { 1400 char *s = NULL; 1401 1402 if (ucv_type(v) == UC_STRING) { 1403 s = ucv_string_get(v); 1404 *p = *s; 1405 } 1406 else { 1407 s = ucv_to_string(vm, v); 1408 *p = *s; 1409 free(s); 1410 } 1411 1412 return true; 1413 } 1414 1415 static bool 1416 native_pack_short(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1417 { 1418 long x = 0; 1419 short y = 0; 1420 1421 if (!ucv_as_long(vm, v, &x)) 1422 return false; 1423 1424 if (x < SHRT_MIN || x > SHRT_MAX) { 1425 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 1426 "Short format requires numeric value between %d and %d", 1427 (int)SHRT_MIN, (int)SHRT_MAX); 1428 1429 return false; 1430 } 1431 1432 y = (short)x; 1433 memcpy(p, &y, sizeof(y)); 1434 1435 return true; 1436 } 1437 1438 static bool 1439 native_pack_ushort(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1440 { 1441 unsigned short y = 0; 1442 long x = 0; 1443 1444 if (!ucv_as_long(vm, v, &x)) 1445 return false; 1446 1447 if (x < 0 || x > USHRT_MAX) { 1448 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 1449 "Unsigned short format requires numeric value between 0 and %u", 1450 (unsigned int)USHRT_MAX); 1451 1452 return false; 1453 } 1454 1455 y = (unsigned short)x; 1456 memcpy(p, &y, sizeof(y)); 1457 1458 return true; 1459 } 1460 1461 static bool 1462 native_pack_int(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1463 { 1464 long x = 0; 1465 int y = 0; 1466 1467 if (!ucv_as_long(vm, v, &x)) 1468 return false; 1469 1470 if (sizeof(long) > sizeof(int)) { 1471 if ((x < ((long)INT_MIN)) || (x > ((long)INT_MAX))) 1472 return range_exception(vm, f, false); 1473 } 1474 1475 y = (int)x; 1476 memcpy(p, &y, sizeof(y)); 1477 1478 return true; 1479 } 1480 1481 static bool 1482 native_pack_uint(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1483 { 1484 unsigned long x = 0; 1485 unsigned int y = 0; 1486 1487 if (!ucv_as_ulong(vm, v, &x)) 1488 return false; 1489 1490 if (sizeof(long) > sizeof(int)) { 1491 if (x > ((unsigned long)UINT_MAX)) 1492 return range_exception(vm, f, true); 1493 } 1494 1495 y = (unsigned int)x; 1496 memcpy(p, &y, sizeof(y)); 1497 1498 return true; 1499 } 1500 1501 static bool 1502 native_pack_long(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1503 { 1504 long x = 0; 1505 1506 if (!ucv_as_long(vm, v, &x)) 1507 return false; 1508 1509 memcpy(p, &x, sizeof(x)); 1510 1511 return true; 1512 } 1513 1514 static bool 1515 native_pack_ulong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1516 { 1517 unsigned long x = 0; 1518 1519 if (!ucv_as_ulong(vm, v, &x)) 1520 return false; 1521 1522 memcpy(p, &x, sizeof(x)); 1523 1524 return true; 1525 } 1526 1527 static bool 1528 native_pack_ssize_t(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1529 { 1530 ssize_t x = 0; 1531 1532 if (!ucv_as_ssize_t(vm, v, &x)) 1533 return false; 1534 1535 memcpy(p, &x, sizeof(x)); 1536 1537 return true; 1538 } 1539 1540 static bool 1541 native_pack_size_t(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1542 { 1543 size_t x = 0; 1544 1545 if (!ucv_as_size_t(vm, v, &x)) 1546 return false; 1547 1548 memcpy(p, &x, sizeof(x)); 1549 1550 return true; 1551 } 1552 1553 static bool 1554 native_pack_longlong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1555 { 1556 long long x = 0; 1557 1558 if (!ucv_as_longlong(vm, v, &x)) 1559 return false; 1560 1561 memcpy(p, &x, sizeof(x)); 1562 1563 return true; 1564 } 1565 1566 static bool 1567 native_pack_ulonglong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1568 { 1569 unsigned long long x = 0; 1570 1571 if (!ucv_as_ulonglong(vm, v, &x)) 1572 return false; 1573 1574 memcpy(p, &x, sizeof(x)); 1575 1576 return true; 1577 } 1578 1579 1580 static bool 1581 native_pack_bool(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1582 { 1583 bool x = 0; 1584 1585 x = ucv_is_truish(v); 1586 1587 memcpy(p, &x, sizeof(x)); 1588 1589 return true; 1590 } 1591 1592 static bool 1593 native_pack_halffloat(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1594 { 1595 double x; 1596 1597 if (!ucv_as_double(vm, v, &x)) 1598 return false; 1599 1600 #if __BYTE_ORDER == __LITTLE_ENDIAN 1601 return double_pack16(x, p, true); 1602 #else 1603 return double_pack16(x, p, false); 1604 #endif 1605 } 1606 1607 static bool 1608 native_pack_float(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1609 { 1610 double d = 0.0; 1611 float x = 0.0; 1612 1613 if (!ucv_as_double(vm, v, &d)) 1614 return false; 1615 1616 x = (float)d; 1617 memcpy(p, &x, sizeof(x)); 1618 1619 return true; 1620 } 1621 1622 static bool 1623 native_pack_double(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1624 { 1625 double x = 0.0; 1626 1627 if (!ucv_as_double(vm, v, &x)) 1628 return false; 1629 1630 memcpy(p, &x, sizeof(x)); 1631 1632 return true; 1633 } 1634 1635 static bool 1636 native_pack_void_p(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1637 { 1638 long long int i = 0; 1639 void *x = NULL; 1640 1641 if (!ucv_as_longlong(vm, v, &i)) 1642 return false; 1643 1644 x = (void *)(intptr_t)i; 1645 memcpy(p, &x, sizeof(x)); 1646 1647 return true; 1648 } 1649 1650 static const formatdef_t native_endian_table[] = { 1651 { 'x', sizeof(char), 0, NULL, NULL }, 1652 { 'b', sizeof(char), 0, native_unpack_byte, native_pack_byte }, 1653 { 'B', sizeof(char), 0, native_unpack_ubyte, native_pack_ubyte }, 1654 { 'c', sizeof(char), 0, native_unpack_char, native_pack_char }, 1655 { '*', sizeof(char), 0, NULL, NULL }, 1656 { 's', sizeof(char), 0, NULL, NULL }, 1657 { 'p', sizeof(char), 0, NULL, NULL }, 1658 { 'h', sizeof(short), SHORT_ALIGN, native_unpack_short, native_pack_short }, 1659 { 'H', sizeof(short), SHORT_ALIGN, native_unpack_ushort, native_pack_ushort }, 1660 { 'i', sizeof(int), INT_ALIGN, native_unpack_int, native_pack_int }, 1661 { 'I', sizeof(int), INT_ALIGN, native_unpack_uint, native_pack_uint }, 1662 { 'l', sizeof(long), LONG_ALIGN, native_unpack_long, native_pack_long }, 1663 { 'L', sizeof(long), LONG_ALIGN, native_unpack_ulong, native_pack_ulong }, 1664 { 'n', sizeof(size_t), SIZE_T_ALIGN, native_unpack_ssize_t, native_pack_ssize_t }, 1665 { 'N', sizeof(size_t), SIZE_T_ALIGN, native_unpack_size_t, native_pack_size_t }, 1666 { 'q', sizeof(long long), LONG_LONG_ALIGN, native_unpack_longlong, native_pack_longlong }, 1667 { 'Q', sizeof(long long), LONG_LONG_ALIGN, native_unpack_ulonglong,native_pack_ulonglong }, 1668 { '?', sizeof(bool), BOOL_ALIGN, native_unpack_bool, native_pack_bool }, 1669 { 'e', sizeof(short), SHORT_ALIGN, native_unpack_halffloat, native_pack_halffloat }, 1670 { 'f', sizeof(float), FLOAT_ALIGN, native_unpack_float, native_pack_float }, 1671 { 'd', sizeof(double), DOUBLE_ALIGN, native_unpack_double, native_pack_double }, 1672 { 'P', sizeof(void *), VOID_P_ALIGN, native_unpack_void_p, native_pack_void_p }, 1673 { 0 } 1674 }; 1675 1676 1677 /* Big-endian routines. *****************************************************/ 1678 1679 static uc_value_t * 1680 be_unpack_int(uc_vm_t *vm, const char *p, const formatdef_t *f) 1681 { 1682 const unsigned char *bytes = (const unsigned char *)p; 1683 ssize_t i = f->size; 1684 long x = 0; 1685 1686 do { 1687 x = (x<<8) | *bytes++; 1688 } while (--i > 0); 1689 1690 /* Extend the sign bit. */ 1691 if ((ssize_t)sizeof(long) > f->size) 1692 x |= -(x & (1L << ((8 * f->size) - 1))); 1693 1694 return ucv_int64_new(x); 1695 } 1696 1697 static uc_value_t * 1698 be_unpack_uint(uc_vm_t *vm, const char *p, const formatdef_t *f) 1699 { 1700 const unsigned char *bytes = (const unsigned char *)p; 1701 ssize_t i = f->size; 1702 unsigned long x = 0; 1703 1704 do { 1705 x = (x<<8) | *bytes++; 1706 } while (--i > 0); 1707 1708 return ucv_uint64_new(x); 1709 } 1710 1711 static uc_value_t * 1712 be_unpack_longlong(uc_vm_t *vm, const char *p, const formatdef_t *f) 1713 { 1714 const unsigned char *bytes = (const unsigned char *)p; 1715 ssize_t i = f->size; 1716 long long x = 0; 1717 1718 do { 1719 x = (x<<8) | *bytes++; 1720 } while (--i > 0); 1721 1722 /* Extend the sign bit. */ 1723 if ((ssize_t)sizeof(long long) > f->size) 1724 x |= -(x & ((long long)1 << ((8 * f->size) - 1))); 1725 1726 return ucv_int64_new(x); 1727 } 1728 1729 static uc_value_t * 1730 be_unpack_ulonglong(uc_vm_t *vm, const char *p, const formatdef_t *f) 1731 { 1732 const unsigned char *bytes = (const unsigned char *)p; 1733 unsigned long long x = 0; 1734 ssize_t i = f->size; 1735 1736 do { 1737 x = (x<<8) | *bytes++; 1738 } while (--i > 0); 1739 1740 return ucv_uint64_new(x); 1741 } 1742 1743 static uc_value_t * 1744 be_unpack_halffloat(uc_vm_t *vm, const char *p, const formatdef_t *f) 1745 { 1746 return ucv_double_new(double_unpack16(p, false)); 1747 } 1748 1749 static uc_value_t * 1750 be_unpack_float(uc_vm_t *vm, const char *p, const formatdef_t *f) 1751 { 1752 return ucv_double_new(double_unpack32(p, false)); 1753 } 1754 1755 static uc_value_t * 1756 be_unpack_double(uc_vm_t *vm, const char *p, const formatdef_t *f) 1757 { 1758 return ucv_double_new(double_unpack64(p, false)); 1759 } 1760 1761 static uc_value_t * 1762 be_unpack_bool(uc_vm_t *vm, const char *p, const formatdef_t *f) 1763 { 1764 return ucv_boolean_new(*p != 0); 1765 } 1766 1767 static bool 1768 be_pack_int(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1769 { 1770 unsigned char *q = (unsigned char *)p; 1771 ssize_t i = 0; 1772 long x = 0; 1773 1774 if (!ucv_as_long(vm, v, &x)) 1775 return false; 1776 1777 i = f->size; 1778 1779 if (i != sizeof(long)) { 1780 if ((i == 2) && (x < -32768 || x > 32767)) 1781 return range_exception(vm, f, false); 1782 #if UINT_MAX < ULONG_MAX 1783 else if ((i == 4) && (x < -2147483648L || x > 2147483647L)) 1784 return range_exception(vm, f, false); 1785 #endif 1786 } 1787 1788 do { 1789 q[--i] = (unsigned char)(x & 0xffL); 1790 x >>= 8; 1791 } while (i > 0); 1792 1793 return true; 1794 } 1795 1796 static bool 1797 be_pack_uint(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1798 { 1799 unsigned char *q = (unsigned char *)p; 1800 unsigned long x = 0; 1801 ssize_t i = 0; 1802 1803 if (!ucv_as_ulong(vm, v, &x)) 1804 return false; 1805 1806 i = f->size; 1807 1808 if (i != sizeof(long)) { 1809 unsigned long maxint = 1; 1810 maxint <<= (unsigned long)(i * 8); 1811 if (x >= maxint) 1812 return range_exception(vm, f, true); 1813 } 1814 1815 do { 1816 q[--i] = (unsigned char)(x & 0xffUL); 1817 x >>= 8; 1818 } while (i > 0); 1819 1820 return true; 1821 } 1822 1823 static bool 1824 be_pack_longlong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1825 { 1826 unsigned char *q = (unsigned char *)p; 1827 long long x = 0; 1828 ssize_t i = 0; 1829 1830 if (!ucv_as_longlong(vm, v, &x)) 1831 return false; 1832 1833 i = f->size; 1834 1835 do { 1836 q[--i] = (unsigned char)(x & 0xffL); 1837 x >>= 8; 1838 } while (i > 0); 1839 1840 return true; 1841 } 1842 1843 static bool 1844 be_pack_ulonglong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1845 { 1846 unsigned char *q = (unsigned char *)p; 1847 unsigned long long x = 0; 1848 ssize_t i = 0; 1849 1850 if (!ucv_as_ulonglong(vm, v, &x)) 1851 return false; 1852 1853 i = f->size; 1854 1855 do { 1856 q[--i] = (unsigned char)(x & 0xffUL); 1857 x >>= 8; 1858 } while (i > 0); 1859 1860 return true; 1861 } 1862 1863 static bool 1864 be_pack_halffloat(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1865 { 1866 double x = 0.0; 1867 1868 if (!ucv_as_double(vm, v, &x)) 1869 return false; 1870 1871 return double_pack16(x, p, false); 1872 } 1873 1874 static bool 1875 be_pack_float(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1876 { 1877 double x = 0.0; 1878 1879 if (!ucv_as_double(vm, v, &x)) 1880 return false; 1881 1882 if (!double_pack32(x, p, 0)) { 1883 uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Argument out of range"); 1884 1885 return false; 1886 } 1887 1888 return true; 1889 } 1890 1891 static bool 1892 be_pack_double(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1893 { 1894 double x = 0.0; 1895 1896 if (!ucv_as_double(vm, v, &x)) 1897 return false; 1898 1899 if (!double_pack64(x, p, 0)) { 1900 uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Argument out of range"); 1901 1902 return false; 1903 } 1904 1905 return true; 1906 } 1907 1908 static bool 1909 be_pack_bool(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 1910 { 1911 *p = (char)ucv_is_truish(v); 1912 1913 return true; 1914 } 1915 1916 static formatdef_t big_endian_table[] = { 1917 { 'x', 1, 0, NULL, NULL }, 1918 { 'b', 1, 0, native_unpack_byte, native_pack_byte }, 1919 { 'B', 1, 0, native_unpack_ubyte, native_pack_ubyte }, 1920 { 'c', 1, 0, native_unpack_char, native_pack_char }, 1921 { '*', 1, 0, NULL, NULL }, 1922 { 's', 1, 0, NULL, NULL }, 1923 { 'p', 1, 0, NULL, NULL }, 1924 { 'h', 2, 0, be_unpack_int, be_pack_int }, 1925 { 'H', 2, 0, be_unpack_uint, be_pack_uint }, 1926 { 'i', 4, 0, be_unpack_int, be_pack_int }, 1927 { 'I', 4, 0, be_unpack_uint, be_pack_uint }, 1928 { 'l', 4, 0, be_unpack_int, be_pack_int }, 1929 { 'L', 4, 0, be_unpack_uint, be_pack_uint }, 1930 { 'q', 8, 0, be_unpack_longlong, be_pack_longlong }, 1931 { 'Q', 8, 0, be_unpack_ulonglong, be_pack_ulonglong }, 1932 { '?', 1, 0, be_unpack_bool, be_pack_bool }, 1933 { 'e', 2, 0, be_unpack_halffloat, be_pack_halffloat }, 1934 { 'f', 4, 0, be_unpack_float, be_pack_float }, 1935 { 'd', 8, 0, be_unpack_double, be_pack_double }, 1936 { 0 } 1937 }; 1938 1939 1940 /* Little-endian routines. *****************************************************/ 1941 1942 static uc_value_t * 1943 le_unpack_int(uc_vm_t *vm, const char *p, const formatdef_t *f) 1944 { 1945 const unsigned char *bytes = (const unsigned char *)p; 1946 ssize_t i = f->size; 1947 long x = 0; 1948 1949 do { 1950 x = (x<<8) | bytes[--i]; 1951 } while (i > 0); 1952 1953 /* Extend the sign bit. */ 1954 if ((ssize_t)sizeof(long) > f->size) 1955 x |= -(x & (1L << ((8 * f->size) - 1))); 1956 1957 return ucv_int64_new(x); 1958 } 1959 1960 static uc_value_t * 1961 le_unpack_uint(uc_vm_t *vm, const char *p, const formatdef_t *f) 1962 { 1963 const unsigned char *bytes = (const unsigned char *)p; 1964 ssize_t i = f->size; 1965 unsigned long x = 0; 1966 1967 do { 1968 x = (x<<8) | bytes[--i]; 1969 } while (i > 0); 1970 1971 return ucv_uint64_new(x); 1972 } 1973 1974 static uc_value_t * 1975 le_unpack_longlong(uc_vm_t *vm, const char *p, const formatdef_t *f) 1976 { 1977 const unsigned char *bytes = (const unsigned char *)p; 1978 ssize_t i = f->size; 1979 long long x = 0; 1980 1981 do { 1982 x = (x<<8) | bytes[--i]; 1983 } while (i > 0); 1984 1985 /* Extend the sign bit. */ 1986 if ((ssize_t)sizeof(long long) > f->size) 1987 x |= -(x & ((long long)1 << ((8 * f->size) - 1))); 1988 1989 return ucv_int64_new(x); 1990 } 1991 1992 static uc_value_t * 1993 le_unpack_ulonglong(uc_vm_t *vm, const char *p, const formatdef_t *f) 1994 { 1995 const unsigned char *bytes = (const unsigned char *)p; 1996 unsigned long long x = 0; 1997 ssize_t i = f->size; 1998 1999 do { 2000 x = (x<<8) | bytes[--i]; 2001 } while (i > 0); 2002 2003 return ucv_uint64_new(x); 2004 } 2005 2006 static uc_value_t * 2007 le_unpack_halffloat(uc_vm_t *vm, const char *p, const formatdef_t *f) 2008 { 2009 return ucv_double_new(double_unpack16(p, true)); 2010 } 2011 2012 static uc_value_t * 2013 le_unpack_float(uc_vm_t *vm, const char *p, const formatdef_t *f) 2014 { 2015 return ucv_double_new(double_unpack32(p, true)); 2016 } 2017 2018 static uc_value_t * 2019 le_unpack_double(uc_vm_t *vm, const char *p, const formatdef_t *f) 2020 { 2021 return ucv_double_new(double_unpack64(p, true)); 2022 } 2023 2024 static bool 2025 le_pack_int(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 2026 { 2027 unsigned char *q = (unsigned char *)p; 2028 ssize_t i = 0; 2029 long x = 0; 2030 2031 if (!ucv_as_long(vm, v, &x)) 2032 return false; 2033 2034 i = f->size; 2035 2036 if (i != sizeof(long)) { 2037 if ((i == 2) && (x < -32768 || x > 32767)) 2038 return range_exception(vm, f, false); 2039 #if UINT_MAX < ULONG_MAX 2040 else if ((i == 4) && (x < -2147483648L || x > 2147483647L)) 2041 return range_exception(vm, f, false); 2042 #endif 2043 } 2044 2045 do { 2046 *q++ = (unsigned char)(x & 0xffL); 2047 x >>= 8; 2048 } while (--i > 0); 2049 2050 return true; 2051 } 2052 2053 static bool 2054 le_pack_uint(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 2055 { 2056 unsigned char *q = (unsigned char *)p; 2057 unsigned long x = 0; 2058 ssize_t i = 0; 2059 2060 if (!ucv_as_ulong(vm, v, &x)) 2061 return false; 2062 2063 i = f->size; 2064 2065 if (i != sizeof(long)) { 2066 unsigned long maxint = 1; 2067 maxint <<= (unsigned long)(i * 8); 2068 2069 if (x >= maxint) 2070 return range_exception(vm, f, true); 2071 } 2072 2073 do { 2074 *q++ = (unsigned char)(x & 0xffUL); 2075 x >>= 8; 2076 } while (--i > 0); 2077 2078 return true; 2079 } 2080 2081 static bool 2082 le_pack_longlong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 2083 { 2084 unsigned char *q = (unsigned char *)p; 2085 long long x = 0; 2086 ssize_t i = 0; 2087 2088 if (!ucv_as_longlong(vm, v, &x)) 2089 return false; 2090 2091 i = f->size; 2092 2093 do { 2094 *q++ = (unsigned char)(x & 0xffL); 2095 x >>= 8; 2096 } while (--i > 0); 2097 2098 return true; 2099 } 2100 2101 static bool 2102 le_pack_ulonglong(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 2103 { 2104 unsigned char *q = (unsigned char *)p; 2105 unsigned long long x = 0; 2106 ssize_t i = 0; 2107 2108 if (!ucv_as_ulonglong(vm, v, &x)) 2109 return false; 2110 2111 i = f->size; 2112 2113 do { 2114 *q++ = (unsigned char)(x & 0xffUL); 2115 x >>= 8; 2116 } while (--i > 0); 2117 2118 return true; 2119 } 2120 2121 static bool 2122 le_pack_halffloat(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 2123 { 2124 double x = 0.0; 2125 2126 if (!ucv_as_double(vm, v, &x)) 2127 return false; 2128 2129 return double_pack16(x, p, true); 2130 } 2131 2132 static bool 2133 le_pack_float(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 2134 { 2135 double x = 0.0; 2136 2137 if (!ucv_as_double(vm, v, &x)) 2138 return false; 2139 2140 if (!double_pack32(x, p, 1)) { 2141 uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Argument out of range"); 2142 2143 return false; 2144 } 2145 2146 return true; 2147 } 2148 2149 static bool 2150 le_pack_double(uc_vm_t *vm, char *p, uc_value_t *v, const formatdef_t *f) 2151 { 2152 double x = 0.0; 2153 2154 if (!ucv_as_double(vm, v, &x)) 2155 return false; 2156 2157 if (!double_pack64(x, p, 1)) { 2158 uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Argument out of range"); 2159 2160 return false; 2161 } 2162 2163 return true; 2164 } 2165 2166 static formatdef_t little_endian_table[] = { 2167 { 'x', 1, 0, NULL, NULL }, 2168 { 'b', 1, 0, native_unpack_byte, native_pack_byte }, 2169 { 'B', 1, 0, native_unpack_ubyte, native_pack_ubyte }, 2170 { 'c', 1, 0, native_unpack_char, native_pack_char }, 2171 { '*', 1, 0, NULL, NULL }, 2172 { 's', 1, 0, NULL, NULL }, 2173 { 'p', 1, 0, NULL, NULL }, 2174 { 'h', 2, 0, le_unpack_int, le_pack_int }, 2175 { 'H', 2, 0, le_unpack_uint, le_pack_uint }, 2176 { 'i', 4, 0, le_unpack_int, le_pack_int }, 2177 { 'I', 4, 0, le_unpack_uint, le_pack_uint }, 2178 { 'l', 4, 0, le_unpack_int, le_pack_int }, 2179 { 'L', 4, 0, le_unpack_uint, le_pack_uint }, 2180 { 'q', 8, 0, le_unpack_longlong, le_pack_longlong }, 2181 { 'Q', 8, 0, le_unpack_ulonglong, le_pack_ulonglong }, 2182 { '?', 1, 0, be_unpack_bool, be_pack_bool }, 2183 { 'e', 2, 0, le_unpack_halffloat, le_pack_halffloat }, 2184 { 'f', 4, 0, le_unpack_float, le_pack_float }, 2185 { 'd', 8, 0, le_unpack_double, le_pack_double }, 2186 { 0 } 2187 }; 2188 2189 2190 static const formatdef_t * 2191 select_format_table(const char **pfmt) 2192 { 2193 const char *fmt = (*pfmt)++; /* May be backed out of later */ 2194 2195 switch (*fmt) { 2196 case '<': 2197 return little_endian_table; 2198 2199 case '>': 2200 case '!': /* Network byte order is big-endian */ 2201 return big_endian_table; 2202 2203 case '=': /* Host byte order -- different from native in alignment! */ 2204 #if __BYTE_ORDER == __LITTLE_ENDIAN 2205 return little_endian_table; 2206 #else 2207 return big_endian_table; 2208 #endif 2209 2210 default: 2211 --*pfmt; /* Back out of pointer increment */ 2212 /* Fall through */ 2213 2214 case '@': 2215 return native_endian_table; 2216 } 2217 } 2218 2219 2220 /* Get the table entry for a format code */ 2221 2222 static const formatdef_t * 2223 lookup_table_entry(uc_vm_t *vm, int c, const formatdef_t *table) 2224 { 2225 for (; table->format != '\0'; table++) { 2226 if (table->format == c) { 2227 return table; 2228 } 2229 } 2230 2231 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 2232 "Unrecognized character '%c' in struct format", 2233 c); 2234 2235 return NULL; 2236 } 2237 2238 2239 /* Align a size according to a format code. Return -1 on overflow. */ 2240 2241 static ssize_t 2242 align_for_entry(ssize_t size, const formatdef_t *e) 2243 { 2244 ssize_t extra; 2245 2246 if (e->alignment && size > 0) { 2247 extra = (e->alignment - 1) - (size - 1) % (e->alignment); 2248 2249 if (extra > SSIZE_MAX - size) 2250 return -1; 2251 2252 size += extra; 2253 } 2254 2255 return size; 2256 } 2257 2258 2259 static void 2260 optimize_functions(void) 2261 { 2262 /* Check endian and swap in faster functions */ 2263 const formatdef_t *native = native_endian_table; 2264 formatdef_t *other, *ptr; 2265 2266 #if __BYTE_ORDER == __LITTLE_ENDIAN 2267 other = little_endian_table; 2268 #else 2269 other = big_endian_table; 2270 #endif 2271 2272 /* Scan through the native table, find a matching 2273 entry in the endian table and swap in the 2274 native implementations whenever possible 2275 (64-bit platforms may not have "standard" sizes) */ 2276 while (native->format != '\0' && other->format != '\0') { 2277 ptr = other; 2278 2279 while (ptr->format != '\0') { 2280 if (ptr->format == native->format) { 2281 /* Match faster when formats are 2282 listed in the same order */ 2283 if (ptr == other) 2284 other++; 2285 2286 /* Only use the trick if the 2287 size matches */ 2288 if (ptr->size != native->size) 2289 break; 2290 2291 /* Skip float and double, could be 2292 "unknown" float format */ 2293 if (ptr->format == 'd' || ptr->format == 'f') 2294 break; 2295 2296 /* Skip bool, semantics are different for standard size */ 2297 if (ptr->format == '?') 2298 break; 2299 2300 ptr->pack = native->pack; 2301 ptr->unpack = native->unpack; 2302 break; 2303 } 2304 2305 ptr++; 2306 } 2307 2308 native++; 2309 } 2310 } 2311 2312 static formatstate_t * 2313 parse_format(uc_vm_t *vm, uc_value_t *fmtval) 2314 { 2315 ssize_t size, num, itemsize; 2316 const formatdef_t *e, *f; 2317 const char *fmt, *s; 2318 formatstate_t *state; 2319 formatcode_t *codes; 2320 size_t ncodes; 2321 char c; 2322 2323 if (ucv_type(fmtval) != UC_STRING) { 2324 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 2325 "Format value not a string"); 2326 2327 return NULL; 2328 } 2329 2330 fmt = ucv_string_get(fmtval); 2331 2332 if (strlen(fmt) != ucv_string_length(fmtval)) { 2333 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 2334 "Format string contains embedded null character"); 2335 2336 return NULL; 2337 } 2338 2339 f = select_format_table(&fmt); 2340 2341 s = fmt; 2342 size = 0; 2343 ncodes = 0; 2344 2345 while ((c = *s++) != '\0') { 2346 if (isspace(c)) 2347 continue; 2348 2349 if ('' <= c && c <= '9') { 2350 num = c - ''; 2351 2352 while ('' <= (c = *s++) && c <= '9') { 2353 /* overflow-safe version of 2354 if (num*10 + (c - '') > SSIZE_MAX) { ... } */ 2355 if (num >= SSIZE_MAX / 10 && ( 2356 num > SSIZE_MAX / 10 || 2357 (c - '') > SSIZE_MAX % 10)) 2358 goto overflow; 2359 2360 num = num*10 + (c - ''); 2361 } 2362 2363 if (c == '\0') { 2364 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 2365 "Format string contains repeat count given without format specifier"); 2366 2367 return NULL; 2368 } 2369 } 2370 else 2371 num = 1; 2372 2373 e = lookup_table_entry(vm, c, f); 2374 2375 if (e == NULL) 2376 return NULL; 2377 2378 switch (c) { 2379 case '*': /* fall through */ 2380 case 's': 2381 case 'p': 2382 ncodes++; 2383 break; 2384 2385 case 'x': 2386 break; 2387 2388 default: 2389 if (num) 2390 ncodes++; 2391 2392 break; 2393 } 2394 2395 itemsize = e->size; 2396 size = align_for_entry(size, e); 2397 2398 if (size == -1) 2399 goto overflow; 2400 2401 /* if (size + num * itemsize > SSIZE_MAX) { ... } */ 2402 if (num > (SSIZE_MAX - size) / itemsize) 2403 goto overflow; 2404 2405 size += (c != '*') ? num * itemsize : 0; 2406 } 2407 2408 /* check for overflow */ 2409 if ((ncodes + 1) > ((size_t)SSIZE_MAX / sizeof(formatcode_t))) { 2410 uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, "Out of memory"); 2411 2412 return NULL; 2413 } 2414 2415 state = xalloc(sizeof(*state) + ncodes * sizeof(formatcode_t)); 2416 state->size = size; 2417 state->ncodes = ncodes; 2418 2419 codes = state->codes; 2420 2421 s = fmt; 2422 size = 0; 2423 2424 while ((c = *s++) != '\0') { 2425 if (isspace(c)) 2426 continue; 2427 2428 if ('' <= c && c <= '9') { 2429 num = c - ''; 2430 2431 while ('' <= (c = *s++) && c <= '9') 2432 num = num*10 + (c - ''); 2433 2434 } 2435 else if (c == '*') 2436 num = -1; 2437 else 2438 num = 1; 2439 2440 e = lookup_table_entry(vm, c, f); 2441 2442 if (e == NULL) 2443 continue; 2444 2445 size = align_for_entry(size, e); 2446 2447 if (c == '*' || c == 's' || c == 'p') { 2448 codes->offset = size; 2449 codes->size = num; 2450 codes->fmtdef = e; 2451 codes->repeat = 1; 2452 codes++; 2453 size += (c != '*') ? num : 0; 2454 } 2455 else if (c == 'x') { 2456 size += num; 2457 } 2458 else if (num) { 2459 codes->offset = size; 2460 codes->size = e->size; 2461 codes->fmtdef = e; 2462 codes->repeat = num; 2463 codes++; 2464 size += e->size * num; 2465 } 2466 } 2467 2468 return state; 2469 2470 overflow: 2471 uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, 2472 "Total struct size too long"); 2473 2474 return NULL; 2475 } 2476 2477 static uc_value_t * 2478 uc_pack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff) 2479 { 2480 size_t ncode, arg, off; 2481 formatcode_t *code; 2482 uc_string_t *buf; 2483 ssize_t size, n; 2484 const void *p; 2485 2486 for (ncode = 0, code = &state->codes[0], arg = argoff, off = 0; 2487 ncode < state->ncodes; 2488 code = &state->codes[++ncode]) { 2489 if (code->fmtdef->format == '*') { 2490 uc_value_t *v = uc_fn_arg(arg++); 2491 2492 if (ucv_type(v) != UC_STRING) 2493 continue; 2494 2495 n = ucv_string_length(v); 2496 2497 if (code->size == -1 || code->size > n) 2498 off += n; 2499 else 2500 off += code->size; 2501 } 2502 else { 2503 arg += code->repeat; 2504 } 2505 } 2506 2507 buf = xalloc(sizeof(*buf) + state->size + off + 1); 2508 buf->header.type = UC_STRING; 2509 buf->header.refcount = 1; 2510 buf->length = state->size + off; 2511 2512 for (ncode = 0, code = &state->codes[0], off = 0; 2513 ncode < state->ncodes; 2514 code = &state->codes[++ncode]) { 2515 const formatdef_t *e = code->fmtdef; 2516 char *res = buf->str + code->offset + off; 2517 ssize_t j = code->repeat; 2518 2519 while (j--) { 2520 uc_value_t *v = uc_fn_arg(argoff++); 2521 2522 size = code->size; 2523 2524 if (e->format == '*') { 2525 if (ucv_type(v) != UC_STRING) { 2526 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 2527 "Argument for '*' must be a string"); 2528 2529 goto err; 2530 } 2531 2532 n = ucv_string_length(v); 2533 p = ucv_string_get(v); 2534 2535 if (size == -1 || n < size) 2536 size = n; 2537 else if (n > size) 2538 n = size; 2539 2540 off += size; 2541 2542 if (n > 0) 2543 memcpy(res, p, n); 2544 } 2545 else if (e->format == 's') { 2546 if (ucv_type(v) != UC_STRING) { 2547 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 2548 "Argument for 's' must be a string"); 2549 2550 goto err; 2551 } 2552 2553 n = ucv_string_length(v); 2554 p = ucv_string_get(v); 2555 2556 if (n > size) 2557 n = size; 2558 2559 if (n > 0) 2560 memcpy(res, p, n); 2561 } 2562 else if (e->format == 'p') { 2563 if (ucv_type(v) != UC_STRING) { 2564 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 2565 "Argument for 'p' must be a string"); 2566 2567 goto err; 2568 } 2569 2570 n = ucv_string_length(v); 2571 p = ucv_string_get(v); 2572 2573 if (n > (size - 1)) 2574 n = size - 1; 2575 2576 if (n > 0) 2577 memcpy(res + 1, p, n); 2578 2579 if (n > 255) 2580 n = 255; 2581 2582 *res = (unsigned char)n; 2583 } 2584 else { 2585 if (!e->pack(vm, res, v, e)) 2586 goto err; 2587 } 2588 2589 res += size; 2590 } 2591 } 2592 2593 return &buf->header; 2594 2595 err: 2596 free(buf); 2597 2598 return NULL; 2599 } 2600 2601 static uc_value_t * 2602 uc_unpack_common(uc_vm_t *vm, size_t nargs, formatstate_t *state, size_t argoff) 2603 { 2604 uc_value_t *bufval = uc_fn_arg(argoff); 2605 uc_value_t *offset = uc_fn_arg(argoff + 1); 2606 const char *startfrom = NULL; 2607 ssize_t bufrem, size, n; 2608 uc_value_t *result; 2609 formatcode_t *code; 2610 size_t ncode, off; 2611 2612 if (ucv_type(bufval) != UC_STRING) { 2613 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 2614 "Buffer value not a string"); 2615 2616 return NULL; 2617 } 2618 2619 startfrom = ucv_string_get(bufval); 2620 bufrem = ucv_string_length(bufval); 2621 2622 if (offset) { 2623 if (ucv_type(offset) != UC_INTEGER) { 2624 uc_vm_raise_exception(vm, EXCEPTION_TYPE, 2625 "Offset value not an integer"); 2626 2627 return NULL; 2628 } 2629 2630 n = (ssize_t)ucv_int64_get(offset); 2631 2632 if (n < 0) 2633 n += bufrem; 2634 2635 if (n < 0 || n >= bufrem) 2636 return NULL; 2637 2638 startfrom += n; 2639 bufrem -= n; 2640 } 2641 2642 result = ucv_array_new(vm); 2643 2644 for (ncode = 0, code = &state->codes[0], off = 0; 2645 ncode < state->ncodes; 2646 code = &state->codes[++ncode]) { 2647 const formatdef_t *e = code->fmtdef; 2648 const char *res = startfrom + code->offset + off; 2649 ssize_t j = code->repeat; 2650 2651 while (j--) { 2652 uc_value_t *v = NULL; 2653 2654 size = code->size; 2655 2656 if (e->format == '*') { 2657 if (size == -1 || size > bufrem) 2658 size = bufrem; 2659 2660 off += size; 2661 } 2662 else if (size > bufrem) { 2663 goto fail; 2664 } 2665 2666 if (e->format == 's' || e->format == '*') { 2667 v = ucv_string_new_length(res, size); 2668 } 2669 else if (e->format == 'p') { 2670 n = *(unsigned char *)res; 2671 2672 if (n >= size) 2673 n = (size > 0 ? size - 1 : 0); 2674 2675 v = ucv_string_new_length(res + 1, n); 2676 } 2677 else { 2678 v = e->unpack(vm, res, e); 2679 } 2680 2681 if (v == NULL) 2682 goto fail; 2683 2684 ucv_array_push(result, v); 2685 2686 res += size; 2687 bufrem -= size; 2688 } 2689 } 2690 2691 return result; 2692 2693 fail: 2694 ucv_put(result); 2695 2696 return NULL; 2697 } 2698 2699 2700 /** 2701 * Pack given values according to specified format. 2702 * 2703 * The `pack()` function creates a byte string containing the argument values 2704 * packed according to the given format string. 2705 * 2706 * Returns the packed string. 2707 * 2708 * Raises a runtime exception if a given argument value does not match the 2709 * required type of the corresponding format string directive or if and invalid 2710 * format string is provided. 2711 * 2712 * @function module:struct#pack 2713 * 2714 * @param {string} format 2715 * The format string. 2716 * 2717 * @param {...*} values 2718 * Variable number of values to pack. 2719 * 2720 * @returns {string} 2721 * 2722 * @example 2723 * // Pack the values 1, 2, 3 as three consecutive unsigned int values 2724 * // in network byte order. 2725 * const data = pack('!III', 1, 2, 3); 2726 */ 2727 static uc_value_t * 2728 uc_pack(uc_vm_t *vm, size_t nargs) 2729 { 2730 uc_value_t *fmtval = uc_fn_arg(0); 2731 uc_value_t *res = NULL; 2732 formatstate_t *state; 2733 2734 state = parse_format(vm, fmtval); 2735 2736 if (!state) 2737 return NULL; 2738 2739 res = uc_pack_common(vm, nargs, state, 1); 2740 2741 free(state); 2742 2743 return res; 2744 } 2745 2746 /** 2747 * Unpack given byte string according to specified format. 2748 * 2749 * The `unpack()` function interpretes a byte string according to the given 2750 * format string and returns the resulting values. If the optional offset 2751 * argument is given, unpacking starts from this byte position within the input. 2752 * If not specified, the start offset defaults to `0`, the start of the given 2753 * input string. 2754 * 2755 * Returns an array of unpacked values. 2756 * 2757 * Raises a runtime exception if the format string is invalid or if an invalid 2758 * input string or offset value is given. 2759 * 2760 * @function module:struct#unpack 2761 * 2762 * @param {string} format 2763 * The format string. 2764 * 2765 * @param {string} input 2766 * The input string to unpack. 2767 * 2768 * @param {number} [offset=0] 2769 * The offset within the input string to start unpacking from. 2770 * 2771 * @returns {array} 2772 * 2773 * @example 2774 * // Unpack three consecutive unsigned int values in network byte order. 2775 * const numbers = 2776 * unpack('!III', '\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03'); 2777 * print(numbers, "\n"); // [ 1, 2, 3 ] 2778 */ 2779 static uc_value_t * 2780 uc_unpack(uc_vm_t *vm, size_t nargs) 2781 { 2782 uc_value_t *fmtval = uc_fn_arg(0); 2783 uc_value_t *res = NULL; 2784 formatstate_t *state; 2785 2786 state = parse_format(vm, fmtval); 2787 2788 if (!state) 2789 return NULL; 2790 2791 res = uc_unpack_common(vm, nargs, state, 1); 2792 2793 free(state); 2794 2795 return res; 2796 } 2797 2798 2799 /** 2800 * Represents a struct instance created by `new()`. 2801 * 2802 * @class module:struct.instance 2803 * @hideconstructor 2804 * 2805 * @see {@link module:struct#new|new()} 2806 * 2807 * @example 2808 * 2809 * const fmt = struct.new(…); 2810 * 2811 * fmt.pack(…); 2812 * 2813 * const values = fmt.unpack(…); 2814 */ 2815 2816 /** 2817 * Precompile format string. 2818 * 2819 * The `new()` function precompiles the given format string argument and returns 2820 * a `struct` object instance useful for packing and unpacking multiple items 2821 * without having to recompute the internal format each time. 2822 * 2823 * Returns an precompiled struct format instance. 2824 * 2825 * Raises a runtime exception if the format string is invalid. 2826 * 2827 * @function module:struct#new 2828 * 2829 * @param {string} format 2830 * The format string. 2831 * 2832 * @returns {module:struct.instance} 2833 * 2834 * @example 2835 * // Create a format of three consecutive unsigned int values in network byte order. 2836 * const fmt = struct.new('!III'); 2837 * const buf = fmt.pack(1, 2, 3); // "\x00\x00\x00\x01…" 2838 * print(fmt.unpack(buf), "\n"); // [ 1, 2, 3 ] 2839 */ 2840 static uc_value_t * 2841 uc_struct_new(uc_vm_t *vm, size_t nargs) 2842 { 2843 uc_value_t *fmtval = uc_fn_arg(0); 2844 formatstate_t *state; 2845 2846 state = parse_format(vm, fmtval); 2847 2848 if (!state) 2849 return NULL; 2850 2851 return uc_resource_new(struct_type, state); 2852 } 2853 2854 static void 2855 uc_struct_gc(void *ud) 2856 { 2857 formatstate_t *state = ud; 2858 2859 free(state); 2860 } 2861 2862 /** 2863 * Pack given values. 2864 * 2865 * The `pack()` function creates a byte string containing the argument values 2866 * packed according to the given format instance. 2867 * 2868 * Returns the packed string. 2869 * 2870 * Raises a runtime exception if a given argument value does not match the 2871 * required type of the corresponding format string directive. 2872 * 2873 * @function module:struct.instance#pack 2874 * 2875 * @param {...*} values 2876 * Variable number of values to pack. 2877 * 2878 * @returns {string} 2879 * 2880 * @example 2881 * const fmt = struct.new(…); 2882 * const data = fmt.pack(…); 2883 */ 2884 static uc_value_t * 2885 uc_struct_pack(uc_vm_t *vm, size_t nargs) 2886 { 2887 formatstate_t **state = uc_fn_this("struct"); 2888 2889 if (!state || !*state) 2890 return NULL; 2891 2892 return uc_pack_common(vm, nargs, *state, 0); 2893 } 2894 2895 /** 2896 * Unpack given byte string. 2897 * 2898 * The `unpack()` function interpretes a byte string according to the given 2899 * format instance and returns the resulting values. If the optional offset 2900 * argument is given, unpacking starts from this byte position within the input. 2901 * If not specified, the start offset defaults to `0`, the start of the given 2902 * input string. 2903 * 2904 * Returns an array of unpacked values. 2905 * 2906 * Raises a runtime exception if an invalid input string or offset value is 2907 * given. 2908 * 2909 * @function module:struct.instance#unpack 2910 * 2911 * @param {string} input 2912 * The input string to unpack. 2913 * 2914 * @param {number} [offset=0] 2915 * The offset within the input string to start unpacking from. 2916 * 2917 * @returns {array} 2918 * 2919 * @example 2920 * const fmt = struct.new(…); 2921 * const values = fmt.unpack(…); 2922 */ 2923 static uc_value_t * 2924 uc_struct_unpack(uc_vm_t *vm, size_t nargs) 2925 { 2926 formatstate_t **state = uc_fn_this("struct"); 2927 2928 if (!state || !*state) 2929 return NULL; 2930 2931 return uc_unpack_common(vm, nargs, *state, 0); 2932 } 2933 2934 2935 static const uc_function_list_t struct_inst_fns[] = { 2936 { "pack", uc_struct_pack }, 2937 { "unpack", uc_struct_unpack } 2938 }; 2939 2940 static const uc_function_list_t struct_fns[] = { 2941 { "pack", uc_pack }, 2942 { "unpack", uc_unpack }, 2943 { "new", uc_struct_new } 2944 }; 2945 2946 void uc_module_init(uc_vm_t *vm, uc_value_t *scope) 2947 { 2948 optimize_functions(); 2949 2950 uc_function_list_register(scope, struct_fns); 2951 2952 struct_type = uc_type_declare(vm, "struct", struct_inst_fns, uc_struct_gc); 2953 } 2954
This page was automatically generated by LXR 0.3.1. • OpenWrt