diff --git a/.gitignore b/.gitignore index 31e43aa61efc..8d0c7d33935c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,80 +1,86 @@ *.config *.creator *.files *.includes *.creator.user* *.cflags *.cxxflags bin/*bc bin/*bc.exe bin/*dc bin/*dc.exe bin/bcl bc.old *.o *.a .log_*.txt .test.txt .math.txt .results.txt .ops.txt manuals/bc.1 manuals/bc.1.ronn manuals/bc.1.md manuals/dc.1 manuals/dc.1.ronn manuals/dc.1.md gen/strgen lib.c lib2.c lib3.c bc_help.c dc_help.c config.mak timeconst.bc Makefile +bcl.pc +build/* tests/fuzzing/bc_outputs1/* tests/fuzzing/bc_outputs2/* tests/fuzzing/bc_outputs3/* tests/fuzzing/dc_outputs/* tests/bc_outputs/* tests/dc_outputs/* .gdb_history # Ignore the generated test files parse.txt parse_results.txt print.txt print_results.txt bessel.txt bessel_results.txt prime.txt strings2.txt strings2_results.txt tests/bc/scripts/add.txt tests/bc/scripts/divide.txt tests/bc/scripts/multiply.txt tests/bc/scripts/subtract.txt tests/bc/scripts/strings2.txt benchmarks/bc/*.txt benchmarks/dc/*.txt scripts/ministat -scripts/bitgen +scripts/bitfuncgen perf.data perf.data.old *.gcda *.gcno *.gcov *.html *.profraw core.* cscope*.out tags *.vcxproj.user -Debug/* -Release/* +vs/.vs/* +vs/bin/* +vs/lib/* +vs/src2/* +vs/tests/*.txt +vs/tests/*.exe diff --git a/NEWS.md b/NEWS.md index 9a354e537d9f..0f28d552df18 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,1239 +1,1244 @@ # News +## 5.2.3 + +This is a production release that fixes one bug, a parse error when passing a +file to `bc` using `-f` if that file had a multiline comment or string in it. + ## 5.2.2 This is a production release that fixes one bug, a segmentation fault if `argv[0]` equals `NULL`. This is not a critical bug; there will be no vulnerability as far as I can tell. There is no need to update if you do not wish to. ## 5.2.1 This is a production release that fixes two parse bugs when in POSIX standard mode. One of these bugs was due to a quirk of the POSIX grammar, and the other was because `bc` was too strict. ## 5.2.0 This is a production release that adds a new feature, fixes some bugs, and adds out-of-source builds and a `pkg-config` file for `bcl`. The new feature is the ability to turn off exiting on expressions. It is also possible to set the default using `configure.sh`. This behavior used to exist with the `BC_EXPR_EXIT` environment variable, which is now used again. Bugs fixed include: * Some possible race conditions with error handling. * Install and uninstall targets for `bcl` did not work. ## 5.1.1 This is a production release that completes a bug fix from `5.1.0`. The bug exists in all versions of `bc`. The bug was that `if` statements without `else` statements would not be handled correctly at the end of files or right before a function definition. ## 5.1.0 This is a production release with some fixes and new features. * Fixed a bug where an `if` statement without an `else` before defining a function caused an error. * Fixed a bug with the `bc` banner and `-q`. * Fixed a bug on Windows where files were not read correctly. * Added a command-line flag (`-z`) to make `bc` and `dc` print leading zeroes on numbers `-1 < x < 1`. * Added four functions to `lib2.bc` (`plz()`, `plznl()`, `pnlz()`, and `pnlznl()`) to allow printing numbers with or without leading zeros, despite the use of `-z` or not. * Added builtin functions to query global state like line length, global stacks, and leading zeroes. * Added a command-line flag (`-L`) to disable wrapping when printing numbers. * Improved builds on Windows. ## 5.0.2 This is a production release with one fix for a flaky test. If you have not experienced problems with the test suite, you do ***NOT*** need to upgrade. The test was one that tested whether `bc` fails gracefully when it can't allocate memory. Unfortunately, there are cases when Linux and FreeBSD lie and pretend to allocate the memory. The reason they do this is because a lot of programs don't use all of the memory they allocate, so those OS's usually get away with it. However, this `bc` uses all of the memory it allocates (at least at page granularity), so when it tries to use the memory, FreeBSD and Linux kill it. This only happens sometimes, however. Other times (on my machine), they do, in fact, refuse the request. So I changed the test to not test for that because I think the graceful failure code won't really change much. ## 5.0.1 This is a production release with two fixes: * Fix for the build on Mac OSX. * Fix for the build on Android. Users that do not use those platforms do ***NOT*** need to update. ## 5.0.0 This is a major production release with several changes: * Added support for OpenBSD's `pledge()` and `unveil()`. * Fixed print bug where a backslash newline combo was printed even if only one digit was left, something I blindly copied from GNU `bc`, like a fool. * Fixed bugs in the manuals. * Fixed a possible multiplication overflow in power. * Temporary numbers are garbage collected if allocation fails, and the allocation is retried. This is to make `bc` and `dc` more resilient to running out of memory. * Limited the number of temporary numbers and made the space for them static so that allocating more space for them cannot fail. * Allowed integers with non-zero `scale` to be used with power, places, and shift operators. * Added greatest common divisor and least common multiple to `lib2.bc`. * Added `SIGQUIT` handling to history. * Added a command to `dc` (`y`) to get the length of register stacks. * Fixed multi-digit bugs in `lib2.bc`. * Removed the no prompt build option. * Created settings that builders can set defaults for and users can set their preferences for. This includes the `bc` banner, resetting on `SIGINT`, TTY mode, and prompt. * Added history support to Windows. * Fixed bugs with the handling of register names in `dc`. * Fixed bugs with multi-line comments and strings in both calculators. * Added a new error type and message for `dc` when register stacks don't have enough items. * Optimized string allocation. * Made `bc` and `dc` UTF-8 capable. * Fixed a bug with `void` functions. * Fixed a misspelled symbol in `bcl`. This is technically a breaking change, which requires this to be `5.0.0`. * Added the ability for users to get the copyright banner back. * Added the ability for users to have `bc` and `dc` quit on `SIGINT`. * Added the ability for users to disable prompt and TTY mode by environment variables. * Added the ability for users to redefine keywords. This is another reason this is `5.0.0`. * Added `dc`'s modular exponentiation and divmod to `bc`. * Added the ability to assign strings to variables and array elements and pass them to functions in `bc`. * Added `dc`'s asciify command and stream printing to `bc`. * Added a command to `dc` (`Y`) to get the length of an array. * Added a command to `dc` (`,`) to get the depth of the execution stack. * Added bitwise and, or, xor, left shift, right shift, reverse, left rotate, right rotate, and mod functions to `lib2.bc`. * Added the functions `s2u(x)` and `s2un(x,n)`, to `lib2.bc`. ## 4.0.2 This is a production release that fixes two bugs: 1. If no files are used and the first statement on `stdin` is invalid, `scale` would not be set to `20` even if `-l` was used. 2. When using history, `bc` failed to respond properly to `SIGSTOP` and `SIGTSTP`. ## 4.0.1 This is a production release that only adds one thing: flushing output when it is printed with a print statement. ## 4.0.0 This is a production release with many fixes, a new command-line option, and a big surprise: * A bug was fixed in `dc`'s `P` command where the item on the stack was *not* popped. * Various bugs in the manuals have been fixed. * A known bug was fixed where history did not interact well with prompts printed by user code without newlines. * A new command-line option, `-R` and `--no-read-prompt` was added to disable just the prompt when using `read()` (`bc`) or `?` (`dc`). * And finally, **official support for Windows was added**. The last item is why this is a major version bump. Currently, only one set of build options (extra math and prompt enabled, history and NLS/locale support disabled, both calculators enabled) is supported on Windows. However, both debug and release builds are supported. In addition, Windows builds are supported for the the library (`bcl`). For more details about how to build on Windows, see the [README][5] or the [build manual][13]. ## 3.3.4 This is a production release that fixes a small bug. The bug was that output was not flushed before a `read()` call, so prompts without a newline on the end were not flushed before the `read()` call. This is such a tiny bug that users only need to upgrade if they are affected. ## 3.3.3 This is a production release with one tweak and fixes for manuals. The tweak is that `length(0)` returns `1` instead of `0`. In `3.3.1`, I changed it so `length(0.x)`, where `x` could be any number of digits, returned the `scale`, but `length(0)` still returned `0` because I believe that `0` has `0` significant digits. After request of FreeBSD and considering the arguments of a mathematician, compatibility with other `bc`'s, and the expectations of users, I decided to make the change. The fixes for manuals fixed a bug where `--` was rendered as `-`. ## 3.3.2 This is a production release that fixes a divide-by-zero bug in `root()` in the [extended math library][16]. All previous versions with `root()` have the bug. ## 3.3.1 This is a production release that fixes a bug. The bug was in the reporting of number length when the value was 0. ## 3.3.0 This is a production release that changes one behavior and fixes documentation bugs. The changed behavior is the treatment of `-e` and `-f` when given through `BC_ENV_ARGS` or `DC_ENV_ARGS`. Now `bc` and `dc` do not exit when those options (or their equivalents) are given through those environment variables. However, `bc` and `dc` still exit when they or their equivalents are given on the command-line. ## 3.2.7 This is a production release that removes a small non-portable shell operation in `configure.sh`. This problem was only noticed on OpenBSD, not FreeBSD or Linux. Non-OpenBSD users do ***NOT*** need to upgrade, although NetBSD users may also need to upgrade. ## 3.2.6 This is a production release that fixes the build on FreeBSD. There was a syntax error in `configure.sh` that the Linux shell did not catch, and FreeBSD depends on the existence of `tests/all.sh`. All users that already upgraded to `3.2.5` should update to this release, with my apologies for the poor release of `3.2.5`. Other users should skip `3.2.5` in favor of this version. ## 3.2.5 This is a production release that fixes several bugs and adds a couple small things. The two most important bugs were bugs that causes `dc` to access memory out-of-bounds (crash in debug builds). This was found by upgrading to `afl++` from `afl`. Both were caused by a failure to distinguish between the same two cases. Another bug was the failure to put all of the licenses in the `LICENSE.md` file. Third, some warnings by `scan-build` were found and eliminated. This needed one big change: `bc` and `dc` now bail out as fast as possible on fatal errors instead of unwinding the stack. Fourth, the pseudo-random number now attempts to seed itself with `/dev/random` if `/dev/urandom` fails. Finally, this release has a few quality-of-life changes to the build system. The usage should not change at all; the only thing that changed was making sure the `Makefile.in` was written to rebuild properly when headers changed and to not rebuild when not necessary. ## 3.2.4 This is a production release that fixes a warning on `gcc` 6 or older, which does not have an attribute that is used. Users do ***NOT*** need to upgrade if they don't use `gcc` 6 or older. ## 3.2.3 This is a production release that fixes a bug in `gen/strgen.sh`. I recently changed `gen/strgen.c`, but I did not change `gen/strgen.sh`. Users that do not use `gen/strgen.sh` do not need to upgrade. ## 3.2.2 This is a production release that fixes a portability bug in `configure.sh`. The bug was using the GNU `find` extension `-wholename`. ## 3.2.1 This is a production release that has one fix for `bcl(3)`. It is technically not a bug fix since the behavior is undefined, but the `BclNumber`s that `bcl_divmod()` returns will be set to `BCL_ERROR_INVALID_NUM` if there is an error. Previously, they were not set. ## 3.2.0 This is a production release that has one bug fix and a major addition. The bug fix was a missing `auto` variable in the bessel `j()` function in the math library. The major addition is a way to build a version of `bc`'s math code as a library. This is done with the `-a` option to `configure.sh`. The API for the library can be read in `./manuals/bcl.3.md` or `man bcl` once the library is installed with `make install`. This library was requested by developers before I even finished version 1.0, but I could not figure out how to do it until now. If the library has API breaking changes, the major version of `bc` will be incremented. ## 3.1.6 This is a production release that fixes a new warning from Clang 12 for FreeBSD and also removes some possible undefined behavior found by UBSan that compilers did not seem to take advantage of. Users do ***NOT*** need to upgrade, if they do not want to. ## 3.1.5 This is a production release that fixes the Chinese locales (which caused `bc` to crash) and a crash caused by `bc` executing code when it should not have been able to. ***ALL USERS SHOULD UPGRADE.*** ## 3.1.4 This is a production release that fixes one bug, changes two behaviors, and removes one environment variable. The bug is like the one in the last release except it applies if files are being executed. I also made the fix more general. The behavior that was changed is that `bc` now exits when given `-e`, `-f`, `--expression` or `--file`. However, if the last one of those is `-f-` (using `stdin` as the file), `bc` does not exit. If `-f-` exists and is not the last of the `-e` and `-f` options (and equivalents), `bc` gives a fatal error and exits. Next, I removed the `BC_EXPR_EXIT` and `DC_EXPR_EXIT` environment variables since their use is not needed with the behavior change. Finally, I made it so `bc` does not print the header, though the `-q` and `--quiet` options were kept for compatibility with GNU `bc`. ## 3.1.3 This is a production release that fixes one minor bug: if `bc` was invoked like the following, it would error: ``` echo "if (1 < 3) 1" | bc ``` Unless users run into this bug, they do not need to upgrade, but it is suggested that they do. ## 3.1.2 This is a production release that adds a way to install *all* locales. Users do ***NOT*** need to upgrade. For package maintainers wishing to make use of the change, just pass `-l` to `configure.sh`. ## 3.1.1 This is a production release that adds two Spanish locales. Users do ***NOT*** need to upgrade, unless they want those locales. ## 3.1.0 This is a production release that adjusts one behavior, fixes eight bugs, and improves manpages for FreeBSD. Because this release fixes bugs, **users and package maintainers should update to this version as soon as possible**. The behavior that was adjusted was how code from the `-e` and `-f` arguments (and equivalents) were executed. They used to be executed as one big chunk, but in this release, they are now executed line-by-line. The first bug fix in how output to `stdout` was handled in `SIGINT`. If a `SIGINT` came in, the `stdout` buffer was not correctly flushed. In fact, a clean-up function was not getting called. This release fixes that bug. The second bug is in how `dc` handled input from `stdin`. This affected `bc` as well since it was a mishandling of the `stdin` buffer. The third fixed bug was that `bc` and `dc` could `abort()` (in debug mode) when receiving a `SIGTERM`. This one was a race condition with pushing and popping items onto and out of vectors. The fourth bug fixed was that `bc` could leave extra items on the stack and thus, not properly clean up some memory. (The memory would still get `free()`'ed, but it would not be `free()`'ed when it could have been.) The next two bugs were bugs in `bc`'s parser that caused crashes when executing the resulting code. The last two bugs were crashes in `dc` that resulted from mishandling of strings. The manpage improvement was done by switching from [ronn][20] to [Pandoc][21] to generate manpages. Pandoc generates much cleaner manpages and doesn't leave blank lines where they shouldn't be. ## 3.0.3 This is a production release that adds one new feature: specific manpages. Before this release, `bc` and `dc` only used one manpage each that referred to various build options. This release changes it so there is one manpage set per relevant build type. Each manual only has information about its particular build, and `configure.sh` selects the correct set for install. ## 3.0.2 This is a production release that adds `utf8` locale symlinks and removes an unused `auto` variable from the `ceil()` function in the [extended math library][16]. Users do ***NOT*** need to update unless they want the locales. ## 3.0.1 This is a production release with two small changes. Users do ***NOT*** need to upgrade to this release; however, if they haven't upgraded to `3.0.0` yet, it may be worthwhile to upgrade to this release. The first change is fixing a compiler warning on FreeBSD with strict warnings on. The second change is to make the new implementation of `ceil()` in `lib2.bc` much more efficient. ## 3.0.0 *Notes for package maintainers:* *First, the `2.7.0` release series saw a change in the option parsing. This made me change one error message and add a few others. The error message that was changed removed one format specifier. This means that `printf()` will seqfault on old locale files. Unfortunately, `bc` cannot use any locale files except the global ones that are already installed, so it will use the previous ones while running tests during install. **If `bc` segfaults while running arg tests when updating, it is because the global locale files have not been replaced. Make sure to either prevent the test suite from running on update or remove the old locale files before updating.** (Removing the locale files can be done with `make uninstall` or by running the [`locale_uninstall.sh`][22] script.) Once this is done, `bc` should install without problems.* *Second, **the option to build without signal support has been removed**. See below for the reasons why.* This is a production release with some small bug fixes, a few improvements, three major bug fixes, and a complete redesign of `bc`'s error and signal handling. **Users and package maintainers should update to this version as soon as possible.** The first major bug fix was in how `bc` executed files. Previously, a whole file was parsed before it was executed, but if a function is defined *after* code, especially if the function definition was actually a redefinition, and the code before the definition referred to the previous function, this `bc` would replace the function before executing any code. The fix was to make sure that all code that existed before a function definition was executed. The second major bug fix was in `bc`'s `lib2.bc`. The `ceil()` function had a bug where a `0` in the decimal place after the truncation position, caused it to output the wrong numbers if there was any non-zero digit after. The third major bug is that when passing parameters to functions, if an expression included an array (not an array element) as a parameter, it was accepted, when it should have been rejected. It is now correctly rejected. Beyond that, this `bc` got several improvements that both sped it up, improved the handling of signals, and improved the error handling. First, the requirements for `bc` were pushed back to POSIX 2008. `bc` uses one function, `strdup()`, which is not in POSIX 2001, and it is in the X/Open System Interfaces group 2001. It is, however, in POSIX 2008, and since POSIX 2008 is old enough to be supported anywhere that I care, that should be the requirement. Second, the BcVm global variable was put into `bss`. This actually slightly reduces the size of the executable from a massive code shrink, and it will stop `bc` from allocating a large set of memory when `bc` starts. Third, the default Karatsuba length was updated from 64 to 32 after making the optimization changes below, since 32 is going to be better than 64 after the changes. Fourth, Spanish translations were added. Fifth, the interpreter received a speedup to make performance on non-math-heavy scripts more competitive with GNU `bc`. While improvements did, in fact, get it much closer (see the [benchmarks][19]), it isn't quite there. There were several things done to speed up the interpreter: First, several small inefficiencies were removed. These inefficiencies included calling the function `bc_vec_pop(v)` twice instead of calling `bc_vec_npop(v, 2)`. They also included an extra function call for checking the size of the stack and checking the size of the stack more than once on several operations. Second, since the current `bc` function is the one that stores constants and strings, the program caches pointers to the current function's vectors of constants and strings to prevent needing to grab the current function in order to grab a constant or a string. Third, `bc` tries to reuse `BcNum`'s (the internal representation of arbitary-precision numbers). If a `BcNum` has the default capacity of `BC_NUM_DEF_SIZE` (32 on 64-bit and 16 on 32-bit) when it is freed, it is added to a list of available `BcNum`'s. And then, when a `BcNum` is allocated with a capacity of `BC_NUM_DEF_SIZE` and any `BcNum`'s exist on the list of reusable ones, one of those ones is grabbed instead. In order to support these changes, the `BC_NUM_DEF_SIZE` was changed. It used to be 16 bytes on all systems, but it was changed to more closely align with the minimum allocation size on Linux, which is either 32 bytes (64-bit musl), 24 bytes (64-bit glibc), 16 bytes (32-bit musl), or 12 bytes (32-bit glibc). Since these are the minimum allocation sizes, these are the sizes that would be allocated anyway, making it worth it to just use the whole space, so the value of `BC_NUM_DEF_SIZE` on 64-bit systems was changed to 32 bytes. On top of that, at least on 64-bit, `BC_NUM_DEF_SIZE` supports numbers with either 72 integer digits or 45 integer digits and 27 fractional digits. This should be more than enough for most cases since `bc`'s default `scale` values are 0 or 20, meaning that, by default, it has at most 20 fractional digits. And 45 integer digits are *a lot*; it's enough to calculate the amount of mass in the Milky Way galaxy in kilograms. Also, 72 digits is enough to calculate the diameter of the universe in Planck lengths. (For 32-bit, these numbers are either 32 integer digits or 12 integer digits and 20 fractional digits. These are also quite big, and going much bigger on a 32-bit system seems a little pointless since 12 digits is just under a trillion and 20 fractional digits is still enough for about any use since `10^-20` light years is just under a millimeter.) All of this together means that for ordinary uses, and even uses in scientific work, the default number size will be all that is needed, which means that nearly all, if not all, numbers will be reused, relieving pressure on the system allocator. I did several experiments to find the changes that had the most impact, especially with regard to reusing `BcNum`'s. One was putting `BcNum`'s into buckets according to their capacity in powers of 2 up to 512. That performed worse than `bc` did in `2.7.2`. Another was putting any `BcNum` on the reuse list that had a capacity of `BC_NUM_DEF_SIZE * 2` and reusing them for `BcNum`'s that requested `BC_NUM_DEF_SIZE`. This did reduce the amount of time spent, but it also spent a lot of time in the system allocator for an unknown reason. (When using `strace`, a bunch more `brk` calls showed up.) Just reusing `BcNum`'s that had exactly `BC_NUM_DEF_SIZE` capacity spent the smallest amount of time in both user and system time. This makes sense, especially with the changes to make `BC_NUM_DEF_SIZE` bigger on 64-bit systems, since the vast majority of numbers will only ever use numbers with a size less than or equal to `BC_NUM_DEF_SIZE`. Last of all, `bc`'s signal handling underwent a complete redesign. (This is the reason that this version is `3.0.0` and not `2.8.0`.) The change was to move from a polling approach to signal handling to an interrupt-based approach. Previously, every single loop condition had a check for signals. I suspect that this could be expensive when in tight loops. Now, the signal handler just uses `longjmp()` (actually `siglongjmp()`) to start an unwinding of the stack until it is stopped or the stack is unwound to `main()`, which just returns. If `bc` is currently executing code that cannot be safely interrupted (according to POSIX), then signals are "locked." The signal handler checks if the lock is taken, and if it is, it just sets the status to indicate that a signal arrived. Later, when the signal lock is released, the status is checked to see if a signal came in. If so, the stack unwinding starts. This design eliminates polling in favor of maintaining a stack of `jmp_buf`'s. This has its own performance implications, but it gives better interaction. And the cost of pushing and popping a `jmp_buf` in a function is paid at most twice. Most functions do not pay that price, and most of the rest only pay it once. (There are only some 3 functions in `bc` that push and pop a `jmp_buf` twice.) As a side effect of this change, I had to eliminate the use of `stdio.h` in `bc` because `stdio` does not play nice with signals and `longjmp()`. I implemented custom I/O buffer code that takes a fraction of the size. This means that static builds will be smaller, but non-static builds will be bigger, though they will have less linking time. This change is also good because my history implementation was already bypassing `stdio` for good reasons, and unifying the architecture was a win. Another reason for this change is that my `bc` should *always* behave correctly in the presence of signals like `SIGINT`, `SIGTERM`, and `SIGQUIT`. With the addition of my own I/O buffering, I needed to also make sure that the buffers were correctly flushed even when such signals happened. For this reason, I **removed the option to build without signal support**. As a nice side effect of this change, the error handling code could be changed to take advantage of the stack unwinding that signals used. This means that signals and error handling use the same code paths, which means that the stack unwinding is well-tested. (Errors are tested heavily in the test suite.) It also means that functions do not need to return a status code that ***every*** caller needs to check. This eliminated over 100 branches that simply checked return codes and then passed that return code up the stack if necessary. The code bloat savings from this is at least 1700 bytes on `x86_64`, *before* taking into account the extra code from removing `stdio.h`. ## 2.7.2 This is a production release with one major bug fix. The `length()` built-in function can take either a number or an array. If it takes an array, it returns the length of the array. Arrays can be passed by reference. The bug is that the `length()` function would not properly dereference arrays that were references. This is a bug that affects all users. **ALL USERS SHOULD UPDATE `bc`**. ## 2.7.1 This is a production release with fixes for new locales and fixes for compiler warnings on FreeBSD. ## 2.7.0 This is a production release with a bug fix for Linux, new translations, and new features. Bug fixes: * Option parsing in `BC_ENV_ARGS` was broken on Linux in 2.6.1 because `glibc`'s `getopt_long()` is broken. To get around that, and to support long options on every platform, an adapted version of [`optparse`][17] was added. Now, `bc` does not even use `getopt()`. * Parsing `BC_ENV_ARGS` with quotes now works. It isn't the smartest, but it does the job if there are spaces in file names. The following new languages are supported: * Dutch * Polish * Russian * Japanes * Simplified Chinese All of these translations were generated using [DeepL][18], so improvements are welcome. There is only one new feature: **`bc` now has a built-in pseudo-random number generator** (PRNG). The PRNG is seeded, making it useful for applications where `/dev/urandom` does not work because output needs to be reproducible. However, it also uses `/dev/urandom` to seed itself by default, so it will start with a good seed by default. It also outputs 32 bits on 32-bit platforms and 64 bits on 64-bit platforms, far better than the 15 bits of C's `rand()` and `bash`'s `$RANDOM`. In addition, the PRNG can take a bound, and when it gets a bound, it automatically adjusts to remove bias. It can also generate numbers of arbitrary size. (As of the time of release, the largest pseudo-random number generated by this `bc` was generated with a bound of `2^(2^20)`.) ***IMPORTANT: read the [`bc` manual][9] and the [`dc` manual][10] to find out exactly what guarantees the PRNG provides. The underlying implementation is not guaranteed to stay the same, but the guarantees that it provides are guaranteed to stay the same regardless of the implementation.*** On top of that, four functions were added to `bc`'s [extended math library][16] to make using the PRNG easier: * `frand(p)`: Generates a number between `[0,1)` to `p` decimal places. * `ifrand(i, p)`: Generates an integer with bound `i` and adds it to `frand(p)`. * `srand(x)`: Randomizes the sign of `x`. In other words, it flips the sign of `x` with probability `0.5`. * `brand()`: Returns a random boolean value (either `0` or `1`). ## 2.6.1 This is a production release with a bug fix for FreeBSD. The bug was that when `bc` was built without long options, it would give a fatal error on every run. This was caused by a mishandling of `optind`. ## 2.6.0 This release is a production release ***with no bugfixes***. If you do not want to upgrade, you don't have to. No source code changed; the only thing that changed was `lib2.bc`. This release adds one function to the [extended math library][16]: `p(x, y)`, which calculates `x` to the power of `y`, whether or not `y` is an integer. (The `^` operator can only accept integer powers.) This release also includes a couple of small tweaks to the [extended math library][16], mostly to fix returning numbers with too high of `scale`. ## 2.5.3 This release is a production release which addresses inconsistencies in the Portuguese locales. No `bc` code was changed. The issues were that the ISO files used different naming, and also that the files that should have been symlinks were not. I did not catch that because GitHub rendered them the exact same way. ## 2.5.2 This release is a production release. No code was changed, but the build system was changed to allow `CFLAGS` to be given to `CC`, like this: ``` CC="gcc -O3 -march=native" ./configure.sh ``` If this happens, the flags are automatically put into `CFLAGS`, and the compiler is set appropriately. In the example above this means that `CC` will be "gcc" and `CFLAGS` will be "-O3 -march=native". This behavior was added to conform to GNU autotools practices. ## 2.5.1 This is a production release which addresses portability concerns discovered in the `bc` build system. No `bc` code was changed. * Support for Solaris SPARC and AIX were added. * Minor documentations edits were performed. * An option for `configure.sh` was added to disable long options if `getopt_long()` is missing. ## 2.5.0 This is a production release with new translations. No code changed. The translations were contributed by [bugcrazy][15], and they are for Portuguese, both Portugal and Brazil locales. ## 2.4.0 This is a production release primarily aimed at improving `dc`. * A couple of copy and paste errors in the [`dc` manual][10] were fixed. * `dc` startup was optimized by making sure it didn't have to set up `bc`-only things. * The `bc` `&&` and `||` operators were made available to `dc` through the `M` and `m` commands, respectively. * `dc` macros were changed to be tail call-optimized. The last item, tail call optimization, means that if the last thing in a macro is a call to another macro, then the old macro is popped before executing the new macro. This change was made to stop `dc` from consuming more and more memory as macros are executed in a loop. The `q` and `Q` commands still respect the "hidden" macros by way of recording how many macros were removed by tail call optimization. ## 2.3.2 This is a production release meant to fix warnings in the Gentoo `ebuild` by making it possible to disable binary stripping. Other users do *not* need to upgrade. ## 2.3.1 This is a production release. It fixes a bug that caused `-1000000000 < -1` to return `0`. This only happened with negative numbers and only if the value on the left was more negative by a certain amount. That said, this bug *is* a bad bug, and needs to be fixed. **ALL USERS SHOULD UPDATE `bc`**. ## 2.3.0 This is a production release with changes to the build system. ## 2.2.0 This release is a production release. It only has new features and performance improvements. 1. The performance of `sqrt(x)` was improved. 2. The new function `root(x, n)` was added to the extended math library to calculate `n`th roots. 3. The new function `cbrt(x)` was added to the extended math library to calculate cube roots. ## 2.1.3 This is a non-critical release; it just changes the build system, and in non-breaking ways: 1. Linked locale files were changed to link to their sources with a relative link. 2. A bug in `configure.sh` that caused long option parsing to fail under `bash` was fixed. ## 2.1.2 This release is not a critical release. 1. A few codes were added to history. 2. Multiplication was optimized a bit more. 3. Addition and subtraction were both optimized a bit more. ## 2.1.1 This release contains a fix for the test suite made for Linux from Scratch: now the test suite prints `pass` when a test is passed. Other than that, there is no change in this release, so distros and other users do not need to upgrade. ## 2.1.0 This release is a production release. The following bugs were fixed: 1. A `dc` bug that caused stack mishandling was fixed. 2. A warning on OpenBSD was fixed. 3. Bugs in `ctrl+arrow` operations in history were fixed. 4. The ability to paste multiple lines in history was added. 5. A `bc` bug, mishandling of array arguments to functions, was fixed. 6. A crash caused by freeing the wrong pointer was fixed. 7. A `dc` bug where strings, in a rare case, were mishandled in parsing was fixed. In addition, the following changes were made: 1. Division was slightly optimized. 2. An option was added to the build to disable printing of prompts. 3. The special case of empty arguments is now handled. This is to prevent errors in scripts that end up passing empty arguments. 4. A harmless bug was fixed. This bug was that, with the pop instructions (mostly) removed (see below), `bc` would leave extra values on its stack for `void` functions and in a few other cases. These extra items would not affect anything put on the stack and would not cause any sort of crash or even buggy behavior, but they would cause `bc` to take more memory than it needed. On top of the above changes, the following optimizations were added: 1. The need for pop instructions in `bc` was removed. 2. Extra tests on every iteration of the interpreter loop were removed. 3. Updating function and code pointers on every iteration of the interpreter loop was changed to only updating them when necessary. 4. Extra assignments to pointers were removed. Altogether, these changes sped up the interpreter by around 2x. ***NOTE***: This is the last release with new features because this `bc` is now considered complete. From now on, only bug fixes and new translations will be added to this `bc`. ## 2.0.3 This is a production, bug-fix release. Two bugs were fixed in this release: 1. A rare and subtle signal handling bug was fixed. 2. A misbehavior on `0` to a negative power was fixed. The last bug bears some mentioning. When I originally wrote power, I did not thoroughly check its error cases; instead, I had it check if the first number was `0` and then if so, just return `0`. However, `0` to a negative power means that `1` will be divided by `0`, which is an error. I caught this, but only after I stopped being cocky. You see, sometime later, I had noticed that GNU `bc` returned an error, correctly, but I thought it was wrong simply because that's not what my `bc` did. I saw it again later and had a double take. I checked for real, finally, and found out that my `bc` was wrong all along. That was bad on me. But the bug was easy to fix, so it is fixed now. There are two other things in this release: 1. Subtraction was optimized by [Stefan Eßer][14]. 2. Division was also optimized, also by Stefan Eßer. ## 2.0.2 This release contains a fix for a possible overflow in the signal handling. I would be surprised if any users ran into it because it would only happen after 2 billion (`2^31-1`) `SIGINT`'s, but I saw it and had to fix it. ## 2.0.1 This release contains very few things that will apply to any users. 1. A slight bug in `dc`'s interactive mode was fixed. 2. A bug in the test suite that was only triggered on NetBSD was fixed. 3. **The `-P`/`--no-prompt` option** was added for users that do not want a prompt. 4. A `make check` target was added as an alias for `make test`. 5. `dc` got its own read prompt: `?> `. ## 2.0.0 This release is a production release. This release is also a little different from previous releases. From here on out, I do not plan on adding any more features to this `bc`; I believe that it is complete. However, there may be bug fix releases in the future, if I or any others manage to find bugs. This release has only a few new features: 1. `atan2(y, x)` was added to the extended math library as both `a2(y, x)` and `atan2(y, x)`. 2. Locales were fixed. 3. A **POSIX shell-compatible script was added as an alternative to compiling `gen/strgen.c`** on a host machine. More details about making the choice between the two can be found by running `./configure.sh --help` or reading the [build manual][13]. 4. Multiplication was optimized by using **diagonal multiplication**, rather than straight brute force. 5. The `locale_install.sh` script was fixed. 6. `dc` was given the ability to **use the environment variable `DC_ENV_ARGS`**. 7. `dc` was also given the ability to **use the `-i` or `--interactive`** options. 8. Printing the prompt was fixed so that it did not print when it shouldn't. 9. Signal handling was fixed. 10. **Handling of `SIGTERM` and `SIGQUIT`** was fixed. 11. The **built-in functions `maxibase()`, `maxobase()`, and `maxscale()`** (the commands `T`, `U`, `V` in `dc`, respectively) were added to allow scripts to query for the max allowable values of those globals. 12. Some incompatibilities with POSIX were fixed. In addition, this release is `2.0.0` for a big reason: the internal format for numbers changed. They used to be a `char` array. Now, they are an array of larger integers, packing more decimal digits into each integer. This has delivered ***HUGE*** performance improvements, especially for multiplication, division, and power. This `bc` should now be the fastest `bc` available, but I may be wrong. ## 1.2.8 This release contains a fix for a harmless bug (it is harmless in that it still works, but it just copies extra data) in the [`locale_install.sh`][12] script. ## 1.2.7 This version contains fixes for the build on Arch Linux. ## 1.2.6 This release removes the use of `local` in shell scripts because it's not POSIX shell-compatible, and also updates a man page that should have been updated a long time ago but was missed. ## 1.2.5 This release contains some missing locale `*.msg` files. ## 1.2.4 This release contains a few bug fixes and new French translations. ## 1.2.3 This release contains a fix for a bug: use of uninitialized data. Such data was only used when outputting an error message, but I am striving for perfection. As Michelangelo said, "Trifles make perfection, and perfection is no trifle." ## 1.2.2 This release contains fixes for OpenBSD. ## 1.2.1 This release contains bug fixes for some rare bugs. ## 1.2.0 This is a production release. There have been several changes since `1.1.0`: 1. The build system had some changes. 2. Locale support has been added. (Patches welcome for translations.) 3. **The ability to turn `ibase`, `obase`, and `scale` into stacks** was added with the `-g` command-line option. (See the [`bc` manual][9] for more details.) 4. Support for compiling on Mac OSX out of the box was added. 5. The extended math library got `t(x)`, `ceil(x)`, and some aliases. 6. The extended math library also got `r2d(x)` (for converting from radians to degrees) and `d2r(x)` (for converting from degrees to radians). This is to allow using degrees with the standard library. 7. Both calculators now accept numbers in **scientific notation**. See the [`bc` manual][9] and the [`dc` manual][10] for details. 8. Both calculators can **output in either scientific or engineering notation**. See the [`bc` manual][9] and the [`dc` manual][10] for details. 9. Some inefficiencies were removed. 10. Some bugs were fixed. 11. Some bugs in the extended library were fixed. 12. Some defects from [Coverity Scan][11] were fixed. ## 1.1.4 This release contains a fix to the build system that allows it to build on older versions of `glibc`. ## 1.1.3 This release contains a fix for a bug in the test suite where `bc` tests and `dc` tests could not be run in parallel. ## 1.1.2 This release has a fix for a history bug; the down arrow did not work. ## 1.1.1 This release fixes a bug in the `1.1.0` build system. The source is exactly the same. The bug that was fixed was a failure to install if no `EXECSUFFIX` was used. ## 1.1.0 This is a production release. However, many new features were added since `1.0`. 1. **The build system has been changed** to use a custom, POSIX shell-compatible configure script ([`configure.sh`][6]) to generate a POSIX make-compatible `Makefile`, which means that `bc` and `dc` now build out of the box on any POSIX-compatible system. 2. Out-of-memory and output errors now cause the `bc` to report the error, clean up, and die, rather than just reporting and trying to continue. 3. **Strings and constants are now garbage collected** when possible. 4. Signal handling and checking has been made more simple and more thorough. 5. `BcGlobals` was refactored into `BcVm` and `BcVm` was made global. Some procedure names were changed to reflect its difference to everything else. 6. Addition got a speed improvement. 7. Some common code for addition and multiplication was refactored into its own procedure. 8. A bug was removed where `dc` could have been selected, but the internal `#define` that returned `true` for a query about `dc` would not have returned `true`. 9. Useless calls to `bc_num_zero()` were removed. 10. **History support was added.** The history support is based off of a [UTF-8 aware fork][7] of [`linenoise`][8], which has been customized with `bc`'s own data structures and signal handling. 11. Generating C source from the math library now removes tabs from the library, shrinking the size of the executable. 12. The math library was shrunk. 13. Error handling and reporting was improved. 14. Reallocations were reduced by giving access to the request size for each operation. 15. **`abs()` (`b` command for `dc`) was added as a builtin.** 16. Both calculators were tested on FreeBSD. 17. Many obscure parse bugs were fixed. 18. Markdown and man page manuals were added, and the man pages are installed by `make install`. 19. Executable size was reduced, though the added features probably made the executable end up bigger. 20. **GNU-style array references were added as a supported feature.** 21. Allocations were reduced. 22. **New operators were added**: `$` (`$` for `dc`), `@` (`@` for `dc`), `@=`, `<<` (`H` for `dc`), `<<=`, `>>` (`h` for `dc`), and `>>=`. See the [`bc` manual][9] and the [`dc` manual][10] for more details. 23. **An extended math library was added.** This library contains code that makes it so I can replace my desktop calculator with this `bc`. See the [`bc` manual][3] for more details. 24. Support for all capital letters as numbers was added. 25. **Support for GNU-style void functions was added.** 26. A bug fix for improper handling of function parameters was added. 27. Precedence for the or (`||`) operator was changed to match GNU `bc`. 28. `dc` was given an explicit negation command. 29. `dc` was changed to be able to handle strings in arrays. ## 1.1 Release Candidate 3 This release is the eighth release candidate for 1.1, though it is the third release candidate meant as a general release candidate. The new code has not been tested as thoroughly as it should for release. ## 1.1 Release Candidate 2 This release is the seventh release candidate for 1.1, though it is the second release candidate meant as a general release candidate. The new code has not been tested as thoroughly as it should for release. ## 1.1 FreeBSD Beta 5 This release is the sixth release candidate for 1.1, though it is the fifth release candidate meant specifically to test if `bc` works on FreeBSD. The new code has not been tested as thoroughly as it should for release. ## 1.1 FreeBSD Beta 4 This release is the fifth release candidate for 1.1, though it is the fourth release candidate meant specifically to test if `bc` works on FreeBSD. The new code has not been tested as thoroughly as it should for release. ## 1.1 FreeBSD Beta 3 This release is the fourth release candidate for 1.1, though it is the third release candidate meant specifically to test if `bc` works on FreeBSD. The new code has not been tested as thoroughly as it should for release. ## 1.1 FreeBSD Beta 2 This release is the third release candidate for 1.1, though it is the second release candidate meant specifically to test if `bc` works on FreeBSD. The new code has not been tested as thoroughly as it should for release. ## 1.1 FreeBSD Beta 1 This release is the second release candidate for 1.1, though it is meant specifically to test if `bc` works on FreeBSD. The new code has not been tested as thoroughly as it should for release. ## 1.1 Release Candidate 1 This is the first release candidate for 1.1. The new code has not been tested as thoroughly as it should for release. ## 1.0 This is the first non-beta release. `bc` is ready for production use. As such, a lot has changed since 0.5. 1. `dc` has been added. It has been tested even more thoroughly than `bc` was for `0.5`. It does not have the `!` command, and for security reasons, it never will, so it is complete. 2. `bc` has been more thoroughly tested. An entire section of the test suite (for both programs) has been added to test for errors. 3. A prompt (`>>> `) has been added for interactive mode, making it easier to see inputs and outputs. 4. Interrupt handling has been improved, including elimination of race conditions (as much as possible). 5. MinGW and [Windows Subsystem for Linux][1] support has been added (see [xstatic][2] for binaries). 6. Memory leaks and errors have been eliminated (as far as ASan and Valgrind can tell). 7. Crashes have been eliminated (as far as [afl][3] can tell). 8. Karatsuba multiplication was added (and thoroughly) tested, speeding up multiplication and power by orders of magnitude. 9. Performance was further enhanced by using a "divmod" function to reduce redundant divisions and by removing superfluous `memset()` calls. 10. To switch between Karatsuba and `O(n^2)` multiplication, the config variable `BC_NUM_KARATSUBA_LEN` was added. It is set to a sane default, but the optimal number can be found with [`karatsuba.py`][4] (requires Python 3) and then configured through `make`. 11. The random math test generator script was changed to Python 3 and improved. `bc` and `dc` have together been run through 30+ million random tests. 12. All known math bugs have been fixed, including out of control memory allocations in `sine` and `cosine` (that was actually a parse bug), certain cases of infinite loop on square root, and slight inaccuracies (as much as possible; see the [README][5]) in transcendental functions. 13. Parsing has been fixed as much as possible. 14. Test coverage was improved to 94.8%. The only paths not covered are ones that happen when `malloc()` or `realloc()` fails. 15. An extension to get the length of an array was added. 16. The boolean not (`!`) had its precedence change to match negation. 17. Data input was hardened. 18. `bc` was made fully compliant with POSIX when the `-s` flag is used or `POSIXLY_CORRECT` is defined. 19. Error handling was improved. 20. `bc` now checks that files it is given are not directories. ## 1.0 Release Candidate 7 This is the seventh release candidate for 1.0. It fixes a few bugs in 1.0 Release Candidate 6. ## 1.0 Release Candidate 6 This is the sixth release candidate for 1.0. It fixes a few bugs in 1.0 Release Candidate 5. ## 1.0 Release Candidate 5 This is the fifth release candidate for 1.0. It fixes a few bugs in 1.0 Release Candidate 4. ## 1.0 Release Candidate 4 This is the fourth release candidate for 1.0. It fixes a few bugs in 1.0 Release Candidate 3. ## 1.0 Release Candidate 3 This is the third release candidate for 1.0. It fixes a few bugs in 1.0 Release Candidate 2. ## 1.0 Release Candidate 2 This is the second release candidate for 1.0. It fixes a few bugs in 1.0 Release Candidate 1. ## 1.0 Release Candidate 1 This is the first Release Candidate for 1.0. `bc` is complete, with `dc`, but it is not tested. ## 0.5 This beta release completes more features, but it is still not complete nor tested as thoroughly as necessary. ## 0.4.1 This beta release fixes a few bugs in 0.4. ## 0.4 This is a beta release. It does not have the complete set of features, and it is not thoroughly tested. [1]: https://docs.microsoft.com/en-us/windows/wsl/install-win10 [2]: https://pkg.musl.cc/bc/ [3]: http://lcamtuf.coredump.cx/afl/ [4]: ./scripts/karatsuba.py [5]: ./README.md [6]: ./configure.sh [7]: https://github.com/rain-1/linenoise-mob [8]: https://github.com/antirez/linenoise [9]: ./manuals/bc/A.1.md [10]: ./manuals/dc/A.1.md [11]: https://scan.coverity.com/projects/gavinhoward-bc [12]: ./scripts/locale_install.sh [13]: ./manuals/build.md [14]: https://github.com/stesser [15]: https://github.com/bugcrazy [16]: ./manuals/bc/A.1.md#extended-library [17]: https://github.com/skeeto/optparse [18]: https://www.deepl.com/translator [19]: ./manuals/benchmarks.md [20]: https://github.com/apjanke/ronn-ng [21]: https://pandoc.org/ [22]: ./scripts/locale_uninstall.sh diff --git a/benchmarks/bc/add.bc b/benchmarks/bc/add.bc new file mode 100644 index 000000000000..90a83e4758d9 --- /dev/null +++ b/benchmarks/bc/add.bc @@ -0,0 +1,21 @@ +#! /usr/bin/bc -lq + +print "scale = 20\n" +print "x = 1234567890 / scale\n" +print "len = length(x) + 1 + scale\n" +print "len *= 2\n" + +scale = 20 +x = 1234567890 / scale +len = length(x) + 1 + scale +len *= 2 + +for (i = 0; i <= len; ++i) { + print "a[", i, "] = x * (10^", i, ")\n" +} + +for (i = 1; i <= 10000; ++i) { + for (j = 0; j < len; ++j) { + print "v = a[", i, "] + a[", j, "]\n" + } +} diff --git a/benchmarks/bc/arrays.bc b/benchmarks/bc/arrays.bc new file mode 100644 index 000000000000..cc0276d6ad20 --- /dev/null +++ b/benchmarks/bc/arrays.bc @@ -0,0 +1,38 @@ +#! /usr/bin/bc +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2018-2021 Gavin D. Howard and contributors. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# + +max = 1000000 + +for (i = 0; i < max; ++i) { + print "a", i, "[0] = ", i, "\n" +} + +print "halt\n" + +halt diff --git a/benchmarks/bc/arrays_and_constants.bc b/benchmarks/bc/arrays_and_constants.bc new file mode 100644 index 000000000000..9a2172ece5be --- /dev/null +++ b/benchmarks/bc/arrays_and_constants.bc @@ -0,0 +1,38 @@ +#! /usr/bin/bc +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2018-2021 Gavin D. Howard and contributors. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# + +max = 1000000 + +for (i = 0; i < max; ++i) { + print "b", i, "[100] = ", i, "\n" +} + +print "halt\n" + +halt diff --git a/benchmarks/bc/bitfuncs.bc b/benchmarks/bc/bitfuncs.bc new file mode 100644 index 000000000000..69d357c2ce8a --- /dev/null +++ b/benchmarks/bc/bitfuncs.bc @@ -0,0 +1,18 @@ +#! /usr/bin/bc -lq + +scale = 0 +max = 10000 + +print "scale = 0\n" + +for (i = 0; i < max; ++i) { + + a = rand() + b = rand() + + print "band(", a, ", ", b, ")\n" + print "bor(", a, ", ", b, ")\n" + print "bxor(", a, ", ", b, ")\n" + print "bshl(", a, ", ", b % 32, ")\n" + print "bshr(", a, ", ", b % 32, ")\n" +} diff --git a/benchmarks/bc/constants.bc b/benchmarks/bc/constants.bc new file mode 100644 index 000000000000..1f7b92d47566 --- /dev/null +++ b/benchmarks/bc/constants.bc @@ -0,0 +1,41 @@ +#! /usr/bin/bc +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2018-2021 Gavin D. Howard and contributors. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# + +max = 1000 +max2 = 1000 + +for (i = 0; i < max; ++i) { + + print "c = ", i, "\n" + print "e = 0.", i, "\n" + + for (j = 0; j < max2; ++j) { + print "d = ", i, ".", j, "\n" + } +} diff --git a/benchmarks/bc/divide.bc b/benchmarks/bc/divide.bc new file mode 100644 index 000000000000..227794badbcb --- /dev/null +++ b/benchmarks/bc/divide.bc @@ -0,0 +1,26 @@ +#! /usr/bin/bc -lq + +print "scale = 20\n" +print "x = 1234567890 * 10^(-scale)\n" +print "len = 1 + 2 * scale\n" +print "scale += 10\n" + +scale = 20 +x = 1234567890 * 10^(-scale) +len = 1 + 2 * scale + +scale += 10 + +for (i = 0; i <= len; ++i) { + print "a[", i, "] = x * (10^", i, ")\n" +} + +for (i = 1; i <= 10000; ++i) { + for (j = 0; j < len; ++j) { + print "v = a[0] / a[", j, "]\n" + print "v = a[", i, "] / a[", j, "]\n" + print "v = (a[0] * ", i, ") / a[", j, "]\n" + print "v = a[0] / (a[", j, "] * ", i, ")\n" + print "v = (a[0] * ", i, ") / (a[", j, "] * ", i, ")\n" + } +} diff --git a/benchmarks/bc/functions.bc b/benchmarks/bc/functions.bc new file mode 100644 index 000000000000..7848c8df0c9f --- /dev/null +++ b/benchmarks/bc/functions.bc @@ -0,0 +1,38 @@ +#! /usr/bin/bc +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2018-2021 Gavin D. Howard and contributors. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# + +max = 1000000 + +for (i = 0; i < max; ++i) { + print "define etsna", i, "(n) {\n\tn\n}\n" +} + +print "halt\n" + +halt diff --git a/benchmarks/bc/irand_long.bc b/benchmarks/bc/irand_long.bc new file mode 100644 index 000000000000..2d2404942f83 --- /dev/null +++ b/benchmarks/bc/irand_long.bc @@ -0,0 +1,12 @@ +#! /usr/bin/bc -lq + +start = 2^256 +end = start + 10000000 + +for (i = start; i < end; ++i) { + print "irand(", i, ")\n" +} + +print "halt\n" + +halt diff --git a/benchmarks/bc/irand_short.bc b/benchmarks/bc/irand_short.bc new file mode 100644 index 000000000000..a53d407879f3 --- /dev/null +++ b/benchmarks/bc/irand_short.bc @@ -0,0 +1,9 @@ +#! /usr/bin/bc -lq + +for (i = 2; i < 10000000; ++i) { + print "irand(", i, ")\n" +} + +print "halt\n" + +halt diff --git a/benchmarks/bc/lib.bc b/benchmarks/bc/lib.bc new file mode 100644 index 000000000000..fb7cd1b93354 --- /dev/null +++ b/benchmarks/bc/lib.bc @@ -0,0 +1,11 @@ +#! /usr/bin/bc -lq + +print "for (i = 100; i < 1000; ++i) {\n" +print " v = pi(i)\n" +print " v = e(v)\n" +print " v = l(v)\n" +print "}\n" + +print "halt\n" + +halt diff --git a/benchmarks/bc/multiply.bc b/benchmarks/bc/multiply.bc new file mode 100644 index 000000000000..d4ed08e055c8 --- /dev/null +++ b/benchmarks/bc/multiply.bc @@ -0,0 +1,23 @@ +#! /usr/bin/bc -lq + +print "scale = 20\n" +print "x = 1234567890 / scale\n" +print "len = length(x) + 1 + scale\n" + +scale = 20 +x = 1234567890 / scale +len = length(x) + 1 + scale + +for (i = 0; i <= len; ++i) { + print "a[", i, "] = x * (10^", i, ")\n" +} + +for (i = 1; i <= 10000; ++i) { + for (j = 0; j < len; ++j) { + print "v = a[0] * a[", j, "]\n" + print "v = a[", i, "] * a[", j, "]\n" + print "v = (a[0] * ", i, ") * a[", j, "]\n" + print "v = a[0] * (a[", j, "] * ", i, ")\n" + print "v = (a[0] * ", i, ") * (a[", j, "] * ", i, ")\n" + } +} diff --git a/benchmarks/bc/postfix_incdec.bc b/benchmarks/bc/postfix_incdec.bc new file mode 100644 index 000000000000..2437f4c4c820 --- /dev/null +++ b/benchmarks/bc/postfix_incdec.bc @@ -0,0 +1,11 @@ +#! /usr/bin/bc -lq + +max = 1000000 + +for (i = 0; i < max; ++i) { + print "i++\ni--\n" +} + +print "halt\n" + +halt diff --git a/benchmarks/bc/power.bc b/benchmarks/bc/power.bc new file mode 100644 index 000000000000..b067aa732d10 --- /dev/null +++ b/benchmarks/bc/power.bc @@ -0,0 +1,2 @@ +#! /usr/bin/bc -lq + diff --git a/benchmarks/bc/strings.bc b/benchmarks/bc/strings.bc new file mode 100644 index 000000000000..a97017ea78b4 --- /dev/null +++ b/benchmarks/bc/strings.bc @@ -0,0 +1,40 @@ +#! /usr/bin/bc +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2018-2021 Gavin D. Howard and contributors. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# + +max = 1000000 + +print "\qasotehnuasnotehustnaoheusntaoheustnaoheusntaoehunsatoheuastoehuaosnetuhaosetnuhaosentuahoesntuahoeuhstoeunhatoehusanotehusatnoheus\q\n" + +for (i = 0; i < max; ++i) { + print "\qabc", i, " = ", i, "\\n\q\n" +} + +print "halt\n" + +halt diff --git a/benchmarks/bc/subtract.bc b/benchmarks/bc/subtract.bc new file mode 100644 index 000000000000..b88bd60e935c --- /dev/null +++ b/benchmarks/bc/subtract.bc @@ -0,0 +1,22 @@ +#! /usr/bin/bc -lq + +print "scale = 20\n" +print "x = 1234567890 / scale\n" +print "len = length(x) + 1 + scale\n" +print "len *= 2\n" + +scale = 20 +x = 1234567890 / scale +len = length(x) + 1 + scale +len *= 2 + +for (i = 0; i <= len; ++i) { + print "a[", i, "] = x * (10^", i, ")\n" +} + +for (i = 1; i <= 10000; ++i) { + for (j = 0; j < len; ++j) { + print "v = a[", i, "] - a[", j, "]\n" + } +} + diff --git a/benchmarks/dc/modexp.dc b/benchmarks/dc/modexp.dc new file mode 100644 index 000000000000..48f304cb92da --- /dev/null +++ b/benchmarks/dc/modexp.dc @@ -0,0 +1,42 @@ +#! /usr/bin/dc +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2018-2021 Gavin D. Howard and contributors. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +[ ]ss +[|]so +100sm 0si +[ + li1+si 0sj + [ + lj1+sj 0sk + [ + lk1+sk lin lsn ljn lsn lkn lsn lon 10P lk lm ! + #if BC_C11 #include #endif // BC_C11 #include #include #include /// The instructions for bytecode. typedef enum BcInst { #if BC_ENABLED /// Postfix increment and decrement. Prefix are translated into /// BC_INST_ONE with either BC_INST_ASSIGN_PLUS or BC_INST_ASSIGN_MINUS. BC_INST_INC = 0, BC_INST_DEC, #endif // BC_ENABLED /// Unary negation. BC_INST_NEG, /// Boolean not. BC_INST_BOOL_NOT, #if BC_ENABLE_EXTRA_MATH /// Truncation operator. BC_INST_TRUNC, #endif // BC_ENABLE_EXTRA_MATH /// These should be self-explanatory. BC_INST_POWER, BC_INST_MULTIPLY, BC_INST_DIVIDE, BC_INST_MODULUS, BC_INST_PLUS, BC_INST_MINUS, #if BC_ENABLE_EXTRA_MATH /// Places operator. BC_INST_PLACES, /// Shift operators. BC_INST_LSHIFT, BC_INST_RSHIFT, #endif // BC_ENABLE_EXTRA_MATH /// Comparison operators. BC_INST_REL_EQ, BC_INST_REL_LE, BC_INST_REL_GE, BC_INST_REL_NE, BC_INST_REL_LT, BC_INST_REL_GT, /// Boolean or and and. BC_INST_BOOL_OR, BC_INST_BOOL_AND, #if BC_ENABLED /// Same as the normal operators, but assigment. So ^=, *=, /=, etc. BC_INST_ASSIGN_POWER, BC_INST_ASSIGN_MULTIPLY, BC_INST_ASSIGN_DIVIDE, BC_INST_ASSIGN_MODULUS, BC_INST_ASSIGN_PLUS, BC_INST_ASSIGN_MINUS, #if BC_ENABLE_EXTRA_MATH /// Places and shift assignment operators. BC_INST_ASSIGN_PLACES, BC_INST_ASSIGN_LSHIFT, BC_INST_ASSIGN_RSHIFT, #endif // BC_ENABLE_EXTRA_MATH /// Normal assignment. BC_INST_ASSIGN, /// bc and dc detect when the value from an assignment is not necessary. /// For example, a plain assignment statement means the value is never used. /// In those cases, we can get lots of performance back by not even creating /// a copy at all. In fact, it saves a copy, a push onto the results stack, /// a pop from the results stack, and a free. Definitely worth it to detect. BC_INST_ASSIGN_POWER_NO_VAL, BC_INST_ASSIGN_MULTIPLY_NO_VAL, BC_INST_ASSIGN_DIVIDE_NO_VAL, BC_INST_ASSIGN_MODULUS_NO_VAL, BC_INST_ASSIGN_PLUS_NO_VAL, BC_INST_ASSIGN_MINUS_NO_VAL, #if BC_ENABLE_EXTRA_MATH /// Same as above. BC_INST_ASSIGN_PLACES_NO_VAL, BC_INST_ASSIGN_LSHIFT_NO_VAL, BC_INST_ASSIGN_RSHIFT_NO_VAL, #endif // BC_ENABLE_EXTRA_MATH #endif // BC_ENABLED /// Normal assignment that pushes no value on the stack. BC_INST_ASSIGN_NO_VAL, /// Push a constant onto the results stack. BC_INST_NUM, /// Push a variable onto the results stack. BC_INST_VAR, /// Push an array element onto the results stack. BC_INST_ARRAY_ELEM, /// Push an array onto the results stack. This is different from pushing an /// array *element* onto the results stack; it pushes a reference to the /// whole array. This is needed in bc for function arguments that are /// arrays. It is also needed for returning the length of an array. BC_INST_ARRAY, /// Push a zero or a one onto the stack. These are special cased because it /// does help performance, particularly for one since inc/dec operators /// use it. BC_INST_ZERO, BC_INST_ONE, #if BC_ENABLED /// Push the last printed value onto the stack. BC_INST_LAST, #endif // BC_ENABLED /// Push the value of any of the globals onto the stack. BC_INST_IBASE, BC_INST_OBASE, BC_INST_SCALE, #if BC_ENABLE_EXTRA_MATH /// Push the value of the seed global onto the stack. BC_INST_SEED, #endif // BC_ENABLE_EXTRA_MATH /// These are builtin functions. BC_INST_LENGTH, BC_INST_SCALE_FUNC, BC_INST_SQRT, BC_INST_ABS, #if BC_ENABLE_EXTRA_MATH /// Another builtin function. BC_INST_IRAND, #endif // BC_ENABLE_EXTRA_MATH /// Asciify. BC_INST_ASCIIFY, /// Another builtin function. BC_INST_READ, #if BC_ENABLE_EXTRA_MATH /// Another builtin function. BC_INST_RAND, #endif // BC_ENABLE_EXTRA_MATH /// Return the max for the various globals. BC_INST_MAXIBASE, BC_INST_MAXOBASE, BC_INST_MAXSCALE, #if BC_ENABLE_EXTRA_MATH /// Return the max value returned by rand(). BC_INST_MAXRAND, #endif // BC_ENABLE_EXTRA_MATH /// bc line_length() builtin function. BC_INST_LINE_LENGTH, #if BC_ENABLED /// bc global_stacks() builtin function. BC_INST_GLOBAL_STACKS, #endif // BC_ENABLED /// bc leading_zero() builtin function. BC_INST_LEADING_ZERO, /// This is slightly misnamed versus BC_INST_PRINT_POP. Well, it is in bc. /// dc uses this instruction to print, but not pop. That's valid in dc. /// However, in bc, it is *never* valid to print without popping. In bc, /// BC_INST_PRINT_POP is used to indicate when a string should be printed /// because of a print statement or whether it should be printed raw. The /// reason for this is because a print statement handles escaped characters. /// So BC_INST_PRINT_POP is for printing a string from a print statement, /// BC_INST_PRINT_STR is for printing a string by itself. /// /// In dc, BC_INST_PRINT_POP prints and pops, and BC_INST_PRINT just prints. /// /// Oh, and BC_INST_STR pushes a string onto the results stack. BC_INST_PRINT, BC_INST_PRINT_POP, BC_INST_STR, #if BC_ENABLED BC_INST_PRINT_STR, /// Jumps unconditionally. BC_INST_JUMP, /// Jumps if the top of the results stack is zero (condition failed). It /// turns out that we only want to jump when conditions fail to "skip" code. BC_INST_JUMP_ZERO, /// Call a function. BC_INST_CALL, /// Return the top of the stack to the caller. BC_INST_RET, /// Return 0 to the caller. BC_INST_RET0, /// Special return instruction for void functions. BC_INST_RET_VOID, /// Special halt instruction. BC_INST_HALT, #endif // BC_ENABLED /// Pop an item off of the results stack. BC_INST_POP, /// Swaps the top two items on the results stack. BC_INST_SWAP, /// Modular exponentiation. BC_INST_MODEXP, /// Do divide and modulus at the same time. BC_INST_DIVMOD, /// Turns a number into a string and prints it. BC_INST_PRINT_STREAM, #if DC_ENABLED /// dc's return; it pops an executing string off of the stack. BC_INST_POP_EXEC, /// Unconditionally execute a string. BC_INST_EXECUTE, /// Conditionally execute a string. BC_INST_EXEC_COND, /// Prints each item on the results stack, separated by newlines. BC_INST_PRINT_STACK, /// Pops everything off of the results stack. BC_INST_CLEAR_STACK, /// Pushes the current length of a register stack onto the results stack. BC_INST_REG_STACK_LEN, /// Pushes the current length of the results stack onto the results stack. BC_INST_STACK_LEN, /// Pushes a copy of the item on the top of the results stack onto the /// results stack. BC_INST_DUPLICATE, /// Copies the value in a register and pushes the copy onto the results /// stack. BC_INST_LOAD, /// Pops an item off of a register stack and pushes it onto the results /// stack. BC_INST_PUSH_VAR, /// Pops an item off of the results stack and pushes it onto a register's /// stack. BC_INST_PUSH_TO_VAR, /// Quit. BC_INST_QUIT, /// Quit executing some number of strings. BC_INST_NQUIT, /// Push the depth of the execution stack onto the stack. BC_INST_EXEC_STACK_LEN, #endif // DC_ENABLED /// Invalid instruction. BC_INST_INVALID, } BcInst; #if BC_C11 static_assert(BC_INST_INVALID <= UCHAR_MAX, "Too many instructions to fit into an unsigned char"); #endif // BC_C11 /// Used by maps to identify where items are in the array. typedef struct BcId { /// The name of the item. char *name; /// The index into the array where the item is. size_t idx; } BcId; /// The location of a var, array, or array element. typedef struct BcLoc { /// The index of the var or array. size_t loc; /// The index of the array element. Only used for array elements. size_t idx; } BcLoc; /// An entry for a constant. typedef struct BcConst { /// The original string as parsed from the source code. char *val; /// The last base that the constant was parsed in. BcBigDig base; /// The parsed constant. BcNum num; } BcConst; /// A function. This is also used in dc, not just bc. The reason is that strings /// are executed in dc, and they are converted to functions in order to be /// executed. typedef struct BcFunc { /// The bytecode instructions. BcVec code; #if BC_ENABLED /// The labels. This is a vector of indices. The index is the index into /// the bytecode vector where the label is. BcVec labels; /// The autos for the function. The first items are the parameters, and the /// arguments to the parameters must match the types in this vector. BcVec autos; /// The number of parameters the function takes. size_t nparams; #endif // BC_ENABLED /// The strings encountered in the function. BcVec strs; /// The constants encountered in the function. BcVec consts; /// The function's name. const char *name; #if BC_ENABLED /// True if the function is a void function. bool voidfn; #endif // BC_ENABLED } BcFunc; /// Types of results that can be pushed onto the results stack. typedef enum BcResultType { /// Result is a variable. BC_RESULT_VAR, /// Result is an array element. BC_RESULT_ARRAY_ELEM, /// Result is an array. This is only allowed for function arguments or /// returning the length of the array. BC_RESULT_ARRAY, /// Result is a string. BC_RESULT_STR, /// Result is a temporary. This is used for the result of almost all /// expressions. BC_RESULT_TEMP, /// Special casing the two below gave performance improvements. /// Result is a 0. BC_RESULT_ZERO, /// Result is a 1. Useful for inc/dec operators. BC_RESULT_ONE, #if BC_ENABLED /// Result is the special "last" variable. BC_RESULT_LAST, /// Result is the return value of a void function. BC_RESULT_VOID, #endif // BC_ENABLED /// Result is the value of ibase. BC_RESULT_IBASE, /// Result is the value of obase. BC_RESULT_OBASE, /// Result is the value of scale. BC_RESULT_SCALE, #if BC_ENABLE_EXTRA_MATH /// Result is the value of seed. BC_RESULT_SEED, #endif // BC_ENABLE_EXTRA_MATH } BcResultType; /// A union to store data for various result types. typedef union BcResultData { /// A number. Strings are stored here too; they are numbers with /// cap == 0 && num == NULL. The string's index into the strings vector is /// stored in the scale field. But this is only used for strings stored in /// variables. BcNum n; /// A vector. BcVec v; /// A variable, array, or array element reference. This could also be a /// string if a string is not stored in a variable (dc only). BcLoc loc; } BcResultData; /// A tagged union for results. typedef struct BcResult { /// The tag. The type of the result. BcResultType t; /// The data. The data for the result. BcResultData d; } BcResult; /// An instruction pointer. This is how bc knows where in the bytecode vector, /// and which function, the current execution is. typedef struct BcInstPtr { /// The index of the currently executing function in the fns vector. size_t func; /// The index into the bytecode vector of the *next* instruction. size_t idx; /// The length of the results vector when this function started executing. /// This is mostly used for bc where functions should not affect the results /// of their callers. size_t len; } BcInstPtr; /// Types of identifiers. typedef enum BcType { /// Variable. BC_TYPE_VAR, /// Array. BC_TYPE_ARRAY, #if BC_ENABLED /// Array reference. BC_TYPE_REF, #endif // BC_ENABLED } BcType; #if BC_ENABLED /// An auto variable in bc. typedef struct BcAuto { /// The index of the variable in the vars or arrs vectors. size_t idx; /// The type of the variable. BcType type; } BcAuto; #endif // BC_ENABLED /// Forward declaration. struct BcProgram; /** * Initializes a function. * @param f The function to initialize. * @param name The name of the function. The string is assumed to be owned by * some other entity. */ void bc_func_init(BcFunc *f, const char* name); /** * Inserts an auto into the function. * @param f The function to insert into. * @param p The program. This is to search for the variable or array name. * @param name The name of the auto to insert. * @param type The type of the auto. * @param line The line in the source code where the insert happened. This is * solely for error reporting. */ void bc_func_insert(BcFunc *f, struct BcProgram* p, char* name, BcType type, size_t line); /** * Resets a function in preparation for it to be reused. This can happen in bc * because it is a dynamic language and functions can be redefined. * @param f The functio to reset. */ void bc_func_reset(BcFunc *f); #ifndef NDEBUG /** * Frees a function. This is a destructor. This is only used in debug builds * because all functions are freed at exit. We free them in debug builds to * check for memory leaks. * @param func The function to free as a void pointer. */ void bc_func_free(void *func); #endif // NDEBUG /** * Initializes an array, which is the array type in bc and dc source code. Since * variables and arrays are both arrays (see the development manual, * manuals/development.md#execution, for more information), the @a nums * parameter tells bc whether to initialize an array of numbers or an array of * arrays of numbers. If the latter, it does a recursive call with nums set to * true. * @param a The array to initialize. * @param nums True if the array should be for numbers, false if it should be * for vectors. */ void bc_array_init(BcVec *a, bool nums); /** * Copies an array to another array. This is used to do pass arrays to functions * that do not take references to arrays. The arrays are passed entirely by * value, which means that they need to be copied. * @param d The destination array. * @param s The source array. */ void bc_array_copy(BcVec *d, const BcVec *s); /** * Frees a string stored in a function. This is a destructor. * @param string The string to free as a void pointer. */ void bc_string_free(void *string); /** * Frees a constant stored in a function. This is a destructor. * @param constant The constant to free as a void pointer. */ void bc_const_free(void *constant); /** * Clears a result. It sets the type to BC_RESULT_TEMP and clears the union by * clearing the BcNum in the union. This is to ensure that bc does not use * uninitialized data. * @param r The result to clear. */ void bc_result_clear(BcResult *r); /** * Copies a result into another. This is done for things like duplicating the * top of the results stack or copying the result of an assignment to put back * on the results stack. * @param d The destination result. * @param src The source result. */ void bc_result_copy(BcResult *d, BcResult *src); /** * Frees a result. This is a destructor. * @param result The result to free as a void pointer. */ void bc_result_free(void *result); /** * Expands an array to @a len. This can happen because in bc, you do not have to * explicitly initialize elements of an array. If you access an element that is * not initialized, the array is expanded to fit it, and all missing elements * are initialized to 0 if they are numbers, or arrays with one element of 0. * This function does that expansion. * @param a The array to expand. * @param len The length to expand to. */ void bc_array_expand(BcVec *a, size_t len); /** * Compare two BcId's and return the result. Since they are just comparing the * names in the BcId, I return the result from strcmp() exactly. This is used by * maps in their binary search. * @param e1 The first id. * @param e2 The second id. * @return The result of strcmp() on the BcId's names. */ int bc_id_cmp(const BcId *e1, const BcId *e2); #if BC_ENABLED /** * Returns non-zero if the bytecode instruction i is an assignment instruction. * @param i The instruction to test. * @return Non-zero if i is an assignment instruction, zero otherwise. */ #define BC_INST_IS_ASSIGN(i) \ ((i) == BC_INST_ASSIGN || (i) == BC_INST_ASSIGN_NO_VAL) /** * Returns true if the bytecode instruction @a i requires the value to be * returned for use. * @param i The instruction to test. * @return True if @a i requires the value to be returned for use, false * otherwise. */ #define BC_INST_USE_VAL(i) ((i) <= BC_INST_ASSIGN) #else // BC_ENABLED /** * Returns non-zero if the bytecode instruction i is an assignment instruction. * @param i The instruction to test. * @return Non-zero if i is an assignment instruction, zero otherwise. */ #define BC_INST_IS_ASSIGN(i) ((i) == BC_INST_ASSIGN_NO_VAL) /** * Returns true if the bytecode instruction @a i requires the value to be * returned for use. * @param i The instruction to test. * @return True if @a i requires the value to be returned for use, false * otherwise. */ #define BC_INST_USE_VAL(i) (false) #endif // BC_ENABLED #if BC_DEBUG_CODE /// Reference to string names for all of the instructions. For debugging. extern const char* bc_inst_names[]; #endif // BC_DEBUG_CODE /// References to the names of the main and read functions. extern const char bc_func_main[]; extern const char bc_func_read[]; #endif // BC_LANG_H diff --git a/include/lex.h b/include/lex.h index 0e7af1742001..129b9940618f 100644 --- a/include/lex.h +++ b/include/lex.h @@ -1,586 +1,593 @@ /* * ***************************************************************************** * * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2018-2021 Gavin D. Howard and contributors. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * ***************************************************************************** * * Definitions for bc's lexer. * */ #ifndef BC_LEX_H #define BC_LEX_H #include #include #include #include #include // Two convencience macros for throwing errors in lex code. They take care of // plumbing like passing in the current line the lexer is on. #define bc_lex_err(l, e) (bc_vm_handleError((e), (l)->line)) #define bc_lex_verr(l, e, ...) (bc_vm_handleError((e), (l)->line, __VA_ARGS__)) // BC_LEX_NEG_CHAR returns the char that corresponds to negative for the // current calculator. // // BC_LEX_LAST_NUM_CHAR returns the char that corresponds to the last valid // char for numbers. In bc and dc, capital letters are part of numbers, to a // point. (dc only goes up to hex, so its last valid char is 'F'.) #if BC_ENABLED #if DC_ENABLED #define BC_LEX_NEG_CHAR (BC_IS_BC ? '-' : '_') #define BC_LEX_LAST_NUM_CHAR (BC_IS_BC ? 'Z' : 'F') #else // DC_ENABLED #define BC_LEX_NEG_CHAR ('-') #define BC_LEX_LAST_NUM_CHAR ('Z') #endif // DC_ENABLED #else // BC_ENABLED #define BC_LEX_NEG_CHAR ('_') #define BC_LEX_LAST_NUM_CHAR ('F') #endif // BC_ENABLED /** * Returns true if c is a valid number character. * @param c The char to check. * @param pt If a decimal point has already been seen. * @param int_only True if the number is expected to be an int only, false if * non-integers are allowed. * @return True if @a c is a valid number character. */ #define BC_LEX_NUM_CHAR(c, pt, int_only) \ (isdigit(c) != 0 || ((c) >= 'A' && (c) <= BC_LEX_LAST_NUM_CHAR) || \ ((c) == '.' && !(pt) && !(int_only))) /// An enum of lex token types. typedef enum BcLexType { /// End of file. BC_LEX_EOF, /// Marker for invalid tokens, used by bc and dc for const data. BC_LEX_INVALID, #if BC_ENABLED /// Increment operator. BC_LEX_OP_INC, /// Decrement operator. BC_LEX_OP_DEC, #endif // BC_ENABLED /// BC_LEX_NEG is not used in lexing; it is only for parsing. The lexer /// marks all '-' characters as BC_LEX_OP_MINUS, but the parser needs to be /// able to distinguish them. BC_LEX_NEG, /// Boolean not. BC_LEX_OP_BOOL_NOT, #if BC_ENABLE_EXTRA_MATH /// Truncation operator. BC_LEX_OP_TRUNC, #endif // BC_ENABLE_EXTRA_MATH /// Power operator. BC_LEX_OP_POWER, /// Multiplication operator. BC_LEX_OP_MULTIPLY, /// Division operator. BC_LEX_OP_DIVIDE, /// Modulus operator. BC_LEX_OP_MODULUS, /// Addition operator. BC_LEX_OP_PLUS, /// Subtraction operator. BC_LEX_OP_MINUS, #if BC_ENABLE_EXTRA_MATH /// Places (truncate or extend) operator. BC_LEX_OP_PLACES, /// Left (decimal) shift operator. BC_LEX_OP_LSHIFT, /// Right (decimal) shift operator. BC_LEX_OP_RSHIFT, #endif // BC_ENABLE_EXTRA_MATH /// Equal operator. BC_LEX_OP_REL_EQ, /// Less than or equal operator. BC_LEX_OP_REL_LE, /// Greater than or equal operator. BC_LEX_OP_REL_GE, /// Not equal operator. BC_LEX_OP_REL_NE, /// Less than operator. BC_LEX_OP_REL_LT, /// Greater than operator. BC_LEX_OP_REL_GT, /// Boolean or operator. BC_LEX_OP_BOOL_OR, /// Boolean and operator. BC_LEX_OP_BOOL_AND, #if BC_ENABLED /// Power assignment operator. BC_LEX_OP_ASSIGN_POWER, /// Multiplication assignment operator. BC_LEX_OP_ASSIGN_MULTIPLY, /// Division assignment operator. BC_LEX_OP_ASSIGN_DIVIDE, /// Modulus assignment operator. BC_LEX_OP_ASSIGN_MODULUS, /// Addition assignment operator. BC_LEX_OP_ASSIGN_PLUS, /// Subtraction assignment operator. BC_LEX_OP_ASSIGN_MINUS, #if BC_ENABLE_EXTRA_MATH /// Places (truncate or extend) assignment operator. BC_LEX_OP_ASSIGN_PLACES, /// Left (decimal) shift assignment operator. BC_LEX_OP_ASSIGN_LSHIFT, /// Right (decimal) shift assignment operator. BC_LEX_OP_ASSIGN_RSHIFT, #endif // BC_ENABLE_EXTRA_MATH #endif // BC_ENABLED /// Assignment operator. BC_LEX_OP_ASSIGN, /// Newline. BC_LEX_NLINE, /// Whitespace. BC_LEX_WHITESPACE, /// Left parenthesis. BC_LEX_LPAREN, /// Right parenthesis. BC_LEX_RPAREN, /// Left bracket. BC_LEX_LBRACKET, /// Comma. BC_LEX_COMMA, /// Right bracket. BC_LEX_RBRACKET, /// Left brace. BC_LEX_LBRACE, /// Semicolon. BC_LEX_SCOLON, /// Right brace. BC_LEX_RBRACE, /// String. BC_LEX_STR, /// Identifier/name. BC_LEX_NAME, /// Constant number. BC_LEX_NUMBER, // These keywords are in the order they are in for a reason. Don't change // the order unless you want a bunch of weird failures in the test suite. // In fact, almost all of these tokens are in a specific order for a reason. #if BC_ENABLED /// bc auto keyword. BC_LEX_KW_AUTO, /// bc break keyword. BC_LEX_KW_BREAK, /// bc continue keyword. BC_LEX_KW_CONTINUE, /// bc define keyword. BC_LEX_KW_DEFINE, /// bc for keyword. BC_LEX_KW_FOR, /// bc if keyword. BC_LEX_KW_IF, /// bc limits keyword. BC_LEX_KW_LIMITS, /// bc return keyword. BC_LEX_KW_RETURN, /// bc while keyword. BC_LEX_KW_WHILE, /// bc halt keyword. BC_LEX_KW_HALT, /// bc last keyword. BC_LEX_KW_LAST, #endif // BC_ENABLED /// bc ibase keyword. BC_LEX_KW_IBASE, /// bc obase keyword. BC_LEX_KW_OBASE, /// bc scale keyword. BC_LEX_KW_SCALE, #if BC_ENABLE_EXTRA_MATH /// bc seed keyword. BC_LEX_KW_SEED, #endif // BC_ENABLE_EXTRA_MATH /// bc length keyword. BC_LEX_KW_LENGTH, /// bc print keyword. BC_LEX_KW_PRINT, /// bc sqrt keyword. BC_LEX_KW_SQRT, /// bc abs keyword. BC_LEX_KW_ABS, #if BC_ENABLE_EXTRA_MATH /// bc irand keyword. BC_LEX_KW_IRAND, #endif // BC_ENABLE_EXTRA_MATH /// bc asciffy keyword. BC_LEX_KW_ASCIIFY, /// bc modexp keyword. BC_LEX_KW_MODEXP, /// bc divmod keyword. BC_LEX_KW_DIVMOD, /// bc quit keyword. BC_LEX_KW_QUIT, /// bc read keyword. BC_LEX_KW_READ, #if BC_ENABLE_EXTRA_MATH /// bc rand keyword. BC_LEX_KW_RAND, #endif // BC_ENABLE_EXTRA_MATH /// bc maxibase keyword. BC_LEX_KW_MAXIBASE, /// bc maxobase keyword. BC_LEX_KW_MAXOBASE, /// bc maxscale keyword. BC_LEX_KW_MAXSCALE, #if BC_ENABLE_EXTRA_MATH /// bc maxrand keyword. BC_LEX_KW_MAXRAND, #endif // BC_ENABLE_EXTRA_MATH /// bc line_length keyword. BC_LEX_KW_LINE_LENGTH, #if BC_ENABLED /// bc global_stacks keyword. BC_LEX_KW_GLOBAL_STACKS, #endif // BC_ENABLED /// bc leading_zero keyword. BC_LEX_KW_LEADING_ZERO, /// bc stream keyword. BC_LEX_KW_STREAM, /// bc else keyword. BC_LEX_KW_ELSE, #if DC_ENABLED /// A special token for dc to calculate equal without a register. BC_LEX_EQ_NO_REG, /// Colon (array) operator. BC_LEX_COLON, /// Execute command. BC_LEX_EXECUTE, /// Print stack command. BC_LEX_PRINT_STACK, /// Clear stack command. BC_LEX_CLEAR_STACK, /// Register stack level command. BC_LEX_REG_STACK_LEVEL, /// Main stack level command. BC_LEX_STACK_LEVEL, /// Duplicate command. BC_LEX_DUPLICATE, /// Swap (reverse) command. BC_LEX_SWAP, /// Pop (remove) command. BC_LEX_POP, /// Store ibase command. BC_LEX_STORE_IBASE, /// Store obase command. BC_LEX_STORE_OBASE, /// Store scale command. BC_LEX_STORE_SCALE, #if BC_ENABLE_EXTRA_MATH /// Store seed command. BC_LEX_STORE_SEED, #endif // BC_ENABLE_EXTRA_MATH /// Load variable onto stack command. BC_LEX_LOAD, /// Pop off of variable stack onto results stack command. BC_LEX_LOAD_POP, /// Push onto variable stack command. BC_LEX_STORE_PUSH, /// Print with pop command. BC_LEX_PRINT_POP, /// Parameterized quit command. BC_LEX_NQUIT, /// Execution stack depth command. BC_LEX_EXEC_STACK_LENGTH, /// Scale of number command. This is needed specifically for dc because bc /// parses the scale function in parts. BC_LEX_SCALE_FACTOR, /// Array length command. This is needed specifically for dc because bc /// just reuses its length keyword. BC_LEX_ARRAY_LENGTH, #endif // DC_ENABLED } BcLexType; struct BcLex; /** * A function pointer to call when another token is needed. Mostly called by the * parser. * @param l The lexer. */ typedef void (*BcLexNext)(struct BcLex* l); /// The lexer. typedef struct BcLex { /// A pointer to the text to lex. const char *buf; /// The current index into buf. size_t i; /// The current line. size_t line; /// The length of buf. size_t len; /// The current token. BcLexType t; /// The previous token. BcLexType last; /// A string to store extra data for tokens. For example, the @a BC_LEX_STR /// token really needs to store the actual string, and numbers also need the /// string. BcVec str; /// If this is true, the lexer is processing stdin and can ask for more data /// if a string or comment are not properly terminated. bool is_stdin; + /// If this is true, the lexer is processing expressions from the + /// command-line and can ask for more data if a string or comment are not + /// properly terminated. + bool is_exprs; + } BcLex; /** * Initializes a lexer. * @param l The lexer to initialize. */ void bc_lex_init(BcLex *l); /** * Frees a lexer. This is not guarded by #ifndef NDEBUG because a separate * parser is created at runtime to parse read() expressions and dc strings, and * that parser needs a lexer. * @param l The lexer to free. */ void bc_lex_free(BcLex *l); /** * Sets the filename that the lexer will be lexing. * @param l The lexer. * @param file The filename that the lexer will lex. */ void bc_lex_file(BcLex *l, const char *file); /** * Sets the text the lexer will lex. * @param l The lexer. * @param text The text to lex. * @param is_stdin True if the text is from stdin, false otherwise. + * @param is_exprs True if the text is from command-line expressions, false + * otherwise. */ -void bc_lex_text(BcLex *l, const char *text, bool is_stdin); +void bc_lex_text(BcLex *l, const char *text, bool is_stdin, bool is_exprs); /** * Generic next function for the parser to call. It takes care of calling the * correct @a BcLexNext function and consuming whitespace. * @param l The lexer. */ void bc_lex_next(BcLex *l); /** * Lexes a line comment (one beginning with '#' and going to a newline). * @param l The lexer. */ void bc_lex_lineComment(BcLex *l); /** * Lexes a general comment (C-style comment). * @param l The lexer. */ void bc_lex_comment(BcLex *l); /** * Lexes whitespace, finding as much as possible. * @param l The lexer. */ void bc_lex_whitespace(BcLex *l); /** * Lexes a number that begins with char @a start. This takes care of parsing * numbers in scientific and engineering notations. * @param l The lexer. * @param start The starting char of the number. To detect a number and call * this function, the lexer had to eat the first char. It fixes * that by passing it in. */ void bc_lex_number(BcLex *l, char start); /** * Lexes a name/identifier. * @param l The lexer. */ void bc_lex_name(BcLex *l); /** * Lexes common whitespace characters. * @param l The lexer. * @param c The character to lex. */ void bc_lex_commonTokens(BcLex *l, char c); /** * Throws a parse error because char @a c was invalid. * @param l The lexer. * @param c The problem character. */ void bc_lex_invalidChar(BcLex *l, char c); /** * Reads a line from stdin and puts it into the lexer's buffer. - * @param l The lexer. + * @param l The lexer. */ bool bc_lex_readLine(BcLex *l); #endif // BC_LEX_H diff --git a/include/parse.h b/include/parse.h index 0088c1523ec6..35ca1652fc98 100644 --- a/include/parse.h +++ b/include/parse.h @@ -1,275 +1,277 @@ /* * ***************************************************************************** * * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2018-2021 Gavin D. Howard and contributors. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * ***************************************************************************** * * Definitions for bc's parser. * */ #ifndef BC_PARSE_H #define BC_PARSE_H #include #include #include #include #include #include #include // The following are flags that can be passed to @a BcParseExpr functions. They // define the requirements that the parsed expression must meet to not have an // error thrown. /// A flag that requires that the expression is valid for conditionals in for /// loops, while loops, and if statements. This is because POSIX requires that /// certain operators are *only* used in those cases. It's whacked, but that's /// how it is. #define BC_PARSE_REL (UINTMAX_C(1)<<0) /// A flag that requires that the expression is valid for a print statement. #define BC_PARSE_PRINT (UINTMAX_C(1)<<1) /// A flag that requires that the expression does *not* have any function call. #define BC_PARSE_NOCALL (UINTMAX_C(1)<<2) /// A flag that requires that the expression does *not* have a read() expression. #define BC_PARSE_NOREAD (UINTMAX_C(1)<<3) /// A flag that *allows* (rather than requires) that an array appear in the /// expression. This is mostly used as parameters in bc. #define BC_PARSE_ARRAY (UINTMAX_C(1)<<4) /// A flag that requires that the expression is not empty and returns a value. #define BC_PARSE_NEEDVAL (UINTMAX_C(1)<<5) /** * Returns true if the parser has been initialized. * @param p The parser. * @param prg The program. * @return True if @a p has been initialized, false otherwise. */ #define BC_PARSE_IS_INITED(p, prg) ((p)->prog == (prg)) #if BC_ENABLED /** * Returns true if the current parser state allows parsing, false otherwise. * @param p The parser. * @return True if parsing can proceed, false otherwise. */ #define BC_PARSE_CAN_PARSE(p) \ ((p).l.t != BC_LEX_EOF && (p).l.t != BC_LEX_KW_DEFINE) #else // BC_ENABLED /** * Returns true if the current parser state allows parsing, false otherwise. * @param p The parser. * @return True if parsing can proceed, false otherwise. */ #define BC_PARSE_CAN_PARSE(p) ((p).l.t != BC_LEX_EOF) #endif // BC_ENABLED /** * Pushes the instruction @a i onto the bytecode vector for the current * function. * @param p The parser. * @param i The instruction to push onto the bytecode vector. */ #define bc_parse_push(p, i) (bc_vec_pushByte(&(p)->func->code, (uchar) (i))) /** * Pushes an index onto the bytecode vector. For more information, see * @a bc_vec_pushIndex() in src/vector.c and @a bc_program_index() in * src/program.c. * @param p The parser. * @param idx The index to push onto the bytecode vector. */ #define bc_parse_pushIndex(p, idx) (bc_vec_pushIndex(&(p)->func->code, (idx))) /** * A convenience macro for throwing errors in parse code. They take care of * plumbing like passing in the current line the lexer is on. * @param p The parser. * @param e The error. */ #define bc_parse_err(p, e) (bc_vm_handleError((e), (p)->l.line)) /** * A convenience macro for throwing errors in parse code. They take care of * plumbing like passing in the current line the lexer is on. * @param p The parser. * @param e The error. * @param ... The varags that are needed. */ #define bc_parse_verr(p, e, ...) \ (bc_vm_handleError((e), (p)->l.line, __VA_ARGS__)) // Forward declarations. struct BcParse; struct BcProgram; /** * A function pointer to call when more parsing is needed. * @param p The parser. */ typedef void (*BcParseParse)(struct BcParse* p); /** * A function pointer to call when an expression needs to be parsed. This can * happen for read() expressions or dc strings. * @param p The parser. * @param flags The flags for what is allowed or required. (See flags above.) */ typedef void (*BcParseExpr)(struct BcParse* p, uint8_t flags); /// The parser struct. typedef struct BcParse { /// The lexer. BcLex l; #if BC_ENABLED /// The stack of flags for bc. (See comments in include/bc.h.) This stack is /// *required* to have one item at all times. Not maintaining that invariant /// will cause problems. BcVec flags; /// The stack of exits. These are indices into the bytecode vector where /// blocks for loops and if statements end. Basically, these are the places /// to jump to when skipping code. BcVec exits; /// The stack of conditionals. Unlike exits, which are indices to jump /// *forward* to, this is a vector of indices to jump *backward* to, usually /// to the conditional of a loop, hence the name. BcVec conds; /// A stack of operators. When parsing expressions, the bc parser uses the /// Shunting-Yard algorithm, which requires a stack of operators. This can /// hold the stack for multiple expressions at once because the expressions /// stack as well. For more information, see the Expression Parsing section /// of the Development manual (manuals/development.md). BcVec ops; /// A buffer to temporarily store a string in. This is because the lexer /// might generate a string as part of its work, and the parser needs that /// string, but it also needs the lexer to continue lexing, which might /// overwrite the string stored in the lexer. This buffer is for copying /// that string from the lexer to keep it safe. BcVec buf; #endif // BC_ENABLED /// A reference to the program to grab the current function when necessary. struct BcProgram *prog; /// A reference to the current function. The function is what holds the /// bytecode vector that the parser is filling. BcFunc *func; /// The index of the function. size_t fidx; #if BC_ENABLED /// True if the bc parser just entered a function and an auto statement /// would be valid. bool auto_part; #endif // BC_ENABLED } BcParse; /** * Initializes a parser. * @param p The parser to initialize. * @param prog A referenc to the program. * @param func The index of the current function. */ void bc_parse_init(BcParse *p, struct BcProgram *prog, size_t func); /** * Frees a parser. This is not guarded by #ifndef NDEBUG because a separate * parser is created at runtime to parse read() expressions and dc strings. * @param p The parser to free. */ void bc_parse_free(BcParse *p); /** * Resets the parser. Resetting means erasing all state to the point that the * parser would think it was just initialized. * @param p The parser to reset. */ void bc_parse_reset(BcParse *p); /** * Adds a string. See @a BcProgram in include/program.h for more details. * @param p The parser that parsed the string. */ void bc_parse_addString(BcParse *p); /** * Adds a number. See @a BcProgram in include/program.h for more details. * @param p The parser that parsed the number. */ void bc_parse_number(BcParse *p); /** * Update the current function in the parser. * @param p The parser. * @param fidx The index of the new function. */ void bc_parse_updateFunc(BcParse *p, size_t fidx); /** * Adds a new variable or array. See @a BcProgram in include/program.h for more * details. * @param p The parser that parsed the variable or array name. * @param name The name of the variable or array to add. * @param var True if the name is for a variable, false if it's for an array. */ void bc_parse_pushName(const BcParse* p, char *name, bool var); /** * Sets the text that the parser will parse. * @param p The parser. * @param text The text to lex. * @param is_stdin True if the text is from stdin, false otherwise. + * @param is_exprs True if the text is from command-line expressions, false + * otherwise. */ -void bc_parse_text(BcParse *p, const char *text, bool is_stdin); +void bc_parse_text(BcParse *p, const char *text, bool is_stdin, bool is_exprs); // References to const 0 and 1 strings for special cases. bc and dc have // specific instructions for 0 and 1 because they pop up so often and (in the // case of 1), increment/decrement operators. extern const char bc_parse_zero[2]; extern const char bc_parse_one[2]; #endif // BC_PARSE_H diff --git a/include/status.h b/include/status.h index 993b5e698fb3..df084c70c1b5 100644 --- a/include/status.h +++ b/include/status.h @@ -1,805 +1,805 @@ /* * ***************************************************************************** * * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2018-2021 Gavin D. Howard and contributors. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * ***************************************************************************** * * All bc status codes and cross-platform portability. * */ #ifndef BC_STATUS_H #define BC_STATUS_H #include // This is used by configure.sh to test for OpenBSD. #ifdef BC_TEST_OPENBSD #ifdef __OpenBSD__ #error On OpenBSD without _BSD_SOURCE #endif // __OpenBSD__ #endif // BC_TEST_OPENBSD #ifndef BC_ENABLED #define BC_ENABLED (1) #endif // BC_ENABLED #ifndef DC_ENABLED #define DC_ENABLED (1) #endif // DC_ENABLED #ifndef BC_ENABLE_LIBRARY #define BC_ENABLE_LIBRARY (0) #endif // BC_ENABLE_LIBRARY // This is error checking for fuzz builds. #if BC_ENABLE_AFL #ifndef __AFL_HAVE_MANUAL_CONTROL #error Must compile with afl-clang-fast or afl-clang-lto for fuzzing #endif // __AFL_HAVE_MANUAL_CONTROL #endif // BC_ENABLE_AFL #ifndef BC_ENABLE_MEMCHECK #define BC_ENABLE_MEMCHECK (0) #endif // BC_ENABLE_MEMCHECK /** * Mark a variable as unused. * @param e The variable to mark as unused. */ #define BC_UNUSED(e) ((void) (e)) // If users want, they can define this to something like __builtin_expect(e, 1). // It might give a performance improvement. #ifndef BC_LIKELY /** * Mark a branch expression as likely. * @param e The expression to mark as likely. */ #define BC_LIKELY(e) (e) #endif // BC_LIKELY // If users want, they can define this to something like __builtin_expect(e, 0). // It might give a performance improvement. #ifndef BC_UNLIKELY /** * Mark a branch expression as unlikely. * @param e The expression to mark as unlikely. */ #define BC_UNLIKELY(e) (e) #endif // BC_UNLIKELY /** * Mark a branch expression as an error, if true. * @param e The expression to mark as an error, if true. */ #define BC_ERR(e) BC_UNLIKELY(e) /** * Mark a branch expression as not an error, if true. * @param e The expression to mark as not an error, if true. */ #define BC_NO_ERR(s) BC_LIKELY(s) // Disable extra debug code by default. #ifndef BC_DEBUG_CODE #define BC_DEBUG_CODE (0) #endif // BC_DEBUG_CODE // We want to be able to use _Noreturn on C11 compilers. -#if __STDC_VERSION__ >= 201100L +#if __STDC_VERSION__ >= 201112L #include #define BC_NORETURN _Noreturn #define BC_C11 (1) #else // __STDC_VERSION__ #define BC_NORETURN #define BC_MUST_RETURN #define BC_C11 (0) #endif // __STDC_VERSION__ #define BC_HAS_UNREACHABLE (0) #define BC_HAS_COMPUTED_GOTO (0) // GCC and Clang complain if fallthroughs are not marked with their special // attribute. Jerks. This creates a define for marking the fallthroughs that is // nothing on other compilers. #if defined(__clang__) || defined(__GNUC__) #if defined(__has_attribute) #if __has_attribute(fallthrough) #define BC_FALLTHROUGH __attribute__((fallthrough)); #else // __has_attribute(fallthrough) #define BC_FALLTHROUGH #endif // __has_attribute(fallthrough) #ifdef __GNUC__ #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) #undef BC_HAS_UNREACHABLE #define BC_HAS_UNREACHABLE (1) #endif // __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) #else // __GNUC__ #if __clang_major__ >= 4 #undef BC_HAS_UNREACHABLE #define BC_HAS_UNREACHABLE (1) #endif // __clang_major__ >= 4 #endif // __GNUC__ #else // defined(__has_attribute) #define BC_FALLTHROUGH #endif // defined(__has_attribute) #else // defined(__clang__) || defined(__GNUC__) #define BC_FALLTHROUGH #endif // defined(__clang__) || defined(__GNUC__) #if BC_HAS_UNREACHABLE #define BC_UNREACHABLE __builtin_unreachable(); #else // BC_HAS_UNREACHABLE #ifdef _WIN32 #define BC_UNREACHABLE __assume(0); #else // _WIN32 #define BC_UNREACHABLE #endif // _WIN32 #endif // BC_HAS_UNREACHABLE #ifdef __GNUC__ #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) #undef BC_HAS_COMPUTED_GOTO #define BC_HAS_COMPUTED_GOTO (1) #endif // __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) #endif // __GNUC__ #ifdef __clang__ #if __clang_major__ >= 4 #undef BC_HAS_COMPUTED_GOTO #define BC_HAS_COMPUTED_GOTO (1) #endif // __clang_major__ >= 4 #endif // __GNUC__ #ifdef BC_NO_COMPUTED_GOTO #undef BC_HAS_COMPUTED_GOTO #define BC_HAS_COMPUTED_GOTO (0) #endif // BC_NO_COMPUTED_GOTO #ifdef __GNUC__ #ifdef __OpenBSD__ // The OpenBSD GCC doesn't like inline. #define inline #endif // __OpenBSD__ #endif // __GNUC__ // Workarounds for AIX's POSIX incompatibility. #ifndef SIZE_MAX #define SIZE_MAX __SIZE_MAX__ #endif // SIZE_MAX #ifndef UINTMAX_C #define UINTMAX_C __UINTMAX_C #endif // UINTMAX_C #ifndef UINT32_C #define UINT32_C __UINT32_C #endif // UINT32_C #ifndef UINT_FAST32_MAX #define UINT_FAST32_MAX __UINT_FAST32_MAX__ #endif // UINT_FAST32_MAX #ifndef UINT16_MAX #define UINT16_MAX __UINT16_MAX__ #endif // UINT16_MAX #ifndef SIG_ATOMIC_MAX #define SIG_ATOMIC_MAX __SIG_ATOMIC_MAX__ #endif // SIG_ATOMIC_MAX // Yes, this has to be here. #include // All of these set defaults for settings. #if BC_ENABLED #ifndef BC_DEFAULT_BANNER #define BC_DEFAULT_BANNER (0) #endif // BC_DEFAULT_BANNER #endif // BC_ENABLED #ifndef BC_DEFAULT_SIGINT_RESET #define BC_DEFAULT_SIGINT_RESET (1) #endif // BC_DEFAULT_SIGINT_RESET #ifndef BC_DEFAULT_TTY_MODE #define BC_DEFAULT_TTY_MODE (1) #endif // BC_DEFAULT_TTY_MODE #ifndef BC_DEFAULT_PROMPT #define BC_DEFAULT_PROMPT BC_DEFAULT_TTY_MODE #endif // BC_DEFAULT_PROMPT #ifndef BC_DEFAULT_EXPR_EXIT #define BC_DEFAULT_EXPR_EXIT (1) #endif // BC_DEFAULT_EXPR_EXIT // All of these set defaults for settings. #ifndef DC_DEFAULT_SIGINT_RESET #define DC_DEFAULT_SIGINT_RESET (1) #endif // DC_DEFAULT_SIGINT_RESET #ifndef DC_DEFAULT_TTY_MODE #define DC_DEFAULT_TTY_MODE (0) #endif // DC_DEFAULT_TTY_MODE #ifndef DC_DEFAULT_HISTORY #define DC_DEFAULT_HISTORY DC_DEFAULT_TTY_MODE #endif // DC_DEFAULT_HISTORY #ifndef DC_DEFAULT_PROMPT #define DC_DEFAULT_PROMPT DC_DEFAULT_TTY_MODE #endif // DC_DEFAULT_PROMPT #ifndef DC_DEFAULT_EXPR_EXIT #define DC_DEFAULT_EXPR_EXIT (1) #endif // DC_DEFAULT_EXPR_EXIT /// Statuses, which mark either which category of error happened, or some other /// status that matters. typedef enum BcStatus { /// Normal status. BC_STATUS_SUCCESS = 0, /// Math error. BC_STATUS_ERROR_MATH, /// Parse (and lex) error. BC_STATUS_ERROR_PARSE, /// Runtime error. BC_STATUS_ERROR_EXEC, /// Fatal error. BC_STATUS_ERROR_FATAL, /// EOF status. BC_STATUS_EOF, /// Quit status. This means that bc/dc is in the process of quitting. BC_STATUS_QUIT, } BcStatus; /// Errors, which are more specific errors. typedef enum BcErr { // Math errors. /// Negative number used when not allowed. BC_ERR_MATH_NEGATIVE, /// Non-integer used when not allowed. BC_ERR_MATH_NON_INTEGER, /// Conversion to a hardware integer would overflow. BC_ERR_MATH_OVERFLOW, /// Divide by zero. BC_ERR_MATH_DIVIDE_BY_ZERO, // Fatal errors. /// An allocation or reallocation failed. BC_ERR_FATAL_ALLOC_ERR, /// I/O failure. BC_ERR_FATAL_IO_ERR, /// File error, such as permissions or file does not exist. BC_ERR_FATAL_FILE_ERR, /// File is binary, not text, error. BC_ERR_FATAL_BIN_FILE, /// Attempted to read a directory as a file error. BC_ERR_FATAL_PATH_DIR, /// Invalid option error. BC_ERR_FATAL_OPTION, /// Option with required argument not given an argument. BC_ERR_FATAL_OPTION_NO_ARG, /// Option with no argument given an argument. BC_ERR_FATAL_OPTION_ARG, /// Option argument is invalid. BC_ERR_FATAL_ARG, // Runtime errors. /// Invalid ibase value. BC_ERR_EXEC_IBASE, /// Invalid obase value. BC_ERR_EXEC_OBASE, /// Invalid scale value. BC_ERR_EXEC_SCALE, /// Invalid expression parsed by read(). BC_ERR_EXEC_READ_EXPR, /// read() used within an expression given to a read() call. BC_ERR_EXEC_REC_READ, /// Type error. BC_ERR_EXEC_TYPE, /// Stack has too few elements error. BC_ERR_EXEC_STACK, /// Register stack has too few elements error. BC_ERR_EXEC_STACK_REGISTER, /// Wrong number of arguments error. BC_ERR_EXEC_PARAMS, /// Undefined function error. BC_ERR_EXEC_UNDEF_FUNC, /// Void value used in an expression error. BC_ERR_EXEC_VOID_VAL, - // Parse (and lex errors). + // Parse (and lex) errors. /// EOF encountered when not expected error. BC_ERR_PARSE_EOF, /// Invalid character error. BC_ERR_PARSE_CHAR, /// Invalid string (no ending quote) error. BC_ERR_PARSE_STRING, /// Invalid comment (no end found) error. BC_ERR_PARSE_COMMENT, /// Invalid token encountered error. BC_ERR_PARSE_TOKEN, #if BC_ENABLED /// Invalid expression error. BC_ERR_PARSE_EXPR, /// Expression is empty error. BC_ERR_PARSE_EMPTY_EXPR, /// Print statement is invalid error. BC_ERR_PARSE_PRINT, /// Function definition is invalid error. BC_ERR_PARSE_FUNC, /// Assignment is invalid error. BC_ERR_PARSE_ASSIGN, /// No auto identifiers given for an auto statement error. BC_ERR_PARSE_NO_AUTO, /// Duplicate local (parameter or auto) error. BC_ERR_PARSE_DUP_LOCAL, /// Invalid block (within braces) error. BC_ERR_PARSE_BLOCK, /// Invalid return statement for void functions. BC_ERR_PARSE_RET_VOID, /// Reference attached to a variable, not an array, error. BC_ERR_PARSE_REF_VAR, // POSIX-only errors. /// Name length greater than 1 error. BC_ERR_POSIX_NAME_LEN, /// Non-POSIX comment used error. BC_ERR_POSIX_COMMENT, /// Non-POSIX keyword error. BC_ERR_POSIX_KW, /// Non-POSIX . (last) error. BC_ERR_POSIX_DOT, /// Non-POSIX return error. BC_ERR_POSIX_RET, /// Non-POSIX boolean operator used error. BC_ERR_POSIX_BOOL, /// POSIX relation operator used outside if, while, or for statements error. BC_ERR_POSIX_REL_POS, /// Multiple POSIX relation operators used in an if, while, or for statement /// error. BC_ERR_POSIX_MULTIREL, /// Empty statements in POSIX for loop error. BC_ERR_POSIX_FOR, /// POSIX's grammar does not allow a function definition right after a /// semicolon. BC_ERR_POSIX_FUNC_AFTER_SEMICOLON, /// Non-POSIX exponential (scientific or engineering) number used error. BC_ERR_POSIX_EXP_NUM, /// Non-POSIX array reference error. BC_ERR_POSIX_REF, /// Non-POSIX void error. BC_ERR_POSIX_VOID, /// Non-POSIX brace position used error. BC_ERR_POSIX_BRACE, /// String used in expression. BC_ERR_POSIX_EXPR_STRING, #endif // BC_ENABLED // Number of elements. BC_ERR_NELEMS, #if BC_ENABLED /// A marker for the start of POSIX errors. BC_ERR_POSIX_START = BC_ERR_POSIX_NAME_LEN, /// A marker for the end of POSIX errors. BC_ERR_POSIX_END = BC_ERR_POSIX_EXPR_STRING, #endif // BC_ENABLED } BcErr; // The indices of each category of error in bc_errs[], and used in bc_err_ids[] // to associate actual errors with their categories. /// Math error category. #define BC_ERR_IDX_MATH (0) /// Parse (and lex) error category. #define BC_ERR_IDX_PARSE (1) /// Runtime error category. #define BC_ERR_IDX_EXEC (2) /// Fatal error category. #define BC_ERR_IDX_FATAL (3) /// Number of categories. #define BC_ERR_IDX_NELEMS (4) // If bc is enabled, we add an extra category for POSIX warnings. #if BC_ENABLED /// POSIX warning category. #define BC_ERR_IDX_WARN (BC_ERR_IDX_NELEMS) #endif // BC_ENABLED /// Do a longjmp(). This is what to use when activating an "exception", i.e., a /// longjmp(). With debug code, it will print the name of the function it jumped /// from. #if BC_DEBUG_CODE #define BC_JMP bc_vm_jmp(__func__) #else // BC_DEBUG_CODE #define BC_JMP bc_vm_jmp() #endif // BC_DEBUG_CODE /// Returns true if an exception is in flight, false otherwise. #define BC_SIG_EXC \ BC_UNLIKELY(vm.status != (sig_atomic_t) BC_STATUS_SUCCESS || vm.sig) /// Returns true if there is *no* exception in flight, false otherwise. #define BC_NO_SIG_EXC \ BC_LIKELY(vm.status == (sig_atomic_t) BC_STATUS_SUCCESS && !vm.sig) #ifndef NDEBUG /// Assert that signals are locked. There are non-async-signal-safe functions in /// bc, and they *must* have signals locked. Other functions are expected to /// *not* have signals locked, for reasons. So this is a pre-built assert /// (no-op in non-debug mode) that check that signals are locked. #define BC_SIG_ASSERT_LOCKED do { assert(vm.sig_lock); } while (0) /// Assert that signals are unlocked. There are non-async-signal-safe functions /// in bc, and they *must* have signals locked. Other functions are expected to /// *not* have signals locked, for reasons. So this is a pre-built assert /// (no-op in non-debug mode) that check that signals are unlocked. #define BC_SIG_ASSERT_NOT_LOCKED do { assert(vm.sig_lock == 0); } while (0) #else // NDEBUG /// Assert that signals are locked. There are non-async-signal-safe functions in /// bc, and they *must* have signals locked. Other functions are expected to /// *not* have signals locked, for reasons. So this is a pre-built assert /// (no-op in non-debug mode) that check that signals are locked. #define BC_SIG_ASSERT_LOCKED /// Assert that signals are unlocked. There are non-async-signal-safe functions /// in bc, and they *must* have signals locked. Other functions are expected to /// *not* have signals locked, for reasons. So this is a pre-built assert /// (no-op in non-debug mode) that check that signals are unlocked. #define BC_SIG_ASSERT_NOT_LOCKED #endif // NDEBUG /// Locks signals. #define BC_SIG_LOCK \ do { \ BC_SIG_ASSERT_NOT_LOCKED; \ vm.sig_lock = 1; \ } while (0) /// Unlocks signals. If a signal happened, then this will cause a jump. #define BC_SIG_UNLOCK \ do { \ BC_SIG_ASSERT_LOCKED; \ vm.sig_lock = 0; \ if (vm.sig) BC_JMP; \ } while (0) /// Locks signals, regardless of if they are already locked. This is really only /// used after labels that longjmp() goes to after the jump because the cleanup /// code must have signals locked, and BC_LONGJMP_CONT will unlock signals if it /// doesn't jump. #define BC_SIG_MAYLOCK \ do { \ vm.sig_lock = 1; \ } while (0) /// Unlocks signals, regardless of if they were already unlocked. If a signal /// happened, then this will cause a jump. #define BC_SIG_MAYUNLOCK \ do { \ vm.sig_lock = 0; \ if (vm.sig) BC_JMP; \ } while (0) /* * Locks signals, but stores the old lock state, to be restored later by * BC_SIG_TRYUNLOCK. * @param v The variable to store the old lock state to. */ #define BC_SIG_TRYLOCK(v) \ do { \ v = vm.sig_lock; \ vm.sig_lock = 1; \ } while (0) /* Restores the previous state of a signal lock, and if it is now unlocked, * initiates an exception/jump. * @param v The old lock state. */ #define BC_SIG_TRYUNLOCK(v) \ do { \ vm.sig_lock = (v); \ if (!(v) && vm.sig) BC_JMP; \ } while (0) /** * Sets a jump, and sets it up as well so that if a longjmp() happens, bc will * immediately goto a label where some cleanup code is. This one assumes that * signals are not locked and will lock them, set the jump, and unlock them. * Setting the jump also includes pushing the jmp_buf onto the jmp_buf stack. * This grows the jmp_bufs vector first to prevent a fatal error from happening * after the setjmp(). This is done because BC_SETJMP(l) is assumed to be used * *before* the actual initialization calls that need the setjmp(). * param l The label to jump to on a longjmp(). */ #define BC_SETJMP(l) \ do { \ sigjmp_buf sjb; \ BC_SIG_LOCK; \ bc_vec_grow(&vm.jmp_bufs, 1); \ if (sigsetjmp(sjb, 0)) { \ assert(BC_SIG_EXC); \ goto l; \ } \ bc_vec_push(&vm.jmp_bufs, &sjb); \ BC_SIG_UNLOCK; \ } while (0) /** * Sets a jump like BC_SETJMP, but unlike BC_SETJMP, it assumes signals are * locked and will just set the jump. This does *not* have a call to * bc_vec_grow() because it is assumed that BC_SETJMP_LOCKED(l) is used *after* * the initializations that need the setjmp(). * param l The label to jump to on a longjmp(). */ #define BC_SETJMP_LOCKED(l) \ do { \ sigjmp_buf sjb; \ BC_SIG_ASSERT_LOCKED; \ if (sigsetjmp(sjb, 0)) { \ assert(BC_SIG_EXC); \ goto l; \ } \ bc_vec_push(&vm.jmp_bufs, &sjb); \ } while (0) /// Used after cleanup labels set by BC_SETJMP and BC_SETJMP_LOCKED to jump to /// the next place. This is what continues the stack unwinding. This basically /// copies BC_SIG_UNLOCK into itself, but that is because its condition for /// jumping is BC_SIG_EXC, not just that a signal happened. #define BC_LONGJMP_CONT \ do { \ BC_SIG_ASSERT_LOCKED; \ if (!vm.sig_pop) bc_vec_pop(&vm.jmp_bufs); \ vm.sig_lock = 0; \ if (BC_SIG_EXC) BC_JMP; \ } while (0) /// Unsets a jump. It always assumes signals are locked. This basically just /// pops a jmp_buf off of the stack of jmp_bufs, and since the jump mechanism /// always jumps to the location at the top of the stack, this effectively /// undoes a setjmp(). #define BC_UNSETJMP \ do { \ BC_SIG_ASSERT_LOCKED; \ bc_vec_pop(&vm.jmp_bufs); \ } while (0) /// Stops a stack unwinding. Technically, a stack unwinding needs to be done /// manually, but it will always be done unless certain flags are cleared. This /// clears the flags. #define BC_LONGJMP_STOP \ do { \ vm.sig_pop = 0; \ vm.sig = 0; \ } while (0) // Various convenience macros for calling the bc's error handling routine. #if BC_ENABLE_LIBRARY /** * Call bc's error handling routine. * @param e The error. * @param l The line of the script that the error happened. * @param ... Extra arguments for error messages as necessary. */ #define bc_error(e, l, ...) (bc_vm_handleError((e))) /** * Call bc's error handling routine. * @param e The error. */ #define bc_err(e) (bc_vm_handleError((e))) /** * Call bc's error handling routine. * @param e The error. */ #define bc_verr(e, ...) (bc_vm_handleError((e))) #else // BC_ENABLE_LIBRARY /** * Call bc's error handling routine. * @param e The error. * @param l The line of the script that the error happened. * @param ... Extra arguments for error messages as necessary. */ #define bc_error(e, l, ...) (bc_vm_handleError((e), (l), __VA_ARGS__)) /** * Call bc's error handling routine. * @param e The error. */ #define bc_err(e) (bc_vm_handleError((e), 0)) /** * Call bc's error handling routine. * @param e The error. */ #define bc_verr(e, ...) (bc_vm_handleError((e), 0, __VA_ARGS__)) #endif // BC_ENABLE_LIBRARY /** * Returns true if status @a s is an error, false otherwise. * @param s The status to test. * @return True if @a s is an error, false otherwise. */ #define BC_STATUS_IS_ERROR(s) \ ((s) >= BC_STATUS_ERROR_MATH && (s) <= BC_STATUS_ERROR_FATAL) // Convenience macros that can be placed at the beginning and exits of functions // for easy marking of where functions are entered and exited. #if BC_DEBUG_CODE #define BC_FUNC_ENTER \ do { \ size_t bc_func_enter_i; \ for (bc_func_enter_i = 0; bc_func_enter_i < vm.func_depth; \ ++bc_func_enter_i) \ { \ bc_file_puts(&vm.ferr, bc_flush_none, " "); \ } \ vm.func_depth += 1; \ bc_file_printf(&vm.ferr, "Entering %s\n", __func__); \ bc_file_flush(&vm.ferr, bc_flush_none); \ } while (0); #define BC_FUNC_EXIT \ do { \ size_t bc_func_enter_i; \ vm.func_depth -= 1; \ for (bc_func_enter_i = 0; bc_func_enter_i < vm.func_depth; \ ++bc_func_enter_i) \ { \ bc_file_puts(&vm.ferr, bc_flush_none, " "); \ } \ bc_file_printf(&vm.ferr, "Leaving %s\n", __func__); \ bc_file_flush(&vm.ferr, bc_flush_none); \ } while (0); #else // BC_DEBUG_CODE #define BC_FUNC_ENTER #define BC_FUNC_EXIT #endif // BC_DEBUG_CODE #endif // BC_STATUS_H diff --git a/include/version.h b/include/version.h index 0c4122528e7d..a7d1640a87a2 100644 --- a/include/version.h +++ b/include/version.h @@ -1,42 +1,42 @@ /* * ***************************************************************************** * * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2018-2021 Gavin D. Howard and contributors. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * ***************************************************************************** * * The version of bc. * */ #ifndef BC_VERSION_H #define BC_VERSION_H /// The current version. -#define VERSION 5.2.2 +#define VERSION 5.2.3 #endif // BC_VERSION_H diff --git a/include/vm.h b/include/vm.h index 6f69712a804b..cf7eb5238870 100644 --- a/include/vm.h +++ b/include/vm.h @@ -1,884 +1,891 @@ /* * ***************************************************************************** * * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2018-2021 Gavin D. Howard and contributors. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * ***************************************************************************** * * Definitions for bc's VM. * */ #ifndef BC_VM_H #define BC_VM_H #include #include #include #include #if BC_ENABLE_NLS #ifdef _WIN32 #error NLS is not supported on Windows. #endif // _WIN32 #include #endif // BC_ENABLE_NLS #include #include #include #include #include #include #include #include // We don't want to include this file for the library because it's unused. #if !BC_ENABLE_LIBRARY #include #endif // !BC_ENABLE_LIBRARY // This should be obvious. If neither calculator is enabled, barf. #if !BC_ENABLED && !DC_ENABLED #error Must define BC_ENABLED, DC_ENABLED, or both #endif // CHAR_BIT must be at least 6, for various reasons. I might want to bump this // to 8 in the future. #if CHAR_BIT < 6 #error CHAR_BIT must be at least 6. #endif // Set defaults. // #ifndef BC_ENABLE_NLS #define BC_ENABLE_NLS (0) #endif // BC_ENABLE_NLS #ifndef MAINEXEC #define MAINEXEC bc #endif // MAINEXEC #ifndef _WIN32 #ifndef EXECPREFIX #define EXECPREFIX #endif // EXECPREFIX #else // _WIN32 #undef EXECPREFIX #endif // _WIN32 /** * Generate a string from text. * @parm V The text to generate a string for. */ #define GEN_STR(V) #V /** * Help generate a string from text. The preprocessor requires this two-step * process. Trust me. * @parm V The text to generate a string for. */ #define GEN_STR2(V) GEN_STR(V) /// The version as a string. VERSION must be defined previously, usually by the /// build system. #define BC_VERSION GEN_STR2(VERSION) /// The main executable name as a string. MAINEXEC must be defined previously, /// usually by the build system. #define BC_MAINEXEC GEN_STR2(MAINEXEC) /// The build type as a string. BUILD_TYPE must be defined previously, usually /// by the build system. #define BC_BUILD_TYPE GEN_STR2(BUILD_TYPE) // We only allow an empty executable prefix on Windows. #ifndef _WIN32 #define BC_EXECPREFIX GEN_STR2(EXECPREFIX) #else // _WIN32 #define BC_EXECPREFIX "" #endif // _WIN32 #if !BC_ENABLE_LIBRARY #if DC_ENABLED /// The flag for the extended register option. #define DC_FLAG_X (UINTMAX_C(1)<<0) #endif // DC_ENABLED #if BC_ENABLED /// The flag for the POSIX warning option. #define BC_FLAG_W (UINTMAX_C(1)<<1) /// The flag for the POSIX error option. #define BC_FLAG_S (UINTMAX_C(1)<<2) /// The flag for the math library option. #define BC_FLAG_L (UINTMAX_C(1)<<3) /// The flag for the global stacks option. #define BC_FLAG_G (UINTMAX_C(1)<<4) #endif // BC_ENABLED /// The flag for quiet, though this one is reversed; the option clears the flag. #define BC_FLAG_Q (UINTMAX_C(1)<<5) /// The flag for interactive. #define BC_FLAG_I (UINTMAX_C(1)<<6) /// The flag for prompt. This is also reversed; the option clears the flag. #define BC_FLAG_P (UINTMAX_C(1)<<7) /// The flag for read prompt. This is also reversed; the option clears the flag. #define BC_FLAG_R (UINTMAX_C(1)<<8) /// The flag for a leading zero. #define BC_FLAG_Z (UINTMAX_C(1)<<9) /// The flag for stdin being a TTY. #define BC_FLAG_TTYIN (UINTMAX_C(1)<<10) /// The flag for TTY mode. #define BC_FLAG_TTY (UINTMAX_C(1)<<11) /// The flag for reset on SIGINT. #define BC_FLAG_SIGINT (UINTMAX_C(1)<<12) /// The flag for exiting with expressions. #define BC_FLAG_EXPR_EXIT (UINTMAX_C(1)<<13) /// A convenience macro for getting the TTYIN flag. #define BC_TTYIN (vm.flags & BC_FLAG_TTYIN) /// A convenience macro for getting the TTY flag. #define BC_TTY (vm.flags & BC_FLAG_TTY) /// A convenience macro for getting the SIGINT flag. #define BC_SIGINT (vm.flags & BC_FLAG_SIGINT) #if BC_ENABLED /// A convenience macro for getting the POSIX error flag. #define BC_S (vm.flags & BC_FLAG_S) /// A convenience macro for getting the POSIX warning flag. #define BC_W (vm.flags & BC_FLAG_W) /// A convenience macro for getting the math library flag. #define BC_L (vm.flags & BC_FLAG_L) /// A convenience macro for getting the global stacks flag. #define BC_G (vm.flags & BC_FLAG_G) #endif // BC_ENABLED #if DC_ENABLED /// A convenience macro for getting the extended register flag. #define DC_X (vm.flags & DC_FLAG_X) #endif // DC_ENABLED /// A convenience macro for getting the interactive flag. #define BC_I (vm.flags & BC_FLAG_I) /// A convenience macro for getting the prompt flag. #define BC_P (vm.flags & BC_FLAG_P) /// A convenience macro for getting the read prompt flag. #define BC_R (vm.flags & BC_FLAG_R) /// A convenience macro for getting the leading zero flag. #define BC_Z (vm.flags & BC_FLAG_Z) /// A convenience macro for getting the expression exit flag. #define BC_EXPR_EXIT (vm.flags & BC_FLAG_EXPR_EXIT) #if BC_ENABLED /// A convenience macro for checking if bc is in POSIX mode. #define BC_IS_POSIX (BC_S || BC_W) #if DC_ENABLED /// Returns true if bc is running. #define BC_IS_BC (vm.name[0] != 'd') /// Returns true if dc is running. #define BC_IS_DC (vm.name[0] == 'd') #else // DC_ENABLED /// Returns true if bc is running. #define BC_IS_BC (1) /// Returns true if dc is running. #define BC_IS_DC (0) #endif // DC_ENABLED #else // BC_ENABLED /// A convenience macro for checking if bc is in POSIX mode. #define BC_IS_POSIX (0) /// Returns true if bc is running. #define BC_IS_BC (0) /// Returns true if dc is running. #define BC_IS_DC (1) #endif // BC_ENABLED /// A convenience macro for checking if the prompt is enabled. #define BC_PROMPT (BC_P) #else // !BC_ENABLE_LIBRARY #define BC_Z (vm.leading_zeroes) #endif // !BC_ENABLE_LIBRARY /** * Returns the max of its two arguments. This evaluates arguments twice, so be * careful what args you give it. * @param a The first argument. * @param b The second argument. * @return The max of the two arguments. */ #define BC_MAX(a, b) ((a) > (b) ? (a) : (b)) /** * Returns the min of its two arguments. This evaluates arguments twice, so be * careful what args you give it. * @param a The first argument. * @param b The second argument. * @return The min of the two arguments. */ #define BC_MIN(a, b) ((a) < (b) ? (a) : (b)) /// Returns the max obase that is allowed. #define BC_MAX_OBASE ((BcBigDig) (BC_BASE_POW)) /// Returns the max array size that is allowed. #define BC_MAX_DIM ((BcBigDig) (SIZE_MAX - 1)) /// Returns the max scale that is allowed. #define BC_MAX_SCALE ((BcBigDig) (BC_NUM_BIGDIG_MAX - 1)) /// Returns the max string length that is allowed. #define BC_MAX_STRING ((BcBigDig) (BC_NUM_BIGDIG_MAX - 1)) /// Returns the max identifier length that is allowed. #define BC_MAX_NAME BC_MAX_STRING /// Returns the max number size that is allowed. #define BC_MAX_NUM BC_MAX_SCALE #if BC_ENABLE_EXTRA_MATH /// Returns the max random integer that can be returned. #define BC_MAX_RAND ((BcBigDig) (((BcRand) 0) - 1)) #endif // BC_ENABLE_EXTRA_MATH /// Returns the max exponent that is allowed. #define BC_MAX_EXP ((ulong) (BC_NUM_BIGDIG_MAX)) /// Returns the max number of variables that is allowed. #define BC_MAX_VARS ((ulong) (SIZE_MAX - 1)) /// The size of the global buffer. #define BC_VM_BUF_SIZE (1<<12) /// The amount of the global buffer allocated to stdout. #define BC_VM_STDOUT_BUF_SIZE (1<<11) /// The amount of the global buffer allocated to stderr. #define BC_VM_STDERR_BUF_SIZE (1<<10) /// The amount of the global buffer allocated to stdin. #define BC_VM_STDIN_BUF_SIZE (BC_VM_STDERR_BUF_SIZE - 1) /// The max number of temporary BcNums that can be kept. #define BC_VM_MAX_TEMPS (1 << 9) /// The capacity of the one BcNum, which is a constant. #define BC_VM_ONE_CAP (1) /** * Returns true if a BcResult is safe for garbage collection. * @param r The BcResult to test. * @return True if @a r is safe to garbage collect. */ #define BC_VM_SAFE_RESULT(r) ((r)->t >= BC_RESULT_TEMP) /// The invalid locale catalog return value. #define BC_VM_INVALID_CATALOG ((nl_catd) -1) /** * Returns true if the *unsigned* multiplication overflows. * @param a The first operand. * @param b The second operand. * @param r The product. * @return True if the multiplication of @a a and @a b overflows. */ #define BC_VM_MUL_OVERFLOW(a, b, r) \ ((r) >= SIZE_MAX || ((a) != 0 && (r) / (a) != (b))) /// The global vm struct. This holds all of the global data besides the file /// buffers. typedef struct BcVm { /// The current status. This is volatile sig_atomic_t because it is also /// used in the signal handler. See the development manual /// (manuals/development.md#async-signal-safe-signal-handling) for more /// information. volatile sig_atomic_t status; /// Non-zero if a jump series is in progress and items should be popped off /// the jmp_bufs vector. This is volatile sig_atomic_t because it is also /// used in the signal handler. See the development manual /// (manuals/development.md#async-signal-safe-signal-handling) for more /// information. volatile sig_atomic_t sig_pop; #if !BC_ENABLE_LIBRARY /// The parser. BcParse prs; /// The program. BcProgram prog; /// A buffer for lines for stdin. BcVec line_buf; /// A buffer to hold a series of lines from stdin. Sometimes, multiple lines /// are necessary for parsing, such as a comment that spans multiple lines. BcVec buffer; /// A parser to parse read expressions. BcParse read_prs; /// A buffer for read expressions. BcVec read_buf; #endif // !BC_ENABLE_LIBRARY /// A vector of jmp_bufs for doing a jump series. This allows exception-type /// error handling, while allowing me to do cleanup on the way. BcVec jmp_bufs; /// The number of temps in the temps array. size_t temps_len; #if BC_ENABLE_LIBRARY /// The vector of contexts for the library. BcVec ctxts; /// The vector for creating strings to pass to the client. BcVec out; /// The PRNG. BcRNG rng; /// The current error. BclError err; /// Whether or not bcl should abort on fatal errors. bool abrt; /// Whether or not to print leading zeros. bool leading_zeroes; /// The number of "references," or times that the library was initialized. unsigned int refs; /// Non-zero if bcl is running. This is volatile sig_atomic_t because it is /// also used in the signal handler. See the development manual /// (manuals/development.md#async-signal-safe-signal-handling) for more /// information. volatile sig_atomic_t running; #endif // BC_ENABLE_LIBRARY #if !BC_ENABLE_LIBRARY /// A pointer to the filename of the current file. This is not owned by the /// BcVm struct. const char* file; /// The message printed when SIGINT happens. const char *sigmsg; #endif // !BC_ENABLE_LIBRARY /// Non-zero when signals are "locked." This is volatile sig_atomic_t /// because it is also used in the signal handler. See the development /// manual (manuals/development.md#async-signal-safe-signal-handling) for /// more information. volatile sig_atomic_t sig_lock; /// Non-zero when a signal has been received, but not acted on. This is /// volatile sig_atomic_t because it is also used in the signal handler. See /// the development manual /// (manuals/development.md#async-signal-safe-signal-handling) for more /// information. volatile sig_atomic_t sig; #if !BC_ENABLE_LIBRARY /// The length of sigmsg. uchar siglen; /// The instruction used for returning from a read() call. uchar read_ret; /// The flags field used by most macros above. uint16_t flags; /// The number of characters printed in the current line. This is used /// because bc has a limit of the number of characters it can print per /// line. uint16_t nchars; /// The length of the line we can print. The user can set this if they wish. uint16_t line_len; /// True if bc should error if expressions are encountered during option /// parsing, false otherwise. bool no_exprs; /// True if bc should exit if expresions are encountered. bool exit_exprs; /// True if EOF was encountered. bool eof; /// True if bc is currently reading from stdin. bool is_stdin; #if BC_ENABLED /// True if keywords should not be redefined. This is only true for the /// builtin math libraries for bc. bool no_redefine; #endif // BC_ENABLED #endif // !BC_ENABLE_LIBRARY /// An array of maxes for the globals. BcBigDig maxes[BC_PROG_GLOBALS_LEN + BC_ENABLE_EXTRA_MATH]; #if !BC_ENABLE_LIBRARY /// A vector of filenames to process. BcVec files; /// A vector of expressions to process. BcVec exprs; /// The name of the calculator under use. This is used by BC_IS_BC and /// BC_IS_DC. const char *name; /// The help text for the calculator. const char *help; #if BC_ENABLE_HISTORY /// The history data. BcHistory history; #endif // BC_ENABLE_HISTORY /// The function to call to get the next lex token. BcLexNext next; /// The function to call to parse. BcParseParse parse; /// The function to call to parse expressions. BcParseExpr expr; /// The text to display to label functions in error messages. const char *func_header; /// The names of the categories of errors. const char *err_ids[BC_ERR_IDX_NELEMS + BC_ENABLED]; /// The messages for each error. const char *err_msgs[BC_ERR_NELEMS]; #if BC_ENABLE_NLS /// The locale. const char *locale; #endif // BC_ENABLE_NLS #endif // !BC_ENABLE_LIBRARY /// The last base used to parse. BcBigDig last_base; /// The last power of last_base used to parse. BcBigDig last_pow; /// The last exponent of base that equals last_pow. BcBigDig last_exp; /// BC_BASE_POW - last_pow. BcBigDig last_rem; #if !BC_ENABLE_LIBRARY /// A buffer of environment arguments. This is the actual value of the /// environment variable. char *env_args_buffer; /// A vector for environment arguments after parsing. BcVec env_args; /// A BcNum set to constant 0. BcNum zero; #endif // !BC_ENABLE_LIBRARY /// A BcNum set to constant 1. BcNum one; /// A BcNum holding the max number held by a BcBigDig plus 1. BcNum max; /// A BcNum holding the max number held by a BcBigDig times 2 plus 1. BcNum max2; /// The BcDig array for max. BcDig max_num[BC_NUM_BIGDIG_LOG10]; /// The BcDig array for max2. BcDig max2_num[BC_NUM_BIGDIG_LOG10]; // The BcDig array for the one BcNum. BcDig one_num[BC_VM_ONE_CAP]; #if !BC_ENABLE_LIBRARY // The BcDig array for the zero BcNum. BcDig zero_num[BC_VM_ONE_CAP]; /// The stdout file. BcFile fout; /// The stderr file. BcFile ferr; #if BC_ENABLE_NLS /// The locale catalog. nl_catd catalog; #endif // BC_ENABLE_NLS /// A pointer to the stdin buffer. char *buf; /// The number of items in the input buffer. size_t buf_len; /// The slab for constants in the main function. This is separate for /// garbage collection reasons. BcVec main_const_slab; //// The slab for all other strings for the main function. BcVec main_slabs; /// The slab for function names, strings in other functions, and constants /// in other functions. BcVec other_slabs; #if BC_ENABLED /// An array of booleans for which bc keywords have been redefined if /// BC_REDEFINE_KEYWORDS is non-zero. bool redefined_kws[BC_LEX_NKWS]; #endif // BC_ENABLED #endif // !BC_ENABLE_LIBRARY #if BC_DEBUG_CODE /// The depth for BC_FUNC_ENTER and BC_FUNC_EXIT. size_t func_depth; #endif // BC_DEBUG_CODE } BcVm; /** * Print the copyright banner and help if it's non-NULL. * @param help The help message to print if it's non-NULL. */ void bc_vm_info(const char* const help); /** * The entrance point for bc/dc together. * @param argc The count of arguments. * @param argv The argument array. */ void bc_vm_boot(int argc, char *argv[]); /** * Initializes some of the BcVm global. This is separate to make things easier * on the library code. */ void bc_vm_init(void); /** * Frees the BcVm global. */ void bc_vm_shutdown(void); /** * Add a temp to the temp array. * @param num The BcDig array to add to the temp array. */ void bc_vm_addTemp(BcDig *num); /** * Dish out a temp, or NULL if there are none. * @return A temp, or NULL if none exist. */ BcDig* bc_vm_takeTemp(void); /** * Frees all temporaries. */ void bc_vm_freeTemps(void); #if !BC_ENABLE_HISTORY /** * Erases the flush argument if history does not exist because it does not * matter if history does not exist. */ #define bc_vm_putchar(c, t) bc_vm_putchar(c) #endif // !BC_ENABLE_HISTORY /** * Print to stdout with limited formating. * @param fmt The format string. */ void bc_vm_printf(const char *fmt, ...); /** * Puts a char into the stdout buffer. * @param c The character to put on the stdout buffer. * @param type The flush type. */ void bc_vm_putchar(int c, BcFlushType type); /** * Multiplies @a n and @a size and throws an allocation error if overflow * occurs. * @param n The number of elements. * @param size The size of each element. * @return The product of @a n and @a size. */ size_t bc_vm_arraySize(size_t n, size_t size); /** * Adds @a a and @a b and throws an error if overflow occurs. * @param a The first operand. * @param b The second operand. * @return The sum of @a a and @a b. */ size_t bc_vm_growSize(size_t a, size_t b); /** * Allocate @a n bytes and throw an allocation error if allocation fails. * @param n The bytes to allocate. * @return A pointer to the allocated memory. */ void* bc_vm_malloc(size_t n); /** * Reallocate @a ptr to be @a n bytes and throw an allocation error if * reallocation fails. * @param ptr The pointer to a memory allocation to reallocate. * @param n The bytes to allocate. * @return A pointer to the reallocated memory. */ void* bc_vm_realloc(void *ptr, size_t n); /** * Allocates space for, and duplicates, @a str. * @param str The string to allocate. * @return The allocated string. */ char* bc_vm_strdup(const char *str); /** - * Reads a line into BcVm's buffer field. + * Reads a line from stdin into BcVm's buffer field. * @param clear True if the buffer should be cleared first, false otherwise. * @return True if a line was read, false otherwise. */ bool bc_vm_readLine(bool clear); +/** + * Reads a line from the command-line expressions into BcVm's buffer field. + * @param clear True if the buffer should be cleared first, false otherwise. + * @return True if a line was read, false otherwise. + */ +bool bc_vm_readBuf(bool clear); + /** * A convenience and portability function for OpenBSD's pledge(). * @param promises The promises to pledge(). * @param execpromises The exec promises to pledge(). */ void bc_pledge(const char *promises, const char *execpromises); /** * Returns the value of an environment variable. * @param var The environment variable. * @return The value of the environment variable. */ char* bc_vm_getenv(const char* var); /** * Frees an environment variable value. * @param val The value to free. */ void bc_vm_getenvFree(char* val); #if BC_DEBUG_CODE /** * Start executing a jump series. * @param f The name of the function that started the jump series. */ void bc_vm_jmp(const char *f); #else // BC_DEBUG_CODE /** * Start executing a jump series. */ void bc_vm_jmp(void); #endif // BC_DEBUG_CODE #if BC_ENABLE_LIBRARY /** * Handle an error. This is the true error handler. It will start a jump series * if an error occurred. POSIX errors will not cause jumps when warnings are on * or no POSIX errors are enabled. * @param e The error. */ void bc_vm_handleError(BcErr e); /** * Handle a fatal error. * @param e The error. */ void bc_vm_fatalError(BcErr e); /** * A function to call at exit. */ void bc_vm_atexit(void); #else // BC_ENABLE_LIBRARY /** * Handle an error. This is the true error handler. It will start a jump series * if an error occurred. POSIX errors will not cause jumps when warnings are on * or no POSIX errors are enabled. * @param e The error. * @param line The source line where the error occurred. */ void bc_vm_handleError(BcErr e, size_t line, ...); /** * Handle a fatal error. * @param e The error. */ #if !BC_ENABLE_MEMCHECK BC_NORETURN #endif // !BC_ENABLE_MEMCHECK void bc_vm_fatalError(BcErr e); /** * A function to call at exit. * @param status The exit status. */ int bc_vm_atexit(int status); #endif // BC_ENABLE_LIBRARY /// A reference to the copyright header. extern const char bc_copyright[]; /// A reference to the format string for source code line printing. extern const char* const bc_err_line; /// A reference to the format string for source code function printing. extern const char* const bc_err_func_header; /// A reference to the array of default error category names. extern const char *bc_errs[]; /// A reference to the array of error category indices for each error. extern const uchar bc_err_ids[]; /// A reference to the array of default error messages. extern const char* const bc_err_msgs[]; /// A reference to the pledge() promises at start. extern const char bc_pledge_start[]; #if BC_ENABLE_HISTORY /// A reference to the end pledge() promises when using history. extern const char bc_pledge_end_history[]; #endif // BC_ENABLE_HISTORY /// A reference to the end pledge() promises when *not* using history. extern const char bc_pledge_end[]; /// A reference to the global data. extern BcVm vm; /// A reference to the global output buffers. extern char output_bufs[BC_VM_BUF_SIZE]; #endif // BC_VM_H diff --git a/manuals/bc.1.md.in b/manuals/bc.1.md.in new file mode 100644 index 000000000000..e5ca807dbe39 --- /dev/null +++ b/manuals/bc.1.md.in @@ -0,0 +1,2476 @@ + + +# NAME + +bc - arbitrary-precision decimal arithmetic language and calculator + +# SYNOPSIS + +**bc** [**-ghilPqRsvVw**] [**-\-global-stacks**] [**-\-help**] [**-\-interactive**] [**-\-mathlib**] [**-\-no-prompt**] [**-\-no-read-prompt**] [**-\-quiet**] [**-\-standard**] [**-\-warn**] [**-\-version**] [**-e** *expr*] [**-\-expression**=*expr*...] [**-f** *file*...] [**-\-file**=*file*...] [*file*...] + +# DESCRIPTION + +bc(1) is an interactive processor for a language first standardized in 1991 by +POSIX. (The current standard is [here][1].) The language provides unlimited +precision decimal arithmetic and is somewhat C-like, but there are differences. +Such differences will be noted in this document. + +After parsing and handling options, this bc(1) reads any files given on the +command line and executes them before reading from **stdin**. + +{{ A H N HN }} +This bc(1) is a drop-in replacement for *any* bc(1), including (and +especially) the GNU bc(1). It also has many extensions and extra features beyond +other implementations. +{{ end }} +{{ E EH EN EHN }} +This bc(1) is a drop-in replacement for *any* bc(1), including (and especially) +the GNU bc(1). +{{ end }} + +**Note**: If running this bc(1) on *any* script meant for another bc(1) gives a +parse error, it is probably because a word this bc(1) reserves as a keyword is +used as the name of a function, variable, or array. To fix that, use the +command-line option **-r** *keyword*, where *keyword* is the keyword that is +used as a name in the script. For more information, see the **OPTIONS** section. + +If parsing scripts meant for other bc(1) implementations still does not work, +that is a bug and should be reported. See the **BUGS** section. + +# OPTIONS + +The following are the options that bc(1) accepts. + +**-g**, **-\-global-stacks** + +{{ A H N HN }} +: Turns the globals **ibase**, **obase**, **scale**, and **seed** into stacks. + + This has the effect that a copy of the current value of all four are pushed +{{ end }} +{{ E EH EN EHN }} +: Turns the globals **ibase**, **obase**, and **scale** into stacks. + + This has the effect that a copy of the current value of all three are pushed +{{ end }} + onto a stack for every function call, as well as popped when every function + returns. This means that functions can assign to any and all of those + globals without worrying that the change will affect other functions. + Thus, a hypothetical function named **output(x,b)** that simply printed + **x** in base **b** could be written like this: + + define void output(x, b) { + obase=b + x + } + + instead of like this: + + define void output(x, b) { + auto c + c=obase + obase=b + x + obase=c + } + + This makes writing functions much easier. + +{{ A H N HN }} + (**Note**: the function **output(x,b)** exists in the extended math library. + See the **LIBRARY** section.) + + However, since using this flag means that functions cannot set **ibase**, + **obase**, **scale**, or **seed** globally, functions that are made to do so + cannot work anymore. There are two possible use cases for that, and each has + a solution. +{{ end }} +{{ E EH EN EHN }} + However, since using this flag means that functions cannot set **ibase**, + **obase**, or **scale** globally, functions that are made to do so cannot + work anymore. There are two possible use cases for that, and each has a + solution. +{{ end }} + + First, if a function is called on startup to turn bc(1) into a number + converter, it is possible to replace that capability with various shell + aliases. Examples: + + alias d2o="bc -e ibase=A -e obase=8" + alias h2b="bc -e ibase=G -e obase=2" + +{{ A H N HN }} + Second, if the purpose of a function is to set **ibase**, **obase**, + **scale**, or **seed** globally for any other purpose, it could be split + into one to four functions (based on how many globals it sets) and each of + those functions could return the desired value for a global. + + For functions that set **seed**, the value assigned to **seed** is not + propagated to parent functions. This means that the sequence of + pseudo-random numbers that they see will not be the same sequence of + pseudo-random numbers that any parent sees. This is only the case once + **seed** has been set. + + If a function desires to not affect the sequence of pseudo-random numbers + of its parents, but wants to use the same **seed**, it can use the following + line: + + seed = seed +{{ end }} +{{ E EH EN EHN }} + Second, if the purpose of a function is to set **ibase**, **obase**, or + **scale** globally for any other purpose, it could be split into one to + three functions (based on how many globals it sets) and each of those + functions could return the desired value for a global. +{{ end }} + + If the behavior of this option is desired for every run of bc(1), then users + could make sure to define **BC_ENV_ARGS** and include this option (see the + **ENVIRONMENT VARIABLES** section for more details). + + If **-s**, **-w**, or any equivalents are used, this option is ignored. + + This is a **non-portable extension**. + +**-h**, **-\-help** + +: Prints a usage message and quits. + +**-i**, **-\-interactive** + +: Forces interactive mode. (See the **INTERACTIVE MODE** section.) + + This is a **non-portable extension**. + +**-L**, **-\-no-line-length** + +: Disables line length checking and prints numbers without backslashes and + newlines. In other words, this option sets **BC_LINE_LENGTH** to **0** (see + the **ENVIRONMENT VARIABLES** section). + + This is a **non-portable extension**. + +**-l**, **-\-mathlib** + +: Sets **scale** (see the **SYNTAX** section) to **20** and loads the included +{{ A H N HN }} + math library and the extended math library before running any code, + including any expressions or files specified on the command line. + + To learn what is in the libraries, see the **LIBRARY** section. +{{ end }} +{{ E EH EN EHN }} + math library before running any code, including any expressions or files + specified on the command line. + + To learn what is in the library, see the **LIBRARY** section. +{{ end }} + +**-P**, **-\-no-prompt** + +: Disables the prompt in TTY mode. (The prompt is only enabled in TTY mode. + See the **TTY MODE** section.) This is mostly for those users that do not + want a prompt or are not used to having them in bc(1). Most of those users + would want to put this option in **BC_ENV_ARGS** (see the + **ENVIRONMENT VARIABLES** section). + + These options override the **BC_PROMPT** and **BC_TTY_MODE** environment + variables (see the **ENVIRONMENT VARIABLES** section). + + This is a **non-portable extension**. + +**-R**, **-\-no-read-prompt** + +: Disables the read prompt in TTY mode. (The read prompt is only enabled in + TTY mode. See the **TTY MODE** section.) This is mostly for those users that + do not want a read prompt or are not used to having them in bc(1). Most of + those users would want to put this option in **BC_ENV_ARGS** (see the + **ENVIRONMENT VARIABLES** section). This option is also useful in hash bang + lines of bc(1) scripts that prompt for user input. + + This option does not disable the regular prompt because the read prompt is + only used when the **read()** built-in function is called. + + These options *do* override the **BC_PROMPT** and **BC_TTY_MODE** + environment variables (see the **ENVIRONMENT VARIABLES** section), but only + for the read prompt. + + This is a **non-portable extension**. + +**-r** *keyword*, **-\-redefine**=*keyword* + +: Redefines *keyword* in order to allow it to be used as a function, variable, + or array name. This is useful when this bc(1) gives parse errors when + parsing scripts meant for other bc(1) implementations. + + The keywords this bc(1) allows to be redefined are: + + * **abs** + * **asciify** + * **continue** + * **divmod** + * **else** + * **halt** +{{ A H N HN }} + * **irand** +{{ end }} + * **last** + * **limits** + * **maxibase** + * **maxobase** +{{ A H N HN }} + * **maxrand** +{{ end }} + * **maxscale** + * **modexp** + * **print** +{{ A H N HN }} + * **rand** +{{ end }} + * **read** +{{ A H N HN }} + * **seed** +{{ end }} + * **stream** + + If any of those keywords are used as a function, variable, or array name in + a script, use this option with the keyword as the argument. If multiple are + used, use this option for all of them; it can be used multiple times. + + Keywords are *not* redefined when parsing the builtin math library (see the + **LIBRARY** section). + + It is a fatal error to redefine keywords mandated by the POSIX standard. It + is a fatal error to attempt to redefine words that this bc(1) does not + reserve as keywords. + +**-q**, **-\-quiet** + +: This option is for compatibility with the [GNU bc(1)][2]; it is a no-op. + Without this option, GNU bc(1) prints a copyright header. This bc(1) only + prints the copyright header if one or more of the **-v**, **-V**, or + **-\-version** options are given. + + This is a **non-portable extension**. + +**-s**, **-\-standard** + +: Process exactly the language defined by the [standard][1] and error if any + extensions are used. + + This is a **non-portable extension**. + +**-v**, **-V**, **-\-version** + +: Print the version information (copyright header) and exit. + + This is a **non-portable extension**. + +**-w**, **-\-warn** + +: Like **-s** and **-\-standard**, except that warnings (and not errors) are + printed for non-standard extensions and execution continues normally. + + This is a **non-portable extension**. + +**-z**, **-\-leading-zeroes** + +: Makes bc(1) print all numbers greater than **-1** and less than **1**, and + not equal to **0**, with a leading zero. + + This can be set for individual numbers with the **plz(x)**, plznl(x)**, + **pnlz(x)**, and **pnlznl(x)** functions in the extended math library (see + the **LIBRARY** section). + + This is a **non-portable extension**. + +**-e** *expr*, **-\-expression**=*expr* + +: Evaluates *expr*. If multiple expressions are given, they are evaluated in + order. If files are given as well (see below), the expressions and files are + evaluated in the order given. This means that if a file is given before an + expression, the file is read in and evaluated first. + + If this option is given on the command-line (i.e., not in **BC_ENV_ARGS**, + see the **ENVIRONMENT VARIABLES** section), then after processing all + expressions and files, bc(1) will exit, unless **-** (**stdin**) was given + as an argument at least once to **-f** or **-\-file**, whether on the + command-line or in **BC_ENV_ARGS**. However, if any other **-e**, + **-\-expression**, **-f**, or **-\-file** arguments are given after **-f-** + or equivalent is given, bc(1) will give a fatal error and exit. + + This is a **non-portable extension**. + +**-f** *file*, **-\-file**=*file* + +: Reads in *file* and evaluates it, line by line, as though it were read + through **stdin**. If expressions are also given (see above), the + expressions are evaluated in the order given. + + If this option is given on the command-line (i.e., not in **BC_ENV_ARGS**, + see the **ENVIRONMENT VARIABLES** section), then after processing all + expressions and files, bc(1) will exit, unless **-** (**stdin**) was given + as an argument at least once to **-f** or **-\-file**. However, if any other + **-e**, **-\-expression**, **-f**, or **-\-file** arguments are given after + **-f-** or equivalent is given, bc(1) will give a fatal error and exit. + + This is a **non-portable extension**. + +All long options are **non-portable extensions**. + +# STDIN + +If no files or expressions are given by the **-f**, **-\-file**, **-e**, or +**-\-expression** options, then bc(1) read from **stdin**. + +However, there are a few caveats to this. + +First, **stdin** is evaluated a line at a time. The only exception to this is if +the parse cannot complete. That means that starting a string without ending it +or starting a function, **if** statement, or loop without ending it will also +cause bc(1) to not execute. + +Second, after an **if** statement, bc(1) doesn't know if an **else** statement +will follow, so it will not execute until it knows there will not be an **else** +statement. + +# STDOUT + +Any non-error output is written to **stdout**. In addition, if history (see the +**HISTORY** section) and the prompt (see the **TTY MODE** section) are enabled, +both are output to **stdout**. + +**Note**: Unlike other bc(1) implementations, this bc(1) will issue a fatal +error (see the **EXIT STATUS** section) if it cannot write to **stdout**, so if +**stdout** is closed, as in **bc >&-**, it will quit with an error. This +is done so that bc(1) can report problems when **stdout** is redirected to a +file. + +If there are scripts that depend on the behavior of other bc(1) implementations, +it is recommended that those scripts be changed to redirect **stdout** to +**/dev/null**. + +# STDERR + +Any error output is written to **stderr**. + +**Note**: Unlike other bc(1) implementations, this bc(1) will issue a fatal +error (see the **EXIT STATUS** section) if it cannot write to **stderr**, so if +**stderr** is closed, as in **bc 2>&-**, it will quit with an error. This +is done so that bc(1) can exit with an error code when **stderr** is redirected +to a file. + +If there are scripts that depend on the behavior of other bc(1) implementations, +it is recommended that those scripts be changed to redirect **stderr** to +**/dev/null**. + +# SYNTAX + +The syntax for bc(1) programs is mostly C-like, with some differences. This +bc(1) follows the [POSIX standard][1], which is a much more thorough resource +for the language this bc(1) accepts. This section is meant to be a summary and a +listing of all the extensions to the standard. + +In the sections below, **E** means expression, **S** means statement, and **I** +means identifier. + +Identifiers (**I**) start with a lowercase letter and can be followed by any +number (up to **BC_NAME_MAX-1**) of lowercase letters (**a-z**), digits +(**0-9**), and underscores (**\_**). The regex is **\[a-z\]\[a-z0-9\_\]\***. +Identifiers with more than one character (letter) are a +**non-portable extension**. + +**ibase** is a global variable determining how to interpret constant numbers. It +is the "input" base, or the number base used for interpreting input numbers. +**ibase** is initially **10**. If the **-s** (**-\-standard**) and **-w** +(**-\-warn**) flags were not given on the command line, the max allowable value +for **ibase** is **36**. Otherwise, it is **16**. The min allowable value for +**ibase** is **2**. The max allowable value for **ibase** can be queried in +bc(1) programs with the **maxibase()** built-in function. + +**obase** is a global variable determining how to output results. It is the +"output" base, or the number base used for outputting numbers. **obase** is +initially **10**. The max allowable value for **obase** is **BC_BASE_MAX** and +can be queried in bc(1) programs with the **maxobase()** built-in function. The +{{ A H N HN }} +min allowable value for **obase** is **0**. If **obase** is **0**, values are +output in scientific notation, and if **obase** is **1**, values are output in +engineering notation. Otherwise, values are output in the specified base. + +Outputting in scientific and engineering notations are **non-portable +extensions**. +{{ end }} +{{ E EH EN EHN }} +min allowable value for **obase** is **2**. Values are output in the specified +base. +{{ end }} + +The *scale* of an expression is the number of digits in the result of the +expression right of the decimal point, and **scale** is a global variable that +sets the precision of any operations, with exceptions. **scale** is initially +**0**. **scale** cannot be negative. The max allowable value for **scale** is +**BC_SCALE_MAX** and can be queried in bc(1) programs with the **maxscale()** +built-in function. + +bc(1) has both *global* variables and *local* variables. All *local* +variables are local to the function; they are parameters or are introduced in +the **auto** list of a function (see the **FUNCTIONS** section). If a variable +is accessed which is not a parameter or in the **auto** list, it is assumed to +be *global*. If a parent function has a *local* variable version of a variable +that a child function considers *global*, the value of that *global* variable in +the child function is the value of the variable in the parent function, not the +value of the actual *global* variable. + +All of the above applies to arrays as well. + +The value of a statement that is an expression (i.e., any of the named +expressions or operands) is printed unless the lowest precedence operator is an +assignment operator *and* the expression is notsurrounded by parentheses. + +The value that is printed is also assigned to the special variable **last**. A +single dot (**.**) may also be used as a synonym for **last**. These are +**non-portable extensions**. + +Either semicolons or newlines may separate statements. + +## Comments + +There are two kinds of comments: + +1. Block comments are enclosed in **/\*** and **\*/**. +2. Line comments go from **#** until, and not including, the next newline. This + is a **non-portable extension**. + +## Named Expressions + +The following are named expressions in bc(1): + +1. Variables: **I** +2. Array Elements: **I[E]** +3. **ibase** +4. **obase** +5. **scale** +{{ A H N HN }} +6. **seed** +7. **last** or a single dot (**.**) + +Numbers 6 and 7 are **non-portable extensions**. + +The meaning of **seed** is dependent on the current pseudo-random number +generator but is guaranteed to not change except for new major versions. + +The *scale* and sign of the value may be significant. + +If a previously used **seed** value is assigned to **seed** and used again, the +pseudo-random number generator is guaranteed to produce the same sequence of +pseudo-random numbers as it did when the **seed** value was previously used. + +The exact value assigned to **seed** is not guaranteed to be returned if +**seed** is queried again immediately. However, if **seed** *does* return a +different value, both values, when assigned to **seed**, are guaranteed to +produce the same sequence of pseudo-random numbers. This means that certain +values assigned to **seed** will *not* produce unique sequences of pseudo-random +numbers. The value of **seed** will change after any use of the **rand()** and +**irand(E)** operands (see the *Operands* subsection below), except if the +parameter passed to **irand(E)** is **0**, **1**, or negative. + +There is no limit to the length (number of significant decimal digits) or +*scale* of the value that can be assigned to **seed**. +{{ end }} +{{ E EH EN EHN }} +6. **last** or a single dot (**.**) + +Number 6 is a **non-portable extension**. +{{ end }} + +Variables and arrays do not interfere; users can have arrays named the same as +variables. This also applies to functions (see the **FUNCTIONS** section), so a +user can have a variable, array, and function that all have the same name, and +they will not shadow each other, whether inside of functions or not. + +Named expressions are required as the operand of **increment**/**decrement** +operators and as the left side of **assignment** operators (see the *Operators* +subsection). + +## Operands + +The following are valid operands in bc(1): + +1. Numbers (see the *Numbers* subsection below). +2. Array indices (**I[E]**). +3. **(E)**: The value of **E** (used to change precedence). +4. **sqrt(E)**: The square root of **E**. **E** must be non-negative. +5. **length(E)**: The number of significant decimal digits in **E**. Returns + **1** for **0** with no decimal places. If given a string, the length of the + string is returned. Passing a string to **length(E)** is a **non-portable + extension**. +6. **length(I[])**: The number of elements in the array **I**. This is a + **non-portable extension**. +7. **scale(E)**: The *scale* of **E**. +8. **abs(E)**: The absolute value of **E**. This is a **non-portable + extension**. +9. **modexp(E, E, E)**: Modular exponentiation, where the first expression is + the base, the second is the exponent, and the third is the modulus. All + three values must be integers. The second argument must be non-negative. The + third argument must be non-zero. This is a **non-portable extension**. +10. **divmod(E, E, I[])**: Division and modulus in one operation. This is for + optimization. The first expression is the dividend, and the second is the + divisor, which must be non-zero. The return value is the quotient, and the + modulus is stored in index **0** of the provided array (the last argument). + This is a **non-portable extension**. +11. **asciify(E)**: If **E** is a string, returns a string that is the first + letter of its argument. If it is a number, calculates the number mod **256** + and returns that number as a one-character string. This is a **non-portable + extension**. +12. **I()**, **I(E)**, **I(E, E)**, and so on, where **I** is an identifier for + a non-**void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). The **E** argument(s) may also be arrays of the form + **I[]**, which will automatically be turned into array references (see the + *Array References* subsection of the **FUNCTIONS** section) if the + corresponding parameter in the function definition is an array reference. +13. **read()**: Reads a line from **stdin** and uses that as an expression. The + result of that expression is the result of the **read()** operand. This is a + **non-portable extension**. +14. **maxibase()**: The max allowable **ibase**. This is a **non-portable + extension**. +15. **maxobase()**: The max allowable **obase**. This is a **non-portable + extension**. +16. **maxscale()**: The max allowable **scale**. This is a **non-portable + extension**. +17. **line_length()**: The line length set with **BC_LINE_LENGTH** (see the + **ENVIRONMENT VARIABLES** section). This is a **non-portable extension**. +18. **global_stacks()**: **0** if global stacks are not enabled with the **-g** + or **-\-global-stacks** options, non-zero otherwise. See the **OPTIONS** + section. This is a **non-portable extension**. +19. **leading_zero()**: **0** if leading zeroes are not enabled with the **-z** + or **--leading-zeroes** options, non-zero otherwise. See the **OPTIONS** + section. This is a **non-portable extension**. +{{ A H N HN }} +20. **rand()**: A pseudo-random integer between **0** (inclusive) and + **BC_RAND_MAX** (inclusive). Using this operand will change the value of + **seed**. This is a **non-portable extension**. +21. **irand(E)**: A pseudo-random integer between **0** (inclusive) and the + value of **E** (exclusive). If **E** is negative or is a non-integer + (**E**'s *scale* is not **0**), an error is raised, and bc(1) resets (see + the **RESET** section) while **seed** remains unchanged. If **E** is larger + than **BC_RAND_MAX**, the higher bound is honored by generating several + pseudo-random integers, multiplying them by appropriate powers of + **BC_RAND_MAX+1**, and adding them together. Thus, the size of integer that + can be generated with this operand is unbounded. Using this operand will + change the value of **seed**, unless the value of **E** is **0** or **1**. + In that case, **0** is returned, and **seed** is *not* changed. This is a + **non-portable extension**. +22. **maxrand()**: The max integer returned by **rand()**. This is a + **non-portable extension**. + +The integers generated by **rand()** and **irand(E)** are guaranteed to be as +unbiased as possible, subject to the limitations of the pseudo-random number +generator. + +**Note**: The values returned by the pseudo-random number generator with +**rand()** and **irand(E)** are guaranteed to *NOT* be cryptographically secure. +This is a consequence of using a seeded pseudo-random number generator. However, +they *are* guaranteed to be reproducible with identical **seed** values. This +means that the pseudo-random values from bc(1) should only be used where a +reproducible stream of pseudo-random numbers is *ESSENTIAL*. In any other case, +use a non-seeded pseudo-random number generator. +{{ end }} + +## Numbers + +Numbers are strings made up of digits, uppercase letters, and at most **1** +period for a radix. Numbers can have up to **BC_NUM_MAX** digits. Uppercase +letters are equal to **9** + their position in the alphabet (i.e., **A** equals +**10**, or **9+1**). If a digit or letter makes no sense with the current value +of **ibase**, they are set to the value of the highest valid digit in **ibase**. + +Single-character numbers (i.e., **A** alone) take the value that they would have +if they were valid digits, regardless of the value of **ibase**. This means that +**A** alone always equals decimal **10** and **Z** alone always equals decimal +**35**. + +{{ A H N HN }} +In addition, bc(1) accepts numbers in scientific notation. These have the form +**\e\**. The exponent (the portion after the **e**) must be +an integer. An example is **1.89237e9**, which is equal to **1892370000**. +Negative exponents are also allowed, so **4.2890e-3** is equal to **0.0042890**. + +Using scientific notation is an error or warning if the **-s** or **-w**, +respectively, command-line options (or equivalents) are given. + +**WARNING**: Both the number and the exponent in scientific notation are +interpreted according to the current **ibase**, but the number is still +multiplied by **10\^exponent** regardless of the current **ibase**. For example, +if **ibase** is **16** and bc(1) is given the number string **FFeA**, the +resulting decimal number will be **2550000000000**, and if bc(1) is given the +number string **10e-4**, the resulting decimal number will be **0.0016**. + +Accepting input as scientific notation is a **non-portable extension**. +{{ end }} + +## Operators + +The following arithmetic and logical operators can be used. They are listed in +order of decreasing precedence. Operators in the same group have the same +precedence. + +**++** **-\-** + +: Type: Prefix and Postfix + + Associativity: None + + Description: **increment**, **decrement** + +**-** **!** + +: Type: Prefix + + Associativity: None + + Description: **negation**, **boolean not** + +{{ A H N HN }} +**\$** + +: Type: Postfix + + Associativity: None + + Description: **truncation** + +**\@** + +: Type: Binary + + Associativity: Right + + Description: **set precision** +{{ end }} + +**\^** + +: Type: Binary + + Associativity: Right + + Description: **power** + +**\*** **/** **%** + +: Type: Binary + + Associativity: Left + + Description: **multiply**, **divide**, **modulus** + +**+** **-** + +: Type: Binary + + Associativity: Left + + Description: **add**, **subtract** + +{{ A H N HN }} +**\<\<** **\>\>** + +: Type: Binary + + Associativity: Left + + Description: **shift left**, **shift right** + +**=** **\<\<=** **\>\>=** **+=** **-=** **\*=** **/=** **%=** **\^=** **\@=** +{{ end }} +{{ E EH EN EHN }} +**=** **+=** **-=** **\*=** **/=** **%=** **\^=** +{{ end }} + +: Type: Binary + + Associativity: Right + + Description: **assignment** + +**==** **\<=** **\>=** **!=** **\<** **\>** + +: Type: Binary + + Associativity: Left + + Description: **relational** + +**&&** + +: Type: Binary + + Associativity: Left + + Description: **boolean and** + +**||** + +: Type: Binary + + Associativity: Left + + Description: **boolean or** + +The operators will be described in more detail below. + +**++** **-\-** + +: The prefix and postfix **increment** and **decrement** operators behave + exactly like they would in C. They require a named expression (see the + *Named Expressions* subsection) as an operand. + + The prefix versions of these operators are more efficient; use them where + possible. + +**-** + +: The **negation** operator returns **0** if a user attempts to negate any + expression with the value **0**. Otherwise, a copy of the expression with + its sign flipped is returned. + +**!** + +: The **boolean not** operator returns **1** if the expression is **0**, or + **0** otherwise. + + This is a **non-portable extension**. + +{{ A H N HN }} +**\$** + +: The **truncation** operator returns a copy of the given expression with all + of its *scale* removed. + + This is a **non-portable extension**. + +**\@** + +: The **set precision** operator takes two expressions and returns a copy of + the first with its *scale* equal to the value of the second expression. That + could either mean that the number is returned without change (if the + *scale* of the first expression matches the value of the second + expression), extended (if it is less), or truncated (if it is more). + + The second expression must be an integer (no *scale*) and non-negative. + + This is a **non-portable extension**. +{{ end }} + +**\^** + +: The **power** operator (not the **exclusive or** operator, as it would be in + C) takes two expressions and raises the first to the power of the value of + the second. The *scale* of the result is equal to **scale**. + + The second expression must be an integer (no *scale*), and if it is + negative, the first value must be non-zero. + +**\*** + +: The **multiply** operator takes two expressions, multiplies them, and + returns the product. If **a** is the *scale* of the first expression and + **b** is the *scale* of the second expression, the *scale* of the result is + equal to **min(a+b,max(scale,a,b))** where **min()** and **max()** return + the obvious values. + +**/** + +: The **divide** operator takes two expressions, divides them, and returns the + quotient. The *scale* of the result shall be the value of **scale**. + + The second expression must be non-zero. + +**%** + +: The **modulus** operator takes two expressions, **a** and **b**, and + evaluates them by 1) Computing **a/b** to current **scale** and 2) Using the + result of step 1 to calculate **a-(a/b)\*b** to *scale* + **max(scale+scale(b),scale(a))**. + + The second expression must be non-zero. + +**+** + +: The **add** operator takes two expressions, **a** and **b**, and returns the + sum, with a *scale* equal to the max of the *scale*s of **a** and **b**. + +**-** + +: The **subtract** operator takes two expressions, **a** and **b**, and + returns the difference, with a *scale* equal to the max of the *scale*s of + **a** and **b**. + +{{ A H N HN }} +**\<\<** + +: The **left shift** operator takes two expressions, **a** and **b**, and + returns a copy of the value of **a** with its decimal point moved **b** + places to the right. + + The second expression must be an integer (no *scale*) and non-negative. + + This is a **non-portable extension**. + +**\>\>** + +: The **right shift** operator takes two expressions, **a** and **b**, and + returns a copy of the value of **a** with its decimal point moved **b** + places to the left. + + The second expression must be an integer (no *scale*) and non-negative. + + This is a **non-portable extension**. +{{ end }} + +{{ A H N HN }} +**=** **\<\<=** **\>\>=** **+=** **-=** **\*=** **/=** **%=** **\^=** **\@=** +{{ end }} +{{ E EH EN EHN }} +**=** **+=** **-=** **\*=** **/=** **%=** **\^=** +{{ end }} + +: The **assignment** operators take two expressions, **a** and **b** where + **a** is a named expression (see the *Named Expressions* subsection). + + For **=**, **b** is copied and the result is assigned to **a**. For all + others, **a** and **b** are applied as operands to the corresponding + arithmetic operator and the result is assigned to **a**. + +{{ A H N HN }} + The **assignment** operators that correspond to operators that are + extensions are themselves **non-portable extensions**. +{{ end }} + +**==** **\<=** **\>=** **!=** **\<** **\>** + +: The **relational** operators compare two expressions, **a** and **b**, and + if the relation holds, according to C language semantics, the result is + **1**. Otherwise, it is **0**. + + Note that unlike in C, these operators have a lower precedence than the + **assignment** operators, which means that **a=b\>c** is interpreted as + **(a=b)\>c**. + + Also, unlike the [standard][1] requires, these operators can appear anywhere + any other expressions can be used. This allowance is a + **non-portable extension**. + +**&&** + +: The **boolean and** operator takes two expressions and returns **1** if both + expressions are non-zero, **0** otherwise. + + This is *not* a short-circuit operator. + + This is a **non-portable extension**. + +**||** + +: The **boolean or** operator takes two expressions and returns **1** if one + of the expressions is non-zero, **0** otherwise. + + This is *not* a short-circuit operator. + + This is a **non-portable extension**. + +## Statements + +The following items are statements: + +1. **E** +2. **{** **S** **;** ... **;** **S** **}** +3. **if** **(** **E** **)** **S** +4. **if** **(** **E** **)** **S** **else** **S** +5. **while** **(** **E** **)** **S** +6. **for** **(** **E** **;** **E** **;** **E** **)** **S** +7. An empty statement +8. **break** +9. **continue** +10. **quit** +11. **halt** +12. **limits** +13. A string of characters, enclosed in double quotes +14. **print** **E** **,** ... **,** **E** +15. **stream** **E** **,** ... **,** **E** +16. **I()**, **I(E)**, **I(E, E)**, and so on, where **I** is an identifier for + a **void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). The **E** argument(s) may also be arrays of the form + **I[]**, which will automatically be turned into array references (see the + *Array References* subsection of the **FUNCTIONS** section) if the + corresponding parameter in the function definition is an array reference. + +Numbers 4, 9, 11, 12, 14, 15, and 16 are **non-portable extensions**. + +Also, as a **non-portable extension**, any or all of the expressions in the +header of a for loop may be omitted. If the condition (second expression) is +omitted, it is assumed to be a constant **1**. + +The **break** statement causes a loop to stop iterating and resume execution +immediately following a loop. This is only allowed in loops. + +The **continue** statement causes a loop iteration to stop early and returns to +the start of the loop, including testing the loop condition. This is only +allowed in loops. + +The **if** **else** statement does the same thing as in C. + +The **quit** statement causes bc(1) to quit, even if it is on a branch that will +not be executed (it is a compile-time command). + +The **halt** statement causes bc(1) to quit, if it is executed. (Unlike **quit** +if it is on a branch of an **if** statement that is not executed, bc(1) does not +quit.) + +The **limits** statement prints the limits that this bc(1) is subject to. This +is like the **quit** statement in that it is a compile-time command. + +An expression by itself is evaluated and printed, followed by a newline. + +{{ A H N HN }} +Both scientific notation and engineering notation are available for printing the +results of expressions. Scientific notation is activated by assigning **0** to +**obase**, and engineering notation is activated by assigning **1** to +**obase**. To deactivate them, just assign a different value to **obase**. + +Scientific notation and engineering notation are disabled if bc(1) is run with +either the **-s** or **-w** command-line options (or equivalents). + +Printing numbers in scientific notation and/or engineering notation is a +**non-portable extension**. +{{ end }} + +## Strings + +If strings appear as a statement by themselves, they are printed without a +trailing newline. + +In addition to appearing as a lone statement by themselves, strings can be +assigned to variables and array elements. They can also be passed to functions +in variable parameters. + +If any statement that expects a string is given a variable that had a string +assigned to it, the statement acts as though it had received a string. + +If any math operation is attempted on a string or a variable or array element +that has been assigned a string, an error is raised, and bc(1) resets (see the +**RESET** section). + +Assigning strings to variables and array elements and passing them to functions +are **non-portable extensions**. + +## Print Statement + +The "expressions" in a **print** statement may also be strings. If they are, there +are backslash escape sequences that are interpreted specially. What those +sequences are, and what they cause to be printed, are shown below: + +**\\a**: **\\a** + +**\\b**: **\\b** + +**\\\\**: **\\** + +**\\e**: **\\** + +**\\f**: **\\f** + +**\\n**: **\\n** + +**\\q**: **"** + +**\\r**: **\\r** + +**\\t**: **\\t** + +Any other character following a backslash causes the backslash and character to +be printed as-is. + +Any non-string expression in a print statement shall be assigned to **last**, +like any other expression that is printed. + +## Stream Statement + +The "expressions in a **stream** statement may also be strings. + +If a **stream** statement is given a string, it prints the string as though the +string had appeared as its own statement. In other words, the **stream** +statement prints strings normally, without a newline. + +If a **stream** statement is given a number, a copy of it is truncated and its +absolute value is calculated. The result is then printed as though **obase** is +**256** and each digit is interpreted as an 8-bit ASCII character, making it a +byte stream. + +## Order of Evaluation + +All expressions in a statment are evaluated left to right, except as necessary +to maintain order of operations. This means, for example, assuming that **i** is +equal to **0**, in the expression + + a[i++] = i++ + +the first (or 0th) element of **a** is set to **1**, and **i** is equal to **2** +at the end of the expression. + +This includes function arguments. Thus, assuming **i** is equal to **0**, this +means that in the expression + + x(i++, i++) + +the first argument passed to **x()** is **0**, and the second argument is **1**, +while **i** is equal to **2** before the function starts executing. + +# FUNCTIONS + +Function definitions are as follows: + +``` +define I(I,...,I){ + auto I,...,I + S;...;S + return(E) +} +``` + +Any **I** in the parameter list or **auto** list may be replaced with **I[]** to +make a parameter or **auto** var an array, and any **I** in the parameter list +may be replaced with **\*I[]** to make a parameter an array reference. Callers +of functions that take array references should not put an asterisk in the call; +they must be called with just **I[]** like normal array parameters and will be +automatically converted into references. + +As a **non-portable extension**, the opening brace of a **define** statement may +appear on the next line. + +As a **non-portable extension**, the return statement may also be in one of the +following forms: + +1. **return** +2. **return** **(** **)** +3. **return** **E** + +The first two, or not specifying a **return** statement, is equivalent to +**return (0)**, unless the function is a **void** function (see the *Void +Functions* subsection below). + +## Void Functions + +Functions can also be **void** functions, defined as follows: + +``` +define void I(I,...,I){ + auto I,...,I + S;...;S + return +} +``` + +They can only be used as standalone expressions, where such an expression would +be printed alone, except in a print statement. + +Void functions can only use the first two **return** statements listed above. +They can also omit the return statement entirely. + +The word "void" is not treated as a keyword; it is still possible to have +variables, arrays, and functions named **void**. The word "void" is only +treated specially right after the **define** keyword. + +This is a **non-portable extension**. + +## Array References + +For any array in the parameter list, if the array is declared in the form + +``` +*I[] +``` + +it is a **reference**. Any changes to the array in the function are reflected, +when the function returns, to the array that was passed in. + +Other than this, all function arguments are passed by value. + +This is a **non-portable extension**. + +# LIBRARY + +{{ A H N HN }} +All of the functions below, including the functions in the extended math +library (see the *Extended Library* subsection below), are available when the +**-l** or **-\-mathlib** command-line flags are given, except that the extended +math library is not available when the **-s** option, the **-w** option, or +equivalents are given. +{{ end }} +{{ E EH EN EHN }} +All of the functions below are available when the **-l** or **-\-mathlib** +command-line flags are given. +{{ end }} + +## Standard Library + +The [standard][1] defines the following functions for the math library: + +**s(x)** + +: Returns the sine of **x**, which is assumed to be in radians. + + This is a transcendental function (see the *Transcendental Functions* + subsection below). + +**c(x)** + +: Returns the cosine of **x**, which is assumed to be in radians. + + This is a transcendental function (see the *Transcendental Functions* + subsection below). + +**a(x)** + +: Returns the arctangent of **x**, in radians. + + This is a transcendental function (see the *Transcendental Functions* + subsection below). + +**l(x)** + +: Returns the natural logarithm of **x**. + + This is a transcendental function (see the *Transcendental Functions* + subsection below). + +**e(x)** + +: Returns the mathematical constant **e** raised to the power of **x**. + + This is a transcendental function (see the *Transcendental Functions* + subsection below). + +**j(x, n)** + +: Returns the bessel integer order **n** (truncated) of **x**. + + This is a transcendental function (see the *Transcendental Functions* + subsection below). + +{{ A H N HN }} +## Extended Library + +The extended library is *not* loaded when the **-s**/**-\-standard** or +**-w**/**-\-warn** options are given since they are not part of the library +defined by the [standard][1]. + +The extended library is a **non-portable extension**. + +**p(x, y)** + +: Calculates **x** to the power of **y**, even if **y** is not an integer, and + returns the result to the current **scale**. + + It is an error if **y** is negative and **x** is **0**. + + This is a transcendental function (see the *Transcendental Functions* + subsection below). + +**r(x, p)** + +: Returns **x** rounded to **p** decimal places according to the rounding mode + [round half away from **0**][3]. + +**ceil(x, p)** + +: Returns **x** rounded to **p** decimal places according to the rounding mode + [round away from **0**][6]. + +**f(x)** + +: Returns the factorial of the truncated absolute value of **x**. + +**perm(n, k)** + +: Returns the permutation of the truncated absolute value of **n** of the + truncated absolute value of **k**, if **k \<= n**. If not, it returns **0**. + +**comb(n, k)** + +: Returns the combination of the truncated absolute value of **n** of the + truncated absolute value of **k**, if **k \<= n**. If not, it returns **0**. + +**l2(x)** + +: Returns the logarithm base **2** of **x**. + + This is a transcendental function (see the *Transcendental Functions* + subsection below). + +**l10(x)** + +: Returns the logarithm base **10** of **x**. + + This is a transcendental function (see the *Transcendental Functions* + subsection below). + +**log(x, b)** + +: Returns the logarithm base **b** of **x**. + + This is a transcendental function (see the *Transcendental Functions* + subsection below). + +**cbrt(x)** + +: Returns the cube root of **x**. + +**root(x, n)** + +: Calculates the truncated value of **n**, **r**, and returns the **r**th root + of **x** to the current **scale**. + + If **r** is **0** or negative, this raises an error and causes bc(1) to + reset (see the **RESET** section). It also raises an error and causes bc(1) + to reset if **r** is even and **x** is negative. + +**gcd(a, b)** + +: Returns the greatest common divisor (factor) of the truncated absolute value + of **a** and the truncated absolute value of **b**. + +**lcm(a, b)** + +: Returns the least common multiple of the truncated absolute value of **a** + and the truncated absolute value of **b**. + +**pi(p)** + +: Returns **pi** to **p** decimal places. + + This is a transcendental function (see the *Transcendental Functions* + subsection below). + +**t(x)** + +: Returns the tangent of **x**, which is assumed to be in radians. + + This is a transcendental function (see the *Transcendental Functions* + subsection below). + +**a2(y, x)** + +: Returns the arctangent of **y/x**, in radians. If both **y** and **x** are + equal to **0**, it raises an error and causes bc(1) to reset (see the + **RESET** section). Otherwise, if **x** is greater than **0**, it returns + **a(y/x)**. If **x** is less than **0**, and **y** is greater than or equal + to **0**, it returns **a(y/x)+pi**. If **x** is less than **0**, and **y** + is less than **0**, it returns **a(y/x)-pi**. If **x** is equal to **0**, + and **y** is greater than **0**, it returns **pi/2**. If **x** is equal to + **0**, and **y** is less than **0**, it returns **-pi/2**. + + This function is the same as the **atan2()** function in many programming + languages. + + This is a transcendental function (see the *Transcendental Functions* + subsection below). + +**sin(x)** + +: Returns the sine of **x**, which is assumed to be in radians. + + This is an alias of **s(x)**. + + This is a transcendental function (see the *Transcendental Functions* + subsection below). + +**cos(x)** + +: Returns the cosine of **x**, which is assumed to be in radians. + + This is an alias of **c(x)**. + + This is a transcendental function (see the *Transcendental Functions* + subsection below). + +**tan(x)** + +: Returns the tangent of **x**, which is assumed to be in radians. + + If **x** is equal to **1** or **-1**, this raises an error and causes bc(1) + to reset (see the **RESET** section). + + This is an alias of **t(x)**. + + This is a transcendental function (see the *Transcendental Functions* + subsection below). + +**atan(x)** + +: Returns the arctangent of **x**, in radians. + + This is an alias of **a(x)**. + + This is a transcendental function (see the *Transcendental Functions* + subsection below). + +**atan2(y, x)** + +: Returns the arctangent of **y/x**, in radians. If both **y** and **x** are + equal to **0**, it raises an error and causes bc(1) to reset (see the + **RESET** section). Otherwise, if **x** is greater than **0**, it returns + **a(y/x)**. If **x** is less than **0**, and **y** is greater than or equal + to **0**, it returns **a(y/x)+pi**. If **x** is less than **0**, and **y** + is less than **0**, it returns **a(y/x)-pi**. If **x** is equal to **0**, + and **y** is greater than **0**, it returns **pi/2**. If **x** is equal to + **0**, and **y** is less than **0**, it returns **-pi/2**. + + This function is the same as the **atan2()** function in many programming + languages. + + This is an alias of **a2(y, x)**. + + This is a transcendental function (see the *Transcendental Functions* + subsection below). + +**r2d(x)** + +: Converts **x** from radians to degrees and returns the result. + + This is a transcendental function (see the *Transcendental Functions* + subsection below). + +**d2r(x)** + +: Converts **x** from degrees to radians and returns the result. + + This is a transcendental function (see the *Transcendental Functions* + subsection below). + +**frand(p)** + +: Generates a pseudo-random number between **0** (inclusive) and **1** + (exclusive) with the number of decimal digits after the decimal point equal + to the truncated absolute value of **p**. If **p** is not **0**, then + calling this function will change the value of **seed**. If **p** is **0**, + then **0** is returned, and **seed** is *not* changed. + +**ifrand(i, p)** + +: Generates a pseudo-random number that is between **0** (inclusive) and the + truncated absolute value of **i** (exclusive) with the number of decimal + digits after the decimal point equal to the truncated absolute value of + **p**. If the absolute value of **i** is greater than or equal to **2**, and + **p** is not **0**, then calling this function will change the value of + **seed**; otherwise, **0** is returned and **seed** is not changed. + +**srand(x)** + +: Returns **x** with its sign flipped with probability **0.5**. In other + words, it randomizes the sign of **x**. + +**brand()** + +: Returns a random boolean value (either **0** or **1**). + +**band(a, b)** + +: Takes the truncated absolute value of both **a** and **b** and calculates + and returns the result of the bitwise **and** operation between them. + + If you want to use signed two's complement arguments, use **s2u(x)** to + convert. + +**bor(a, b)** + +: Takes the truncated absolute value of both **a** and **b** and calculates + and returns the result of the bitwise **or** operation between them. + + If you want to use signed two's complement arguments, use **s2u(x)** to + convert. + +**bxor(a, b)** + +: Takes the truncated absolute value of both **a** and **b** and calculates + and returns the result of the bitwise **xor** operation between them. + + If you want to use signed two's complement arguments, use **s2u(x)** to + convert. + +**bshl(a, b)** + +: Takes the truncated absolute value of both **a** and **b** and calculates + and returns the result of **a** bit-shifted left by **b** places. + + If you want to use signed two's complement arguments, use **s2u(x)** to + convert. + +**bshr(a, b)** + +: Takes the truncated absolute value of both **a** and **b** and calculates + and returns the truncated result of **a** bit-shifted right by **b** places. + + If you want to use signed two's complement arguments, use **s2u(x)** to + convert. + +**bnotn(x, n)** + +: Takes the truncated absolute value of **x** and does a bitwise not as though + it has the same number of bytes as the truncated absolute value of **n**. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**bnot8(x)** + +: Does a bitwise not of the truncated absolute value of **x** as though it has + **8** binary digits (1 unsigned byte). + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**bnot16(x)** + +: Does a bitwise not of the truncated absolute value of **x** as though it has + **16** binary digits (2 unsigned bytes). + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**bnot32(x)** + +: Does a bitwise not of the truncated absolute value of **x** as though it has + **32** binary digits (4 unsigned bytes). + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**bnot64(x)** + +: Does a bitwise not of the truncated absolute value of **x** as though it has + **64** binary digits (8 unsigned bytes). + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**bnot(x)** + +: Does a bitwise not of the truncated absolute value of **x** as though it has + the minimum number of power of two unsigned bytes. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**brevn(x, n)** + +: Runs a bit reversal on the truncated absolute value of **x** as though it + has the same number of 8-bit bytes as the truncated absolute value of **n**. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**brev8(x)** + +: Runs a bit reversal on the truncated absolute value of **x** as though it + has 8 binary digits (1 unsigned byte). + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**brev16(x)** + +: Runs a bit reversal on the truncated absolute value of **x** as though it + has 16 binary digits (2 unsigned bytes). + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**brev32(x)** + +: Runs a bit reversal on the truncated absolute value of **x** as though it + has 32 binary digits (4 unsigned bytes). + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**brev64(x)** + +: Runs a bit reversal on the truncated absolute value of **x** as though it + has 64 binary digits (8 unsigned bytes). + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**brev(x)** + +: Runs a bit reversal on the truncated absolute value of **x** as though it + has the minimum number of power of two unsigned bytes. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**broln(x, p, n)** + +: Does a left bitwise rotatation of the truncated absolute value of **x**, as + though it has the same number of unsigned 8-bit bytes as the truncated + absolute value of **n**, by the number of places equal to the truncated + absolute value of **p** modded by the **2** to the power of the number of + binary digits in **n** 8-bit bytes. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**brol8(x, p)** + +: Does a left bitwise rotatation of the truncated absolute value of **x**, as + though it has **8** binary digits (**1** unsigned byte), by the number of + places equal to the truncated absolute value of **p** modded by **2** to the + power of **8**. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**brol16(x, p)** + +: Does a left bitwise rotatation of the truncated absolute value of **x**, as + though it has **16** binary digits (**2** unsigned bytes), by the number of + places equal to the truncated absolute value of **p** modded by **2** to the + power of **16**. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**brol32(x, p)** + +: Does a left bitwise rotatation of the truncated absolute value of **x**, as + though it has **32** binary digits (**2** unsigned bytes), by the number of + places equal to the truncated absolute value of **p** modded by **2** to the + power of **32**. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**brol64(x, p)** + +: Does a left bitwise rotatation of the truncated absolute value of **x**, as + though it has **64** binary digits (**2** unsigned bytes), by the number of + places equal to the truncated absolute value of **p** modded by **2** to the + power of **64**. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**brol(x, p)** + +: Does a left bitwise rotatation of the truncated absolute value of **x**, as + though it has the minimum number of power of two unsigned 8-bit bytes, by + the number of places equal to the truncated absolute value of **p** modded + by 2 to the power of the number of binary digits in the minimum number of + 8-bit bytes. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**brorn(x, p, n)** + +: Does a right bitwise rotatation of the truncated absolute value of **x**, as + though it has the same number of unsigned 8-bit bytes as the truncated + absolute value of **n**, by the number of places equal to the truncated + absolute value of **p** modded by the **2** to the power of the number of + binary digits in **n** 8-bit bytes. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**bror8(x, p)** + +: Does a right bitwise rotatation of the truncated absolute value of **x**, as + though it has **8** binary digits (**1** unsigned byte), by the number of + places equal to the truncated absolute value of **p** modded by **2** to the + power of **8**. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**bror16(x, p)** + +: Does a right bitwise rotatation of the truncated absolute value of **x**, as + though it has **16** binary digits (**2** unsigned bytes), by the number of + places equal to the truncated absolute value of **p** modded by **2** to the + power of **16**. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**bror32(x, p)** + +: Does a right bitwise rotatation of the truncated absolute value of **x**, as + though it has **32** binary digits (**2** unsigned bytes), by the number of + places equal to the truncated absolute value of **p** modded by **2** to the + power of **32**. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**bror64(x, p)** + +: Does a right bitwise rotatation of the truncated absolute value of **x**, as + though it has **64** binary digits (**2** unsigned bytes), by the number of + places equal to the truncated absolute value of **p** modded by **2** to the + power of **64**. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**bror(x, p)** + +: Does a right bitwise rotatation of the truncated absolute value of **x**, as + though it has the minimum number of power of two unsigned 8-bit bytes, by + the number of places equal to the truncated absolute value of **p** modded + by 2 to the power of the number of binary digits in the minimum number of + 8-bit bytes. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**bmodn(x, n)** + +: Returns the modulus of the truncated absolute value of **x** by **2** to the + power of the multiplication of the truncated absolute value of **n** and + **8**. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**bmod8(x, n)** + +: Returns the modulus of the truncated absolute value of **x** by **2** to the + power of **8**. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**bmod16(x, n)** + +: Returns the modulus of the truncated absolute value of **x** by **2** to the + power of **16**. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**bmod32(x, n)** + +: Returns the modulus of the truncated absolute value of **x** by **2** to the + power of **32**. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**bmod64(x, n)** + +: Returns the modulus of the truncated absolute value of **x** by **2** to the + power of **64**. + + If you want to a use signed two's complement argument, use **s2u(x)** to + convert. + +**bunrev(t)** + +: Assumes **t** is a bitwise-reversed number with an extra set bit one place + more significant than the real most significant bit (which was the least + significant bit in the original number). This number is reversed and + returned without the extra set bit. + + This function is used to implement other bitwise functions; it is not meant + to be used by users, but it can be. + +**plz(x)** + +: If **x** is not equal to **0** and greater that **-1** and less than **1**, + it is printed with a leading zero, regardless of the use of the **-z** + option (see the **OPTIONS** section) and without a trailing newline. + + Otherwise, **x** is printed normally, without a trailing newline. + +**plznl(x)** + +: If **x** is not equal to **0** and greater that **-1** and less than **1**, + it is printed with a leading zero, regardless of the use of the **-z** + option (see the **OPTIONS** section) and with a trailing newline. + + Otherwise, **x** is printed normally, with a trailing newline. + +**pnlz(x)** + +: If **x** is not equal to **0** and greater that **-1** and less than **1**, + it is printed without a leading zero, regardless of the use of the **-z** + option (see the **OPTIONS** section) and without a trailing newline. + + Otherwise, **x** is printed normally, without a trailing newline. + +**pnlznl(x)** + +: If **x** is not equal to **0** and greater that **-1** and less than **1**, + it is printed without a leading zero, regardless of the use of the **-z** + option (see the **OPTIONS** section) and with a trailing newline. + + Otherwise, **x** is printed normally, with a trailing newline. + +**ubytes(x)** + +: Returns the numbers of unsigned integer bytes required to hold the truncated + absolute value of **x**. + +**sbytes(x)** + +: Returns the numbers of signed, two's-complement integer bytes required to + hold the truncated value of **x**. + +**s2u(x)** + +: Returns **x** if it is non-negative. If it *is* negative, then it calculates + what **x** would be as a 2's-complement signed integer and returns the + non-negative integer that would have the same representation in binary. + +**s2un(x,n)** + +: Returns **x** if it is non-negative. If it *is* negative, then it calculates + what **x** would be as a 2's-complement signed integer with **n** bytes and + returns the non-negative integer that would have the same representation in + binary. If **x** cannot fit into **n** 2's-complement signed bytes, it is + truncated to fit. + +**hex(x)** + +: Outputs the hexadecimal (base **16**) representation of **x**. + + This is a **void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). + +**binary(x)** + +: Outputs the binary (base **2**) representation of **x**. + + This is a **void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). + +**output(x, b)** + +: Outputs the base **b** representation of **x**. + + This is a **void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). + +**uint(x)** + +: Outputs the representation, in binary and hexadecimal, of **x** as an + unsigned integer in as few power of two bytes as possible. Both outputs are + split into bytes separated by spaces. + + If **x** is not an integer or is negative, an error message is printed + instead, but bc(1) is not reset (see the **RESET** section). + + This is a **void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). + +**int(x)** + +: Outputs the representation, in binary and hexadecimal, of **x** as a signed, + two's-complement integer in as few power of two bytes as possible. Both + outputs are split into bytes separated by spaces. + + If **x** is not an integer, an error message is printed instead, but bc(1) + is not reset (see the **RESET** section). + + This is a **void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). + +**uintn(x, n)** + +: Outputs the representation, in binary and hexadecimal, of **x** as an + unsigned integer in **n** bytes. Both outputs are split into bytes separated + by spaces. + + If **x** is not an integer, is negative, or cannot fit into **n** bytes, an + error message is printed instead, but bc(1) is not reset (see the **RESET** + section). + + This is a **void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). + +**intn(x, n)** + +: Outputs the representation, in binary and hexadecimal, of **x** as a signed, + two's-complement integer in **n** bytes. Both outputs are split into bytes + separated by spaces. + + If **x** is not an integer or cannot fit into **n** bytes, an error message + is printed instead, but bc(1) is not reset (see the **RESET** section). + + This is a **void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). + +**uint8(x)** + +: Outputs the representation, in binary and hexadecimal, of **x** as an + unsigned integer in **1** byte. Both outputs are split into bytes separated + by spaces. + + If **x** is not an integer, is negative, or cannot fit into **1** byte, an + error message is printed instead, but bc(1) is not reset (see the **RESET** + section). + + This is a **void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). + +**int8(x)** + +: Outputs the representation, in binary and hexadecimal, of **x** as a signed, + two's-complement integer in **1** byte. Both outputs are split into bytes + separated by spaces. + + If **x** is not an integer or cannot fit into **1** byte, an error message + is printed instead, but bc(1) is not reset (see the **RESET** section). + + This is a **void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). + +**uint16(x)** + +: Outputs the representation, in binary and hexadecimal, of **x** as an + unsigned integer in **2** bytes. Both outputs are split into bytes separated + by spaces. + + If **x** is not an integer, is negative, or cannot fit into **2** bytes, an + error message is printed instead, but bc(1) is not reset (see the **RESET** + section). + + This is a **void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). + +**int16(x)** + +: Outputs the representation, in binary and hexadecimal, of **x** as a signed, + two's-complement integer in **2** bytes. Both outputs are split into bytes + separated by spaces. + + If **x** is not an integer or cannot fit into **2** bytes, an error message + is printed instead, but bc(1) is not reset (see the **RESET** section). + + This is a **void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). + +**uint32(x)** + +: Outputs the representation, in binary and hexadecimal, of **x** as an + unsigned integer in **4** bytes. Both outputs are split into bytes separated + by spaces. + + If **x** is not an integer, is negative, or cannot fit into **4** bytes, an + error message is printed instead, but bc(1) is not reset (see the **RESET** + section). + + This is a **void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). + +**int32(x)** + +: Outputs the representation, in binary and hexadecimal, of **x** as a signed, + two's-complement integer in **4** bytes. Both outputs are split into bytes + separated by spaces. + + If **x** is not an integer or cannot fit into **4** bytes, an error message + is printed instead, but bc(1) is not reset (see the **RESET** section). + + This is a **void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). + +**uint64(x)** + +: Outputs the representation, in binary and hexadecimal, of **x** as an + unsigned integer in **8** bytes. Both outputs are split into bytes separated + by spaces. + + If **x** is not an integer, is negative, or cannot fit into **8** bytes, an + error message is printed instead, but bc(1) is not reset (see the **RESET** + section). + + This is a **void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). + +**int64(x)** + +: Outputs the representation, in binary and hexadecimal, of **x** as a signed, + two's-complement integer in **8** bytes. Both outputs are split into bytes + separated by spaces. + + If **x** is not an integer or cannot fit into **8** bytes, an error message + is printed instead, but bc(1) is not reset (see the **RESET** section). + + This is a **void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). + +**hex_uint(x, n)** + +: Outputs the representation of the truncated absolute value of **x** as an + unsigned integer in hexadecimal using **n** bytes. Not all of the value will + be output if **n** is too small. + + This is a **void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). + +**binary_uint(x, n)** + +: Outputs the representation of the truncated absolute value of **x** as an + unsigned integer in binary using **n** bytes. Not all of the value will be + output if **n** is too small. + + This is a **void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). + +**output_uint(x, n)** + +: Outputs the representation of the truncated absolute value of **x** as an + unsigned integer in the current **obase** (see the **SYNTAX** section) using + **n** bytes. Not all of the value will be output if **n** is too small. + + This is a **void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). + +**output_byte(x, i)** + +: Outputs byte **i** of the truncated absolute value of **x**, where **0** is + the least significant byte and **number_of_bytes - 1** is the most + significant byte. + + This is a **void** function (see the *Void Functions* subsection of the + **FUNCTIONS** section). +{{ end }} + +## Transcendental Functions + +All transcendental functions can return slightly inaccurate results (up to 1 +[ULP][4]). This is unavoidable, and [this article][5] explains why it is +impossible and unnecessary to calculate exact results for the transcendental +functions. + +Because of the possible inaccuracy, I recommend that users call those functions +with the precision (**scale**) set to at least 1 higher than is necessary. If +exact results are *absolutely* required, users can double the precision +(**scale**) and then truncate. + +The transcendental functions in the standard math library are: + +* **s(x)** +* **c(x)** +* **a(x)** +* **l(x)** +* **e(x)** +* **j(x, n)** + +{{ A H N HN }} +The transcendental functions in the extended math library are: + +* **l2(x)** +* **l10(x)** +* **log(x, b)** +* **pi(p)** +* **t(x)** +* **a2(y, x)** +* **sin(x)** +* **cos(x)** +* **tan(x)** +* **atan(x)** +* **atan2(y, x)** +* **r2d(x)** +* **d2r(x)** +{{ end }} + +# RESET + +When bc(1) encounters an error or a signal that it has a non-default handler +for, it resets. This means that several things happen. + +First, any functions that are executing are stopped and popped off the stack. +The behavior is not unlike that of exceptions in programming languages. Then +the execution point is set so that any code waiting to execute (after all +functions returned) is skipped. + +Thus, when bc(1) resets, it skips any remaining code waiting to be executed. +Then, if it is interactive mode, and the error was not a fatal error (see the +**EXIT STATUS** section), it asks for more input; otherwise, it exits with the +appropriate return code. + +Note that this reset behavior is different from the GNU bc(1), which attempts to +start executing the statement right after the one that caused an error. + +# PERFORMANCE + +Most bc(1) implementations use **char** types to calculate the value of **1** +decimal digit at a time, but that can be slow. This bc(1) does something +different. + +It uses large integers to calculate more than **1** decimal digit at a time. If +built in a environment where **BC_LONG_BIT** (see the **LIMITS** section) is +**64**, then each integer has **9** decimal digits. If built in an environment +where **BC_LONG_BIT** is **32** then each integer has **4** decimal digits. This +value (the number of decimal digits per large integer) is called +**BC_BASE_DIGS**. + +The actual values of **BC_LONG_BIT** and **BC_BASE_DIGS** can be queried with +the **limits** statement. + +In addition, this bc(1) uses an even larger integer for overflow checking. This +integer type depends on the value of **BC_LONG_BIT**, but is always at least +twice as large as the integer type used to store digits. + +# LIMITS + +The following are the limits on bc(1): + +**BC_LONG_BIT** + +: The number of bits in the **long** type in the environment where bc(1) was + built. This determines how many decimal digits can be stored in a single + large integer (see the **PERFORMANCE** section). + +**BC_BASE_DIGS** + +: The number of decimal digits per large integer (see the **PERFORMANCE** + section). Depends on **BC_LONG_BIT**. + +**BC_BASE_POW** + +: The max decimal number that each large integer can store (see + **BC_BASE_DIGS**) plus **1**. Depends on **BC_BASE_DIGS**. + +**BC_OVERFLOW_MAX** + +: The max number that the overflow type (see the **PERFORMANCE** section) can + hold. Depends on **BC_LONG_BIT**. + +**BC_BASE_MAX** + +: The maximum output base. Set at **BC_BASE_POW**. + +**BC_DIM_MAX** + +: The maximum size of arrays. Set at **SIZE_MAX-1**. + +**BC_SCALE_MAX** + +: The maximum **scale**. Set at **BC_OVERFLOW_MAX-1**. + +**BC_STRING_MAX** + +: The maximum length of strings. Set at **BC_OVERFLOW_MAX-1**. + +**BC_NAME_MAX** + +: The maximum length of identifiers. Set at **BC_OVERFLOW_MAX-1**. + +**BC_NUM_MAX** + +: The maximum length of a number (in decimal digits), which includes digits + after the decimal point. Set at **BC_OVERFLOW_MAX-1**. + +{{ A H N HN }} +**BC_RAND_MAX** + +: The maximum integer (inclusive) returned by the **rand()** operand. Set at + **2\^BC_LONG_BIT-1**. +{{ end }} + +Exponent + +: The maximum allowable exponent (positive or negative). Set at + **BC_OVERFLOW_MAX**. + +Number of vars + +: The maximum number of vars/arrays. Set at **SIZE_MAX-1**. + +The actual values can be queried with the **limits** statement. + +These limits are meant to be effectively non-existent; the limits are so large +(at least on 64-bit machines) that there should not be any point at which they +become a problem. In fact, memory should be exhausted before these limits should +be hit. + +# ENVIRONMENT VARIABLES + +bc(1) recognizes the following environment variables: + +**POSIXLY_CORRECT** + +: If this variable exists (no matter the contents), bc(1) behaves as if + the **-s** option was given. + +**BC_ENV_ARGS** + +: This is another way to give command-line arguments to bc(1). They should be + in the same format as all other command-line arguments. These are always + processed first, so any files given in **BC_ENV_ARGS** will be processed + before arguments and files given on the command-line. This gives the user + the ability to set up "standard" options and files to be used at every + invocation. The most useful thing for such files to contain would be useful + functions that the user might want every time bc(1) runs. + + The code that parses **BC_ENV_ARGS** will correctly handle quoted arguments, + but it does not understand escape sequences. For example, the string + **"/home/gavin/some bc file.bc"** will be correctly parsed, but the string + **"/home/gavin/some \"bc\" file.bc"** will include the backslashes. + + The quote parsing will handle either kind of quotes, **'** or **"**. Thus, + if you have a file with any number of single quotes in the name, you can use + double quotes as the outside quotes, as in **"some 'bc' file.bc"**, and vice + versa if you have a file with double quotes. However, handling a file with + both kinds of quotes in **BC_ENV_ARGS** is not supported due to the + complexity of the parsing, though such files are still supported on the + command-line where the parsing is done by the shell. + +**BC_LINE_LENGTH** + +: If this environment variable exists and contains an integer that is greater + than **1** and is less than **UINT16_MAX** (**2\^16-1**), bc(1) will output + lines to that length, including the backslash (**\\**). The default line + length is **70**. + + The special value of **0** will disable line length checking and print + numbers without regard to line length and without backslashes and newlines. + +**BC_BANNER** + +: If this environment variable exists and contains an integer, then a non-zero + value activates the copyright banner when bc(1) is in interactive mode, + while zero deactivates it. + + If bc(1) is not in interactive mode (see the **INTERACTIVE MODE** section), + then this environment variable has no effect because bc(1) does not print + the banner when not in interactive mode. + + This environment variable overrides the default, which can be queried with + the **-h** or **-\-help** options. + +**BC_SIGINT_RESET** + +: If bc(1) is not in interactive mode (see the **INTERACTIVE MODE** section), + then this environment variable has no effect because bc(1) exits on + **SIGINT** when not in interactive mode. + + However, when bc(1) is in interactive mode, then if this environment + variable exists and contains an integer, a non-zero value makes bc(1) reset + on **SIGINT**, rather than exit, and zero makes bc(1) exit. If this + environment variable exists and is *not* an integer, then bc(1) will exit on + **SIGINT**. + + This environment variable overrides the default, which can be queried with + the **-h** or **-\-help** options. + +**BC_TTY_MODE** + +: If TTY mode is *not* available (see the **TTY MODE** section), then this + environment variable has no effect. + + However, when TTY mode is available, then if this environment variable + exists and contains an integer, then a non-zero value makes bc(1) use TTY + mode, and zero makes bc(1) not use TTY mode. + + This environment variable overrides the default, which can be queried with + the **-h** or **-\-help** options. + +**BC_PROMPT** + +: If TTY mode is *not* available (see the **TTY MODE** section), then this + environment variable has no effect. + + However, when TTY mode is available, then if this environment variable + exists and contains an integer, a non-zero value makes bc(1) use a prompt, + and zero or a non-integer makes bc(1) not use a prompt. If this environment + variable does not exist and **BC_TTY_MODE** does, then the value of the + **BC_TTY_MODE** environment variable is used. + + This environment variable and the **BC_TTY_MODE** environment variable + override the default, which can be queried with the **-h** or **-\-help** + options. + +**BC_EXPR_EXIT** + +: If any expressions or expression files are given on the command-line with + **-e**, **-\-expression**, **-f**, or **-\-file**, then if this environment + variable exists and contains an integer, a non-zero value makes bc(1) exit + after executing the expressions and expression files, and a non-zero value + makes bc(1) not exit. + + This environment variable overrides the default, which can be queried with + the **-h** or **-\-help** options. + +# EXIT STATUS + +bc(1) returns the following exit statuses: + +**0** + +: No error. + +**1** + +: A math error occurred. This follows standard practice of using **1** for + expected errors, since math errors will happen in the process of normal + execution. + + Math errors include divide by **0**, taking the square root of a negative +{{ A H N HN }} + number, using a negative number as a bound for the pseudo-random number + generator, attempting to convert a negative number to a hardware integer, + overflow when converting a number to a hardware integer, overflow when + calculating the size of a number, and attempting to use a non-integer where + an integer is required. + + Converting to a hardware integer happens for the second operand of the power + (**\^**), places (**\@**), left shift (**\<\<**), and right shift (**\>\>**) + operators and their corresponding assignment operators. +{{ end }} +{{ E EH EN EHN }} + number, attempting to convert a negative number to a hardware integer, + overflow when converting a number to a hardware integer, overflow when + calculating the size of a number, and attempting to use a non-integer where + an integer is required. + + Converting to a hardware integer happens for the second operand of the power + (**\^**) operator and the corresponding assignment operator. +{{ end }} + +**2** + +: A parse error occurred. + + Parse errors include unexpected **EOF**, using an invalid character, failing + to find the end of a string or comment, using a token where it is invalid, + giving an invalid expression, giving an invalid print statement, giving an + invalid function definition, attempting to assign to an expression that is + not a named expression (see the *Named Expressions* subsection of the + **SYNTAX** section), giving an invalid **auto** list, having a duplicate + **auto**/function parameter, failing to find the end of a code block, + attempting to return a value from a **void** function, attempting to use a + variable as a reference, and using any extensions when the option **-s** or + any equivalents were given. + +**3** + +: A runtime error occurred. + + Runtime errors include assigning an invalid number to any global (**ibase**, + **obase**, or **scale**), giving a bad expression to a **read()** call, + calling **read()** inside of a **read()** call, type errors, passing the + wrong number of arguments to functions, attempting to call an undefined + function, and attempting to use a **void** function call as a value in an + expression. + +**4** + +: A fatal error occurred. + + Fatal errors include memory allocation errors, I/O errors, failing to open + files, attempting to use files that do not have only ASCII characters (bc(1) + only accepts ASCII characters), attempting to open a directory as a file, + and giving invalid command-line options. + +The exit status **4** is special; when a fatal error occurs, bc(1) always exits +and returns **4**, no matter what mode bc(1) is in. + +The other statuses will only be returned when bc(1) is not in interactive mode +(see the **INTERACTIVE MODE** section), since bc(1) resets its state (see the +**RESET** section) and accepts more input when one of those errors occurs in +interactive mode. This is also the case when interactive mode is forced by the +**-i** flag or **-\-interactive** option. + +These exit statuses allow bc(1) to be used in shell scripting with error +checking, and its normal behavior can be forced by using the **-i** flag or +**-\-interactive** option. + +# INTERACTIVE MODE + +Per the [standard][1], bc(1) has an interactive mode and a non-interactive mode. +Interactive mode is turned on automatically when both **stdin** and **stdout** +are hooked to a terminal, but the **-i** flag and **-\-interactive** option can +turn it on in other situations. + +In interactive mode, bc(1) attempts to recover from errors (see the **RESET** +section), and in normal execution, flushes **stdout** as soon as execution is +done for the current input. bc(1) may also reset on **SIGINT** instead of exit, +depending on the contents of, or default for, the **BC_SIGINT_RESET** +environment variable (see the **ENVIRONMENT VARIABLES** section). + +# TTY MODE + +If **stdin**, **stdout**, and **stderr** are all connected to a TTY, then "TTY +mode" is considered to be available, and thus, bc(1) can turn on TTY mode, +subject to some settings. + +If there is the environment variable **BC_TTY_MODE** in the environment (see the +**ENVIRONMENT VARIABLES** section), then if that environment variable contains a +non-zero integer, bc(1) will turn on TTY mode when **stdin**, **stdout**, and +**stderr** are all connected to a TTY. If the **BC_TTY_MODE** environment +variable exists but is *not* a non-zero integer, then bc(1) will not turn TTY +mode on. + +If the environment variable **BC_TTY_MODE** does *not* exist, the default +setting is used. The default setting can be queried with the **-h** or +**-\-help** options. + +TTY mode is different from interactive mode because interactive mode is required +in the [bc(1) specification][1], and interactive mode requires only **stdin** +and **stdout** to be connected to a terminal. + +{{ A E N EN }} +## Command-Line History + +Command-line history is only enabled if TTY mode is, i.e., that **stdin**, +**stdout**, and **stderr** are connected to a TTY and the **BC_TTY_MODE** +environment variable (see the **ENVIRONMENT VARIABLES** section) and its default +do not disable TTY mode. See the **COMMAND LINE HISTORY** section for more +information. +{{ end }} + +## Prompt + +If TTY mode is available, then a prompt can be enabled. Like TTY mode itself, it +can be turned on or off with an environment variable: **BC_PROMPT** (see the +**ENVIRONMENT VARIABLES** section). + +If the environment variable **BC_PROMPT** exists and is a non-zero integer, then +the prompt is turned on when **stdin**, **stdout**, and **stderr** are connected +to a TTY and the **-P** and **-\-no-prompt** options were not used. The read +prompt will be turned on under the same conditions, except that the **-R** and +**-\-no-read-prompt** options must also not be used. + +However, if **BC_PROMPT** does not exist, the prompt can be enabled or disabled +with the **BC_TTY_MODE** environment variable, the **-P** and **-\-no-prompt** +options, and the **-R** and **-\-no-read-prompt** options. See the **ENVIRONMENT +VARIABLES** and **OPTIONS** sections for more details. + +# SIGNAL HANDLING + +Sending a **SIGINT** will cause bc(1) to do one of two things. + +If bc(1) is not in interactive mode (see the **INTERACTIVE MODE** section), or +the **BC_SIGINT_RESET** environment variable (see the **ENVIRONMENT VARIABLES** +section), or its default, is either not an integer or it is zero, bc(1) will +exit. + +However, if bc(1) is in interactive mode, and the **BC_SIGINT_RESET** or its +default is an integer and non-zero, then bc(1) will stop executing the current +input and reset (see the **RESET** section) upon receiving a **SIGINT**. + +Note that "current input" can mean one of two things. If bc(1) is processing +input from **stdin** in interactive mode, it will ask for more input. If bc(1) +is processing input from a file in interactive mode, it will stop processing the +file and start processing the next file, if one exists, or ask for input from +**stdin** if no other file exists. + +This means that if a **SIGINT** is sent to bc(1) as it is executing a file, it +can seem as though bc(1) did not respond to the signal since it will immediately +start executing the next file. This is by design; most files that users execute +when interacting with bc(1) have function definitions, which are quick to parse. +If a file takes a long time to execute, there may be a bug in that file. The +rest of the files could still be executed without problem, allowing the user to +continue. + +**SIGTERM** and **SIGQUIT** cause bc(1) to clean up and exit, and it uses the +{{ A E N EN }} +default handler for all other signals. The one exception is **SIGHUP**; in that +case, and only when bc(1) is in TTY mode (see the **TTY MODE** section), a +**SIGHUP** will cause bc(1) to clean up and exit. +{{ end }} +{{ H EH HN EHN }} +default handler for all other signals. +{{ end }} + +{{ A E N EN }} +# COMMAND LINE HISTORY + +bc(1) supports interactive command-line editing. + +If bc(1) can be in TTY mode (see the **TTY MODE** section), history can be +enabled. This means that command-line history can only be enabled when +**stdin**, **stdout**, and **stderr** are all connected to a TTY. + +Like TTY mode itself, it can be turned on or off with the environment variable +**BC_TTY_MODE** (see the **ENVIRONMENT VARIABLES** section). + +If history is enabled, previous lines can be recalled and edited with the arrow +keys. + +**Note**: tabs are converted to 8 spaces. +{{ end }} + +{{ A E H EH }} +# LOCALES + +This bc(1) ships with support for adding error messages for different locales +and thus, supports **LC_MESSAGES**. +{{ end }} + +# SEE ALSO + +dc(1) + +# STANDARDS + +bc(1) is compliant with the [IEEE Std 1003.1-2017 (“POSIX.1-2017”)][1] +specification. The flags **-efghiqsvVw**, all long options, and the extensions +noted above are extensions to that specification. + +Note that the specification explicitly says that bc(1) only accepts numbers that +use a period (**.**) as a radix point, regardless of the value of +**LC_NUMERIC**. + +{{ A E H EH }} +This bc(1) supports error messages for different locales, and thus, it supports +**LC_MESSAGES**. +{{ end }} + +# BUGS + +None are known. Report bugs at https://git.yzena.com/gavin/bc. + +# AUTHORS + +Gavin D. Howard and contributors. + +[1]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/bc.html +[2]: https://www.gnu.org/software/bc/ +[3]: https://en.wikipedia.org/wiki/Rounding#Round_half_away_from_zero +[4]: https://en.wikipedia.org/wiki/Unit_in_the_last_place +[5]: https://people.eecs.berkeley.edu/~wkahan/LOG10HAF.TXT +[6]: https://en.wikipedia.org/wiki/Rounding#Rounding_away_from_zero diff --git a/manuals/benchmarks.md b/manuals/benchmarks.md new file mode 100644 index 000000000000..af0593f4e876 --- /dev/null +++ b/manuals/benchmarks.md @@ -0,0 +1,673 @@ +# Benchmarks + +The results of these benchmarks suggest that building this `bc` with +optimization at `-O3` with link-time optimization (`-flto`) will result in the +best performance. However, using `-march=native` can result in **WORSE** +performance. + +*Note*: all benchmarks were run four times, and the fastest run is the one +shown. Also, `[bc]` means whichever `bc` was being run, and the assumed working +directory is the root directory of this repository. Also, this `bc` was at +version `3.0.0` while GNU `bc` was at version `1.07.1`, and all tests were +conducted on an `x86_64` machine running Gentoo Linux with `clang` `9.0.1` as +the compiler. + +## Typical Optimization Level + +These benchmarks were run with both `bc`'s compiled with the typical `-O2` +optimizations and no link-time optimization. + +### Addition + +The command used was: + +``` +tests/script.sh bc add.bc 1 0 1 1 [bc] +``` + +For GNU `bc`: + +``` +real 2.54 +user 1.21 +sys 1.32 +``` + +For this `bc`: + +``` +real 0.88 +user 0.85 +sys 0.02 +``` + +### Subtraction + +The command used was: + +``` +tests/script.sh bc subtract.bc 1 0 1 1 [bc] +``` + +For GNU `bc`: + +``` +real 2.51 +user 1.05 +sys 1.45 +``` + +For this `bc`: + +``` +real 0.91 +user 0.85 +sys 0.05 +``` + +### Multiplication + +The command used was: + +``` +tests/script.sh bc multiply.bc 1 0 1 1 [bc] +``` + +For GNU `bc`: + +``` +real 7.15 +user 4.69 +sys 2.46 +``` + +For this `bc`: + +``` +real 2.20 +user 2.10 +sys 0.09 +``` + +### Division + +The command used was: + +``` +tests/script.sh bc divide.bc 1 0 1 1 [bc] +``` + +For GNU `bc`: + +``` +real 3.36 +user 1.87 +sys 1.48 +``` + +For this `bc`: + +``` +real 1.61 +user 1.57 +sys 0.03 +``` + +### Power + +The command used was: + +``` +printf '1234567890^100000; halt\n' | time -p [bc] -q > /dev/null +``` + +For GNU `bc`: + +``` +real 11.30 +user 11.30 +sys 0.00 +``` + +For this `bc`: + +``` +real 0.73 +user 0.72 +sys 0.00 +``` + +### Scripts + +[This file][1] was downloaded, saved at `../timeconst.bc` and the following +patch was applied: + +``` +--- ../timeconst.bc 2018-09-28 11:32:22.808669000 -0600 ++++ ../timeconst.bc 2019-06-07 07:26:36.359913078 -0600 +@@ -110,8 +110,10 @@ + + print "#endif /* KERNEL_TIMECONST_H */\n" + } +- halt + } + +-hz = read(); +-timeconst(hz) ++for (i = 0; i <= 50000; ++i) { ++ timeconst(i) ++} ++ ++halt +``` + +The command used was: + +``` +time -p [bc] ../timeconst.bc > /dev/null +``` + +For GNU `bc`: + +``` +real 16.71 +user 16.06 +sys 0.65 +``` + +For this `bc`: + +``` +real 13.16 +user 13.15 +sys 0.00 +``` + +Because this `bc` is faster when doing math, it might be a better comparison to +run a script that is not running any math. As such, I put the following into +`../test.bc`: + +``` +for (i = 0; i < 100000000; ++i) { + y = i +} + +i +y + +halt +``` + +The command used was: + +``` +time -p [bc] ../test.bc > /dev/null +``` + +For GNU `bc`: + +``` +real 16.60 +user 16.59 +sys 0.00 +``` + +For this `bc`: + +``` +real 22.76 +user 22.75 +sys 0.00 +``` + +I also put the following into `../test2.bc`: + +``` +i = 0 + +while (i < 100000000) { + i += 1 +} + +i + +halt +``` + +The command used was: + +``` +time -p [bc] ../test2.bc > /dev/null +``` + +For GNU `bc`: + +``` +real 17.32 +user 17.30 +sys 0.00 +``` + +For this `bc`: + +``` +real 16.98 +user 16.96 +sys 0.01 +``` + +It seems that the improvements to the interpreter helped a lot in certain cases. + +Also, I have no idea why GNU `bc` did worse when it is technically doing less +work. + +## Recommended Optimizations from `2.7.0` + +Note that, when running the benchmarks, the optimizations used are not the ones +I recommended for version `2.7.0`, which are `-O3 -flto -march=native`. + +This `bc` separates its code into modules that, when optimized at link time, +removes a lot of the inefficiency that comes from function overhead. This is +most keenly felt with one function: `bc_vec_item()`, which should turn into just +one instruction (on `x86_64`) when optimized at link time and inlined. There are +other functions that matter as well. + +I also recommended `-march=native` on the grounds that newer instructions would +increase performance on math-heavy code. We will see if that assumption was +correct. (Spoiler: **NO**.) + +When compiling both `bc`'s with the optimizations I recommended for this `bc` +for version `2.7.0`, the results are as follows. + +### Addition + +The command used was: + +``` +tests/script.sh bc add.bc 1 0 1 1 [bc] +``` + +For GNU `bc`: + +``` +real 2.44 +user 1.11 +sys 1.32 +``` + +For this `bc`: + +``` +real 0.59 +user 0.54 +sys 0.05 +``` + +### Subtraction + +The command used was: + +``` +tests/script.sh bc subtract.bc 1 0 1 1 [bc] +``` + +For GNU `bc`: + +``` +real 2.42 +user 1.02 +sys 1.40 +``` + +For this `bc`: + +``` +real 0.64 +user 0.57 +sys 0.06 +``` + +### Multiplication + +The command used was: + +``` +tests/script.sh bc multiply.bc 1 0 1 1 [bc] +``` + +For GNU `bc`: + +``` +real 7.01 +user 4.50 +sys 2.50 +``` + +For this `bc`: + +``` +real 1.59 +user 1.53 +sys 0.05 +``` + +### Division + +The command used was: + +``` +tests/script.sh bc divide.bc 1 0 1 1 [bc] +``` + +For GNU `bc`: + +``` +real 3.26 +user 1.82 +sys 1.44 +``` + +For this `bc`: + +``` +real 1.24 +user 1.20 +sys 0.03 +``` + +### Power + +The command used was: + +``` +printf '1234567890^100000; halt\n' | time -p [bc] -q > /dev/null +``` + +For GNU `bc`: + +``` +real 11.08 +user 11.07 +sys 0.00 +``` + +For this `bc`: + +``` +real 0.71 +user 0.70 +sys 0.00 +``` + +### Scripts + +The command for the `../timeconst.bc` script was: + +``` +time -p [bc] ../timeconst.bc > /dev/null +``` + +For GNU `bc`: + +``` +real 15.62 +user 15.08 +sys 0.53 +``` + +For this `bc`: + +``` +real 10.09 +user 10.08 +sys 0.01 +``` + +The command for the next script, the `for` loop script, was: + +``` +time -p [bc] ../test.bc > /dev/null +``` + +For GNU `bc`: + +``` +real 14.76 +user 14.75 +sys 0.00 +``` + +For this `bc`: + +``` +real 17.95 +user 17.94 +sys 0.00 +``` + +The command for the next script, the `while` loop script, was: + +``` +time -p [bc] ../test2.bc > /dev/null +``` + +For GNU `bc`: + +``` +real 14.84 +user 14.83 +sys 0.00 +``` + +For this `bc`: + +``` +real 13.53 +user 13.52 +sys 0.00 +``` + +## Link-Time Optimization Only + +Just for kicks, let's see if `-march=native` is even useful. + +The optimizations I used for both `bc`'s were `-O3 -flto`. + +### Addition + +The command used was: + +``` +tests/script.sh bc add.bc 1 0 1 1 [bc] +``` + +For GNU `bc`: + +``` +real 2.41 +user 1.05 +sys 1.35 +``` + +For this `bc`: + +``` +real 0.58 +user 0.52 +sys 0.05 +``` + +### Subtraction + +The command used was: + +``` +tests/script.sh bc subtract.bc 1 0 1 1 [bc] +``` + +For GNU `bc`: + +``` +real 2.39 +user 1.10 +sys 1.28 +``` + +For this `bc`: + +``` +real 0.65 +user 0.57 +sys 0.07 +``` + +### Multiplication + +The command used was: + +``` +tests/script.sh bc multiply.bc 1 0 1 1 [bc] +``` + +For GNU `bc`: + +``` +real 6.82 +user 4.30 +sys 2.51 +``` + +For this `bc`: + +``` +real 1.57 +user 1.49 +sys 0.08 +``` + +### Division + +The command used was: + +``` +tests/script.sh bc divide.bc 1 0 1 1 [bc] +``` + +For GNU `bc`: + +``` +real 3.25 +user 1.81 +sys 1.43 +``` + +For this `bc`: + +``` +real 1.27 +user 1.23 +sys 0.04 +``` + +### Power + +The command used was: + +``` +printf '1234567890^100000; halt\n' | time -p [bc] -q > /dev/null +``` + +For GNU `bc`: + +``` +real 10.50 +user 10.49 +sys 0.00 +``` + +For this `bc`: + +``` +real 0.72 +user 0.71 +sys 0.00 +``` + +### Scripts + +The command for the `../timeconst.bc` script was: + +``` +time -p [bc] ../timeconst.bc > /dev/null +``` + +For GNU `bc`: + +``` +real 15.50 +user 14.81 +sys 0.68 +``` + +For this `bc`: + +``` +real 10.17 +user 10.15 +sys 0.01 +``` + +The command for the next script, the `for` loop script, was: + +``` +time -p [bc] ../test.bc > /dev/null +``` + +For GNU `bc`: + +``` +real 14.99 +user 14.99 +sys 0.00 +``` + +For this `bc`: + +``` +real 16.85 +user 16.84 +sys 0.00 +``` + +The command for the next script, the `while` loop script, was: + +``` +time -p [bc] ../test2.bc > /dev/null +``` + +For GNU `bc`: + +``` +real 14.92 +user 14.91 +sys 0.00 +``` + +For this `bc`: + +``` +real 12.75 +user 12.75 +sys 0.00 +``` + +It turns out that `-march=native` can be a problem. As such, I have removed the +recommendation to build with `-march=native`. + +## Recommended Compiler + +When I ran these benchmarks with my `bc` compiled under `clang` vs. `gcc`, it +performed much better under `clang`. I recommend compiling this `bc` with +`clang`. + +[1]: https://github.com/torvalds/linux/blob/master/kernel/time/timeconst.bc diff --git a/manuals/dc.1.md.in b/manuals/dc.1.md.in new file mode 100644 index 000000000000..5ca37bcc97c4 --- /dev/null +++ b/manuals/dc.1.md.in @@ -0,0 +1,1452 @@ + + +# Name + +dc - arbitrary-precision decimal reverse-Polish notation calculator + +# SYNOPSIS + +**dc** [**-hiPRvVx**] [**-\-version**] [**-\-help**] [**-\-interactive**] [**-\-no-prompt**] [**-\-no-read-prompt**] [**-\-extended-register**] [**-e** *expr*] [**-\-expression**=*expr*...] [**-f** *file*...] [**-\-file**=*file*...] [*file*...] + +# DESCRIPTION + +dc(1) is an arbitrary-precision calculator. It uses a stack (reverse Polish +notation) to store numbers and results of computations. Arithmetic operations +pop arguments off of the stack and push the results. + +If no files are given on the command-line, then dc(1) reads from **stdin** (see +the **STDIN** section). Otherwise, those files are processed, and dc(1) will +then exit. + +If a user wants to set up a standard environment, they can use **DC_ENV_ARGS** +(see the **ENVIRONMENT VARIABLES** section). For example, if a user wants the +**scale** always set to **10**, they can set **DC_ENV_ARGS** to **-e 10k**, and +this dc(1) will always start with a **scale** of **10**. + +# OPTIONS + +The following are the options that dc(1) accepts. + +**-h**, **-\-help** + +: Prints a usage message and quits. + +**-v**, **-V**, **-\-version** + +: Print the version information (copyright header) and exit. + +**-i**, **-\-interactive** + +: Forces interactive mode. (See the **INTERACTIVE MODE** section.) + + This is a **non-portable extension**. + +**-L**, **-\-no-line-length** + +: Disables line length checking and prints numbers without backslashes and + newlines. In other words, this option sets **BC_LINE_LENGTH** to **0** (see + the **ENVIRONMENT VARIABLES** section). + + This is a **non-portable extension**. + +**-P**, **-\-no-prompt** + +: Disables the prompt in TTY mode. (The prompt is only enabled in TTY mode. + See the **TTY MODE** section.) This is mostly for those users that do not + want a prompt or are not used to having them in dc(1). Most of those users + would want to put this option in **DC_ENV_ARGS**. + + These options override the **DC_PROMPT** and **DC_TTY_MODE** environment + variables (see the **ENVIRONMENT VARIABLES** section). + + This is a **non-portable extension**. + +**-R**, **-\-no-read-prompt** + +: Disables the read prompt in TTY mode. (The read prompt is only enabled in + TTY mode. See the **TTY MODE** section.) This is mostly for those users that + do not want a read prompt or are not used to having them in dc(1). Most of + those users would want to put this option in **BC_ENV_ARGS** (see the + **ENVIRONMENT VARIABLES** section). This option is also useful in hash bang + lines of dc(1) scripts that prompt for user input. + + This option does not disable the regular prompt because the read prompt is + only used when the **?** command is used. + + These options *do* override the **DC_PROMPT** and **DC_TTY_MODE** + environment variables (see the **ENVIRONMENT VARIABLES** section), but only + for the read prompt. + + This is a **non-portable extension**. + +**-x** **-\-extended-register** + +: Enables extended register mode. See the *Extended Register Mode* subsection + of the **REGISTERS** section for more information. + + This is a **non-portable extension**. + +**-z**, **-\-leading-zeroes** + +: Makes bc(1) print all numbers greater than **-1** and less than **1**, and + not equal to **0**, with a leading zero. + + This can be set for individual numbers with the **plz(x)**, plznl(x)**, + **pnlz(x)**, and **pnlznl(x)** functions in the extended math library (see + the **LIBRARY** section). + + This is a **non-portable extension**. + +**-e** *expr*, **-\-expression**=*expr* + +: Evaluates *expr*. If multiple expressions are given, they are evaluated in + order. If files are given as well (see below), the expressions and files are + evaluated in the order given. This means that if a file is given before an + expression, the file is read in and evaluated first. + + If this option is given on the command-line (i.e., not in **DC_ENV_ARGS**, + see the **ENVIRONMENT VARIABLES** section), then after processing all + expressions and files, dc(1) will exit, unless **-** (**stdin**) was given + as an argument at least once to **-f** or **-\-file**, whether on the + command-line or in **DC_ENV_ARGS**. However, if any other **-e**, + **-\-expression**, **-f**, or **-\-file** arguments are given after **-f-** + or equivalent is given, dc(1) will give a fatal error and exit. + + This is a **non-portable extension**. + +**-f** *file*, **-\-file**=*file* + +: Reads in *file* and evaluates it, line by line, as though it were read + through **stdin**. If expressions are also given (see above), the + expressions are evaluated in the order given. + + If this option is given on the command-line (i.e., not in **DC_ENV_ARGS**, + see the **ENVIRONMENT VARIABLES** section), then after processing all + expressions and files, dc(1) will exit, unless **-** (**stdin**) was given + as an argument at least once to **-f** or **-\-file**. However, if any other + **-e**, **-\-expression**, **-f**, or **-\-file** arguments are given after + **-f-** or equivalent is given, dc(1) will give a fatal error and exit. + + This is a **non-portable extension**. + +All long options are **non-portable extensions**. + +# STDIN + +If no files are given on the command-line and no files or expressions are given +by the **-f**, **-\-file**, **-e**, or **-\-expression** options, then dc(1) +read from **stdin**. + +However, there is a caveat to this. + +First, **stdin** is evaluated a line at a time. The only exception to this is if +a string has been finished, but not ended. This means that, except for escaped +brackets, all brackets must be balanced before dc(1) parses and executes. + +# STDOUT + +Any non-error output is written to **stdout**. In addition, if history (see the +**HISTORY** section) and the prompt (see the **TTY MODE** section) are enabled, +both are output to **stdout**. + +**Note**: Unlike other dc(1) implementations, this dc(1) will issue a fatal +error (see the **EXIT STATUS** section) if it cannot write to **stdout**, so if +**stdout** is closed, as in **dc >&-**, it will quit with an error. This +is done so that dc(1) can report problems when **stdout** is redirected to a +file. + +If there are scripts that depend on the behavior of other dc(1) implementations, +it is recommended that those scripts be changed to redirect **stdout** to +**/dev/null**. + +# STDERR + +Any error output is written to **stderr**. + +**Note**: Unlike other dc(1) implementations, this dc(1) will issue a fatal +error (see the **EXIT STATUS** section) if it cannot write to **stderr**, so if +**stderr** is closed, as in **dc 2>&-**, it will quit with an error. This +is done so that dc(1) can exit with an error code when **stderr** is redirected +to a file. + +If there are scripts that depend on the behavior of other dc(1) implementations, +it is recommended that those scripts be changed to redirect **stderr** to +**/dev/null**. + +# SYNTAX + +Each item in the input source code, either a number (see the **NUMBERS** +section) or a command (see the **COMMANDS** section), is processed and executed, +in order. Input is processed immediately when entered. + +**ibase** is a register (see the **REGISTERS** section) that determines how to +interpret constant numbers. It is the "input" base, or the number base used for +interpreting input numbers. **ibase** is initially **10**. The max allowable +value for **ibase** is **16**. The min allowable value for **ibase** is **2**. +The max allowable value for **ibase** can be queried in dc(1) programs with the +**T** command. + +**obase** is a register (see the **REGISTERS** section) that determines how to +output results. It is the "output" base, or the number base used for outputting +numbers. **obase** is initially **10**. The max allowable value for **obase** is +**DC_BASE_MAX** and can be queried with the **U** command. The min allowable +{{ A H N HN }} +value for **obase** is **0**. If **obase** is **0**, values are output in +scientific notation, and if **obase** is **1**, values are output in engineering +notation. Otherwise, values are output in the specified base. + +Outputting in scientific and engineering notations are **non-portable +extensions**. +{{ end }} +{{ E EH EN EHN }} +value for **obase** is **2**. Values are output in the specified base. +{{ end }} + +The *scale* of an expression is the number of digits in the result of the +expression right of the decimal point, and **scale** is a register (see the +**REGISTERS** section) that sets the precision of any operations (with +exceptions). **scale** is initially **0**. **scale** cannot be negative. The max +allowable value for **scale** can be queried in dc(1) programs with the **V** +command. + +{{ A H N HN }} +**seed** is a register containing the current seed for the pseudo-random number +generator. If the current value of **seed** is queried and stored, then if it is +assigned to **seed** later, the pseudo-random number generator is guaranteed to +produce the same sequence of pseudo-random numbers that were generated after the +value of **seed** was first queried. + +Multiple values assigned to **seed** can produce the same sequence of +pseudo-random numbers. Likewise, when a value is assigned to **seed**, it is not +guaranteed that querying **seed** immediately after will return the same value. +In addition, the value of **seed** will change after any call to the **'** +command or the **"** command that does not get receive a value of **0** or +**1**. The maximum integer returned by the **'** command can be queried with the +**W** command. + +**Note**: The values returned by the pseudo-random number generator with the +**'** and **"** commands are guaranteed to **NOT** be cryptographically secure. +This is a consequence of using a seeded pseudo-random number generator. However, +they *are* guaranteed to be reproducible with identical **seed** values. This +means that the pseudo-random values from dc(1) should only be used where a +reproducible stream of pseudo-random numbers is *ESSENTIAL*. In any other case, +use a non-seeded pseudo-random number generator. + +The pseudo-random number generator, **seed**, and all associated operations are +**non-portable extensions**. +{{ end }} + +## Comments + +Comments go from **#** until, and not including, the next newline. This is a +**non-portable extension**. + +# NUMBERS + +Numbers are strings made up of digits, uppercase letters up to **F**, and at +most **1** period for a radix. Numbers can have up to **DC_NUM_MAX** digits. +Uppercase letters are equal to **9** + their position in the alphabet (i.e., +**A** equals **10**, or **9+1**). If a digit or letter makes no sense with the +current value of **ibase**, they are set to the value of the highest valid digit +in **ibase**. + +Single-character numbers (i.e., **A** alone) take the value that they would have +if they were valid digits, regardless of the value of **ibase**. This means that +**A** alone always equals decimal **10** and **F** alone always equals decimal +**15**. + +{{ A H N HN }} +In addition, dc(1) accepts numbers in scientific notation. These have the form +**\e\**. The exponent (the portion after the **e**) must be +an integer. An example is **1.89237e9**, which is equal to **1892370000**. +Negative exponents are also allowed, so **4.2890e_3** is equal to **0.0042890**. + +**WARNING**: Both the number and the exponent in scientific notation are +interpreted according to the current **ibase**, but the number is still +multiplied by **10\^exponent** regardless of the current **ibase**. For example, +if **ibase** is **16** and dc(1) is given the number string **FFeA**, the +resulting decimal number will be **2550000000000**, and if dc(1) is given the +number string **10e_4**, the resulting decimal number will be **0.0016**. + +Accepting input as scientific notation is a **non-portable extension**. +{{ end }} + +# COMMANDS + +The valid commands are listed below. + +## Printing + +These commands are used for printing. + +{{ A H N HN }} +Note that both scientific notation and engineering notation are available for +printing numbers. Scientific notation is activated by assigning **0** to +**obase** using **0o**, and engineering notation is activated by assigning **1** +to **obase** using **1o**. To deactivate them, just assign a different value to +**obase**. + +Printing numbers in scientific notation and/or engineering notation is a +**non-portable extension**. +{{ end }} + +**p** + +: Prints the value on top of the stack, whether number or string, and prints a + newline after. + + This does not alter the stack. + +**n** + +: Prints the value on top of the stack, whether number or string, and pops it + off of the stack. + +**P** + +: Pops a value off the stack. + + If the value is a number, it is truncated and the absolute value of the + result is printed as though **obase** is **256** and each digit is + interpreted as an 8-bit ASCII character, making it a byte stream. + + If the value is a string, it is printed without a trailing newline. + + This is a **non-portable extension**. + +**f** + +: Prints the entire contents of the stack, in order from newest to oldest, + without altering anything. + + Users should use this command when they get lost. + +## Arithmetic + +These are the commands used for arithmetic. + +**+** + +: The top two values are popped off the stack, added, and the result is pushed + onto the stack. The *scale* of the result is equal to the max *scale* of + both operands. + +**-** + +: The top two values are popped off the stack, subtracted, and the result is + pushed onto the stack. The *scale* of the result is equal to the max + *scale* of both operands. + +**\*** + +: The top two values are popped off the stack, multiplied, and the result is + pushed onto the stack. If **a** is the *scale* of the first expression and + **b** is the *scale* of the second expression, the *scale* of the result + is equal to **min(a+b,max(scale,a,b))** where **min()** and **max()** return + the obvious values. + +**/** + +: The top two values are popped off the stack, divided, and the result is + pushed onto the stack. The *scale* of the result is equal to **scale**. + + The first value popped off of the stack must be non-zero. + +**%** + +: The top two values are popped off the stack, remaindered, and the result is + pushed onto the stack. + + Remaindering is equivalent to 1) Computing **a/b** to current **scale**, and + 2) Using the result of step 1 to calculate **a-(a/b)\*b** to *scale* + **max(scale+scale(b),scale(a))**. + + The first value popped off of the stack must be non-zero. + +**~** + +: The top two values are popped off the stack, divided and remaindered, and + the results (divided first, remainder second) are pushed onto the stack. + This is equivalent to **x y / x y %** except that **x** and **y** are only + evaluated once. + + The first value popped off of the stack must be non-zero. + + This is a **non-portable extension**. + +**\^** + +: The top two values are popped off the stack, the second is raised to the + power of the first, and the result is pushed onto the stack. The *scale* of + the result is equal to **scale**. + + The first value popped off of the stack must be an integer, and if that + value is negative, the second value popped off of the stack must be + non-zero. + +**v** + +: The top value is popped off the stack, its square root is computed, and the + result is pushed onto the stack. The *scale* of the result is equal to + **scale**. + + The value popped off of the stack must be non-negative. + +**\_** + +: If this command *immediately* precedes a number (i.e., no spaces or other + commands), then that number is input as a negative number. + + Otherwise, the top value on the stack is popped and copied, and the copy is + negated and pushed onto the stack. This behavior without a number is a + **non-portable extension**. + +**b** + +: The top value is popped off the stack, and if it is zero, it is pushed back + onto the stack. Otherwise, its absolute value is pushed onto the stack. + + This is a **non-portable extension**. + +**|** + +: The top three values are popped off the stack, a modular exponentiation is + computed, and the result is pushed onto the stack. + + The first value popped is used as the reduction modulus and must be an + integer and non-zero. The second value popped is used as the exponent and + must be an integer and non-negative. The third value popped is the base and + must be an integer. + + This is a **non-portable extension**. + +{{ A H N HN }} +**\$** + +: The top value is popped off the stack and copied, and the copy is truncated + and pushed onto the stack. + + This is a **non-portable extension**. + +**\@** + +: The top two values are popped off the stack, and the precision of the second + is set to the value of the first, whether by truncation or extension. + + The first value popped off of the stack must be an integer and non-negative. + + This is a **non-portable extension**. + +**H** + +: The top two values are popped off the stack, and the second is shifted left + (radix shifted right) to the value of the first. + + The first value popped off of the stack must be an integer and non-negative. + + This is a **non-portable extension**. + +**h** + +: The top two values are popped off the stack, and the second is shifted right + (radix shifted left) to the value of the first. + + The first value popped off of the stack must be an integer and non-negative. + + This is a **non-portable extension**. +{{ end }} + +**G** + +: The top two values are popped off of the stack, they are compared, and a + **1** is pushed if they are equal, or **0** otherwise. + + This is a **non-portable extension**. + +**N** + +: The top value is popped off of the stack, and if it a **0**, a **1** is + pushed; otherwise, a **0** is pushed. + + This is a **non-portable extension**. + +**(** + +: The top two values are popped off of the stack, they are compared, and a + **1** is pushed if the first is less than the second, or **0** otherwise. + + This is a **non-portable extension**. + +**{** + +: The top two values are popped off of the stack, they are compared, and a + **1** is pushed if the first is less than or equal to the second, or **0** + otherwise. + + This is a **non-portable extension**. + +**)** + +: The top two values are popped off of the stack, they are compared, and a + **1** is pushed if the first is greater than the second, or **0** otherwise. + + This is a **non-portable extension**. + +**}** + +: The top two values are popped off of the stack, they are compared, and a + **1** is pushed if the first is greater than or equal to the second, or + **0** otherwise. + + This is a **non-portable extension**. + +**M** + +: The top two values are popped off of the stack. If they are both non-zero, a + **1** is pushed onto the stack. If either of them is zero, or both of them + are, then a **0** is pushed onto the stack. + + This is like the **&&** operator in bc(1), and it is *not* a short-circuit + operator. + + This is a **non-portable extension**. + +**m** + +: The top two values are popped off of the stack. If at least one of them is + non-zero, a **1** is pushed onto the stack. If both of them are zero, then a + **0** is pushed onto the stack. + + This is like the **||** operator in bc(1), and it is *not* a short-circuit + operator. + + This is a **non-portable extension**. + +{{ A H N HN }} +## Pseudo-Random Number Generator + +dc(1) has a built-in pseudo-random number generator. These commands query the +pseudo-random number generator. (See Parameters for more information about the +**seed** value that controls the pseudo-random number generator.) + +The pseudo-random number generator is guaranteed to **NOT** be +cryptographically secure. + +**'** + +: Generates an integer between 0 and **DC_RAND_MAX**, inclusive (see the + **LIMITS** section). + + The generated integer is made as unbiased as possible, subject to the + limitations of the pseudo-random number generator. + + This is a **non-portable extension**. + +**"** + +: Pops a value off of the stack, which is used as an **exclusive** upper bound + on the integer that will be generated. If the bound is negative or is a + non-integer, an error is raised, and dc(1) resets (see the **RESET** + section) while **seed** remains unchanged. If the bound is larger than + **DC_RAND_MAX**, the higher bound is honored by generating several + pseudo-random integers, multiplying them by appropriate powers of + **DC_RAND_MAX+1**, and adding them together. Thus, the size of integer that + can be generated with this command is unbounded. Using this command will + change the value of **seed**, unless the operand is **0** or **1**. In that + case, **0** is pushed onto the stack, and **seed** is *not* changed. + + The generated integer is made as unbiased as possible, subject to the + limitations of the pseudo-random number generator. + + This is a **non-portable extension**. +{{ end }} + +## Stack Control + +These commands control the stack. + +**c** + +: Removes all items from ("clears") the stack. + +**d** + +: Copies the item on top of the stack ("duplicates") and pushes the copy onto + the stack. + +**r** + +: Swaps ("reverses") the two top items on the stack. + +**R** + +: Pops ("removes") the top value from the stack. + +## Register Control + +These commands control registers (see the **REGISTERS** section). + +**s**_r_ + +: Pops the value off the top of the stack and stores it into register *r*. + +**l**_r_ + +: Copies the value in register *r* and pushes it onto the stack. This does not + alter the contents of *r*. + +**S**_r_ + +: Pops the value off the top of the (main) stack and pushes it onto the stack + of register *r*. The previous value of the register becomes inaccessible. + +**L**_r_ + +: Pops the value off the top of the stack for register *r* and push it onto + the main stack. The previous value in the stack for register *r*, if any, is + now accessible via the **l**_r_ command. + +## Parameters + +{{ A H N HN }} +These commands control the values of **ibase**, **obase**, **scale**, and +**seed**. Also see the **SYNTAX** section. +{{ end }} +{{ E EH EN EHN }} +These commands control the values of **ibase**, **obase**, and **scale**. Also +see the **SYNTAX** section. +{{ end }} + +**i** + +: Pops the value off of the top of the stack and uses it to set **ibase**, + which must be between **2** and **16**, inclusive. + + If the value on top of the stack has any *scale*, the *scale* is ignored. + +**o** + +: Pops the value off of the top of the stack and uses it to set **obase**, +{{ A H N HN }} + which must be between **0** and **DC_BASE_MAX**, inclusive (see the + **LIMITS** section and the **NUMBERS** section). +{{ end }} +{{ E EH EN EHN }} + which must be between **2** and **DC_BASE_MAX**, inclusive (see the + **LIMITS** section). +{{ end }} + + If the value on top of the stack has any *scale*, the *scale* is ignored. + +**k** + +: Pops the value off of the top of the stack and uses it to set **scale**, + which must be non-negative. + + If the value on top of the stack has any *scale*, the *scale* is ignored. + +{{ A H N HN }} +**j** + +: Pops the value off of the top of the stack and uses it to set **seed**. The + meaning of **seed** is dependent on the current pseudo-random number + generator but is guaranteed to not change except for new major versions. + + The *scale* and sign of the value may be significant. + + If a previously used **seed** value is used again, the pseudo-random number + generator is guaranteed to produce the same sequence of pseudo-random + numbers as it did when the **seed** value was previously used. + + The exact value assigned to **seed** is not guaranteed to be returned if the + **J** command is used. However, if **seed** *does* return a different value, + both values, when assigned to **seed**, are guaranteed to produce the same + sequence of pseudo-random numbers. This means that certain values assigned + to **seed** will not produce unique sequences of pseudo-random numbers. + + There is no limit to the length (number of significant decimal digits) or + *scale* of the value that can be assigned to **seed**. + + This is a **non-portable extension**. +{{ end }} + +**I** + +: Pushes the current value of **ibase** onto the main stack. + +**O** + +: Pushes the current value of **obase** onto the main stack. + +**K** + +: Pushes the current value of **scale** onto the main stack. + +{{ A H N HN }} +**J** + +: Pushes the current value of **seed** onto the main stack. + + This is a **non-portable extension**. +{{ end }} + +**T** + +: Pushes the maximum allowable value of **ibase** onto the main stack. + + This is a **non-portable extension**. + +**U** + +: Pushes the maximum allowable value of **obase** onto the main stack. + + This is a **non-portable extension**. + +**V** + +: Pushes the maximum allowable value of **scale** onto the main stack. + + This is a **non-portable extension**. + +{{ A H N HN }} +**W** + +: Pushes the maximum (inclusive) integer that can be generated with the **'** + pseudo-random number generator command. + + This is a **non-portable extension**. +{{ end }} + +## Strings + +The following commands control strings. + +dc(1) can work with both numbers and strings, and registers (see the +**REGISTERS** section) can hold both strings and numbers. dc(1) always knows +whether the contents of a register are a string or a number. + +While arithmetic operations have to have numbers, and will print an error if +given a string, other commands accept strings. + +Strings can also be executed as macros. For example, if the string **[1pR]** is +executed as a macro, then the code **1pR** is executed, meaning that the **1** +will be printed with a newline after and then popped from the stack. + +**\[**_characters_**\]** + +: Makes a string containing *characters* and pushes it onto the stack. + + If there are brackets (**\[** and **\]**) in the string, then they must be + balanced. Unbalanced brackets can be escaped using a backslash (**\\**) + character. + + If there is a backslash character in the string, the character after it + (even another backslash) is put into the string verbatim, but the (first) + backslash is not. + +**a** + +: The value on top of the stack is popped. + + If it is a number, it is truncated and its absolute value is taken. The + result mod **256** is calculated. If that result is **0**, push an empty + string; otherwise, push a one-character string where the character is the + result of the mod interpreted as an ASCII character. + + If it is a string, then a new string is made. If the original string is + empty, the new string is empty. If it is not, then the first character of + the original string is used to create the new string as a one-character + string. The new string is then pushed onto the stack. + + This is a **non-portable extension**. + +**x** + +: Pops a value off of the top of the stack. + + If it is a number, it is pushed back onto the stack. + + If it is a string, it is executed as a macro. + + This behavior is the norm whenever a macro is executed, whether by this + command or by the conditional execution commands below. + +**\>**_r_ + +: Pops two values off of the stack that must be numbers and compares them. If + the first value is greater than the second, then the contents of register + *r* are executed. + + For example, **0 1>a** will execute the contents of register **a**, and + **1 0>a** will not. + + If either or both of the values are not numbers, dc(1) will raise an error + and reset (see the **RESET** section). + +**>**_r_**e**_s_ + +: Like the above, but will execute register *s* if the comparison fails. + + If either or both of the values are not numbers, dc(1) will raise an error + and reset (see the **RESET** section). + + This is a **non-portable extension**. + +**!\>**_r_ + +: Pops two values off of the stack that must be numbers and compares them. If + the first value is not greater than the second (less than or equal to), then + the contents of register *r* are executed. + + If either or both of the values are not numbers, dc(1) will raise an error + and reset (see the **RESET** section). + +**!\>**_r_**e**_s_ + +: Like the above, but will execute register *s* if the comparison fails. + + If either or both of the values are not numbers, dc(1) will raise an error + and reset (see the **RESET** section). + + This is a **non-portable extension**. + +**\<**_r_ + +: Pops two values off of the stack that must be numbers and compares them. If + the first value is less than the second, then the contents of register *r* + are executed. + + If either or both of the values are not numbers, dc(1) will raise an error + and reset (see the **RESET** section). + +**\<**_r_**e**_s_ + +: Like the above, but will execute register *s* if the comparison fails. + + If either or both of the values are not numbers, dc(1) will raise an error + and reset (see the **RESET** section). + + This is a **non-portable extension**. + +**!\<**_r_ + +: Pops two values off of the stack that must be numbers and compares them. If + the first value is not less than the second (greater than or equal to), then + the contents of register *r* are executed. + + If either or both of the values are not numbers, dc(1) will raise an error + and reset (see the **RESET** section). + +**!\<**_r_**e**_s_ + +: Like the above, but will execute register *s* if the comparison fails. + + If either or both of the values are not numbers, dc(1) will raise an error + and reset (see the **RESET** section). + + This is a **non-portable extension**. + +**=**_r_ + +: Pops two values off of the stack that must be numbers and compares them. If + the first value is equal to the second, then the contents of register *r* + are executed. + + If either or both of the values are not numbers, dc(1) will raise an error + and reset (see the **RESET** section). + +**=**_r_**e**_s_ + +: Like the above, but will execute register *s* if the comparison fails. + + If either or both of the values are not numbers, dc(1) will raise an error + and reset (see the **RESET** section). + + This is a **non-portable extension**. + +**!=**_r_ + +: Pops two values off of the stack that must be numbers and compares them. If + the first value is not equal to the second, then the contents of register + *r* are executed. + + If either or both of the values are not numbers, dc(1) will raise an error + and reset (see the **RESET** section). + +**!=**_r_**e**_s_ + +: Like the above, but will execute register *s* if the comparison fails. + + If either or both of the values are not numbers, dc(1) will raise an error + and reset (see the **RESET** section). + + This is a **non-portable extension**. + +**?** + +: Reads a line from the **stdin** and executes it. This is to allow macros to + request input from users. + +**q** + +: During execution of a macro, this exits the execution of that macro and the + execution of the macro that executed it. If there are no macros, or only one + macro executing, dc(1) exits. + +**Q** + +: Pops a value from the stack which must be non-negative and is used the + number of macro executions to pop off of the execution stack. If the number + of levels to pop is greater than the number of executing macros, dc(1) + exits. + +**,** + +: Pushes the depth of the execution stack onto the stack. The execution stack + is the stack of string executions. The number that is pushed onto the stack + is exactly as many as is needed to make dc(1) exit with the **Q** command, + so the sequence **,Q** will make dc(1) exit. + +## Status + +These commands query status of the stack or its top value. + +**Z** + +: Pops a value off of the stack. + + If it is a number, calculates the number of significant decimal digits it + has and pushes the result. It will push **1** if the argument is **0** with + no decimal places. + + If it is a string, pushes the number of characters the string has. + +**X** + +: Pops a value off of the stack. + + If it is a number, pushes the *scale* of the value onto the stack. + + If it is a string, pushes **0**. + +**z** + +: Pushes the current depth of the stack (before execution of this command) + onto the stack. + +**y**_r_ + +: Pushes the current stack depth of the register *r* onto the main stack. + + Because each register has a depth of **1** (with the value **0** in the top + item) when dc(1) starts, dc(1) requires that each register's stack must + always have at least one item; dc(1) will give an error and reset otherwise + (see the **RESET** section). This means that this command will never push + **0**. + + This is a **non-portable extension**. + +## Arrays + +These commands manipulate arrays. + +**:**_r_ + +: Pops the top two values off of the stack. The second value will be stored in + the array *r* (see the **REGISTERS** section), indexed by the first value. + +**;**_r_ + +: Pops the value on top of the stack and uses it as an index into the array + *r*. The selected value is then pushed onto the stack. + +**Y**_r_ + +: Pushes the length of the array *r* onto the stack. + + This is a **non-portable extension**. + +## Global Settings + +These commands retrieve global settings. These are the only commands that +require multiple specific characters, and all of them begin with the letter +**g**. Only the characters below are allowed after the character **g**; any +other character produces a parse error (see the **ERRORS** section). + +**gl** + +: Pushes the line length set by **DC_LINE_LENGTH** (see the **ENVIRONMENT + VARIABLES** section) onto the stack. + +**gz** + +: Pushes **0** onto the stack if the leading zero setting has not been enabled + with the **-z** or **-\-leading-zeroes** options (see the **OPTIONS** + section), non-zero otherwise. + +# REGISTERS + +Registers are names that can store strings, numbers, and arrays. (Number/string +registers do not interfere with array registers.) + +Each register is also its own stack, so the current register value is the top of +the stack for the register. All registers, when first referenced, have one value +(**0**) in their stack, and it is a runtime error to attempt to pop that item +off of the register stack. + +In non-extended register mode, a register name is just the single character that +follows any command that needs a register name. The only exceptions are: a +newline (**'\\n'**) and a left bracket (**'['**); it is a parse error for a +newline or a left bracket to be used as a register name. + +## Extended Register Mode + +Unlike most other dc(1) implentations, this dc(1) provides nearly unlimited +amounts of registers, if extended register mode is enabled. + +If extended register mode is enabled (**-x** or **-\-extended-register** +command-line arguments are given), then normal single character registers are +used *unless* the character immediately following a command that needs a +register name is a space (according to **isspace()**) and not a newline +(**'\\n'**). + +In that case, the register name is found according to the regex +**\[a-z\]\[a-z0-9\_\]\*** (like bc(1) identifiers), and it is a parse error if +the next non-space characters do not match that regex. + +# RESET + +When dc(1) encounters an error or a signal that it has a non-default handler +for, it resets. This means that several things happen. + +First, any macros that are executing are stopped and popped off the stack. +The behavior is not unlike that of exceptions in programming languages. Then +the execution point is set so that any code waiting to execute (after all +macros returned) is skipped. + +Thus, when dc(1) resets, it skips any remaining code waiting to be executed. +Then, if it is interactive mode, and the error was not a fatal error (see the +**EXIT STATUS** section), it asks for more input; otherwise, it exits with the +appropriate return code. + +# PERFORMANCE + +Most dc(1) implementations use **char** types to calculate the value of **1** +decimal digit at a time, but that can be slow. This dc(1) does something +different. + +It uses large integers to calculate more than **1** decimal digit at a time. If +built in a environment where **DC_LONG_BIT** (see the **LIMITS** section) is +**64**, then each integer has **9** decimal digits. If built in an environment +where **DC_LONG_BIT** is **32** then each integer has **4** decimal digits. This +value (the number of decimal digits per large integer) is called +**DC_BASE_DIGS**. + +In addition, this dc(1) uses an even larger integer for overflow checking. This +integer type depends on the value of **DC_LONG_BIT**, but is always at least +twice as large as the integer type used to store digits. + +# LIMITS + +The following are the limits on dc(1): + +**DC_LONG_BIT** + +: The number of bits in the **long** type in the environment where dc(1) was + built. This determines how many decimal digits can be stored in a single + large integer (see the **PERFORMANCE** section). + +**DC_BASE_DIGS** + +: The number of decimal digits per large integer (see the **PERFORMANCE** + section). Depends on **DC_LONG_BIT**. + +**DC_BASE_POW** + +: The max decimal number that each large integer can store (see + **DC_BASE_DIGS**) plus **1**. Depends on **DC_BASE_DIGS**. + +**DC_OVERFLOW_MAX** + +: The max number that the overflow type (see the **PERFORMANCE** section) can + hold. Depends on **DC_LONG_BIT**. + +**DC_BASE_MAX** + +: The maximum output base. Set at **DC_BASE_POW**. + +**DC_DIM_MAX** + +: The maximum size of arrays. Set at **SIZE_MAX-1**. + +**DC_SCALE_MAX** + +: The maximum **scale**. Set at **DC_OVERFLOW_MAX-1**. + +**DC_STRING_MAX** + +: The maximum length of strings. Set at **DC_OVERFLOW_MAX-1**. + +**DC_NAME_MAX** + +: The maximum length of identifiers. Set at **DC_OVERFLOW_MAX-1**. + +**DC_NUM_MAX** + +: The maximum length of a number (in decimal digits), which includes digits + after the decimal point. Set at **DC_OVERFLOW_MAX-1**. + +{{ A H N HN }} +**DC_RAND_MAX** + +: The maximum integer (inclusive) returned by the **'** command, if dc(1). Set + at **2\^DC_LONG_BIT-1**. +{{ end }} + +Exponent + +: The maximum allowable exponent (positive or negative). Set at + **DC_OVERFLOW_MAX**. + +Number of vars + +: The maximum number of vars/arrays. Set at **SIZE_MAX-1**. + +These limits are meant to be effectively non-existent; the limits are so large +(at least on 64-bit machines) that there should not be any point at which they +become a problem. In fact, memory should be exhausted before these limits should +be hit. + +# ENVIRONMENT VARIABLES + +dc(1) recognizes the following environment variables: + +**DC_ENV_ARGS** + +: This is another way to give command-line arguments to dc(1). They should be + in the same format as all other command-line arguments. These are always + processed first, so any files given in **DC_ENV_ARGS** will be processed + before arguments and files given on the command-line. This gives the user + the ability to set up "standard" options and files to be used at every + invocation. The most useful thing for such files to contain would be useful + functions that the user might want every time dc(1) runs. Another use would + be to use the **-e** option to set **scale** to a value other than **0**. + + The code that parses **DC_ENV_ARGS** will correctly handle quoted arguments, + but it does not understand escape sequences. For example, the string + **"/home/gavin/some dc file.dc"** will be correctly parsed, but the string + **"/home/gavin/some \"dc\" file.dc"** will include the backslashes. + + The quote parsing will handle either kind of quotes, **'** or **"**. Thus, + if you have a file with any number of single quotes in the name, you can use + double quotes as the outside quotes, as in **"some 'dc' file.dc"**, and vice + versa if you have a file with double quotes. However, handling a file with + both kinds of quotes in **DC_ENV_ARGS** is not supported due to the + complexity of the parsing, though such files are still supported on the + command-line where the parsing is done by the shell. + +**DC_LINE_LENGTH** + +: If this environment variable exists and contains an integer that is greater + than **1** and is less than **UINT16_MAX** (**2\^16-1**), dc(1) will output + lines to that length, including the backslash newline combo. The default + line length is **70**. + + The special value of **0** will disable line length checking and print + numbers without regard to line length and without backslashes and newlines. + +**DC_SIGINT_RESET** + +: If dc(1) is not in interactive mode (see the **INTERACTIVE MODE** section), + then this environment variable has no effect because dc(1) exits on + **SIGINT** when not in interactive mode. + + However, when dc(1) is in interactive mode, then if this environment + variable exists and contains an integer, a non-zero value makes dc(1) reset + on **SIGINT**, rather than exit, and zero makes dc(1) exit. If this + environment variable exists and is *not* an integer, then dc(1) will exit on + **SIGINT**. + + This environment variable overrides the default, which can be queried with + the **-h** or **-\-help** options. + +**DC_TTY_MODE** + +: If TTY mode is *not* available (see the **TTY MODE** section), then this + environment variable has no effect. + + However, when TTY mode is available, then if this environment variable + exists and contains an integer, then a non-zero value makes dc(1) use TTY + mode, and zero makes dc(1) not use TTY mode. + + This environment variable overrides the default, which can be queried with + the **-h** or **-\-help** options. + +**DC_PROMPT** + +: If TTY mode is *not* available (see the **TTY MODE** section), then this + environment variable has no effect. + + However, when TTY mode is available, then if this environment variable + exists and contains an integer, a non-zero value makes dc(1) use a prompt, + and zero or a non-integer makes dc(1) not use a prompt. If this environment + variable does not exist and **DC_TTY_MODE** does, then the value of the + **DC_TTY_MODE** environment variable is used. + + This environment variable and the **DC_TTY_MODE** environment variable + override the default, which can be queried with the **-h** or **-\-help** + options. + +**DC_EXPR_EXIT** + +: If any expressions or expression files are given on the command-line with + **-e**, **-\-expression**, **-f**, or **-\-file**, then if this environment + variable exists and contains an integer, a non-zero value makes dc(1) exit + after executing the expressions and expression files, and a non-zero value + makes dc(1) not exit. + + This environment variable overrides the default, which can be queried with + the **-h** or **-\-help** options. + +# EXIT STATUS + +dc(1) returns the following exit statuses: + +**0** + +: No error. + +**1** + +: A math error occurred. This follows standard practice of using **1** for + expected errors, since math errors will happen in the process of normal + execution. + + Math errors include divide by **0**, taking the square root of a negative +{{ A H N HN }} + number, using a negative number as a bound for the pseudo-random number + generator, attempting to convert a negative number to a hardware integer, + overflow when converting a number to a hardware integer, overflow when + calculating the size of a number, and attempting to use a non-integer where + an integer is required. + + Converting to a hardware integer happens for the second operand of the power + (**\^**), places (**\@**), left shift (**H**), and right shift (**h**) + operators. +{{ end }} +{{ E EH EN EHN }} + number, attempting to convert a negative number to a hardware integer, + overflow when converting a number to a hardware integer, overflow when + calculating the size of a number, and attempting to use a non-integer where + an integer is required. + + Converting to a hardware integer happens for the second operand of the power + (**\^**) operator. +{{ end }} + +**2** + +: A parse error occurred. + + Parse errors include unexpected **EOF**, using an invalid character, failing + to find the end of a string or comment, and using a token where it is + invalid. + +**3** + +: A runtime error occurred. + + Runtime errors include assigning an invalid number to any global (**ibase**, + **obase**, or **scale**), giving a bad expression to a **read()** call, + calling **read()** inside of a **read()** call, type errors (including + attempting to execute a number), and attempting an operation when the stack + has too few elements. + +**4** + +: A fatal error occurred. + + Fatal errors include memory allocation errors, I/O errors, failing to open + files, attempting to use files that do not have only ASCII characters (dc(1) + only accepts ASCII characters), attempting to open a directory as a file, + and giving invalid command-line options. + +The exit status **4** is special; when a fatal error occurs, dc(1) always exits +and returns **4**, no matter what mode dc(1) is in. + +The other statuses will only be returned when dc(1) is not in interactive mode +(see the **INTERACTIVE MODE** section), since dc(1) resets its state (see the +**RESET** section) and accepts more input when one of those errors occurs in +interactive mode. This is also the case when interactive mode is forced by the +**-i** flag or **-\-interactive** option. + +These exit statuses allow dc(1) to be used in shell scripting with error +checking, and its normal behavior can be forced by using the **-i** flag or +**-\-interactive** option. + +# INTERACTIVE MODE + +Like bc(1), dc(1) has an interactive mode and a non-interactive mode. +Interactive mode is turned on automatically when both **stdin** and **stdout** +are hooked to a terminal, but the **-i** flag and **-\-interactive** option can +turn it on in other situations. + +In interactive mode, dc(1) attempts to recover from errors (see the **RESET** +section), and in normal execution, flushes **stdout** as soon as execution is +done for the current input. dc(1) may also reset on **SIGINT** instead of exit, +depending on the contents of, or default for, the **DC_SIGINT_RESET** +environment variable (see the **ENVIRONMENT VARIABLES** section). + +# TTY MODE + +If **stdin**, **stdout**, and **stderr** are all connected to a TTY, then "TTY +mode" is considered to be available, and thus, dc(1) can turn on TTY mode, +subject to some settings. + +If there is the environment variable **DC_TTY_MODE** in the environment (see the +**ENVIRONMENT VARIABLES** section), then if that environment variable contains a +non-zero integer, dc(1) will turn on TTY mode when **stdin**, **stdout**, and +**stderr** are all connected to a TTY. If the **DC_TTY_MODE** environment +variable exists but is *not* a non-zero integer, then dc(1) will not turn TTY +mode on. + +If the environment variable **DC_TTY_MODE** does *not* exist, the default +setting is used. The default setting can be queried with the **-h** or +**-\-help** options. + +TTY mode is different from interactive mode because interactive mode is required +in the [bc(1) specification][1], and interactive mode requires only **stdin** +and **stdout** to be connected to a terminal. + +{{ A E N EN }} +## Command-Line History + +Command-line history is only enabled if TTY mode is, i.e., that **stdin**, +**stdout**, and **stderr** are connected to a TTY and the **DC_TTY_MODE** +environment variable (see the **ENVIRONMENT VARIABLES** section) and its default +do not disable TTY mode. See the **COMMAND LINE HISTORY** section for more +information. +{{ end }} + +## Prompt + +If TTY mode is available, then a prompt can be enabled. Like TTY mode itself, it +can be turned on or off with an environment variable: **DC_PROMPT** (see the +**ENVIRONMENT VARIABLES** section). + +If the environment variable **DC_PROMPT** exists and is a non-zero integer, then +the prompt is turned on when **stdin**, **stdout**, and **stderr** are connected +to a TTY and the **-P** and **-\-no-prompt** options were not used. The read +prompt will be turned on under the same conditions, except that the **-R** and +**-\-no-read-prompt** options must also not be used. + +However, if **DC_PROMPT** does not exist, the prompt can be enabled or disabled +with the **DC_TTY_MODE** environment variable, the **-P** and **-\-no-prompt** +options, and the **-R** and **-\-no-read-prompt** options. See the **ENVIRONMENT +VARIABLES** and **OPTIONS** sections for more details. + +# SIGNAL HANDLING + +Sending a **SIGINT** will cause dc(1) to do one of two things. + +If dc(1) is not in interactive mode (see the **INTERACTIVE MODE** section), or +the **DC_SIGINT_RESET** environment variable (see the **ENVIRONMENT VARIABLES** +section), or its default, is either not an integer or it is zero, dc(1) will +exit. + +However, if dc(1) is in interactive mode, and the **DC_SIGINT_RESET** or its +default is an integer and non-zero, then dc(1) will stop executing the current +input and reset (see the **RESET** section) upon receiving a **SIGINT**. + +Note that "current input" can mean one of two things. If dc(1) is processing +input from **stdin** in interactive mode, it will ask for more input. If dc(1) +is processing input from a file in interactive mode, it will stop processing the +file and start processing the next file, if one exists, or ask for input from +**stdin** if no other file exists. + +This means that if a **SIGINT** is sent to dc(1) as it is executing a file, it +can seem as though dc(1) did not respond to the signal since it will immediately +start executing the next file. This is by design; most files that users execute +when interacting with dc(1) have function definitions, which are quick to parse. +If a file takes a long time to execute, there may be a bug in that file. The +rest of the files could still be executed without problem, allowing the user to +continue. + +**SIGTERM** and **SIGQUIT** cause dc(1) to clean up and exit, and it uses the +{{ A E N EN }} +default handler for all other signals. The one exception is **SIGHUP**; in that +case, and only when dc(1) is in TTY mode (see the **TTY MODE** section), a +**SIGHUP** will cause dc(1) to clean up and exit. +{{ end }} +{{ H EH HN EHN }} +default handler for all other signals. +{{ end }} + +{{ A E N EN }} +# COMMAND LINE HISTORY + +dc(1) supports interactive command-line editing. + +If dc(1) can be in TTY mode (see the **TTY MODE** section), history can be +enabled. This means that command-line history can only be enabled when +**stdin**, **stdout**, and **stderr** are all connected to a TTY. + +Like TTY mode itself, it can be turned on or off with the environment variable +**DC_TTY_MODE** (see the **ENVIRONMENT VARIABLES** section). + +**Note**: tabs are converted to 8 spaces. +{{ end }} + +{{ A E H EH }} +# LOCALES + +This dc(1) ships with support for adding error messages for different locales +and thus, supports **LC_MESSAGES**. +{{ end }} + +# SEE ALSO + +bc(1) + +# STANDARDS + +The dc(1) utility operators are compliant with the operators in the bc(1) +[IEEE Std 1003.1-2017 (“POSIX.1-2017”)][1] specification. + +# BUGS + +None are known. Report bugs at https://git.yzena.com/gavin/bc. + +# AUTHOR + +Gavin D. Howard and contributors. + +[1]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/bc.html diff --git a/manuals/development.md b/manuals/development.md new file mode 100644 index 000000000000..6733d496defd --- /dev/null +++ b/manuals/development.md @@ -0,0 +1,5110 @@ +# Development + +Updated: 06 Oct 2021 + +This document is meant for the day when I (Gavin D. Howard) get [hit by a +bus][1]. In other words, it's meant to make the [bus factor][1] a non-issue. + +This document is supposed to contain all of the knowledge necessary to develop +`bc` and `dc`. + +In addition, this document is meant to add to the [oral tradition of software +engineering][118], as described by Bryan Cantrill. + +This document will reference other parts of the repository. That is so a lot of +the documentation can be closest to the part of the repo where it is actually +necessary. + +## What Is It? + +This repository contains an implementation of both [POSIX `bc`][2] and [Unix +`dc`][3]. + +POSIX `bc` is a standard utility required for POSIX systems. `dc` is a +historical utility that was included in early Unix and even predates both Unix +and C. They both are arbitrary-precision command-line calculators with their own +programming languages. `bc`'s language looks similar to C, with infix notation +and including functions, while `dc` uses [Reverse Polish Notation][4] and allows +the user to execute strings as though they were functions. + +In addition, it is also possible to build the arbitrary-precision math as a +library, named [`bcl`][156]. + +**Note**: for ease, I will refer to both programs as `bc` in this document. +However, if I say "just `bc`," I am referring to just `bc`, and if I say `dc`, I +am referring to just `dc`. + +### History + +This project started in January 2018 when a certain individual on IRC, hearing +that I knew how to write parsers, asked me to write a `bc` parser for his math +library. I did so. I thought about writing my own math library, but he +disparaged my programming skills and made me think that I couldn't do it. + +However, he took so long to do it that I eventually decided to give it a try and +had a working math portion in two weeks. It taught me that I should not listen +to such people. + +From that point, I decided to make it an extreme learning experience about how +to write quality software. + +That individual's main goal had been to get his `bc` into [toybox][16], and I +managed to get my own `bc` in. I also got it in [busybox][17]. + +Eventually, in late 2018, I also decided to try my hand at implementing +[Karatsuba multiplication][18], an algorithm that that unnamed individual +claimed I could never implement. It took me a bit, but I did it. + +This project became a passion project for me, and I continued. In mid-2019, +Stefan Eßer suggested I improve performance by putting more than 1 digit in each +section of the numbers. After I showed immaturity because of some burnout, I +implemented his suggestion, and the results were incredible. + +Since that time, I have gradually been improving the `bc` as I have learned more +about things like fuzzing, [`scan-build`][19], [valgrind][20], +[AddressSanitizer][21] (and the other sanitizers), and many other things. + +One of my happiest moments was when my `bc` was made the default in FreeBSD. + +But since I believe in [finishing the software I write][22], I have done less +work on `bc` over time, though there are still times when I put a lot of effort +in, such as now (17 June 2021), when I am attempting to convince OpenBSD to use +my `bc`. + +And that is why I am writing this document: someday, someone else is going to +want to change my code, and this document is my attempt to make it as simple as +possible. + +### Values + +[According to Bryan Cantrill][10], all software has values. I think he's +correct, though I [added one value for programming languages in particular][11]. + +However, for `bc`, his original list will do: + +* Approachability +* Availability +* Compatibility +* Composability +* Debuggability +* Expressiveness +* Extensibility +* Interoperability +* Integrity +* Maintainability +* Measurability +* Operability +* Performance +* Portability +* Resiliency +* Rigor +* Robustness +* Safety +* Security +* Simplicity +* Stability +* Thoroughness +* Transparency +* Velocity + +There are several values that don't apply. The reason they don't apply is +because `bc` and `dc` are existing utilities; this is just another +reimplementation. The designs of `bc` and `dc` are set in stone; there is +nothing we can do to change them, so let's get rid of those values that would +apply to their design: + +* Compatibility +* Integrity +* Maintainability +* Measurability +* Performance +* Portability +* Resiliency +* Rigor +* Robustness +* Safety +* Security +* Simplicity +* Stability +* Thoroughness +* Transparency + +Furthermore, some of the remaining ones don't matter to me, so let me get rid of +those and order the rest according to my *actual* values for this project: + +* Robustness +* Stability +* Portability +* Compatibility +* Performance +* Security +* Simplicity + +First is **robustness**. This `bc` and `dc` should be robust, accepting any +input, never crashing, and instead, returning an error. + +Closely related to that is **stability**. The execution of `bc` and `dc` should +be deterministic and never change for the same inputs, including the +pseudo-random number generator (for the same seed). + +Third is **portability**. These programs should run everywhere that POSIX +exists, as well as Windows. This means that just about every person on the +planet will have access to these programs. + +Next is **compatibility**. These programs should, as much as possible, be +compatible with other existing implementations and standards. + +Then we come to **performance**. A calculator is only usable if it's fast, so +these programs should run as fast as possible. + +After that is **security**. These programs should *never* be the reason a user's +computer is compromised. + +And finally, **simplicity**. Where possible, the code should be simple, while +deferring to the above values. + +Keep these values in mind for the rest of this document, and for exploring any +other part of this repo. + +#### Portability + +But before I go on, I want to talk about portability in particular. + +Most of these principles just require good attention and care, but portability +is different. Sometimes, it requires pulling in code from other places and +adapting it. In other words, sometimes I need to duplicate and adapt code. + +This happened in a few cases: + +* Option parsing (see [`include/opt.h`][35]). +* History (see [`include/history.h`][36]). +* Pseudo-Random Number Generator (see [`include/rand.h`][37]). + +This was done because I decided to ensure that `bc`'s dependencies were +basically zero. In particular, either users have a normal install of Windows or +they have a POSIX system. + +A POSIX system limited me to C99, `sh`, and zero external dependencies. That +last item is why I pull code into `bc`: if I pull it in, it's not an external +dependency. + +That's why `bc` has duplicated code. Remove it, and you risk `bc` not being +portable to some platforms. + +## Suggested Course + +I do have a suggested course for programmers to follow when trying to understand +this codebase. The order is this: + +1. `bc` Spec. +2. Manpages. +3. Test suite. +4. Understand the build. +5. Algorithms manual. +6. Code concepts. +7. Repo structure. +8. Headers. +9. Source code. + +This order roughly follows this order: + +1. High-level requirements +2. Low-level requirements +3. High-level implementation +4. Low-level implementation + +In other words, first understand what the code is *supposed* to do, then +understand the code itself. + +## Useful External Tools + +I have a few tools external to `bc` that are useful: + +* A [Vim plugin with syntax files made specifically for my `bc` and `dc`][132]. +* A [repo of `bc` and `dc` scripts][133]. +* A set of `bash` aliases (see below). +* A `.bcrc` file with items useful for my `bash` setup (see below). + +My `bash` aliases are these: + +```sh +alias makej='make -j16' +alias mcmake='make clean && make' +alias mcmakej='make clean && make -j16' +alias bcdebug='CPPFLAGS="-DBC_DEBUG_CODE=1" CFLAGS="-Weverything -Wno-padded \ + -Wno-switch-enum -Wno-format-nonliteral -Wno-cast-align \ + -Wno-unreachable-code-return -Wno-missing-noreturn \ + -Wno-disabled-macro-expansion -Wno-unreachable-code -Wall -Wextra \ + -pedantic -std=c99" ./configure.sh' +alias bcconfig='CFLAGS="-Weverything -Wno-padded -Wno-switch-enum \ + -Wno-format-nonliteral -Wno-cast-align -Wno-unreachable-code-return \ + -Wno-missing-noreturn -Wno-disabled-macro-expansion -Wno-unreachable-code \ + -Wall -Wextra -pedantic -std=c99" ./configure.sh' +alias bcnoassert='CPPFLAGS="-DNDEBUG" CFLAGS="-Weverything -Wno-padded \ + -Wno-switch-enum -Wno-format-nonliteral -Wno-cast-align \ + -Wno-unreachable-code-return -Wno-missing-noreturn \ + -Wno-disabled-macro-expansion -Wno-unreachable-code -Wall -Wextra \ + -pedantic -std=c99" ./configure.sh' +alias bcdebugnoassert='CPPFLAGS="-DNDEBUG -DBC_DEBUG_CODE=1" \ + CFLAGS="-Weverything -Wno-padded -Wno-switch-enum -Wno-format-nonliteral \ + -Wno-cast-align -Wno-unreachable-code-return -Wno-missing-noreturn \ + -Wno-disabled-macro-expansion -Wno-unreachable-code -Wall -Wextra \ + -pedantic -std=c99" ./configure.sh' +alias bcunset='unset BC_LINE_LENGTH && unset BC_ENV_ARGS' +``` + +`makej` runs `make` with all of my cores. + +`mcmake` runs `make clean` before running `make`. It will take a target on the +command-line. + +`mcmakej` is a combination of `makej` and `mcmake`. + +`bcdebug` configures `bc` for a full debug build, including `BC_DEBUG_CODE` (see +[Debugging][134] below). + +`bcconfig` configures `bc` with Clang (Clang is my personal default compiler) +using full warnings, with a few really loud and useless warnings turned off. + +`bcnoassert` configures `bc` to not have asserts built in. + +`bcdebugnoassert` is like `bcnoassert`, except it also configures `bc` for debug +mode. + +`bcunset` unsets my personal `bc` environment variables, which are set to: + +```sh +export BC_ENV_ARGS="-l $HOME/.bcrc" +export BC_LINE_LENGTH="74" +``` + +Unsetting these environment variables are necessary for running +[`scripts/release.sh`][83] because otherwise, it will error when attempting to +run `bc -s` on my `$HOME/.bcrc`. + +Speaking of which, the contents of that file are: + +```bc +define void print_time_unit(t){ + if(t<10)print "0" + if(t<1&&t)print "0" + print t,":" +} +define void sec2time(t){ + auto s,m,h,d,r + r=scale + scale=0 + t=abs(t) + s=t%60 + t-=s + m=t/60%60 + t-=m + h=t/3600%24 + t-=h + d=t/86400 + if(d)print_time_unit(d) + if(h)print_time_unit(h) + print_time_unit(m) + if(s<10)print "0" + if(s<1&&s)print "0" + s + scale=r +} +define minutes(secs){ + return secs/60; +} +define hours(secs){ + return secs/3600; +} +define days(secs){ + return secs/3600/24; +} +define years(secs){ + return secs/3600/24/365.25; +} +define fbrand(b,p){ + auto l,s,t + b=abs(b)$ + if(b<2)b=2 + s=scale + t=b^abs(p)$ + l=ceil(l2(t),0) + if(l>scale)scale=l + t=irand(t)/t + scale=s + return t +} +define ifbrand(i,b,p){return irand(abs(i)$)+fbrand(b,p)} +``` + +This allows me to use `bc` as part of my `bash` prompt. + +## Code Style + +The code style for `bc` is...weird, and that comes from historical accident. + +In [History][23], I mentioned how I got my `bc` in [toybox][16]. Well, in order +to do that, my `bc` originally had toybox style. Eventually, I changed to using +tabs, and assuming they were 4 spaces wide, but other than that, I basically +kept the same style, with some exceptions that are more or less dependent on my +taste. + +The code style is as follows: + +* Tabs are 4 spaces. +* Tabs are used at the beginning of lines for indent. +* Spaces are used for alignment. +* Lines are limited to 80 characters, period. +* Pointer asterisk (`*`) goes with the variable (on the right), not the type, + unless it is for a pointer type returned from a function. +* The opening brace is put on the same line as the header for the function, + loop, or `if` statement. +* Unless the header is more than one line, in which case the opening brace is + put on its own line. +* If the opening brace is put on its own line, there is no blank line after it. +* If the opening brace is *not* put on its own line, there *is* a blank line + after it, *unless* the block is only one or two lines long. +* Code lines are grouped into what I call "paragraphs." Basically, lines that + seem like they should go together are grouped together. This one comes down + to judgment. +* Bodies of `if` statements, `else` statements, and loops that are one line + long are put on the same line as the statement, unless the header is more than + one line long, and/or, the header and body cannot fit into 80 characters with + a space inbetween them. +* If single-line bodies are on a separate line from their headers, and the + headers are only a single line, then no braces are used. +* However, braces are *always* used if they contain another `if` statement or + loop. +* Loops with empty bodies are ended with a semicolon. +* Expressions that return a boolean value are surrounded by paretheses. +* Macro backslashes are aligned as far to the left as possible. +* Binary operators have spaces on both sides. +* If a line with binary operators overflows 80 characters, a newline is inserted + *after* binary operators. +* Function modifiers and return types are on the same line as the function name. +* With one exception, `goto`'s are only used to jump to the end of a function + for cleanup. +* All structs, enums, and unions are `typedef`'ed. +* All constant data is in one file: [`src/data.c`][131], but the corresponding + `extern` declarations are in the appropriate header file. +* All local variables are declared at the beginning of the scope where they + appear. They may be initialized at that point, if it does not invoke UB or + otherwise cause bugs. +* All precondition `assert()`'s (see [Asserts][135]) come *after* local variable + declarations. +* Besides short `if` statements and loops, there should *never* be more than one + statement per line. + +### ClangFormat + +I attempted three times to use [ClangFormat][24] to impose a standard, +machine-useful style on `bc`. All three failed. Otherwise, the style in this +repo would be more consistent. + +## Repo Structure + +Functions are documented with Doxygen-style doc comments. Functions that appear +in headers are documented in the headers, while static functions are documented +where they are defined. + +### `configure` + +A symlink to [`configure.sh`][69]. + +### `configure.sh` + +This is the script to configure `bc` and [`bcl`][156] for building. + +This `bc` has a custom build system. The reason for this is because of +[*portability*][136]. + +If `bc` used an outside build system, that build system would be an external +dependency. Thus, I had to write a build system for `bc` that used nothing but +C99 and POSIX utilities. + +One of those utilities is POSIX `sh`, which technically implements a +Turing-complete programming language. It's a terrible one, but it works. + +A user that wants to build `bc` on a POSIX system (not Windows) first runs +`configure.sh` with the options he wants. `configure.sh` uses those options and +the `Makefile` template ([`Makefile.in`][70]) to generate an actual valid +`Makefile`. Then `make` can do the rest. + +For more information about the build process, see the [Build System][142] +section and the [build manual][14]. + +For more information about shell scripts, see [POSIX Shell Scripts][76]. + +`configure.sh` does the following: + +1. It processes command-line arguments and figure out what the user wants to + build. +2. It reads in [`Makefile.in`][70]. +3. One-by-one, it replaces placeholders (in [`Makefile.in`][70]) of the form + `%%%%` based on the [build type][81]. +4. It appends a list of file targets based on the [build type][81]. +5. It appends the correct test targets. +6. It copies the correct manpage and markdown manual for `bc` and `dc` into a + location from which they can be copied for install. +7. It does a `make clean` to reset the build state. + +### `.gitattributes` + +A `.gitattributes` file. This is needed to preserve the `crlf` line endings in +the Visual Studio files. + +### `.gitignore` + +The `.gitignore` + +### `LICENSE.md` + +This is the `LICENSE` file, including the licenses of various software that I +have borrowed. + +### `Makefile.in` + +This is the `Makefile` template for [`configure.sh`][69] to use for generating a +`Makefile`. + +For more information, see [`configure.sh`][69], the [Build System][142] section, +and the [build manual][14]. + +Because of [portability][136], the generated `Makefile.in` should be a pure +[POSIX `make`][74]-compatible `Makefile` (minus the placeholders). Here are a +few snares for the unwary programmer in this file: + +1. No extensions allowed, including and especially GNU extensions. +2. If new headers are added, they must also be added to `Makefile.in`. +3. Don't delete the `.POSIX:` empty target at the top; that's what tells `make` + implementations that pure [POSIX `make`][74] is needed. + +In particular, there is no way to set up variables other than the `=` operator. +There are no conditionals, so all of the conditional stuff must be in +[`configure.sh`][69]. This is, in fact, why [`configure.sh`][69] exists in the +first place: [POSIX `make`][74] is barebones and only does a build with no +configuration. + +### `NEWS.md` + +A running changelog with an entry for each version. This should be updated at +the same time that [`include/version.h`][75] is. + +### `NOTICE.md` + +The `NOTICE` file with proper attributions. + +### `README.md` + +The `README`. Read it. + +### `benchmarks/` + +The folder containing files to generate benchmarks. + +Each of these files was made, at one time or another, to benchmark some +experimental feature, so if it seems there is no rhyme or reason to these +benchmarks, it is because there is none, besides historical accident. + +#### `bc/` + +The folder containing `bc` scripts to generate `bc` benchmarks. + +##### `add.bc` + +The file to generate the benchmark to benchmark addition in `bc`. + +##### `arrays_and_constants.bc` + +The file to generate the benchmark to benchmark `bc` using lots of array names +and constants. + +##### `arrays.bc` + +The file to generate the benchmark to benchmark `bc` using lots of array names. + +##### `constants.bc` + +The file to generate the benchmark to benchmark `bc` using lots of constants. + +##### `divide.bc` + +The file to generate the benchmark to benchmark division in `bc`. + +##### `functions.bc` + +The file to generate the benchmark to benchmark `bc` using lots of functions. + +##### `irand_long.bc` + +The file to generate the benchmark to benchmark `bc` using lots of calls to +`irand()` with large bounds. + +##### `irand_short.bc` + +The file to generate the benchmark to benchmark `bc` using lots of calls to +`irand()` with small bounds. + +##### `lib.bc` + +The file to generate the benchmark to benchmark `bc` using lots of calls to +heavy functions in `lib.bc`. + +##### `multiply.bc` + +The file to generate the benchmark to benchmark multiplication in `bc`. + +##### `postfix_incdec.bc` + +The file to generate the benchmark to benchmark `bc` using postfix increment and +decrement operators. + +##### `power.bc` + +The file to generate the benchmark to benchmark power (exponentiation) in `bc`. + +##### `subtract.bc` + +The file to generate the benchmark to benchmark subtraction in `bc`. + +##### `strings.bc` + +The file to generate the benchmark to benchmark `bc` using lots of strings. + +#### `dc/` + +The folder containing `dc` scripts to generate `dc` benchmarks. + +##### `modexp.dc` + +The file to generate the benchmark to benchmark modular exponentiation in `dc`. + +### `gen/` + +A folder containing the files necessary to generate C strings that will be +embedded in the executable. + +All of the files in this folder have license headers, but the program and script +that can generate strings from them include code to strip the license header out +before strings are generated. + +#### `bc_help.txt` + +A text file containing the text displayed for `bc -h` or `bc --help`. + +This text just contains the command-line options and a short summary of the +differences from GNU and BSD `bc`'s. It also directs users to the manpage. + +The reason for this is because otherwise, the help would be far too long to be +useful. + +**Warning**: The text has some `printf()` format specifiers. You need to make +sure the format specifiers match the arguments given to `bc_file_printf()`. + +#### `dc_help.txt` + +A text file containing the text displayed for `dc -h` or `dc --help`. + +This text just contains the command-line options and a short summary of the +differences from GNU and BSD `dc`'s. It also directs users to the manpage. + +The reason for this is because otherwise, the help would be far too long to be +useful. + +**Warning**: The text has some `printf()` format specifiers. You need to make +sure the format specifiers match the arguments given to `bc_file_printf()`. + +#### `lib.bc` + +A `bc` script containing the [standard math library][5] required by POSIX. See +the [POSIX standard][2] for what is required. + +This file does not have any extraneous whitespace, except for tabs at the +beginning of lines. That is because this data goes directly into the binary, +and whitespace is extra bytes in the binary. Thus, not having any extra +whitespace shrinks the resulting binary. + +However, tabs at the beginning of lines are kept for two reasons: + +1. Readability. (This file is still code.) +2. The program and script that generate strings from this file can remove + tabs at the beginning of lines. + +For more details about the algorithms used, see the [algorithms manual][25]. + +However, there are a few snares for unwary programmers. + +First, all constants must be one digit. This is because otherwise, multi-digit +constants could be interpreted wrongly if the user uses a different `ibase`. +This does not happen with single-digit numbers because they are guaranteed to be +interpreted what number they would be if the `ibase` was as high as possible. + +This is why `A` is used in the library instead of `10`, and things like `2*9*A` +for `180` in [`lib2.bc`][26]. + +As an alternative, you can set `ibase` in the function, but if you do, make sure +to set it with a single-digit number and beware the snare below... + +Second, `scale`, `ibase`, and `obase` must be safely restored before returning +from any function in the library. This is because without the `-g` option, +functions are allowed to change any of the globals. + +Third, all local variables in a function must be declared in an `auto` statement +before doing anything else. This includes arrays. However, function parameters +are considered predeclared. + +Fourth, and this is only a snare for `lib.bc`, not [`lib2.bc`][26], the code +must not use *any* extensions. It has to work when users use the `-s` or `-w` +flags. + +#### `lib2.bc` + +A `bc` script containing the [extended math library][7]. + +Like [`lib.bc`][8], and for the same reasons, this file should have no +extraneous whitespace, except for tabs at the beginning of lines. + +For more details about the algorithms used, see the [algorithms manual][25]. + +Also, be sure to check [`lib.bc`][8] for the snares that can trip up unwary +programmers when writing code for `lib2.bc`. + +#### `strgen.c` + +Code for the program to generate C strings from text files. This is the original +program, although [`strgen.sh`][9] was added later. + +The reason I used C here is because even though I knew `sh` would be available +(it must be available to run `configure.sh`), I didn't know how to do what I +needed to do with POSIX utilities and `sh`. + +Later, [`strgen.sh`][9] was contributed by Stefan Eßer of FreeBSD, showing that +it *could* be done with `sh` and POSIX utilities. + +However, `strgen.c` exists *still* exists because the versions generated by +[`strgen.sh`][9] may technically hit an environmental limit. (See the [draft C99 +standard][12], page 21.) This is because [`strgen.sh`][9] generates string +literals, and in C99, string literals can be limited to 4095 characters, and +`gen/lib2.bc` is above that. + +Fortunately, the limit for "objects," which include `char` arrays, is much +bigger: 65535 bytes, so that's what `strgen.c` generates. + +However, the existence of `strgen.c` does come with a cost: the build needs C99 +compiler that targets the host machine. For more information, see the ["Cross +Compiling" section][13] of the [build manual][14]. + +Read the comments in `strgen.c` for more detail about it, the arguments it +takes, and how it works. + +#### `strgen.sh` + +An `sh` script that will generate C strings that uses only POSIX utilities. This +exists for those situations where a host C99 compiler is not available, and the +environment limits mentioned above in [`strgen.c`][15] don't matter. + +`strgen.sh` takes the same arguments as [`strgen.c`][15], and the arguments mean +the exact same things, so see the comments in [`strgen.c`][15] for more detail +about that, and see the comments in `strgen.sh` for more details about it and +how it works. + +For more information about shell scripts, see [POSIX Shell Scripts][76]. + +### `include/` + +A folder containing the headers. + +The headers are not included among the source code because I like it better that +way. Also there were folders within `src/` at one point, and I did not want to +see `#include "../some_header.h"` or things like that. + +So all headers are here, even though only one ([`bcl.h`][30]) is meant for end +users (to be installed in `INCLUDEDIR`). + +#### `args.h` + +This file is the API for processing command-line arguments. + +#### `bc.h` + +This header is the API for `bc`-only items. This includes the `bc_main()` +function and the `bc`-specific lexing and parsing items. + +The `bc` parser is perhaps the most sensitive part of the entire codebase. See +the documentation in `bc.h` for more information. + +The code associated with this header is in [`src/bc.c`][40], +[`src/bc_lex.c`][41], and [`src/bc_parse.c`][42]. + +#### `bcl.h` + +This header is the API for the [`bcl`][156] library. + +This header is meant for distribution to end users and contains the API that end +users of [`bcl`][156] can use in their own software. + +This header, because it's the public header, is also the root header. That means +that it has platform-specific fixes for Windows. (If the fixes were not in this +header, the build would fail on Windows.) + +The code associated with this header is in [`src/library.c`][43]. + +#### `dc.h` + +This header is the API for `dc`-only items. This includes the `dc_main()` +function and the `dc`-specific lexing and parsing items. + +The code associated with this header is in [`src/dc.c`][44], +[`src/dc_lex.c`][45], and [`src/dc_parse.c`][46]. + +#### `file.h` + +This header is for `bc`'s internal buffered I/O API. + +For more information about `bc`'s error handling and custom buffered I/O, see +[Error Handling][97] and [Custom I/O][114], along with [`status.h`][176] and the +notes about version [`3.0.0`][32] in the [`NEWS`][32]. + +The code associated with this header is in [`src/file.c`][47]. + +#### `history.h` + +This header is for `bc`'s implementation of command-line editing/history, which +is based on a [UTF-8-aware fork][28] of [`linenoise`][29]. + +For more information, see the [Command-Line History][189] section. + +The code associated with this header is in [`src/history.c`][48]. + +#### `lang.h` + +This header defines the data structures and bytecode used for actual execution +of `bc` and `dc` code. + +Yes, it's misnamed; that's an accident of history where the first things I put +into it all seemed related to the `bc` language. + +The code associated with this header is in [`src/lang.c`][49]. + +#### `lex.h` + +This header defines the common items that both programs need for lexing. + +The code associated with this header is in [`src/lex.c`][50], +[`src/bc_lex.c`][41], and [`src/dc_lex.c`][45]. + +#### `library.h` + +This header defines the things needed for [`bcl`][156] that users should *not* +have access to. In other words, [`bcl.h`][30] is the *public* header for the +library, and this header is the *private* header for the library. + +The code associated with this header is in [`src/library.c`][43]. + +#### `num.h` + +This header is the API for numbers and math. + +The code associated with this header is in [`src/num.c`][39]. + +#### `opt.h` + +This header is the API for parsing command-line arguments. + +It's different from [`args.h`][31] in that [`args.h`][31] is for the main code +to process the command-line arguments into global data *after* they have already +been parsed by `opt.h` into proper tokens. In other words, `opt.h` actually +parses the command-line arguments, and [`args.h`][31] turns that parsed data +into flags (bits), strings, and expressions that will be used later. + +Why are they separate? Because originally, `bc` used `getopt_long()` for +parsing, so [`args.h`][31] was the only one that existed. After it was +discovered that `getopt_long()` has different behavior on different platforms, I +adapted a [public-domain option parsing library][34] to do the job instead. And +in doing so, I gave it its own header. + +They could probably be combined, but I don't really care enough at this point. + +The code associated with this header is in [`src/opt.c`][51]. + +#### `parse.h` + +This header defines the common items that both programs need for parsing. + +Note that the parsers don't produce abstract syntax trees (AST's) or any +intermediate representations. They produce bytecode directly. In other words, +they don't have special data structures except what they need to do their job. + +The code associated with this header is in [`src/parse.c`][50], +[`src/bc_lex.c`][42], and [`src/dc_lex.c`][46]. + +#### `program.h` + +This header defines the items needed to manage the data structures in +[`lang.h`][38] as well as any helper functions needed to generate bytecode or +execute it. + +The code associated with this header is in [`src/program.c`][53]. + +#### `rand.h` + +This header defines the API for the [pseudo-random number generator +(PRNG)][179]. + +The PRNG only generates fixed-size integers. The magic of generating random +numbers of arbitrary size is actually given to the code that does math +([`src/num.c`][39]). + +The code associated with this header is in [`src/rand.c`][54]. + +#### `read.h` + +This header defines the API for reading from files and `stdin`. + +Thus, [`file.h`][55] is really for buffered *output*, while this file is for +*input*. There is no buffering needed for `bc`'s inputs. + +The code associated with this header is in [`src/read.c`][56]. + +#### `status.h` + +This header has several things: + +* A list of possible errors that internal `bc` code can use. +* Compiler-specific fixes. +* Platform-specific fixes. +* Macros for `bc`'s [error handling][97]. + +There is no code associated with this header. + +#### `vector.h` + +This header defines the API for the vectors (resizable arrays) that are used for +data structures. + +Vectors are what do the heavy lifting in almost all of `bc`'s data structures. +Even the maps of identifiers and arrays use vectors. + +#### `version.h` + +This header defines the version of `bc`. + +There is no code associated with this header. + +#### `vm.h` + +This header defines the API for setting up and running `bc` and `dc`. + +It is so named because I think of it as the "virtual machine" of `bc`, though +that is probably not true as [`program.h`][57] is probably the "virtual machine" +API. Thus, the name is more historical accident. + +The code associated with this header is in [`src/vm.c`][58]. + +### `locales/` + +This folder contains a bunch of `.msg` files and soft links to the real `.msg` +files. This is how locale support is implemented in `bc`. + +The files are in the format required by the [`gencat`][59] POSIX utility. They +all have the same messages, in the same order, with the same numbering, under +the same groups. This is because the locale system expects those messages in +that order. + +The softlinks exist because for many locales, they would contain the exact same +information. To prevent duplication, they are simply linked to a master copy. + +The naming format for all files is: + +``` +_..msg +``` + +This naming format must be followed for all locale files. + +### `manuals/` + +This folder contains the documentation for `bc`, `dc`, and [`bcl`][156], along +with a few other manuals. + +#### `algorithms.md` + +This file explains the mathematical algorithms that are used. + +The hope is that this file will guide people in understanding how the math code +works. + +#### `bc.1.md.in` + +This file is a template for the markdown version of the `bc` manual and +manpages. + +For more information about how the manpages and markdown manuals are generated, +and for why, see [`scripts/manpage.sh`][60] and [Manuals][86]. + +#### `bcl.3` + +This is the manpage for the [`bcl`][156] library. It is generated from +[`bcl.3.md`][61] using [`scripts/manpage.sh`][60]. + +For the reason why I check generated data into the repo, see +[`scripts/manpage.sh`][60] and [Manuals][86]. + +#### `bcl.3.md` + +This is the markdown manual for the [`bcl`][156] library. It is the source for the +generated [`bcl.3`][62] file. + +#### `benchmarks.md` + +This is a document that compares this `bc` to GNU `bc` in various benchmarks. It +was last updated when version [`3.0.0`][32] was released. + +It has very little documentation value, other than showing what compiler options +are useful for performance. + +#### `build.md` + +This is the [build manual][14]. + +This `bc` has a custom build system. The reason for this is because of +[*portability*][136]. + +If `bc` used an outside build system, that build system would be an external +dependency. Thus, I had to write a build system for `bc` that used nothing but +C99 and POSIX utilities, including barebones [POSIX `make`][74]. + +for more information about the build system, see the [build system][142] +section, the [build manual][14], [`configure.sh`][69], and [`Makefile.in`][70]. + +#### `dc.1.md.in` + +This file is a template for the markdown version of the `dc` manual and +manpages. + +For more information about how the manpages and markdown manuals are generated, +and for why, see [`scripts/manpage.sh`][60] and [Manuals][86]. + +#### `development.md` + +The file you are reading right now. + +#### `header_bcl.txt` + +Used by [`scripts/manpage.sh`][60] to give the [`bcl.3`][62] manpage a proper +header. + +For more information about generating manuals, see [`scripts/manpage.sh`][60] +and [Manuals][86]. + +#### `header_bc.txt` + +Used by [`scripts/manpage.sh`][60] to give the [generated `bc` manpages][79] a +proper header. + +For more information about generating manuals, see [`scripts/manpage.sh`][60] +and [Manuals][86]. + +#### `header_dc.txt` + +Used by [`scripts/manpage.sh`][60] to give the [generated `dc` manpages][80] a +proper header. + +For more information about generating manuals, see [`scripts/manpage.sh`][60] +and [Manuals][86]. + +#### `header.txt` + +Used by [`scripts/manpage.sh`][60] to give all generated manpages a license +header. + +For more information about generating manuals, see [`scripts/manpage.sh`][60] +and [Manuals][86]. + +#### `release.md` + +A checklist that I try to somewhat follow when making a release. + +#### `bc/` + +A folder containing the `bc` manuals. + +Each `bc` manual corresponds to a [build type][81]. See that link for more +details. + +For each manual, there are two copies: the markdown version generated from the +template, and the manpage generated from the markdown version. + +#### `dc/` + +A folder containing the `dc` manuals. + +Each `dc` manual corresponds to a [build type][81]. See that link for more +details. + +For each manual, there are two copies: the markdown version generated from the +template, and the manpage generated from the markdown version. + +### `scripts/` + +This folder contains helper scripts. Most of them are written in pure [POSIX +`sh`][72], but one ([`karatsuba.py`][78]) is written in Python 3. + +For more information about the shell scripts, see [POSIX Shell Scripts][76]. + +#### `afl.py` + +This script is meant to be used as part of the fuzzing workflow. + +It does one of two things: checks for valid crashes, or runs `bc` and or `dc` +under all of the paths found by [AFL++][125]. + +See [Fuzzing][82] for more information about fuzzing, including this script. + +#### `alloc.sh` + +This script is a quick and dirty script to test whether or not the garbage +collection mechanism of the [`BcNum` caching][96] works. It has been little-used +because it tests something that is not important to correctness. + +#### `benchmark.sh` + +A script making it easy to run benchmarks and to run the executable produced by +[`ministat.c`][223] on them. + +For more information, see the [Benchmarks][144] section. + +#### `bitfuncgen.c` + +A source file for an executable to generate tests for `bc`'s bitwise functions +in [`gen/lib2.bc`][26]. The executable is `scripts/bitfuncgen`, and it is built +with `make bitfuncgen`. It produces the test on `stdout` and the expected +results on `stderr`. This means that to generat tests, use the following +invokation: + +``` +scripts/bitfuncgen > tests/bc/bitfuncs.txt 2> tests/bc/bitfuncs_results.txt +``` + +It calls `abort()` if it runs into an error. + +#### `exec-install.sh` + +This script is the magic behind making sure `dc` is installed properly if it's +a symlink to `bc`. It checks to see if it is a link, and if so, it just creates +a new symlink in the install directory. Of course, it also installs `bc` itself, +or `dc` when it's alone. + +#### `functions.sh` + +This file is a bunch of common functions for most of the POSIX shell scripts. It +is not supposed to be run; instead, it is *sourced* by other POSIX shell +scripts, like so: + +``` +. "$scriptdir/functions.sh" +``` + +or the equivalent, depending on where the sourcing script is. + +For more information about the shell scripts, see [POSIX Shell Scripts][76]. + +#### `fuzz_prep.sh` + +Fuzzing is a regular activity when I am preparing for a release. + +This script handles all the options and such for building a fuzzable binary. +Instead of having to remember a bunch of options, I just put them in this script +and run the script when I want to fuzz. + +For more information about fuzzing, see [Fuzzing][82]. + +#### `karatsuba.py` + +This script has at least one of two major differences from most of the other +scripts: + +* It's written in Python 3. +* It's meant for software packagers. + +For example, [`scripts/afl.py`][94] and [`scripts/randmath.py`][95] are both in +Python 3, but they are not meant for the end user or software packagers and are +not included in source distributions. But this script is. + +This script breaks my rule of only POSIX utilities necessary for package +maintainers, but there's a very good reason for that: it's only meant to be run +*once* when the package is created for the first time, and maybe not even then. + +You see, this script does two things: it tests the Karatsuba implementation at +various settings for `KARATSUBA_LEN`, and it figures out what the optimal +`KARATSUBA_LEN` is for the machine that it is running on. + +Package maintainers can use this script, when creating a package for this `bc`, +to figure out what is optimal for their users. Then they don't have to run it +ever again. So this script only has to run on the packagers machine. + +I tried to write the script in `sh`, by the way, and I finally accepted the +tradeoff of using Python 3 when it became too hard. + +However, I also mentioned that it's for testing Karatsuba with various settings +of `KARATSUBA_LEN`. Package maintainers will want to run the [test suite][124], +right? + +Yes, but this script is not part of the [test suite][124]; it's used for testing +in the [`scripts/release.sh`][83] script, which is maintainer use only. + +However, there is one snare with `karatsuba.py`: I didn't want the user to have +to install any Python libraries to run it. Keep that in mind if you change it. + +#### `link.sh` + +This script is the magic behind making `dc` a symlink of `bc` when both +calculators are built. + +#### `locale_install.sh` + +This script does what its name says: it installs locales. + +It turns out that this is complicated. + +There is a magic environment variable, `$NLSPATH`, that tells you how and where +you are supposed to install locales. + +Yes, *how*. And where. + +But now is not the place to rant about `$NLSPATH`. For more information on +locales and `$NLSPATH`, see [Locales][85]. + +#### `locale_uninstall.sh` + +This script does what its name says: it uninstalls locales. + +This is far less complicated than installing locales. I basically generate a +wildcard path and then list all paths that fit that wildcard. Then I delete each +one of those paths. Easy. + +For more information on locales, see [Locales][85]. + +#### `manpage.sh` + +This script is the one that generates markdown manuals from a template and a +manpage from a markdown manual. + +For more information about generating manuals, see [Manuals][86]. + +#### `ministat.c` + +This is a file copied [from FreeBSD][221] that calculates the standard +statistical numbers, such as mean, average, and median, based on numbers +obtained from a file. + +For more information, see the [FreeBSD ministat(1) manpage][222]. + +This file allows `bc` to build the `scripts/ministat` executable using the +command `make ministat`, and this executable helps programmers evaluate the +results of [benchmarks][144] more accurately. + +#### `package.sh` + +This script is what helps `bc` maintainers cut a release. It does the following: + +1. Creates the appropriate `git` tag. +2. Pushes the `git` tag. +3. Copies the repo to a temp directory. +4. Removes files that should not be included in source distributions. +5. Creates the tarballs. +6. Signs the tarballs. +7. Zips and signs the Windows executables if they exist. +8. Calculates and outputs SHA512 and SHA256 sums for all of the files, + including the signatures. + +This script is for `bc` maintainers to use when cutting a release. It is not +meant for outside use. This means that some non-POSIX utilities can be used, +such as `git` and `gpg`. + +In addition, before using this script, it expects that the folders that Windows +generated when building `bc`, `dc`, and [`bcl`][156], are in the parent +directory of the repo, exactly as Windows generated them. If they are not there, +then it will not zip and sign, nor calculate sums of, the Windows executables. + +Because this script creates a tag and pushes it, it should *only* be run *ONCE* +per release. + +#### `radamsa.sh` + +A script to test `bc`'s command-line expression parsing code, which, while +simple, strives to handle as much as possible. + +What this script does is it uses the test cases in [`radamsa.txt`][98] an input +to the [Radamsa fuzzer][99]. + +For more information, see the [Radamsa][128] section. + +#### `radamsa.txt` + +Initial test cases for the [`radamsa.sh`][100] script. + +#### `randmath.py` + +This script generates random math problems and checks that `bc`'s and `dc`'s +output matches the GNU `bc` and `dc`. (For this reason, it is necessary to have +GNU `bc` and `dc` installed before using this script.) + +One snare: be sure that this script is using the GNU `bc` and `dc`, not a +previously-installed version of this `bc` and `dc`. + +If you want to check for memory issues or failing asserts, you can build the +`bc` using `./scripts/fuzz_prep.sh -a`, and then run it under this script. Any +errors or crashes should be caught by the script and given to the user as part +of the "checklist" (see below). + +The basic idea behind this script is that it generates as many math problems as +it can, biasing towards situations that may be likely to have bugs, and testing +each math problem against GNU `bc` or `dc`. + +If GNU `bc` or `dc` fails, it just continues. If this `bc` or `dc` fails, it +stores that problem. If the output mismatches, it also stores the problem. + +Then, when the user sends a `SIGINT`, the script stops testing and goes into +report mode. One-by-one, it will go through the "checklist," the list of failed +problems, and present each problem to the user, as well as whether this `bc` or +`dc` crashed, and its output versus GNU. Then the user can decide to add them as +test cases, which it does automatically to the appropriate test file. + +#### `release_settings.txt` + +A text file of settings combinations that [`release.sh`][83] uses to ensure that +`bc` and `dc` build and work with various default settings. [`release.sh`][83] +simply reads it line by line and uses each line for one build. + +#### `release.sh` + +This script is for `bc` maintainers only. It runs `bc`, `dc`, and [`bcl`][156] +through a gauntlet that is mostly meant to be used in preparation for a release. + +It does the following: + +1. Builds every [build type][81], with every setting combo in + [`release_settings.txt`][93] with both calculators, `bc` alone, and `dc` + alone. +2. Builds every [build type][81], with every setting combo in + [`release_settings.txt`][93] with both calculators, `bc` alone, and `dc` + alone for 32-bit. +3. Does #1 and #2 for Debug, Release, Release with Debug Info, and Min Size + Release builds. +4. Runs the [test suite][124] on every build, if desired. +5. Runs the [test suite][124] under [ASan, UBSan, and MSan][21] for every build + type/setting combo. +6. Runs [`scripts/karatsuba.py`][78] in test mode. +7. Runs the [test suite][124] for both calculators, `bc` alone, and `dc` alone + under [valgrind][20] and errors if there are any memory bugs or memory + leaks. + +#### `safe-install.sh` + +A script copied from [musl][101] to atomically install files. + +#### `test_settings.sh` + +A quick and dirty script to help automate rebuilding while manually testing the +various default settings. + +This script uses [`test_settings.txt`][103] to generate the various settings +combos. + +For more information about settings, see [Settings][102] in the [build +manual][14]. + +#### `test_settings.txt` + +A list of the various settings combos to be used by [`test_settings.sh`][104]. + +### `src/` + +This folder is, obviously, where the actual heart and soul of `bc`, the source +code, is. + +All of the source files are in one folder; this simplifies the build system +immensely. + +There are separate files for `bc` and `dc` specific code ([`bc.c`][40], +[`bc_lex.c`][41], [`bc_parse.c`][42], [`dc.c`][44], [`dc_lex.c`][45], and +[`dc_parse.c`][46]) where possible because it is cleaner to exclude an entire +source file from a build than to have `#if`/`#endif` preprocessor guards. + +That said, it was easier in many cases to use preprocessor macros where both +calculators used much of the same code and data structures, so there is a +liberal sprinkling of them through the code. + +#### `args.c` + +Code for processing command-line arguments. + +The header for this file is [`include/args.h`][31]. + +#### `bc.c` + +The code for the `bc` main function `bc_main()`. + +The header for this file is [`include/bc.h`][106]. + +#### `bc_lex.c` + +The code for lexing that only `bc` needs. + +The headers for this file are [`include/lex.h`][180] and [`include/bc.h`][106]. + +#### `bc_parse.c` + +The code for parsing that only `bc` needs. This code is the most complex and +subtle in the entire codebase. + +The headers for this file are [`include/parse.h`][181] and +[`include/bc.h`][106]. + +#### `data.c` + +Due to [historical accident][23] because of a desire to get my `bc` into +[toybox][16], all of the constant data that `bc` needs is all in one file. This +is that file. + +There is no code in this file, but a lot of the const data has a heavy influence +on code, including the order of data in arrays because that order has to +correspond to the order of other things elsewhere in the codebase. If you change +the order of something in this file, run `make test`, and get errors, you +changed something that depends on the order that you messed up. + +Almost all headers have `extern` references to items in this file. + +#### `dc.c` + +The code for the `dc` main function `dc_main()`. + +The header for this file is [`include/dc.h`][182]. + +#### `dc_lex.c` + +The code for lexing that only `dc` needs. + +The headers for this file are [`include/lex.h`][180] and [`include/dc.h`][182]. + +#### `dc_parse.c` + +The code for parsing that only `dc` needs. + +The headers for this file are [`include/parse.h`][181] and +[`include/bc.h`][182]. + +#### `file.c` + +The code for `bc`'s implementation of buffered I/O. For more information about +why I implemented my own buffered I/O, see [`include/file.h`][55], [Error +Handling][97], and [Custom I/O][114], along with [`status.h`][176] and the notes +about version [`3.0.0`][32] in the [`NEWS`][32]. + +The header for this file is [`include/file.h`][55]. + +#### `history.c` + +The code for `bc`'s implementation of command-line editing/history, which is +based on a [UTF-8-aware fork][28] of [`linenoise`][29]. + +For more information, see the [Command-Line History][189] section. + +The header for this file is [`include/history.h`][36]. + +#### `lang.c` + +The data structures used for actual execution of `bc` and `dc` code. + +While execution is done in [`src/program.c`][53], this file defines functions +for initializing, copying, and freeing the data structures, which is somewhat +orthogonal to actual execution. + +Yes, it's misnamed; that's an accident of history where the first things I put +into it all seemed related to the `bc` language. + +The header for this file is [`include/lang.h`][38]. + +#### `lex.c` + +The code for the common things that both programs need for lexing. + +The header for this file is [`include/lex.h`][180]. + +#### `library.c` + +The code to implement the public API of the `bcl` library. + +The code in this file does a lot to ensure that clients do not have to worry +about internal `bc` details, especially error handling with `setjmp()` and +`longjmp()`. That and encapsulating the handling of numbers are the bulk of what +the code in this file actually does because most of the library is still +implemented in [`src/num.c`][39]. + +The headers for this file are [`include/bcl.h`][30] and +[`include/library.h`][183]. + +#### `main.c` + +The entry point for both programs; this is the `main()` function. + +This file has no headers associated with it. + +#### `num.c` + +The code for all of the arbitrary-precision [numbers][177] and [math][178] in +`bc`. + +The header for this file is [`include/num.h`][184]. + +#### `opt.c` + +The code for parsing command-line options. + +The header for this file is [`include/opt.h`][35]. + +#### `parse.c` + +The code for the common items that both programs need for parsing. + +The header for this file is [`include/parse.h`][181]. + +#### `program.c` + +The code for the actual execution engine for `bc` and `dc` code. + +The header for this file is [`include/program.h`][57]. + +#### `rand.c` + +The code for the [pseudo-random number generator (PRNG)][179] and the special +stack handling it needs. + +The PRNG only generates fixed-size integers. The magic of generating random +numbers of arbitrary size is actually given to the code that does math +([`src/num.c`][39]). + +The header for this file is [`include/rand.h`][37]. + +#### `read.c` + +The code for reading from files and `stdin`. + +The header for this file is [`include/read.h`][185]. + +#### `vector.c` + +The code for [vectors][111], [maps][186], and [slab vectors][187], along with +slabs. + +The header for this file is [`include/vector.h`][174]. + +#### `vm.c` + +The code for setting up and running `bc` and `dc`. + +It is so named because I think of it as the "virtual machine" of `bc`, though +that is probably not true as [`program.h`][57] is probably the "virtual machine" +code. Thus, the name is more historical accident. + +The header for this file is [`include/vm.h`][27]. + +### `tests/` + +This directory contains the entire [test suite][124] and its infrastructure. + +#### `all.sh` + +A convenience script for the `make run_all_tests` target (see the [Group +Tests][141] section for more information). + +#### `all.txt` + +The file with the names of the calculators. This is to make it easier for the +test scripts to know where the standard and other test directories are. + +#### `bcl.c` + +The test for the [`bcl`][156] API. For more information, see the [`bcl` +Test][157] section. + +#### `error.sh` + +The script to run the file-based error tests in `tests//errors/` for +each calculator. For more information, see the [Error Tests][151] section. + +This is a separate script so that each error file can be run separately and in +parallel. + +#### `errors.sh` + +The script to run the line-based error tests in `tests//errors.txt` +for each calculator. For more information, see the [Error Tests][151] section. + +#### `extra_required.txt` + +The file with the list of tests which both calculators have that need the [Extra +Math build option][188]. This exists to make it easy for test scripts to skip +those tests when the [Extra Math build option][188] is disabled. + +#### `history.py` + +The file with all of the history tests. For more information, see the [History +Tests][155] section. + +#### `history.sh` + +The script to integrate [`history.py`][139] into the build system in a portable +way, and to skip it if necessary. + +This script also re-runs the test three times if it fails. This is because +`pexpect` can be flaky at times. + +#### `other.sh` + +The script to run the "other" (miscellaneous) tests for each calculator. For +more information, see the [Other Tests][154] section. + +#### `read.sh` + +The script to run the read tests for each calculator. For more information, see +the [`read()` Tests][153] section. + +#### `script.sed` + +The `sed` script to edit the output of GNU `bc` when generating script tests. +For more information, see the [Script Tests][150] section. + +#### `script.sh` + +The script for running one script test. For more information, see the [Script +Tests][150] section. + +#### `scripts.sh` + +The script to help the `make run_all_tests` (see the [Group Tests][141] section) +run all of the script tests. + +#### `stdin.sh` + +The script to run the `stdin` tests for each calculator. For more information, +see the [`stdin` Tests][152] section. + +#### `test.sh` + +The script to run one standard test. For more information, see the [Standard +Tests][149] section. + +#### `bc/` + +The standard tests directory for `bc`. For more information, see the [Standard +Tests][149] section. + +##### `all.txt` + +The file to tell the build system and `make run_all_tests` (see the [Group +Tests][141] section) what standard tests to run for `bc`, as well as in what +order. + +This file just lists the test names, one per line. + +##### `errors.txt` + +The initial error test file for `bc`. This file has one test per line. See the +[Error Tests][151] section for more information. + +##### `posix_errors.txt` + +The file of tests for POSIX compatibility for `bc`. This file has one test per +line. For more information, see the [Error Tests][151] section. + +##### `timeconst.sh` + +The script to run the `bc` tests that use the [Linux `timeconst.bc` script][6]. +For more information, see the [Linux `timeconst.bc` Script][191]section. + +##### `errors/` + +The directory with error tests for `bc`, most discovered by AFL++ (see the +[Fuzzing][82] section). There is one test per file. For more information, see +the [Error Tests][151] section. + +##### `scripts/` + +The script tests directory for `bc`. For more information, see the [Script +Tests][150] section. + +###### `all.txt` + +A file to tell the build system and `make run_all_tests` (see the [Group +Tests][141] section) what script tests to run for `bc`, as well as in what +order. + +This file just lists the test names, one per line. + +#### `dc/` + +The standard tests directory for `dc`. For more information, see the [Standard +Tests][149] section. + +##### `all.txt` + +The file to tell the build system and `make run_all_tests` (see the [Group +Tests][141] section) what standard tests to run for `dc`, as well as in what +order. + +This file just lists the test names, one per line. + +##### `errors.txt` + +The initial error test file for `dc`. This file has one test per line. See the +[Error Tests][151] section for more information. + +##### `read_errors.txt` + +The file of tests errors with the `?` command (`read()` in `bc`). This file has +one test per line. See the [Error Tests][151] section for more information. + +##### `errors/` + +The directory with error tests for `dc`, most discovered by AFL++ (see the +[Fuzzing][82] section). There is one test per file. For more information, see +the [Error Tests][151] section. + +##### `scripts/` + +The script tests directory for `dc`. For more information, see the [Script +Tests][150] section. + +###### `all.txt` + +The file to tell the build system and `make run_all_tests` (see the [Group +Tests][141] section) what script tests to run for `dc`, as well as in what +order. + +This file just lists the test names, one per line. + +#### `fuzzing/` + +The directory containing the fuzzing infrastructure. For more information, see +the [Fuzzing][82] section. + +##### `bc_afl_continue.yaml` + +The [`tmuxp`][123] config (for use with [`tmux`][122]) for easily restarting a +fuzz run. For more information, see the [Convenience][130] subsection of the +[Fuzzing][82] section. + +##### `bc_afl.yaml` + +The [`tmuxp`][123] config (for use with [`tmux`][122]) for easily starting a +fuzz run. For more information, see the [Convenience][130] subsection of the +[Fuzzing][82] section. + +Be aware that this will delete all previous unsaved fuzzing tests in the output +directories. + +##### `bc_inputs1/` + +The fuzzing input directory for the first third of inputs for `bc`. For more +information, see the [Corpuses][192] subsection of the [Fuzzing][82] section. + +##### `bc_inputs2/` + +The fuzzing input directory for the second third of inputs for `bc`. For more +information, see the [Corpuses][192] subsection of the [Fuzzing][82] section. + +##### `bc_inputs3/` + +The fuzzing input directory for the third third of inputs for `bc`. For more +information, see the [Corpuses][192] subsection of the [Fuzzing][82] section. + +##### `dc_inputs/` + +The fuzzing input directory for the inputs for `dc`. For more information, see +the [Corpuses][192] subsection of the [Fuzzing][82] section. + +### `vs/` + +The directory containing all of the materials needed to build `bc`, `dc`, and +`bcl` on Windows. + +#### `bcl.sln` + +A Visual Studio solution file for [`bcl`][156]. This, along with +[`bcl.vcxproj`][63] and [`bcl.vcxproj.filters`][64] is what makes it possible to +build [`bcl`][156] on Windows. + +#### `bcl.vcxproj` + +A Visual Studio project file for [`bcl`][156]. This, along with [`bcl.sln`][65] +and [`bcl.vcxproj.filters`][64] is what makes it possible to build [`bcl`][156] +on Windows. + +#### `bcl.vcxproj.filters` + +A Visual Studio filters file for [`bcl`][156]. This, along with [`bcl.sln`][65] +and [`bcl.vcxproj`][63] is what makes it possible to build [`bcl`][156] on +Windows. + +#### `bc.sln` + +A Visual Studio solution file for `bc`. This, along with [`bc.vcxproj`][66] +and [`bc.vcxproj.filters`][67] is what makes it possible to build `bc` on +Windows. + +#### `bc.vcxproj` + +A Visual Studio project file for `bc`. This, along with [`bc.sln`][68] and +[`bc.vcxproj.filters`][67] is what makes it possible to build `bc` on Windows. + +#### `bc.vcxproj.filters` + +A Visual Studio filters file for `bc`. This, along with [`bc.sln`][68] and +[`bc.vcxproj`][66] is what makes it possible to build `bc` on Windows. + +#### `tests/` + +A directory of files to run tests on Windows. + +##### `tests_bc.bat` + +A file to run basic `bc` tests on Windows. It expects that it will be run from +the directory containing it, and it also expects a `bc.exe` in the same +directory. + +##### `tests_dc.bat` + +A file to run basic `dc` tests on Windows. It expects that it will be run from +the directory containing it, and it also expects a `bc.exe` in the same +directory. + +## Build System + +The build system is described in detail in the [build manual][14], so +maintainers should start there. This section, however, describes some parts of +the build system that only maintainers will care about. + +### Clean Targets + +`bc` has a default `make clean` target that cleans up the build files. However, +because `bc`'s build system can generate many different types of files, there +are other clean targets that may be useful: + +* `make clean_gen` cleans the `gen/strgen` executable generated from + [`gen/strgen.c`][15]. It has no prerequisites. +* `make clean` cleans object files, `*.cat` files (see the [Locales][85] + section), executables, and files generated from text files in [`gen/`][145], + including `gen/strgen` if it was built. So this has a prerequisite on + `make clean_gen` in normal use. +* `make clean_benchmarks` cleans [benchmarks][144], including the `ministat` + executable. It has no prerequisites. +* `make clean_config` cleans the generated `Makefile` and the manuals that + [`configure.sh`][69] copied in preparation for install. It also depends on + `make clean` and `make clean_benchmarks`, so it cleans those items too. This + is the target that [`configure.sh`][69] uses before it does its work. +* `make clean_coverage` cleans the generated coverage files for the [test + suite][124]'s [code coverage][146] capabilities. It has no prerequisites. This + is useful if the code coverage tools are giving errors. +* `make clean_tests` cleans *everything*. It has prerequisites on all previous + clean targets, but it also cleans all of the [generated tests][143]. + +When adding more generated files, you may need to add them to one of these +targets and/or add a target for them especially. + +### Preprocessor Macros + +`bc` and `dc` use *a lot* of preprocessor macros to ensure that each build type: + +* builds, +* works under the [test suite][124], and +* excludes as much code as possible from all builds. + +This section will explain the preprocessor style of `bc` and `dc`, as well as +provide an explanation of the macros used. + +#### Style + +The style of macro use in `bc` is pretty straightforward: I avoid depending on +macro definitions and instead, I set defaults if the macro is not defined and +then test the value if the macro with a plain `#if`. + +(Some examples of setting defaults are in [`include/status.h`][176], just above +the definition of the `BcStatus` enum.) + +In other words, I use `#if` instead of `#ifndef` or `#ifdef`, where possible. + +There are a couple of cases where I went with standard stuff instead. For +example, to test whether I am in debug mode or not, I still use the standard +`#ifndef NDEBUG`. + +#### Standard Macros + +`BC_ENABLED` + +: This macro expands to `1` if `bc` is enabled, `0` if disabled. + +`DC_ENABLED` + +: This macro expands to `1` if `dc` is enabled, `0` if disabled. + +`BUILD_TYPE` + +: The macro expands to the build type, which is one of: `A`, `E`, `H`, `N`, + `EH`, `EN`, `HN`, `EHN`. This build type is used in the help text to direct + the user to the correct markdown manual in the `git.yzena.com` website. + +`EXECPREFIX` + +: This macro expands to the prefix on the executable name. This is used to + allow `bc` and `dc` to skip the prefix when finding out which calculator is + executing. + +`BC_NUM_KARATSUBA_LEN` + +: This macro expands to an integer, which is the length of numbers below which + the Karatsuba multiplication algorithm switches to brute-force + multiplication. + +`BC_ENABLE_EXTRA_MATH` + +: This macro expands to `1` if the [Extra Math build option][188] is enabled, + `0` if disabled. + +`BC_ENABLE_HISTORY` + +: This macro expands to `1` if the [History build option][193] is enabled, `0` + if disabled. + +`BC_ENABLE_NLS` + +: This macro expands to `1` if the [NLS build option][193] (for locales) is + enabled, `0` if disabled. + +`BC_ENABLE_LIBRARY` + +: This macro expands to `1` if the [`bcl` library][156] is enabled, `0` if + disabled. If this is enabled, building the calculators themselves is + disabled, but both `BC_ENABLED` and `DC_ENABLED` must be non-zero. + +`BC_ENABLE_MEMCHECK` + +: This macro expands to `1` if `bc` has been built for use with Valgrind's + [Memcheck][194], `0` otherwise. This ensures that fatal errors still free + all of their memory when exiting. `bc` does not do that normally because + what's the point? + +`BC_ENABLE_AFL` + +: This macro expands to `1` if `bc` has been built for fuzzing with + [AFL++][125], `0` otherwise. See the [Fuzzing][82] section for more + information. + +`BC_DEFAULT_BANNER` + +: This macro expands to the default value for displaying the `bc` banner. + +`BC_DEFAULT_SIGINT_RESET` + +: The macro expands to the default value for whether or not `bc` should reset + on `SIGINT` or quit. + +`BC_DEFAULT_TTY_MODE` + +: The macro expands to the default value for whether or not `bc` should use + TTY mode when it available. + +`BC_DEFAULT_PROMPT` + +: This macro expands to the default value for whether or not `bc` should use a + prompt when TTY mode is available. + +`DC_DEFAULT_SIGINT_RESET` + +: The macro expands to the default value for whether or not `dc` should reset + on `SIGINT` or quit. + +`DC_DEFAULT_TTY_MODE` + +: The macro expands to the default value for whether or not `dc` should use + TTY mode when it available. + +`DC_DEFAULT_PROMPT` + +: This macro expands to the default value for whether or not `dc` should use a + prompt when TTY mode is available. + +`BC_DEBUG_CODE` + +: If this macro expands to a non-zero integer, then `bc` is built with *a lot* + of extra debugging code. This is never set by the build system and must be + set by the programmer manually. This should never be set in builds given to + end users. For more information, see the [Debugging][134] section. + +## Test Suite + +While the source code may be the heart and soul of `bc`, the test suite is the +arms and legs: it gives `bc` the power to do anything it needs to do. + +The test suite is what allowed `bc` to climb to such high heights of quality. +This even goes for fuzzing because fuzzing depends on the test suite for its +input corpuses. (See the [Fuzzing][82] section.) + +Understanding how the test suite works should be, I think, the first thing that +maintainers learn after learning what `bc` and `dc` should do. This is because +the test suite, properly used, gives confidence that changes have not caused +bugs or regressions. + +That is why I spent the time to make the test suite as easy to use and as fast +as possible. + +To use the test suite (assuming `bc` and/or `dc` are already built), run the +following command: + +``` +make test +``` + +That's it. That's all. + +It will return an error code if the test suite failed. It will also print out +information about the failure. + +If you want the test suite to go fast, then run the following command: + +``` +make -j test +``` + +Where `` is the number of cores that your computer has. Of course, this +requires a `make` implementation that supports that option, but most do. (And I +will use this convention throughout the rest of this section.) + +I have even tried as much as possible, to put longer-running tests near the +beginning of the run so that the entire suite runs as fast as possible. + +However, if you want to be sure which test is failing, then running a bare +`make test` is a great way to do that. + +But enough about how you have no excuses to use the test suite as much as +possible; let's talk about how it works and what you *can* do with it. + +### Standard Tests + +The heavy lifting of testing the math in `bc`, as well as basic scripting, is +done by the "standard tests" for each calculator. + +These tests use the files in the [`tests/bc/`][161] and [`tests/dc/`][162] +directories (except for [`tests/bc/all.txt`][163], [`tests/bc/errors.txt`][164], +[`tests/bc/posix_errors.txt`][165], [`tests/bc/timeconst.sh`][166], +[`tests/dc/all.txt`][167], [`tests/dc/errors.txt`][168], and +[`tests/dc/read_errors.txt`][175]), which are called the "standard test +directories." + +For every test, there is the test file and the results file. The test files have +names of the form `.txt`, where `` is the name of the test, and the +results files have names of the form `_results.txt`. + +If the test file exists but the results file does not, the results for that test +are generated by a GNU-compatible `bc` or `dc`. See the [Generated Tests][143] +section. + +The `all.txt` file in each standard tests directory is what tells the test suite +and [build system][142] what tests there are, and the tests are either run in +that order, or in the case of parallel `make`, that is the order that the +targets are listed as prerequisites of `make test`. + +If the test exists in the `all.txt` file but does not *actually* exist, the test +and its results are generated by a GNU-compatible `bc` or `dc`. See the +[Generated Tests][143] section. + +To add a non-generated standard test, do the following: + +* Add the test file (`.txt` in the standard tests directory). +* Add the results file (`_results.txt` in the standard tests directory). + You can skip this step if just the results file needs to be generated. See the + [Generated Tests][147] section for more information. +* Add the name of the test to the `all.txt` file in the standard tests + directory, putting it in the order it should be in. If possible, I would put + longer tests near the beginning because they will start running earlier with + parallel `make`. I always keep `decimal` first, though, as a smoke test. + +If you need to add a generated standard test, see the [Generated Tests][147] +section for how to do that. + +Some standard tests need to be skipped in certain cases. That is handled by the +[build system][142]. See the [Integration with the Build System][147] section +for more details. + +In addition to all of the above, the standard test directory is not only the +directory for the standard tests of each calculator, it is also the parent +directory of all other test directories for each calculator. + +#### `bc` Standard Tests + +The list of current (17 July 2021) standard tests for `bc` is below: + +decimal + +: Tests decimal parsing and printing. + +print + +: Tests printing in every base from decimal. This is near the top for + performance of parallel testing. + +parse + +: Tests parsing in any base and outputting in decimal. This is near the top + for performance of parallel testing. + +lib2 + +: Tests the extended math library. This is near the top for performance of + parallel testing. + +print2 + +: Tests printing at the extreme values of `obase`. + +length + +: Tests the `length()` builtin function. + +scale + +: Tests the `scale()` builtin function. + +shift + +: Tests the left (`<<`) and right (`>>`) shift operators. + +add + +: Tests addition. + +subtract + +: Tests subtraction. + +multiply + +: Tests multiplication. + +divide + +: Tests division. + +modulus + +: Tests modulus. + +power + +: Tests power (exponentiation). + +sqrt + +: Tests the `sqrt()` (square root) builtin function. + +trunc + +: Tests the truncation (`$`) operator. + +places + +: Tests the places (`@`) operator. + +vars + +: Tests some usage of variables. This one came from [AFL++][125] I think. + +boolean + +: Tests boolean operators. + +comp + +: Tests comparison operators. + +abs + +: Tests the `abs()` builtin function. + +assignments + +: Tests assignment operators, including increment/decrement operators. + +functions + +: Tests functions, specifically function parameters being replaced before they + themselves are used. See the comment in `bc_program_call()` about the last + condition. + +scientific + +: Tests scientific notation. + +engineering + +: Tests engineering notation. + +globals + +: Tests that assigning to globals affects callers. + +strings + +: Tests strings. + +strings2 + +: Tests string allocation in slabs, to ensure slabs work. + +letters + +: Tests single and double letter numbers to ensure they behave differently. + Single-letter numbers always be set to the same value, regardless of + `ibase`. + +exponent + +: Tests the `e()` function in the math library. + +log + +: Tests the `l()` function in the math library. + +pi + +: Tests that `bc` produces the right value of pi for numbers with varying + `scale` values. + +arctangent + +: Tests the `a()` function in the math library. + +sine + +: Tests the `s()` function in the math library. + +cosine + +: Tests the `c()` function in the math library. + +bessel + +: Tests the `j()` function in the math library. + +arrays + +: Test arrays. + +misc + +: Miscellaneous tests. I named it this because at the time, I struggled to + classify them, but it's really testing multi-line numbers. + +misc1 + +: A miscellaneous test found by [AFL++][125]. + +misc2 + +: A miscellaneous test found by [AFL++][125]. + +misc3 + +: A miscellaneous test found by [AFL++][125]. + +misc4 + +: A miscellaneous test found by [AFL++][125]. + +misc5 + +: A miscellaneous test found by [AFL++][125]. + +misc6 + +: A miscellaneous test found by [AFL++][125]. + +misc7 + +: A miscellaneous test found by [AFL++][125]. + +void + +: Tests void functions. + +rand + +: Tests the pseudo-random number generator and its special stack handling. + +recursive_arrays + +: Tested the slab vector undo ability in used in `bc_parse_name()` when it + existed. Now used as a stress test. + +divmod + +: Tests divmod. + +modexp + +: Tests modular exponentiation. + +bitfuncs + +: Tests the bitwise functions, `band()`, `bor()`, `bxor()`, `blshift()` and + `brshift()` in [`gen/lib2.bc`][26]. + +leadingzero + +: Tests the leading zero functionality and the `plz*()` and `pnlz*()` + functions in [`gen/lib2.bc`][26]. + +#### `dc` Standard Tests + +The list of current (17 July 2021) standard tests for `dc` is below: + +decimal + +: Tests decimal parsing and printing. + +length + +: Tests the `length()` builtin function, including for strings and arrays. + +stack_len + +: Tests taking the length of the results stack. + +stack_len + +: Tests taking the length of the execution stack. + +add + +: Tests addition. + +subtract + +: Tests subtraction. + +multiply + +: Tests multiplication. + +divide + +: Tests division. + +modulus + +: Tests modulus. + +divmod + +: Tests divmod. + +power + +: Tests power (exponentiation). + +sqrt + +: Tests the `sqrt()` (square root) builtin function. + +modexp + +: Tests modular exponentiation. + +boolean + +: Tests boolean operators. + +negate + +: Tests negation as a command and as part of numbers. + +trunc + +: Tests the truncation (`$`) operator. + +places + +: Tests the places (`@`) operator. + +shift + +: Tests the left (`<<`) and right (`>>`) shift operators. + +abs + +: Tests the `abs()` builtin function. + +scientific + +: Tests scientific notation. + +engineering + +: Tests engineering notation. + +vars + +: Tests some usage of variables. This one came from [AFL++][125] I think. + +misc + +: Miscellaneous tests. I named it this because at the time, I struggled to + classify them. + +strings + +: Tests strings. + +rand + +: Tests the pseudo-random number generator and its special stack handling. + +exec_stack + +: Tests the execution stack depth command. + +### Script Tests + +The heavy lifting of testing the scripting of `bc` is done by the "script tests" +for each calculator. + +These tests use the files in the [`tests/bc/scripts/`][169] and +[`tests/dc/scripts/`][170] directories (except for +[`tests/bc/scripts/all.txt`][171] and [`tests/dc/scripts/all.txt`][172]), which +are called the "script test directories." + +To add a script test, do the following: + +* Add the test file (`.bc` or `.dc` in the script tests directory). +* Add the results file (`.txt` in the script tests directory). You can + skip this step if just the results file needs to be generated. See the + [Generated Tests][147] section for more information. +* Add the name of the test to the `all.txt` file in the script tests directory, + putting it in the order it should be in. If possible, I would put longer tests + near the beginning because they will start running earlier with parallel + `make`. + +Some script tests need to be skipped in certain cases. That is handled by the +[build system][142]. See the [Integration with the Build System][147] section +for more details. + +Another unique thing about the script tests, at least for `bc`: they test the +`-g` and `--global-stacks` flags. This means that all of the script tests for +`bc` are written assuming the `-g` flag was given on the command-line + +There is one extra piece of script tests: [`tests/script.sed`][190]. This `sed` +script is used to remove an incompatibility with GNU `bc`. + +If there is only one more character to print at the end of `BC_LINE_LENGTH`, GNU +`bc` still prints a backslash+newline+digit combo. OpenBSD doesn't, which is +correct according to my reading of the `bc` spec, so my `bc` doesn't as well. + +The `sed` script edits numbers that end with just one digit on a line by itself +to put it on the same line as others. + +#### `bc` Script Tests + +The list of current (17 July 2021) script tests for `bc` is below: + +print.bc + +: Tests printing even harder than the print standard test. + +multiply.bc + +: Tests multiplication even harder than the multiply standard test. + +divide.bc + +: Tests division even harder than the divide standard test. + +subtract.bc + +: Tests subtraction even harder than the subtract standard test. + +add.bc + +: Tests addition even harder than the add standard test. + +parse.bc + +: Tests parsing even harder than the parse standard test. + +array.bc + +: Tests arrays even harder than the arrays standard test. + +atan.bc + +: Tests arctangent even harder than the arctangent standard test. + +bessel.bc + +: Tests bessel even harder than the bessel standard test. + +functions.bc + +: Tests functions even harder than the functions standard test. + +globals.bc + +: Tests global stacks directly. + +len.bc + +: Tests the `length()` builtin on arrays. + +rand.bc + +: Tests the random number generator in the presence of global stacks. + +references.bc + +: Tests functions with array reference parameters. + +screen.bc + +: A random script provided by an early user that he used to calculate the size + of computer screens + +strings2.bc + +: Tests escaping in strings. + +ifs.bc + +: Tests proper ending of `if` statements without `else` statements. + +ifs2.bc + +: More tests proper ending of `if` statements without `else` statements. + +#### `dc` Script Tests + +The list of current (17 July 2021) script tests for `dc` is below: + +prime.dc + +: Tests scripting by generating the first 100,000 primes. + +asciify.dc + +: Tests the asciify command. + +stream.dc + +: Tests the stream command. + +array.dc + +: Tests arrays. + +else.dc + +: Tests else clauses on conditional execution commands. + +factorial.dc + +: Tests scripting with factorial. + +loop.dc + +: Tests scripting by implementing loops. + +quit.dc + +: Tests the quit command in the presence of tail calls. + +weird.dc + +: A miscellaneous test. + +### Error Tests + +One of the most useful parts of the `bc` test suite, in my opinion, is the heavy +testing of error conditions. + +Just about every error condition I can think of is tested, along with many +machine-generated (by [AFL++][125]) ones. + +However, because the error tests will often return error codes, they require +different infrastructure from the rest of the test suite, which assumes that +the calculator under test will return successfully. A lot of that infrastructure +is in the [`scripts/functions.sh`][105] script, but it basically allows the +calculator to exit with an error code and then tests that there *was* an error +code. + +Besides returning error codes, error tests also ensure that there is output from +`stderr`. This is to make sure that an error message is always printed. + +The error tests for each calculator are spread through two directories, due to +historical accident. These two directories are the standard test directory (see +the [Standard Tests][149] section) and the `errors/` directory directly +underneath the standard tests directory. + +This split is convenient, however, because the tests in each directory are +treated differently. + +The error tests in the standard test directory, which include `errors.txt` for +both calculators, `posix_errors.txt` for `bc`, and `read_errors.txt` for `dc`, +are run by [`tests/errors.sh`][226]. It reads them line-by-line and shoves the +data through `stdin`. Each line is considered a separate test. For this reason, +there can't be any blank lines in the error files in the standard tests +directory because a blank line causes a successful exit. + +On the other hand, the tests in the `errors/` directory below the standard tests +directory are run by [`tests/error.sh`][227] and are considered to be one test +per file. As such, they are used differently. They are shoved into the +calculator through `stdin`, but they are also executed by passing them on the +command-line. + +To add an error test, first figure out which kind you want. + +Is it a simple one-liner, and you don't care if it's tested through a file? + +Then put it in one of the error files in the standard test directory. I would +only put POSIX errors in the `posix_errors.txt` file for `bc`, and only `read()` +errors in the `read_errors.txt` file for `dc`; all others I would put in the +respective `errors.txt` file. + +On the other hand, if you care if the error is run as a file on the +command-line, or the error requires multiple lines to reproduce, then put the +test in the respective `errors/` directory and run the [`configure.sh`][69] +script again. + +After that, you are done; the test suite will automatically pick up the new +test, and you don't have to tell the test suite the expected results. + +### `stdin` Tests + +The `stdin` tests specifically test the lexing and parsing of multi-line +comments and strings. This is important because when reading from `stdin`, the +calculators can only read one line at a time, so partial parses are possible. + +To add `stdin` tests, just add the tests to the `stdin.txt` file in the +respective standard tests directory, and add the expected results in the +`stdin_results.txt` in the respective standard tests directory. + +### `read()` Tests + +The `read()` tests are meant to test the `read()` builtin function, to ensure +that the parsing and execution is correct. + +Each line is one test, as that is the nature of using the `read()` function, so +to add a test, just add it as another line in the `read.txt` file in the +respective standard tests directory, and add its result to the +`read_results.txt` file in the respective standard tests directory. + +### Other Tests + +The "other" tests are just random tests that I could not easily classify under +other types of tests. They usually include things like command-line parsing and +environment variable testing. + +To add an other test, it requires adding the programming for it to +[`tests/other.sh`][195] because all of the tests are written specifically in +that script. It would be best to use the infrastructure in +[`scripts/functions.sh`][105]. + +### Linux `timeconst.bc` Script + +One special script that `bc`'s test suite will use is the [Linux `timeconst.bc` +script][6]. + +I made the test suite able to use this script because the reason the +[toybox][16] maintainer wanted my `bc` is because of this script, and I wanted +to be sure that it would run correctly on the script. + +However, it is not part of the distribution, nor is it part of the repository. +The reason for this is because [`timeconst.bc`][6] is under the GPL, while this +repo is under a BSD license. + +If you want `bc` to run tests on [`timeconst.bc`][6], download it and place it +at `tests/bc/scripts/timeconst.bc`. If it is there, the test suite will +automatically run its tests; otherwise, it will skip it. + +### History Tests + +There are automatic tests for history; however, they have dependencies: Python 3 +and [`pexpect`][137]. + +As a result, because I need the [test suite to be portable][138], like the rest +of `bc`, the history tests are carefully guarded with things to ensure that they +are skipped, rather than failing if Python and [`pexpect`][137] are not +installed. For this reason, there is a `sh` script, [`tests/history.sh`][140] +that runs the actual script, [`tests/history.py`][139]. + +I have added as many tests as I could to cover as many lines and branches as +possible. I guess I could have done more, but doing so would have required a lot +of time. + +I have tried to make it as easy as possible to run the history tests. They will +run automatically if you use the `make test_history` command, and they will also +use parallel execution with `make -j test_history`. + +However, the history tests are meant only to be run by maintainers of `bc`; they +are *not* meant to be run by users and packagers. The reason for this is that +they only seem to work reliably on Linux; `pexpect` seems to have issues on +other platforms, especially timeout issues. + +Thus, they are excluded from running with `make test` and [`tests/all.sh`][225]. +However, they can be run from the [`scripts/release.sh`][83] script. + +All of the tests are contained in [`tests/history.py`][139]. The reason for this +is because they are in Python, and I don't have an easy way of including Python +(or at the very least, I am not familiar enough with Python to do that). So they +are all in the same file to make it easier on me. + +Each test is one function in the script. They all take the same number and type +of arguments: + +1. `exe`: the executable to run. +2. `args`: the arguments to pass to the executable. +3. `env`: the environment. + +Each function creates a child process with `pexpect.spawn` and then tests with +that child. Then the function returns the child to the caller, who closes it +and checks its error code against its expected error code. + +Yes, the error code is not a success all the time. This is because of the UTF-8 +tests; `bc` gives a fatal error on any non-ASCII data because ASCII is all `bc` +is required to handle, per the [standard][2]. + +So in [`tests/history.py`][139], there are four main arrays: + +* `bc` test functions, +* `bc` expected error codes. +* `dc` test functions. +* `dc` expected error codes. + +[`tests/history.py`][139] takes an index as an argument; that index is what test +it should run. That index is used to index into the proper test and error code +array. + +If you need to add more history tests, you need to do the following: + +1. Add the function for that test to [`tests/history.py`][139]. +2. Add the function to the proper array of tests. +3. Add the expected error code to the proper array of error codes. +4. Add a target for the test to [`Makefile.in`][70]. +5. Add that target as a prerequisite to either `test_bc_history` or + `test_dc_history`. + +You do not need to do anything to add the test to `history_all_tests` (see +[Group Tests][141] below) because the scripts will automatically run all of the +tests properly. + +### Generated Tests + +Some tests are *large*, and as such, it is impractical to check them into `git`. +Instead, the tests depend on the existence of a GNU-compatible `bc` in the +`PATH`, which is then used to generate the tests. + +If [`configure.sh`][69] was run with the `-G` argument, which disables generated +tests, then `make test` and friends will automatically skip generated tests. +This is useful to do on platforms that are not guaranteed to have a +GNU-compatible `bc` installed. + +However, adding a generated test is a complicated because you have to figure out +*where* you want to put the file to generate the test. + +For example, `bc`'s test suite will automatically use a GNU-compatible `bc` to +generate a `_results.txt` file in the [standard tests][149] directory +(either `tests/bc/` or `tests/dc/`) if none exists for the `` test. If no +`.txt` file exists in the [standard tests][149] directory, then `bc`'s +test suite will look for a `.bc` or `.dc` file in the [script +tests][150] directory (either `tests/bc/scripts` or `tests/dc/scripts`), and if +that exists, it will use that script to generate the `.txt` file in the +[standard tests][149] directory after which it will generate the +`_results.txt` file in the [standard tests][149] directory. + +So you can choose to either: + +* Have a test in the [standard tests][149] directory without a corresponding + `*_results.txt` file, or +* Have a script in the [script tests][150] directory to generate the + corresponding file in the standard test directory before generating the + corresponding `*_results.txt` file. + +Adding a script has a double benefit: the script itself can be used as a test. +However, script test results can also be generated. + +If `bc` is asked to run a script test, then if the script does not exist, `bc`'s +test suite returns an error. If it *does* exist, but no corresponding +`.txt` file exists in the [script tests][150] directory, then a +GNU-compatible `bc` is used to generate the `.txt` results file. + +If generated tests are disabled through [`configure.sh`][69], then these tests +are not generated if they do not exist. However, if they *do* exist, then they +are run. This can happen if a `make clean_tests` was not run between a build +that generated tests and a build that will not. + +### Group Tests + +While the test suite has a lot of targets in order to get parallel execution, +there are five targets that allow you to run each section, or all, of the test +suite as one unit: + +* `bc_all_tests` (`bc` tests) +* `timeconst_all_tests` ([Linux `timeconst.bc` script][6] tests) +* `dc_all_tests` (`dc` tests) +* `history_all_tests` (history tests) +* `run_all_tests` (combination of the previous four) + +In addition, there are more fine-grained targets available: + +* `test_bc` runs all `bc` tests (except history tests). +* `test_dc` runs all `dc` tests (except history tests). +* `test_bc_tests` runs all `bc` [standard tests][149]. +* `test_dc_tests` runs all `dc` [standard tests][149]. +* `test_bc_scripts` runs all `bc` [script tests][150]. +* `test_dc_scripts` runs all `dc` [script tests][150]. +* `test_bc_stdin` runs the `bc` [`stdin` tests][152]. +* `test_dc_stdin` runs the `dc` [`stdin` tests][152]. +* `test_bc_read` runs the `bc` [`read()` tests][153]. +* `test_dc_read` runs the `dc` [`read()` tests][153]. +* `test_bc_errors` runs the `bc` [error tests][151]. +* `test_dc_errors` runs the `dc` [error tests][151]. +* `test_bc_other` runs the `bc` [other tests][151]. +* `test_dc_other` runs the `dc` [other tests][151]. +* `timeconst` runs the tests for the [Linux `timeconst.bc` script][6]. +* `test_history` runs all history tests. +* `test_bc_history` runs all `bc` history tests. +* `test_dc_history` runs all `dc` history tests. + +All of the above tests are parallelizable. + +### Individual Tests + +In addition to all of the above, individual test targets are available. These +are mostly useful for attempting to fix a singular test failure. + +These tests are: + +* `test_bc_`, where `` is the name of a `bc` [standard test][149]. + The name is the name of the test file without the `.txt` extension. It is the + name printed by the test suite when running the test. +* `test_dc_`, where `` is the name of a `dc` [standard test][149]. + The name is the name of the test file without the `.txt` extension. It is the + name printed by the test suite when running the test. +* `test_bc_script_`, where `` is the name of a `bc` [script + test][150]. The name of the test is the name of the script without the `.bc` + extension. +* `test_dc_script_`, where `` is the name of a `dc` [script + test][150]. The name of the test is the name of the script without the `.dc` + extension. +* `test_bc_history` runs the `bc` history test with index ``. +* `test_dc_history` runs the `dc` history test with index ``. + +### [`bcl`][156] Test + +When [`bcl`][156] is built, the [build system][142] automatically ensures that +`make test` runs the [`bcl`][156] test instead of the `bc` and `dc` tests. + +There is only one test, and it is built from [`tests/bcl.c`][158]. + +The reason the test is in C is because [`bcl`][156] is a C library; I did not +want to have to write C code *and* POSIX `sh` scripts to run it. + +The reason there is only one test is because most of the code for the library is +tested by virtue of testing `bc` and `dc`; the test needs to only ensure that +the library bindings and plumbing do not interfere with the underlying code. + +However, just because there is only one test does not mean that it doesn't test +more than one thing. The code actually handles a series of tests, along with +error checking to ensure that nothing went wrong. + +To add a [`bcl`][156] test, just figure out what test you want, figure out where +in the [`tests/bcl.c`][158] would be best to put it, and put it there. Do as +much error checking as possible, and use the `err(BclError)` function. Ensure +that all memory is freed because that test is run through [Valgrind][159] and +[AddressSanitizer][160]. + +### Integration with the Build System + +If it was not obvious by now, the test suite is heavily integrated into the +[build system][142], but the integration goes further than just making the test +suite easy to run from `make` and generating individual and group tests. + +The big problem the test suite has is that some `bc` code, stuff that is +important to test, is only in *some* builds. This includes all of the extra math +extensions, for example. + +So the test suite needs to have some way of turning off the tests that depend on +certain [build types][81] when those [build types][81] are not used. + +This is the reason the is tightly integrated with the [build system][142]: the +[build system][142] knows what [build type][81] was used and can tell the test +suite to turn off the tests that do not apply. + +It does this with arguments to the test scripts that are either a `1` or a `0`, +depending on whether tests of that type should be enabled or not. These +arguments are why I suggest, in the [Test Scripts][148] section, to always use a +`make` target to run the test suite or any individual test. I have added a lot +of targets to make this easy and as fast as possible. + +In addition to all of that, the build system is responsible for selecting the +`bc`/`dc` tests or the [`bcl` test][157]. + +### Output Directories + +During any run of the test suite, the test suite outputs the results of running +various tests to files. These files are usually output to `tests/bc_outputs/` +and `tests/dc_outputs/`. + +However, in some cases, it may be necessary to output test results to a +different directory. If that is the case, set the environment variable +`BC_TEST_OUTPUT_DIR` to the name of the directory. + +If that is done, then test results will be written to +`$BC_TEST_OUTPUT_DIR/bc_outputs/` and `$BC_TEST_OUTPUT_DIR/dc_outputs/`. + +### Test Suite Portability + +The test suite is meant to be run by users and packagers as part of their +install process. + +This puts some constraints on the test suite, but the biggest is that the test +suite must be as [portable as `bc` itself][136]. + +This means that the test suite must be implemented in pure POSIX `make`, `sh`, +and C99. + +#### Test Scripts + +To accomplish the portability, the test suite is run by a bunch of `sh` scripts +that have the constraints laid out in [POSIX Shell Scripts][76]. + +However, that means they have some quirks, made worse by the fact that there are +[generated tests][143] and [tests that need to be skipped, but only +sometimes][147]. + +This means that a lot of the scripts take an awkward number and type of +arguments. Some arguments are strings, but most are integers, like +[`scripts/release.sh`][83]. + +It is for this reason that I do not suggest running the test scripts directly. +Instead, always use an appropriate `make` target, which already knows the +correct arguments for the test because of the [integration with the build +system][147]. + +### Test Coverage + +In order to get test coverage information, you need `gcc`, `gcov`, and `gcovr`. + +If you have them, run the following commands: + +``` +CC=gcc ./configure -gO3 -c +make -j +make coverage +``` + +Note that `make coverage` does not have a `-j` part; it cannot be run in +parallel. If you try, you will get errors. And note that `CC=gcc` is used. + +After running those commands, you can open your web browser and open the +`index.html` file in the root directory of the repo. From there, you can explore +all of the coverage results. + +If you see lines or branches that you think you could hit with a manual +execution, do such manual execution, and then run the following command: + +``` +make coverage_output +``` + +and the coverage output will be updated. + +If you want to rerun `make coverage`, you must do a `make clean` and build +first, like this: + +``` +make clean +make -j +make coverage +``` + +Otherwise, you will get errors. + +If you want to run tests in parallel, you can do this: + +``` +make -j +make -j test +make coverage_output +``` + +and that will generate coverage output correctly. + +### [AddressSanitizer][21] and Friends + +To run the test suite under [AddressSanitizer][21] or any of its friends, use +the following commands: + +``` +CFLAGS="-fsanitize= ./configure -gO3 -m +make -j +make -j test +``` + +where `` is the correct name of the desired sanitizer. There is one +exception to the above: `UndefinedBehaviorSanitizer` should be run on a build +that has zero optimization, so for `UBSan`, use the following commands: + +``` +CFLAGS="-fsanitize=undefined" ./configure -gO0 -m +make -j +make -j test +``` + +### [Valgrind][20] + +To run the test suite under [Valgrind][20], run the following commands: + +``` +./configure -gO3 -v +make -j +make -j test +``` + +It really is that easy. I have directly added infrastructure to the build system +and the test suite to ensure that if [Valgrind][20] detects any memory errors or +any memory leaks at all, it will tell the test suite infrastructure to report an +error and exit accordingly. + +## POSIX Shell Scripts + +There is a lot of shell scripts in this repository, and every single one of them +is written in pure [POSIX `sh`][72]. + +The reason that they are written in [POSIX `sh`][72] is for *portability*: POSIX +systems are only guaranteed to have a barebones implementation of `sh` +available. + +There are *many* snares for unwary programmers attempting to modify +[`configure.sh`][69], any of the scripts in this directory, [`strgen.sh`][9], or +any of the scripts in [`tests/`][77]. Here are some of them: + +1. No `bash`-isms. +2. Only POSIX standard utilities are allowed. +3. Only command-line options defined in the POSIX standard for POSIX utilities + are allowed. +4. Only the standardized behavior of POSIX utilities is allowed. +5. Functions return data by *printing* it. Using `return` sets their exit code. + +In other words, the script must only use what is standardized in the [`sh`][72] +and [Shell Command Language][73] standards in POSIX. This is *hard*. It precludes +things like `local` and the `[[ ]]` notation. + +These are *enormous* restrictions and must be tested properly. I put out at +least one release with a change to `configure.sh` that wasn't portable. That was +an embarrassing mistake. + +The lack of `local`, by the way, is why variables in functions are named with +the form: + +``` +__ +``` + +This is done to prevent any clashes of variable names with already existing +names. And this applies to *all* shell scripts. However, there are a few times +when that naming convention is *not* used; all of them are because those +functions are required to change variables in the global scope. + +### Maintainer-Only Scripts + +If a script is meant to be used for maintainers (of `bc`, not package +maintainers), then rules 2, 3, and 4 don't need to be followed as much because +it is assumed that maintainers will be able to install whatever tools are +necessary to do the job. + +## Manuals + +The manuals for `bc` and `dc` are all generated, and the manpages for `bc`, +`dc`, and `bcl` are also generated. + +Why? + +I don't like the format of manpages, and I am not confident in my ability to +write them. Also, they are not easy to read on the web. + +So that explains why `bcl`'s manpage is generated from its markdown version. But +why are the markdown versions of the `bc` and `dc` generated? + +Because the content of the manuals needs to change based on the [build +type][81]. For example, if `bc` was built with no history support, it should not +have the **COMMAND LINE HISTORY** section in its manual. If it did, that would +just confuse users. + +So the markdown manuals for `bc` and `dc` are generated from templates +([`manuals/bc.1.md.in`][89] and [`manuals/dc.1.md.in`][90]). And from there, +the manpages are generated from the generated manuals. + +The generated manpage for `bcl` ([`manuals/bcl.3`][62]) is checked into version +control, and the generated markdown manuals and manpages for `bc` +([`manuals/bc`][79]) and `dc` ([`manuals/dc`][80]) are as well. + +This is because generating the manuals and manpages requires a heavy dependency +that only maintainers should care about: [Pandoc][92]. Because users [should not +have to install *any* dependencies][136], the files are generated, checked into +version control, and included in distribution tarballs. + +If you run [`configure.sh`][69], you have an easy way of generating the markdown +manuals and manpages: just run `make manpages`. This target calls +[`scripts/manpage.sh`][60] appropriately for `bc`, `dc`, and `bcl`. + +For more on how generating manuals and manpages works, see +[`scripts/manpage.sh`][60]. + +## Locales + +The locale system of `bc` is enormously complex, but that's because +POSIX-compatible locales are terrible. + +How are they terrible? + +First, `gencat` does not work for generating cross-compilation. In other words, +it does not generate machine-portable files. There's nothing I can do about +this except for warn users. + +Second, the format of `.msg` files is...interesting. Thank goodness it is text +because otherwise, it would be impossible to get them right. + +Third, `.msg` files are not used. In other words, `gencat` exists. Why? + +Fourth, `$NLSPATH` is an awful way to set where and *how* to install locales. + +Yes, where and *how*. + +Obviously, from it's name, it's a path, and that's the where. The *how* is more +complicated. + +It's actually *not* a path, but a path template. It's a format string, and it +can have a few format specifiers. For more information on that, see [this +link][84]. But in essence, those format specifiers configure how each locale is +supposed to be installed. + +With all those problems, why use POSIX locales? Portability, as always. I can't +assume that `gettext` will be available, but I *can* pretty well assume that +POSIX locales will be available. + +The locale system of `bc` includes all files under [`locales/`][85], +[`scripts/locale_install.sh`][87], [`scripts/locale_uninstall.sh`][88], +[`scripts/functions.sh`][105], the `bc_err_*` constants in [`src/data.c`][131], +and the parts of the build system needed to activate it. There is also code in +[`src/vm.c`][58] (in `bc_vm_gettext()`) for loading the current locale. + +If the order of error messages and/or categories are changed, the order of +errors must be changed in the enum, the default error messages and categories in +[`src/data.c`][131], and all of the messages and categories in the `.msg` files +under [`locales/`][85]. + +## Static Analysis + +I do *some* static analysis on `bc`. + +I used to use [Coverity][196], but I stopped using it when it started giving me +too many false positives and also because it had a vulnerability. + +However, I still use the [Clang Static Analyzer][197] through +[`scan-build`][19]. I only use it in debug mode because I have to add some +special code to make it not complain about things that are definitely not a +problem. + +The most frequent example of false positives is where a local is passed to a +function to be initialized. [`scan-build`][19] misses that fact, so I +pre-initialize such locals to prevent the warnings. + +To run `scan-build`, do the following: + +``` +make clean +scan-build make +``` + +`scan-build` will print its warnings to `stdout`. + +## Fuzzing + +The quality of this `bc` is directly related to the amount of fuzzing I did. As +such, I spent a lot of work making the fuzzing convenient and fast, though I do +admit that it took me a long time to admit that it did need to be faster. + +First, there were several things which make fuzzing fast: + +* Using [AFL++][125]'s deferred initialization. +* Splitting `bc`'s corpuses. +* Parallel fuzzing. + +Second, there are several things which make fuzzing convenient: + +* Preprepared input corpuses. +* [`scripts/fuzz_prep.sh`][119]. +* `tmux` and `tmuxp` configs. +* [`scripts/afl.py`][94]. + +### Fuzzing Performance + +Fuzzing with [AFL++][125] can be ***SLOW***. Spending the time to make it as +fast as possible is well worth the time. + +However, there is a caveat to the above: it is easy to make [AFL++][125] crash, +be unstable, or be unable to find "paths" (see [AFL++ Quickstart][129]) if the +performance enhancements are done poorly. + +To stop [AFL++][125] from crashing on test cases, and to be stable, these are +the requirements: + +* The state at startup must be *exactly* the same. +* The virtual memory setup at startup must be *exactly* the same. + +The first isn't too hard; it's the second that is difficult. + +`bc` allocates a lot of memory at start. ("A lot" is relative; it's far less +than most programs.) After going through an execution run, however, some of that +memory, while it could be cleared and reset, is in different places because of +vectors. Since vectors reallocate, their allocations are not guaranteed to be in +the same place. + +So to make all three work, I had to set up the deferred initialization and +persistent mode *before* any memory was allocated (except for `vm.jmp_bufs`, +which is probably what caused the stability to drop below 100%). However, using +deferred alone let me put the [AFL++][125] initialization further back. This +works because [AFL++][125] sets up a `fork()` server that `fork()`'s `bc` right +at that call. Thus, every run has the exact same virtual memory setup, and each +run can skip all of the setup code. + +I tested `bc` using [AFL++][125]'s deferred initialization, plus persistent +mode, plus shared memory fuzzing. In order to do it safely, with stability above +99%, all of that was actually *slower* than using just deferred initialization +with the initialization *right before* `stdin` was read. And as a bonus, the +stability in that situation is 100%. + +As a result, my [AFL++][125] setup only uses deferred initialization. That's the +`__AFL_INIT()` call. + +(Note: there is one more big item that must be done in order to have 100% +stability: the pseudo-random number generator *must* start with *exactly* the +same seed for every run. This is set up with the `tmux` and `tmuxp` configs that +I talk about below in [Convenience][130]. This seed is set before the +`__AFL_INIT()` call, so setting it has no runtime cost for each run, but without +it, stability would be abysmal.) + +On top of that, while `dc` is plenty fast under fuzzing (because of a faster +parser and less test cases), `bc` can be slow. So I have split the `bc` input +corpus into three parts, and I set fuzzers to run on each individually. This +means that they will duplicate work, but they will also find more stuff. + +On top of all of that, each input corpus (the three `bc` corpuses and the one +`dc` corpus) is set to run with 4 fuzzers. That works out perfectly for two +reasons: first, my machine has 16 cores, and second, the [AFL++][125] docs +recommend 4 parallel fuzzers, at least, to run different "power schedules." + +### Convenience + +The preprepared input corpuses are contained in the +`tests/fuzzing/bc_inputs{1,2,3}/`, and `tests/fuzzing/dc_inputs` directories. +There are three `bc` directories and only one `dc` directory because `bc`'s +input corpuses are about three times as large, and `bc` is a larger program; +it's going to need much more fuzzing. + +(They do share code though, so fuzzing all of them still tests a lot of the same +math code.) + +The next feature of convenience is the [`scripts/fuzz_prep.sh`][119] script. It +assumes the existence of `afl-clang-lto` in the `$PATH`, but if that exists, it +automatically configures and builds `bc` with a fuzz-ideal build. + +A fuzz-ideal build has several things: + +* `afl-clang-lto` as the compiler. (See [AFL++ Quickstart][129].) +* Debug mode, to crash as easily as possible. +* Full optimization (including [Link-Time Optimization][126]), for performance. +* [AFL++][125]'s deferred initialization (see [Fuzzing Performance][127] above). +* And `AFL_HARDEN=1` during the build to harden the build. See the [AFL++][125] + documentation for more information. + +There is one big thing that a fuzz-ideal build does *not* have: it does not use +[AFL++][125]'s `libdislocator.so`. This is because `libdislocator.so` crashes if +it fails to allocate memory. I do not want to consider those as crashes because +my `bc` does, in fact, handle them gracefully by exiting with a set error code. +So `libdislocator.so` is not an option. + +However, to add to [`scripts/fuzz_prep.sh`][119] making a fuzz-ideal build, in +`tests/fuzzing/`, there are two `yaml` files: [`tests/fuzzing/bc_afl.yaml`][120] +and [`tests/fuzzing/bc_afl_continue.yaml`][121]. These files are meant to be +used with [`tmux`][122] and [`tmuxp`][123]. While other programmers will have to +adjust the `start_directory` item, once it is adjusted, then using this command: + +``` +tmuxp load tests/fuzzing/bc_afl.yaml +``` + +will start fuzzing. + +In other words, to start fuzzing, the sequence is: + +``` +./scripts/fuzz_prep.sh +tmuxp load tests/fuzzing/bc_afl.yaml +``` + +Doing that will load, in `tmux`, 16 separate instances of [AFL++][125], 12 on +`bc` and 4 on `dc`. The outputs will be put into the +`tests/fuzzing/bc_outputs{1,2,3}/` and `tests/fuzzing/dc_outputs/` directories. + +(Note that loading that config will also delete all unsaved [AFL++][125] output +from the output directories.) + +Sometimes, [AFL++][125] will report crashes when there are none. When crashes +are reported, I always run the following command: + +``` +./scripts/afl.py +``` + +where `dir` is one of `bc1`, `bc2`, `bc3`, or `dc`, depending on which of the +16 instances reported the crash. If it was one of the first four (`bc11` through +`bc14`), I use `bc1`. If it was one of the second four (`bc21` through `bc24`, I +use `bc2`. If it was one of the third four (`bc31` through `bc34`, I use `bc3`. +And if it was `dc`, I use `dc`. + +The [`scripts/afl.py`][94] script will report whether [AFL++][125] correctly +reported a crash or not. If so, it will copy the crashing test case to +`.test.txt` and tell you whether it was from running it as a file or through +`stdin`. + +From there, I personally always investigate the crash and fix it. Then, when the +crash is fixed, I either move `.test.txt` to `tests/{bc,dc}/errors/.txt` as +an error test (if it produces an error) or I create a new +`tests/{bc,dc}/misc.txt` test for it and a corresponding results file. (See +[Test Suite][124] for more information about the test suite.) In either case, +`` is the next number for a file in that particular place. For example, if +the last file in `tests/{bc,dc}/errors/` is `tests/{bc,dc}/errors/18.txt`, I +move `.test.txt` to `tests/bc/error/19.txt`. + +Then I immediately run [`scripts/afl.py`][94] again to find the next crash +because often, [AFL++][125] found multiple test cases that trigger the same +crash. If it finds another, I repeat the process until it is happy. + +Once it *is* happy, I do the same `fuzz_prep.sh`, `tmuxp load` sequence and +restart fuzzing. Why do I restart instead of continuing? Because with the +changes, the test outputs could be stale and invalid. + +However, there *is* a case where I continue: if [`scripts/afl.py`][94] finds +that every crash reported by [AFL++][125] is invalid. If that's the case, I can +just continue with the command: + +``` +tmuxp load tests/fuzzing/bc_afl_continue.yaml +``` + +(Note: I admit that I usually run [`scripts/afl.py`][94] while the fuzzer is +still running, so often, I don't find a need to continue since there was no +stop. However, the capability is there, if needed.) + +In addition, my fuzzing setup, including the `tmux` and `tmuxp` configs, +automatically set up [AFL++][125] power schedules (see [Fuzzing +Performance][127] above). They also set up the parallel fuzzing such that there +is one fuzzer in each group of 4 that does deterministic fuzzing. It's always +the first one in each group. + +For more information about deterministic fuzzing, see the [AFL++][125] +documentation. + +### Corpuses + +I occasionally add to the input corpuses. These files come from new files in the +[Test Suite][124]. In fact, I use soft links when the files are the same. + +However, when I add new files to an input corpus, I sometimes reduce the size of +the file by removing some redundancies. + +And then, when adding to the `bc` corpuses, I try to add them evenly so that +each corpus will take about the same amount of time to get to a finished state. + +### [AFL++][125] Quickstart + +The way [AFL++][125] works is complicated. + +First, it is the one to invoke the compiler. It leverages the compiler to add +code to the binary to help it know when certain branches are taken. + +Then, when fuzzing, it uses that branch information to generate information +about the "path" that was taken through the binary. + +I don't know what AFL++ counts as a new path, but each new path is added to an +output corpus, and it is later used as a springboard to find new paths. + +This is what makes AFL++ so effective: it's not just blindly thrashing a binary; +it adapts to the binary by leveraging information about paths. + +### Fuzzing Runs + +For doing a fuzzing run, I expect about a week or two where my computer is +basically unusable, except for text editing and light web browsing. + +Yes, it can take two weeks for me to do a full fuzzing run, and that does *not* +include the time needed to find and fix crashes; it only counts the time on the +*last* run, the one that does not find any crashes. This means that the entire +process can take a month or more. + +What I use as an indicator that the fuzzing run is good enough is when the +number of "Pending" paths (see [AFL++ Quickstart][129] above) for all fuzzer +instances, except maybe the deterministic instances, is below 50. And even then, +I try to let deterministic instances get that far as well. + +You can see how many pending paths are left in the "path geometry" section of +the [AFL++][125] dashboard. + +Also, to make [AFL++][125] quit, you need to send it a `SIGINT`, either with +`Ctrl+c` or some other method. It will not quit until you tell it to. + +### Radamsa + +I rarely use [Radamsa][99] instead of [AFL++][125]. In fact, it's only happened +once. + +The reason I use [Radamsa][99] instead of [AFL++][125] is because it is easier +to use with varying command-line arguments, which was needed for testing `bc`'s +command-line expression parsing code, and [AFL++][125] is best when testing +input from `stdin`. + +[`scripts/radamsa.sh`][100] does also do fuzzing on the [AFL++][125] inputs, but +it's not as effective at that, so I don't really use it for that either. + +[`scripts/radamsa.sh`][100] and [Radamsa][99] were only really used once; I have +not had to touch the command-line expression parsing code since. + +### [AddressSanitizer][21] with Fuzzing + +One advantage of using [AFL++][125] is that it saves every test case that +generated a new path (see [AFL++ Quickstart][129] above), and it doesn't delete +them when the user makes it quit. + +Keeping them around is not a good idea, for several reasons: + +* They are frequently large. +* There are a lot of them. +* They go stale; after `bc` is changed, the generated paths may not be valid + anymore. + +However, before they are deleted, they can definitely be leveraged for even +*more* bug squashing by running *all* of the paths through a build of `bc` with +[AddressSanitizer][21]. + +This can easily be done with these four commands: + +``` +./scripts/fuzz_prep.sh -a +./scripts/afl.py --asan bc1 +./scripts/afl.py --asan bc2 +./scripts/afl.py --asan bc3 +./scripts/afl.py --asan dc +``` + +(By the way, the last four commands could be run in separate terminals to do the +processing in parallel.) + +These commands build an [ASan][21]-enabled build of `bc` and `dc` and then they +run `bc` and `dc` on all of the found crashes and path output corpuses. This is +to check that no path or crash has found any memory errors, including memory +leaks. + +Because the output corpuses can contain test cases that generate infinite loops +in `bc` or `dc`, [`scripts/afl.py`][94] has a timeout of 8 seconds, which is far +greater than the timeout that [AFL++][125] uses and should be enough to catch +any crash. + +If [AFL++][125] fails to find crashes *and* [ASan][21] fails to find memory +errors on the outputs of [AFL++][125], that is an excellent indicator of very +few bugs in `bc`, and a release can be made with confidence. + +## Code Concepts + +This section is about concepts that, if understood, will make it easier to +understand the code as it is written. + +The concepts in this section are not found in a single source file, but they are +littered throughout the code. That's why I am writing them all down in a single +place. + +### POSIX Mode + +POSIX mode is `bc`-only. + +In fact, POSIX mode is two different modes: Standard Mode and Warning Mode. +These modes are designed to help users write POSIX-compatible `bc` scripts. + +#### Standard Mode + +Standard Mode is activated with the `-s` or `--standard` flags. + +In this mode, `bc` will error if any constructs are used that are not strictly +compatible with the [POSIX `bc` specification][2]. + +#### Warning Mode + +Warning Mode is activated with the `-w` or `--warn` flags. + +In this mode, `bc` will issue warnings, but continue, if any constructs are used +that are not strictly compatible with the [POSIX `bc` specification][2]. + +### Memory Management + +The memory management in `bc` is simple: everything is owned by one thing. + +If something is in a vector, it is owned by that vector. + +If something is contained in a struct, it is owned by that struct with one +exception: structs can be given pointers to other things, but only if those +other things will outlast the struct itself. + +As an example, the `BcParse` struct has a pointer to the one `BcProgram` in +`bc`. This is okay because the program is initialized first and deallocated +last. + +In other words, it's simple: if a field in a struct is a pointer, then unless +that pointer is directly allocated by the struct (like the vector array or the +number limb array), that struct does not own the item at that pointer. +Otherwise, the struct *does* own the item. + +### [Async-Signal-Safe][115] Signal Handling + +`bc` is not the typical Unix utility. Most Unix utilities are I/O bound, but +`bc` is, by and large, CPU-bound. This has several consequences, but the biggest +is that there is no easy way to allow signals to interrupt it. + +This consequence is not obvious, but it comes from the fact that a lot of I/O +operations can be interrupted and return [`EINTR`][198]. This makes such I/O +calls natural places for allowing signals to interrupt execution, even when the +signal comes during execution, and not interrupting I/O calls. The way this is +done is setting a flag in the signal handler, which is checked around the time +of the I/O call, when it is convenient. + +Alternatively, I/O bound programs can use the [self-pipe trick][199]. + +Neither of these are possible in `bc` because the execution of math code can +take a long time. If a signal arrives during this long execution time, setting a +flag like an I/O bound application and waiting until the next I/O call could +take seconds, minutes, hours, or even days. (Last I checked, my `bc` takes a +week to calculate a million digits of pi, and it's not slow as far as `bc` +implementations go.) + +Thus, using just the technique of setting the flag just will not work for an +interactive calculator. + +Well, it can, but it requires a lot of code and massive inefficiencies. I know +this because that was the original implementation. + +The original implementation set a flag and just exit the signal handler. Then, +on just about every loop header, I have a check for the signal flag. These +checks happened on every iteration of every loop. It was a massive waste because +it was polling, and [polling is evil][200]. + +So for version [3.0.0][32], I expended a lot of effort to change the +implementation. + +In the new system, code *outside* the signal handler sets a flag (`vm.sig_lock`) +to tell the signal handler whether it can use `longjmp()` to stop the current +execution. If so, it does. If not, it sets a flag, which then is used by the +code outside the signal handler that set the `vm.sig_lock` flag. When that code +unsets `vm.sig_lock`, it checks to see if a signal happened, and if so, that +code executes the `longjmp()` and stops the current execution. + +Other than that, the rest of the interrupt-based implementation is best +described in the [Error Handling][97]. + +However, there are rules for signal handlers that I must lay out. + +First, signal handlers can only call [async-signal-safe][115] functions. + +Second, any field set or read by both the signal handler and normal code must be +a `volatile sig_atomic_t`. + +Third, when setting such fields, they must be set to constants and no math can +be done on them. This restriction and the above restriction exist in order to +ensure that the setting of the fields is always atomic with respect to signals. + +These rules exist for *any* code using Unix signal handlers, not just `bc`. + +#### Vectors and Numbers + +Vectors and numbers needed special consideration with the interrupt-based signal +handling. + +When vectors and numbers are about to allocate, or *reallocate* their arrays, +they need to lock signals to ensure that they do not call `malloc()` and friends +and get interrupted by a signal because, as you will see in the [Error +Handling][97] section, `longjmp()` cannot be used in a signal handler if it may +be able to interrupt a non-[async-signal-safe][115] function like `malloc()` and +friends. + +### Asserts + +If you asked me what procedure is used the most in `bc`, I would reply without +hesitation, "`assert()`." + +I use `assert()` everywhere. In fact, it is what made [fuzzing][82] with +[AFL++][125] so effective. [AFL++][125] is incredibly good at finding crashes, +and a failing `assert()` counts as one. + +So while a lot of bad bugs might have corrupted data and *not* caused crashes, +because I put in so many `assert()`'s, they were *turned into* crashing bugs, +and [AFL++][125] found them. + +By far, the most bugs it found this way was in the `bc` parser. (See the [`bc` +Parsing][110] for more information.) And even though I was careful to put +`assert()`'s everywhere, most parser bugs manifested during execution of +bytecode because the virtual machine assumes the bytecode is valid. + +Sidenote: one of those bugs caused an infinite recursion when running the sine +(`s()`) function in the math library, so yes, parser bugs can be *very* weird. + +Anyway, the way I did `assert()`'s was like this: whenever I realized that I +had put assumptions into the code, I would put an `assert()` there to test it +**and** to *document* it. + +Yes, documentation. In fact, by far the best documentation of the code in `bc` +is actually the `assert()`'s. The only time I would not put an `assert()` to +test an assumption is if that assumption was already tested by an `assert()` +earlier. + +As an example, if a function calls another function and passes a pointer that +the caller previously `assert()`'ed was *not* `NULL`, then the callee does not +have to `assert()` it too, unless *also* called by another function that does +not `assert()` that. + +At first glance, it may seem like putting asserts for pointers being non-`NULL` +everywhere would actually be good, but unfortunately, not for fuzzing. Each +`assert()` is a branch, and [AFL++][125] rates its own effectiveness based on +how many branches it covers. If there are too many `assert()`'s, it may think +that it is not being effective and that more fuzzing is needed. + +This means that `assert()`'s show up most often in two places: function +preconditions and function postconditions. + +Function preconditions are `assert()`'s that test conditions relating to the +arguments a function was given. They appear at the top of the function, usually +before anything else (except maybe initializing a local variable). + +Function postconditions are `assert()`'s that test the return values or other +conditions when a function exits. These are at the bottom of a function or just +before a `return` statement. + +The other `assert()`'s cover various miscellaneous assumptions. + +If you change the code, I ***HIGHLY*** suggest that you use `assert()`'s to +document your assumptions. And don't remove them when [AFL++][125] gleefully +crashes `bc` and `dc` over and over again. + +### Vectors + +In `bc`, vectors mean resizable arrays, and they are the most fundamental piece +of code in the entire codebase. + +I had previously written a [vector implementation][112], which I used to guide +my decisions, but I wrote a new one so that `bc` would not have a dependency. I +also didn't make it as sophisticated; the one in `bc` is very simple. + +Vectors store some information about the type that they hold: + +* The size (as returned by `sizeof`). +* An enum designating the destructor. + +If the destructor is `BC_DTOR_NONE`, it is counted as the type not having a +destructor. + +But by storing the size, the vector can then allocate `size * cap` bytes, where +`cap` is the capacity. Then, when growing the vector, the `cap` is doubled again +and again until it is bigger than the requested size. + +But to store items, or to push items, or even to return items, the vector has to +figure out where they are, since to it, the array just looks like an array of +bytes. + +It does this by calculating a pointer to the underlying type with +`v + (i * size)`, where `v` is the array of bytes, `i` is the index of the +desired element, and `size` is the size of the underlying type. + +Doing that, vectors can avoid undefined behavior (because `char` pointers can +be cast to any other pointer type), while calculating the exact position of +every element. + +Because it can do that, it can figure out where to push new elements by +calculating `v + (len * size)`, where `len` is the number of items actually in +the vector. + +By the way, `len` is different from `cap`. While `cap` is the amount of storage +*available*, `len` is the number of actual elements in the vector at the present +point in time. + +Growing the vector happens when `len` is equal to `cap` *before* pushing new +items, not after. + +To add a destructor, you need to add an enum item to `BcDtorType` in +[`include/vector.h`][174] and add the actual destructor in the same place as the +enum item in the `bc_vec_dtors[]` array in [`src/data.c`][131]. + +#### Pointer Invalidation + +There is one big danger with the vectors as currently implemented: pointer +invalidation. + +If a piece of code receives a pointer from a vector, then adds an item to the +vector before they finish using the pointer, that code must then update the +pointer from the vector again. + +This is because any pointer inside the vector is calculated based off of the +array in the vector, and when the vector grows, it can `realloc()` the array, +which may move it in memory. If that is done, any pointer returned by +`bc_vec_item()`, `bc_vec_top()` and `bc_vec_item_rev()` will be invalid. + +This fact was the single most common cause of crashes in the early days of this +`bc`; wherever I have put a comment about pointers becoming invalidated and +updating them with another call to `bc_vec_item()` and friends, *do **NOT** +remove that code!* + +#### Maps + +Maps in `bc` are...not. + +They are really a combination of two vectors. Those combinations are easily +recognized in the source because one vector is named `s` (plural), and the +other is named `_map`. + +There are currently three, all in `BcProgram`: + +* `fns` and `fn_map` (`bc` functions). +* `vars` and `var_map` (variables). +* `arrs` and `arr_map` (arrays). + +They work like this: the `_map` vector holds `BcId`'s, which just holds a +string and an index. The string is the name of the item, and the index is the +index of that item in the `s` vector. + +Obviously, I could have just done a linear search for items in the `s` +vector, but that would be slow with a lot of functions/variables/arrays. +Instead, I ensure that whenever an item is inserted into the `_map` +vector, the item is inserted in sorted order. This means that the `_map` +is always sorted (by the names of the items). + +So when looking up an item in the "map", what is really done is this: + +1. A binary search is carried out on the names in the `_map` vector. +2. When one is found, it returns the index in the `_map` vector where the + item was found. +3. This index is then used to retrieve the `BcId`. +4. The index from the `BcId` is then used to index into the `s` vector, + which returns the *actual* desired item. + +Why were the `s` and `_map` vectors not combined for ease? The +answer is that sometime, when attempting to insert into the "map", code might +find that something is already there. For example, a function with that name may +already exist, or the variable might already exist. + +If the insert fails, then the name already exists, and the inserting code can +forego creating a new item to put into the vector. However, if there is no item, +the inserting code must create a new item and insert it. + +If the two vectors were combined together, it would not be possible to separate +the steps such that creating a new item could be avoided if it already exists. + +#### Slabs and Slab Vectors + +`bc` allocates *a lot* of small strings, and small allocations are the toughest +for general-purpose allocators to handle efficiently. + +Because of that reason, I decided to create a system for allocating small +strings using something that I call a "slab vector" after [slab +allocators][201]. + +These vectors allocate what I call "slabs," which are just an allocation of a +single page with a length to tell the slab how much of the slab is used. + +The vector itself holds slabs, and when the slab vector is asked to allocate a +string, it attempts to in the last slab. If that slab cannot do so, it allocates +a new slab and allocates from that. + +There is one exception: if a string is going to be bigger than 128 bytes, then +the string is directly allocated, and a slab is created with that pointer and a +length of `SIZE_MAX`, which tells the slab vector that it is a direct +allocation. Then, the last slab is pushed into the next spot and the new special +slab is put into the vacated spot. This ensures that a non-special slab is +always last. + +### Command-Line History + +When I first wrote `bc`, I immediately started using it in order to eat my own +dog food. + +It sucked, and the biggest reason why was because of the lack of command-line +history. + +At first, I just dealt with it, not knowing how command-line history might be +implemented. + +Eventually, I caved and attempted to adapt [`linenoise-mob`][28], which I had +known about for some time. + +It turned out to be easier than I thought; the hardest part was the tedious +renaming of everything to fit the `bc` naming scheme. + +Understanding command-line history in `bc` is really about understanding VT-100 +escape codes, so I would start there. + +Now, the history implementation of `bc` has been adapted far beyond that initial +adaptation to make the command-line history implementation perfect for `bc` +alone, including integrating it into `bc`'s [Custom I/O][114] and making sure +that it does not disturb output that did not end with a newline. + +On top of that, at one point, I attempted to get history to work on Windows. It +barely worked after a lot of work and a lot of portability code, but even with +all of that, it does not have at least one feature: multi-line pasting from the +clipboard. + +### Error Handling + +The error handling on `bc` got an overhaul for version [`3.0.0`][32], and it +became one of the things that taught me the most about C in particular and +programming in general. + +Before then, error handling was manual. Almost all functions returned a +`BcStatus` indicating if an error had occurred. This led to a proliferation of +lines like: + +``` +if (BC_ERR(s)) return s; +``` + +In fact, a quick and dirty count of such lines in version `2.7.2` (the last +version before [`3.0.0`][32]) turned up 252 occurrences of that sort of line. + +And that didn't even guarantee that return values were checked *everywhere*. + +But before I can continue, let me back up a bit. + +From the beginning, I decided that I would not do what GNU `bc` does on errors; +it tries to find a point at which it can recover. Instead, I decided that I +would have `bc` reset to a clean slate, which I believed, would reduce the +number of bugs where an unclean state caused errors with continuing execution. + +So from the beginning, errors would essentially unwind the stack until they got +to a safe place from which to clean the slate, reset, and ask for more input. + +Well, if that weren't enough, `bc` also has to handle [POSIX signals][113]. As +such, it had a signal handler that set a flag. But it could not safely interrupt +execution, so that's all it could do. + +In order to actually respond to the signal, I had to litter checks for the flag +*everywhere* in the code. And I mean *everywhere*. They had to be checked on +every iteration of *every* loop. They had to be checked going into and out of +certain functions. + +It was a mess. + +But fortunately for me, signals did the same thing that errors did: they unwound +the stack to the *same* place. + +Do you see where I am going with this? + +It turns out that what I needed was a [async-signal-safe][115] form of what +programmers call "exceptions" in other languages. + +I knew that [`setjmp()`][116] and [`longjmp()`][117] are used in C to implement +exceptions, so I thought I would learn how to use them. How hard could it be? + +Quite hard, it turns out, especially in the presence of signals. And that's +because there are a few big snares: + +1. The value of any local variables are not guaranteed to be preserved after a + `longjmp()` back into a function. +2. While `longjmp()` is required to be [async-signal-safe][115], if it is + invoked by a signal handler that interrupted a non-[async-signal-safe][115] + function, then the behavior is undefined. +3. Any mutation that is not guaranteed to be atomic with respect to signals may + be incomplete when a signal arrives. + +Oh boy. + +For number 1, the answer to this is to hide data that must stay changed behind +pointers. Only the *pointers* are considered local, so as long as I didn't do +any modifying pointer arithmetic, pointers and their data would be safe. For +cases where I have local data that must change and stay changed, I needed to +*undo* the `setjmp()`, do the change, and the *redo* the `setjmp()`. + +For number 2 and number 3, `bc` needs some way to tell the signal handler that +it cannot do a `longjmp()`. This is done by "locking" signals with a `volatile +sig_atomic_t`. (For more information, see the [Async-Signal-Safe Signal +Handling][173] section.) For every function that calls a function that is not +async-signal-safe, they first need to use `BC_SIG_LOCK` to lock signals, and +afterward, use `BC_SIG_UNLOCK` to unlock signals. + +Code also need to do this for all global, non-atomic mutation, which means that +modifying any part of the `BcVm` global struct. + +`BC_SIG_UNLOCK` has another requirement: it must check for signals or errors and +jump if necessary. + +On top of all of that, *all* functions with cleanup needed to be able to run +their cleanup. This meant that `longjmp()` could not just jump to the finish; it +had to start what I call a "jump series," using a stack of `jmp_buf`'s +(`jmp_bufs` in `BcVm`). Each `longjmp()` uses the top of the `jmp_bufs` stack to +execute its jump. Then, if the cleanup code was executed because of a jump, the +cleanup code was responsible for continuing the jump series by popping the +previous item off the stack and using the new top of the stack for a jump. + +In this way, C++-style exceptions were implemented in pure C. Not fun, but it +works. However, the order of operations matters, especially in the macros that +help implement the error handling. + +For example, in `BC_UNSETJMP`, signals are unlocked before checking for signals. +If a signal comes between, that's fine; it will still cause a jump to the right +place. However, disabling the lock after could mean that a signal could come +*after* checking for signals, but before signals were unlocked, causing the +handling of the signal to be delayed. + +#### Custom I/O + +Why did I implement my own buffered I/O for `bc`? Because I use `setjmp()` and +`longjmp()` for error handling (see the [Error Handling][97] section), and the +buffered I/O in `libc` does not interact well with the use of those procedures; +all of the buffered I/O API is basically non-[async-signal-safe][115]. + +Implementing custom buffered I/O had other benefits. First, it allowed me to +tightly integrate history with the I/O code. Second, it allowed me to make +changes to history in order to make it adapt to user prompts. + +### Lexing + +To simplify parsing, both calculators use lexers to turn the text into a more +easily-parsable form. + +While some tokens are only one character long, others require many tokens, and +some of those need to store all of the text corresponding to the token for use +by the parsers. Tokens that need to store their corresponding text include, but +are not limited to: + +* Strings. +* Numbers. +* Identifiers. + +For this purpose, the lexer has a [vector][111] named `str` to store the data +for tokens. This data is overwritten if another token is lexed that needs to +store data, so the parsers need to copy the data before calling the lexer again. + +Both lexers do some of the same things: + +* Lex identifiers into tokens, storing the identifier in `str`. +* Lex number strings into tokens, storing the string in `str`. +* Lex whitespace. +* Lex comments. + +Other than that, and some common plumbing, the lexers have separate code. + +#### `dc` Lexing + +The `dc` lexer is remarkably simple; in fact, besides [`src/main.c`][205], +[`src/bc.c`][40], and [`src/dc.c`][44], which just contain one function each, +the only file smaller than [`src/dc_lex.c`][45] is [`src/args.c`][206], which +just processes command-line arguments after they are parsed by +[`src/opt.c`][51]. + +For most characters, the `dc` lexer is able to convert directly from the +character to its corresponding token. This happens using `dc_lex_tokens[]` in +[`src/data.c`][131]. + +`dc`'s lexer also has to lex the register name after lexing tokens for commands +that need registers. + +And finally, `dc`'s lexer needs to parse `dc` strings, which is the only part of +the `dc` lexer that is more complex than the `bc` lexer. This is because `dc` +strings need to have a balanced number of brackets. + +#### `bc` Lexing + +The `bc` lexer is fairly simple. It does the following things: + +* Lexes `bc` strings. +* Lexes `bc` identifiers. This is necessary because this is how `bc` keywords + are lexed. After ensuring that an identifier is not a keyword, the `bc` lexer + allows the common identifier function to take over. +* Turns characters and groups of characters into `bc` operator tokens. + +### Parsing + +The difference between parsing `bc` and `dc` code is...vast. The `dc` parser is +simple, while the `bc` parser is the most complex piece of code in the entire +codebase. + +However, they both do some of the same things. + +First, the parsers do *not* use [abstract syntax trees][207]; instead, they +directly generate the bytecode that will be executed by the `BcProgram` code. +Even in the case of `bc`, this heavily simplifies the parsing because the +[Shunting-Yard Algorithm][109] is designed to generate [Reverse Polish +Notation][108], which is basically directly executable. + +Second, any extra data that the `BcProgram` needs for execution is stored into +functions (see the [Functions][208] section). These include constants and +strings. + +#### `dc` Parsing + +The parser for `dc`, like its lexer, is remarkably simple. In fact, the easiness +of lexing and parsing [Reverse Polish notation][108] is probably why it was used +for `dc` when it was first created at Bell Labs. + +For most tokens, the `dc` parser is able to convert directly from the token +to its corresponding instruction. This happens using `dc_parse_insts[]` in +[`src/data.c`][131]. + +`dc`'s parser also has to parse the register name for commands that need +registers. This is the most complex part of the `dc` parser; each different +register command needs to be parsed differently because most of them require two +or more instructions to execute properly. + +For example, storing in a register requires a swap instruction and an assignment +instruction. + +Another example are conditional execution instructions; they need to produce the +instruction for the condition, and then they must parse a possible "else" part, +which might not exist. + +##### Existing Commands + +`dc` is based on commands, which are usually one letter. The following table is +a table of which ASCII characters are already used: + +| Characters | Used? | For... | +|------------|-------|--------------------------------------------| +| Space | x | Separator | +| `!` | x | Conditional Execution of Registers | +| `"` | x | Bounded Rand Operator | +| `#` | x | Comments | +| `$` | x | Truncation | +| `%` | x | Modulus | +| `&` | | | +| `'` | x | Rand Operator | +| `(` | x | Greater Than Operator | +| `)` | x | Less Than Operator | +| `*` | x | Multiplication | +| `+` | x | Addition | +| `,` | x | Depth of Execution Stack | +| `-` | x | Subtraction | +| `.` | x | Numbers | +| `/` | x | Division | +| `0-9` | x | Numbers | +| `:` | x | Store into Array | +| `;` | x | Load from Array | +| `<` | x | Conditional Execution of Registers | +| `=` | x | Conditional Execution of Registers | +| `>` | x | Conditional Execution of Registers | +| `?` | x | Ask for User Input | +| `@` | x | Places Operator | +| `A-F` | x | Numbers | +| `G` | x | Equal Operator | +| `H` | x | Shift Left | +| `I` | x | Push `ibase` onto Stack | +| `J` | x | Push `seed` onto Stack | +| `K` | x | Push `scale` onto Stack | +| `L` | x | Pop off of Register | +| `M` | x | Boolean And Operator | +| `N` | x | Boolean Not Operator | +| `O` | x | Push `obase` onto Stack | +| `P` | x | Byte Stream Printing | +| `Q` | x | Quit Some Number of Macros | +| `R` | x | Pop Top of Stack | +| `S` | x | Push onto Register | +| `T` | x | Push Max `ibase` onto Stack | +| `U` | x | Push Max `obase` onto Stack | +| `V` | x | Push Max `scale` onto Stack | +| `W` | x | Push Max of `'` Operator | +| `X` | x | Scale of a Number | +| `Y` | x | Length of Array | +| `Z` | x | Number of Significant Digits | +| `[` | x | Strings | +| `\\` | x | Escaping Brackets in Strings | +| `]` | x | Strings | +| `^` | x | Power | +| `_` | x | Negative Numbers and Negation | +| Backtick | | | +| `a` | x | Asciify | +| `b` | x | Absolute Value | +| `c` | x | Clear Stack | +| `d` | x | Duplication of Top of Stack | +| `e` | x | Else in Conditional Execution of Registers | +| `f` | x | Printing the Stack | +| `g` | x | Global Settings | +| `h` | x | Shift Right | +| `i` | x | Set `ibase` | +| `j` | x | Set `seed` | +| `k` | x | Set `scale` | +| `l` | x | Load from Register | +| `m` | x | Boolean Or Operator | +| `n` | x | Print and Pop | +| `o` | x | Set `obase` | +| `p` | x | Print with Newline | +| `q` | x | Quit Two Macros | +| `r` | x | Swap Top Two Items | +| `s` | x | Store into Register | +| `t` | | | +| `u` | | | +| `v` | x | Square Root | +| `w` | | | +| `x` | x | Execute String | +| `y` | x | Current Depth of a Register | +| `z` | x | Current Depth of Stack | +| `{` | x | Greater Than or Equal Operator | +| `\|` | x | Moduler Exponentiation | +| `}` | x | Less Than or Equal Operator | +| `~` | x | Division and Modulus Combined | + +#### `bc` Parsing + +`bc`'s parser is, by far, the most sensitive piece of code in this software, and +there is a very big reason for that: `bc`'s standard is awful and defined a very +poor language. + +The standard says that either semicolons or newlines can end statements. Trying +to parse the end of a statement when it can either be a newline or a semicolon +is subtle. Doing it in the presence of control flow constructs that do not have +to use braces is even harder. + +And then comes the biggest complication of all: `bc` has to assume that it is +*always* at a REPL (Read-Eval-Print Loop). `bc` is, first and foremost, an +*interactive* utility. + +##### Flags + +All of this means that `bc` has to be able to partially parse something, store +enough data to recreate that state later, and return, making sure to not +execute anything in the meantime. + +*That* is what the flags in [`include/bc.h`][106] are: they are the state that +`bc` is saving for itself. + +It saves them in a stack, by the way, because it's possible to nest +structures, just like any other programming language. Thus, not only does it +have to store state, it needs to do it arbitrarily, and still be able to +come back to it. + +So `bc` stores its parser state with flags in a stack. Careful setting of these +flags, along with properly using them and maintaining the flag stack, are what +make `bc` parsing work, but it's complicated. In fact, as I mentioned, the `bc` +parser is the single most subtle, fickle, and sensitive piece of code in the +entire codebase. Only one thing came close once: square root, and that was only +sensitive because I wrote it wrong. This parser is pretty good, and it is +*still* sensitive. And flags are the reason why. + +For more information about what individual flags there are, see the comments in +[`include/bc.h`][106]. + +##### Labels + +`bc`'s language is Turing-complete. That means that code needs the ability to +jump around, specifically to implement control flow like `if` statements and +loops. + +`bc` handles this while parsing with what I called "labels." + +Labels are markers in the bytecode. They are stored in functions alongside the +bytecode, and they are just indices into the bytecode. + +When the `bc` parser creates a label, it pushes an index onto the labels array, +and the index of the label in that array is the index that will be inserted into +the bytecode. + +Then, when a jump happens, the index pulled out of the bytecode is used to index +the labels array, and the label (index) at the index is then used to set the +instruction pointer. + +##### Cond Labels + +"Cond" labels are so-called because they are used by conditionals. + +The key to them is that they come *before* the code that uses them. In other +words, when jumping to a condition, code is jumping *backwards*. + +This means that when a cond label is created, the value that should go there is +well-known. Cond labels are easy. + +However, they are still stored on a stack so that the parser knows what cond +label to use. + +##### Exit Labels + +Exit labels are not so easy. + +"Exit" labels are so-called because they are used by code "exiting" out of `if` +statements or loops. + +The key to them is that they come *after* the code that uses them. In other +words, when jumping to an exit, code is jumping *forwards*. + +But this means that when an exit label is created, the value that should go +there is *not* known. The code that needs it must be parsed and generated first. + +That means that exit labels are created with the index of `SIZE_MAX`, which is +then specifically checked for with an assert in `bc_program_exec()` before using +those indices. + +There should ***NEVER*** be a case when an exit label is not filled in properly +if the parser has no bugs. This is because every `if` statement, every loop, +must have an exit, so the exit must be set. If not, there is a bug. + +Exit labels are also stored on a stack so that the parser knows what exit label +to use. + +##### Expression Parsing + +`bc` has expressions like you might expect in a typical programming language. +This means [infix notation][107]. + +One thing about infix notation is that you can't just generate code straight +from it like you can with [Reverse Polish notation][108]. It requires more work +to shape it into a form that works for execution on a stack machine. + +That extra work is called the [Shunting-Yard algorithm][109], and the form it +translates infix notation into is...[Reverse Polish notation][108]. + +In order to understand the rest of this section, you must understand the +[Shunting-Yard algorithm][109]. Go do that before you read on. + +###### Operator Stack + +In `bc`, the [Shunting-Yard algorithm][109] is implemented with bytecode as the +output and an explicit operator stack (the `ops` field in `BcParse`) as the +operator stack. It stores tokens from `BcLex`. + +However, there is one **HUGE** hangup: multiple expressions can stack. This +means that multiple expressions can be parsed at one time (think an array element +expression in the middle of a larger expression). Because of that, we need to +keep track of where the previous expression ended. That's what `start` parameter +to `bc_parse_operator()` is. + +Parsing multiple expressions on one operator stack only works because +expressions can only *stack*; this means that, if an expression begins before +another ends, it must *also* end before that other expression ends. This +property ensures that operators will never interfere with each other on the +operator stack. + +###### Recursion + +Because expressions can stack, parsing expressions actually requires recursion. +Well, it doesn't *require* it, but the code is much more readable that way. + +This recursion is indirect; the functions that `bc_parse_expr_err()` (the actual +expression parsing function) calls can, in turn, call it. + +###### Expression Flags + +There is one more big thing: not all expressions in `bc` are equal. + +Some expressions have requirements that others don't have. For example, only +array arguments can be arrays (which are technically not expressions, but are +treated as such for parsing), and some operators (in POSIX) are not allowed in +certain places. + +For this reason, functions that are part of the expression parsing +infrastructure in `bc`'s parser usually take a `flags` argument. This is meant +to be passed to children, and somewhere, they will be checked to ensure that the +resulting expression meets its requirements. + +There are also places where the flags are changed. This is because the +requirements change. + +Maintaining the integrity of the requirements flag set is an important part of +the `bc` parser. However, they do not have to be stored on a stack because their +stack is implicit from the recursion that expression parsing uses. + +### Functions + +Functions, in `bc`, are data structures that contain the bytecode and data +produced by the parsers. Functions are what the `BcProgram` program executes. + +#### Main and Read Functions + +There are two functions that always exist, which I call the "main" and "read" +functions. + +The "main" function is the function in which any code and data outside other +functions is put. Basically, it is the function where the scripting code ends +up. + +The "read" function is the function that is reset and parsed every time a call +to the `read()` builtin function happens. + +#### `dc` Strings + +In `dc`, strings can be executed, and since there are no actual "functions" in +`dc`, strings are handled as functions. In fact, they are effectively translated +into functions by parsing. + +##### Tail Calls + +Since strings in `dc` are functions, and the fact that `dc` has no native loops, +such loops are implemented in `dc` code using strings with conditional execution +commands at the end of strings. + +When such conditional execution, or even unconditional execution, commands are +the very last commands in a string, then `dc` can perform a [tail call][202]. + +This is done by recording the fact that a tail call happened, done by +incrementing an integer on a stack. When a string is executed *without* a tail +call, a new entry is pushed onto the stack with the integer `1`. + +When a string finally quits that followed tail calls, its stack entry is popped, +eliminating all of those tail calls. + +Why perform tail calls? Because otherwise, `dc` would be subject to the same +thing that plagues [functional programming languages][203]: stack overflow. In +`dc`'s case, that would manifest itself as a growing [heap][204], because the +execution stack is stored on the heap, until a fatal allocation failure would +occur. + +#### Execution + +Execution is handled by an interpreter implemented using `BcProgram` and code +in [`src/program.c`][53]. + +The interpreter is a mix between a [stack machine][210] and a [register +machine][211]. It is a stack machine in that operations happen on a stack I call +the "results stack," but it is a register machine in that items on the stack can +be stored to and loaded from "registers" (`dc` terminology), variables (`bc` +terminology), and arrays. + +##### Stacks + +There are two stacks in the interpreter: + +* The "results" stack (as mentioned above). +* The "execution" stack. + +The results stack (the `results` field of the `BcProgram` struct) is the stack +where the results of computations are stored. It is what makes the interpreter +part [stack machine][210]. It is filled with `BcResult`'s. + +The execution stack (the `stack` field of the `BcProgram` struct) is the stack +that tracks the current execution state of the interpreter. It is the presence +of this separate stack that allows the interpreter to implement the machine as a +loop, rather than recursively. It is filled with `BcInstPtr`'s, which are the +"instruction pointers." + +These instruction pointers have three fields, all integers: + +* `func`, the index of the function that is currently executing. +* `idx`, the index of the next bytecode instruction to execute in the function's + bytecode array. +* `len`, which is the length of the results stack when the function started + executing. This is not used by `dc`, but it used by `bc` because functions + in `bc` should never affect the results stack of their callers. + +With these three fields, and always executing using the instruction pointer at +the top of the execution stack, the interpreter can always keep track of its +execution. + +When a function or a string starts executing, a new `BcInstPtr` is pushed onto +the execution stack for it. This includes if a function was called recursively. +And then, when the function or string returns, its `BcInstPtr` is popped off of +the execution stack. + +##### Bytecode + +Execution of functions are done through bytecode produced directly by the +parsers (see the [Parsing][209]). This bytecode is stored in the `code` +[vector][111] of the `BcFunc` struct. + +This is a vector for two reasons: + +* It makes it easier to add bytecode to the vector in the parsers. +* `bc` allows users to redefine functions. + +The reason I can use bytecode is because there are less than 256 instructions, +so an `unsigned char` can store all the bytecodes. + +###### Bytecode Indices + +There is one other factor to bytecode: there are instructions that need to +reference strings, constants, variables, or arrays. Bytecode need some way to +reference those things. + +Fortunately, all of those things can be referenced in the same way: with indices +because all of the items are in vectors. + +So `bc` has a way of encoding an index into bytecode. It does this by, after +pushing the instruction that references anything, pushing a byte set to the +length of the index in bytes, then the bytes of the index are pushed in +little-endian order. + +Then, when the interpreter encounters an instruction that needs one or more +items, it decodes the index or indices there and updates the `idx` field of the +current `BcInstPtr` to point to the byte after the index or indices. + +One more thing: the encoder of the indices only pushes as many bytes as +necessary to encode the index. It stops pushing when the index has no more bytes +with any 1 bits. + +##### Variables + +In `bc`, the vector of variables, `vars` in `BcProgram`, is not a vector of +numbers; it is a vector of vector of numbers. The first vector is the vector of +variables, the second is the variable stack, and the last level is the actual +number. + +This is because both `bc` and `dc` need variables to be stacks. + +For `dc`, registers are *defined* to be stacks. + +For `bc`, variables as stacks is how function arguments/parameters and function +`auto` variables are implemented. + +When a function is called, and a value needs to be used as a function argument, +a copy of the value is pushed onto the stack corresponding to the variable with +the same name as the function's parameter. For `auto` variables, a new number +set to zero is pushed onto each stack corresponding to the `auto` variables. +(Zero is used because the [`bc` spec][2] requires that `auto` variables are set +to zero.) + +It is in this way that the old value of the variable, which may not even be +related to the function parameter or `auto` variable, is preserved while the +variable is used as a function parameter or `auto` variable. + +When the function returns, of course, the stacks of the variables for the +parameters and `auto`'s will have their top item popped, restoring the old value +as it was before the function call. + +##### Arrays + +Like variables, arrays are also implemented as stacks. However, because they are +arrays, there is yet another level; the `arrs` field in `BcProgram` is a vector +of vectors of vectors of numbers. The first of the two levels is the vector of +arrays, the second the stack of for each array, the third the actual array, and +last the numbers in the array. + +`dc` has no need of this extra stack, but `bc` does because arrays can be +function parameters themselves. + +When arrays are used for function arguments, they are copied with a deep copy; +each item of the source vector is copied. This is because in `bc`, according to +the [`bc` spec][2], all function arguments are passed by value. + +However, array references are possible (see below). + +When arrays are used as `auto`'s, a new vector is pushed with one element; if +more elements are needed, the array is grown automatically, and new elements are +given the value of zero. + +In fact, if *any* array is accessed and does not have an element at that index, +the array is automaticall grown to that size, and all new elements are given the +value zero. This behavior is guaranteed by the [`bc` spec][2]. + +###### Array References + +Array references had to be implemented as vectors themselves because they must +be pushed on the vectors stacks, which, as seen above, expect vectors +themselves. + +So thus, references are implemented as vectors on the vector stacks. These +vectors are not vectors of vectors themselves; they are vectors of bytes; in +fact, the fact that they are byte vectors and not vector vectors is how a +reference vector is detected. + +These reference vectors always have the same two things pushed: a byte encoding +(the same way bytecode indices are) of the referenced vector's index in the +`arrs` vector, and a byte encoding of the referenced vectors index in the vector +stack. + +If an item in a referenced vector is needed, then the reference is dereferenced, +and the item is returned. + +If a reference vector is passed to a function that does *not* expect a +reference, the vector is dereferenced and a deep copy is done, in the same way +as vectors are copied for normal array function parameters. + +### Callbacks + +There are many places in `bc` and `dc` where function pointers are used: + +* To implement destructors in vectors. (See the [Vectors][111] section.) +* To select the correct lex and parse functions for `bc` and `dc`. +* To select the correct function to execute unary operators. +* To select the correct function to execute binary operators. +* To calculate the correct number size for binary operators. +* To print a "digit" of a number. +* To seed the pseudo-random number generator. + +And there might be more. + +In every case, they are used for reducing the amount of code. Instead of +`if`/`else` chains, such as: + +``` +if (BC_IS_BC) { + bc_parse_parse(vm.parse); +} +else { + dc_parse_parse(vm.parse); +} +``` + +The best example of this is `bc_num_binary()`. It is called by every binary +operator. It figures out if it needs to allocate space for a new `BcNum`. If so, +it allocates the space and then calls the function pointer to the *true* +operation. + +Doing it like that shrunk the code *immensely*. First, instead of every single +binary operator duplicating the allocation code, it only exists in one place. +Second, `bc_num_binary()` itself does not have a massive `if`/`else` chain or a +`switch` statement. + +But perhaps the most important use was for destructors in vectors. + +Most of the data structures in `bc` are stored in vectors. If I hadn't made +destructors available for vectors, then ensuring that `bc` had no memory leaks +would have been nigh impossible. As it is, I check `bc` for memory leaks every +release when I change the code, and I have not released `bc` after version +`1.0.0` with any memory leaks, as far as I can remember anyway. + +### Numbers + +In order to do arbitrary-precision math, as `bc` must do, there must be some way +of representing arbitrary-precision numbers. `BcNum` in [`include/num.h`][184] +is `bc`'s way of doing that. + +(Note: the word ["limb"][214] is used below; it has a specific meaning when +applied to arbitrary-precision numbers. It means one piece of the number. It can +have a single digit, which is what GNU `bc` does, or it can have multiple, which +is what this `bc` does.) + +This struct needs to store several things: + +* The array of limbs of the number. This is the `num` field. +* The location of the decimal point. This is the `rdx` (short for [radix][215]) + field. +* The number of limbs the number has. This is the `len` field. +* Whether the number is negative or not. This is the least significant bit of + the `rdx` field. More on that later. + +In addition, `bc`'s number stores the capacity of the limb array; this is the +`cap` field. + +If the number needs to grow, and the capacity of the number is big enough, the +number is not reallocated; the number of limbs is just added to. + +There is one additional wrinkle: to make the usual operations (binary operators) +fast, the decimal point is *not* allowed to be in the middle of a limb; it must +always be between limbs, after all limbs (integer), or before all limbs (real +between -1 and 1). + +The reason for this is because addition, subtraction, multiplication, and +division expect digits to be lined up on the decimal point. By requiring that it +be between limbs, no extra alignment is needed, and those operations can proceed +without extra overhead. + +This does make some operations, most notably extending, truncating, and +shifting, more expensive, but the overhead is constant, and these operations are +usually cheap compared to the binary operators anyway. + +This also requires something else: `bc` numbers need to know *exactly* how many +decimal places they have after the decimal point. If the decimal point must be +inbetween limbs, the last decimal place could be in the middle of a limb. The +amount of decimal places in a number is carefully tracked and stored in the +`scale` field, and this number must always coincide with the `rdx` field by the +following formula: + +``` +scale + (BC_BASE_DIGS - 1) / BC_BASE_DIGS == rdx >> 1 +``` + +(`BC_BASE_DIGS` is the number of decimal digits stored in one limb. It is 9 on +64-bit systems and 4 on other systems.) + +Yes, `rdx` is shifted; that is because the negative bit is stored in the least +significant bit of the `rdx` field, and the actual radix (amount of limbs after +the decimal/radix point) is stored in the rest of the bits. This is safe because +`BC_BASE_DIGS` is always at least 4, which means `rdx` will always need at least +2 bits less than `scale`. + +In addition to `rdx` always matching `scale`, another invariant is that `rdx` +must always be less than or equal to `len`. (Because `scale` may be greater than +`rdx`, `scale` does not have to be less than or equal to `len`.) + +Another invariant is that `len` must always be less than or equal to `cap`, for +obvious reasons. + +The last thing programmers need to know is that the limb array is stored in +little-endian order. This means that the last decimal places are in the limb +stored at index 0, and the most significant digits are stored at index `len-1`. + +This is done to make the most important operations fast. Addition and +subtraction are done from least significant to most significant limbs, which +means they can speed through memory in the way most computers are best at. +Multiplication does the same, sort of, and with division, it matters less. +Comparison does need to go backwards, but that's after exhausting all other +alternatives, including for example, checking the length of the integer portion +of each limb array. + +Finally, here are some possible special situations with numbers and what they +mean: + +* `len == 0`: the number equals 0. +* `len == 0 && scale != 0`: the number equals 0, but it has a `scale` value. + This is the only case where `scale` does not have to coincide with `rdx` + This can happen with division, for example, that sets a specific `scale` for + the result value but may produce 0. +* `(rdx >> 1) < len`: the number is greater than or equal to 1, or less than or + equal to -1. +* `(rdx >> 1) == len`: the number is greater than -1 and less than 1, not + including 0, although this will be true for 0 as well. However, 0 is always + assumed to be represented by `len == 0`. +* `(rdx >> 1) == 0`: the number is an integer. In this case, `scale` must also + equal 0. + +#### Math Style + +When I wrote the math for `bc`, I adopted a certain style that, if known, will +make it easier to understand the code. The style follows these rules: + +* `BcNum` arguments always come before arguments of other types. +* Among the `BcNum` arguments, the operands always come first, and the `BcNum` + where the result(s) will be stored come last. +* Error checking is placed first in the function. +* Easy cases are placed next. +* Preparation, such as allocating temporaries, comes next. +* The actual math. +* Cleanup and ensuring invariants. + +While these rules are not hard and fast, using them as a guide will probably +help. + +### Strings as Numbers + +Strings can be assigned to variables. This is a problem because the vectors for +variable stacks expect `BcNum` structs only. + +While I could have made a union, I decided that the complexity of adding an +entirely new type, with destructor and everything, was not worth it. Instead, I +took advantage of the fact that `free()`, when passed a `NULL` pointer, will do +nothing. + +Using that, I made it so `BcNum`'s could store strings instead. This is marked +by the `BcNum` having a `NULL` limb array (`num`) and a `cap` of 0 (which should +*never* happen with a real number, though the other fields could be 0). + +The `BcNum` stores the function that stores the string in the `rdx` field, and +it stores the index of the string in the `scale` field. This is used to actually +load the string if necessary. + +Note that historically, string information was stored in the `loc` field of +the `d` union in a `BcResult`. This was changed recently to standardize; now, +all string information are stored in the `n` field of the `d` union regardless. +This means that all string information is stored in `BcNum`'s. This removes +extra cases. + +Also, if a temp is made with a string, then the result type should still be +`BC_RESULT_STR`, not `BC_RESULT_TEMP`. This is to make it easier to do type +checks. + +### Pseudo-Random Number Generator + +In order to understand this section, I suggest you read the information in the +manpages about the pseudo-random number generator (PRNG) first; that will help +you understand the guarantees it has, which is important because this section +delves into implementation details. + +First, the PRNG I use is seeded; this is because most OS's have an excellent +cryptographically secure PRNG available via command-line, usually +`/dev/urandom`, but the only *seeded* PRNG available is usually `bash`'s +`$RANDOM`, which is essentially a wrapper around C's `rand()`. + +`rand()` is...bad. It is only guaranteed to return 15 bits of random data. +Obviously, getting good random data out of that would be hard with that alone, +but implementations also seem to be poor. + +On top of that, `bc` is an arbitrary-precision calculator; if I made it able to +generate random numbers, I could make it generate random numbers of any size, +and since it would be seeded, results would be reproducible, when wanted. + +So to get that, I needed a seeded PRNG with good characteristics. After scouring +the Internet, I decided on the [PCG PRNG][215], mostly because of [this blog +post][216]. Part of the reason was the behavior of the xoroshiro128+ author, who +hates on PCG and its author, but also because PCG seemed to do better when +tested by independent parties. + +After that decision, I faced a challenge: PCG requires 255 bits of seed: 128 for +the actual seed, and 127 for the "increment." (Melissa O'Neill, the PCG author, +likens the increment to selecting a codebook.) + +I could, of course, put the entire 255 bits into one massive arbitrary-precision +number; `bc` is good at that, after all. But that didn't sit right with me +because it would mean any seed selected by users would have the real portion +ignored, which is stupid in a program like `bc`. + +Instead, I decided to make the integer portion the increment (clamped down to +size), and the real portion the seed. + +In most cases, this would be a bad idea because you cannot, in general, know how +many decimal places you need to represent any number with `n` real digits in +base `b` in another base. However, there is an easy to how many decimal digits +after the decimal point it takes to represent reals of base 2 in base 10: the +power of two. + +It turns out that, for base 2 represented in base 10, the power of 2 is +*exactly* how many digits are necessary to represent *any* number `n/2^p`, where +`p` is the power of 2. This is because at every halving, the number of decimal +places increases by 1: + +``` +0.5 +0.25 +0.125 +0.0625 +0.03125 +0.015625 +... +``` + +So the algorithm to convert all 255 bits of the seed is as follows: + +1. Convert the increment to a `BcNum`. +2. Convert the seed to a `BcNum`. +3. Divide the seed by `2^128` with a `scale` of 128. (For 32-bit systems, + substitute 64 bits for 128.) +4. Add the two numbers together. + +Likewise, the algorithm to convert from a user-supplied number to a seed is: + +1. Truncate a copy of the number. +2. Subtract the result from #1 from the original number. This gives the real + portion of the number. +3. Clamp the result of #1 to 127 (or 63) bits. This is the increment. +4. Multiply the result of #2 by `2^128`. +5. Truncate the result of #4. This is the seed. + +#### Generating Arbitrary-Precision Numbers + +I wrote a function (`bc_rand_bounded()`) that will return unbiased results with +any bound below the max that PCG can generate. + +To generate an integer of arbitrary size using a bound, `bc` simply uses +`bc_rand_bounded()` to generate numbers with a bound `10^BC_BASE_DIGS` for as +many limbs as needed to satisfy the bigger bound. + +To generate numbers with arbitrary precision after the decimal point, `bc` +merely generates an arbitrary precision integer with the bound `10^p`, where `p` +is the desired number of decimal places, then divides in by `10^p` with a +`scale` of `p`. + +## Debug Code + +Besides building `bc` in debug mode with the `-g` flag to [`configure.sh`][69], +programmers can also add `-DBC_DEBUG_CODE=1` to the `CFLAGS`. This will enable +the inclusion of *a lot* of extra code to assist with debugging. + +For more information, see all of the code guarded by `#if BC_DEBUG_CODE` in the +[`include/`][212] directory and in the [`src/`][213] directory. + +Yes, all of the code is guarded by `#if` preprocessor statements; this is +because the code should *never* be in a release build, and by making programmers +add this manually (not even an option to [`configure.sh`][69]), it is easier to +ensure that never happens. + +However, that said, the extra debug code is useful; that was why I kept it in. + +## Performance + +While I have put in a lot of effort to make `bc` as fast as possible, there +might be some things you can do to speed it up without changing the code. + +First, you can probably use [profile-guided optimization][217] to optimize even +better, using the test suite to profile. + +Second, I included macros that might help branch placement and prediction: + +* `BC_ERR(e)` +* `BC_UNLIKELY(e)` +* `BC_NO_ERR(e)` +* `BC_LIKELY(e)` + +`BC_ERR` is the same as `BC_UNLIKELY`, and `BC_NO_ERR` is the same as +`BC_LIKELY`; I just added them to also document branches that lead to error +conditions or *away* from error conditions. + +Anyway, if `BC_LIKELY` and `BC_UNLIKELY` are not defined during compilation, +they expand to nothing but the argument they were given. + +They can, however, be defined to `__builtin_expect((e), 1)` and +`__builtin_expect((e), 0)`, respectively, on GCC and Clang for better branch +prediction and placement. (For more information about `__builtin_expect()` see +the [GCC documentation][218].) + +There might be other compilers that can take advantage of that, but I don't know +anything about that. + +Also, as stated in the [build manual][219], link-time optimization is excellent +at optimizing this `bc`. Use it. + +### Benchmarks + +To help programmers improve performance, I have built and assembled +infrastructure to make benchmarking easy. + +First, in order to easily run benchmarks, I created +[`scripts/benchmark.sh`][220]. + +Second, I copied and adapted [`ministat.c`][223] [from FreeBSD][221], to make it +easier to judge whether the results are significant or not. + +Third, I made the `make` clean target `make clean_benchmarks`, to clean +`scripts/ministat` and the generated benchmark files. + +Fourth, I made it so [`scripts/benchmark.sh`][220] outputs the timing and memory +data in a format that is easy for `scripts/ministat` to digest. + +To add a benchmark, add a script in the right directory to generate the +benchmark. Yes, generate. + +All of the benchmarks are generated first, from `.bc` and `.dc` files in the +[`benchmarks/bc/`][91] and [`benchmarks/dc/`][224]. This is so that massive +amounts of data can be generated and then pushed through the calculators. + +If you need to benchmark `bc` or `dc` with simple loops, have the generator +files simply print the loop code. + +### Caching of Numbers + +In order to provide some performance boost, `bc` tries to reuse old `BcNum`'s +that have the default capacity (`BC_NUM_DEF_SIZE`). + +It does this by allowing `bc_num_free()` to put the limb array onto a +statically-allocated stack (it's just a global array with a set size). Then, +when a `BcNum` with the default capacity is needed, `bc_num_init()` asks if any +are available. If the answer is yes, the one on top of the stack is returned. +Otherwise, `NULL` is returned, and `bc_num_free()` knows it needs to `malloc()` +a new limb array. + +When the stack is filled, any numbers that `bc` attempts to put on it are just +freed. + +This setup saved a few percent in my testing for version [3.0.0][32], which is +when I added it. + +## `bcl` + +At the request of one of my biggest users, I spent the time to make a build mode +where the number and math code of `bc` could be wrapped into a library, which I +called `bcl`. + +This mode is exclusive; `bc` and `dc` themselves are *not* built when building +`bcl`. + +The only things in the `bc` math code that is not included is: + +* Printing newlines (clients do not care about `bc`'s line lenth restriction). +* `dc`'s stream print. + +Even the [pseudo-random number generator][179] is included, with extra support +for generating real numbers with it. (In `bc`, such support is in +[`lib2.bc`][26].) + +### Signal Handling + +Like signal handling in `bc` proper (see the [Async-Signal-Safe Signal +Handling][173] section), `bcl` has the infrastructure for signal handling. + +This infrastructure is different, however, as `bcl` assumes that clients will +implement their own signal handling. + +So instead of doing signal handling on its own, `bcl` provides the capability to +interrupt executions and return to the clients almost immediately. Like in `bc`, +this is done with `setjmp()` and `longjmp()`, although the jump series is +stopped before returning normally to client code. + +### Contexts + +Contexts were an idea by the same user that requested `bcl`. They are meant to +make it so multiple clients in one program can keep their data separate from +each other. + +### Numbers + +Numbers in `bcl` are literally indices into an encapsulated array of numbers, +hidden in the context. These indices are then passed to clients to refer to +numbers later. + +### Operand Consumption + +Most math functions in `bcl` "consume" their operand arguments; the arguments +are freed, whether or not an error is returned. + +This is to make it easy to implement math code, like this: + +``` +n = bcl_add(bcl_mul(a, b), bcl_div(c, d)); +``` + +If numbers need to be preserved, they can be with `bcl_dup()`: + +``` +n = bcl_add(bcl_mul(bcl_dup(a), bc_dup(b)), bcl_div(bcl_dup(c), bcl_dup(d))); +``` + +### Errors + +Errors can be encoded in the indices representing numbers, and where necessary, +clients are responsible for checking those errors. + +The encoding of errors is this: if an error happens, the value `0-error` is +returned. To decode, do the exact same thing. Thus, any index above +`0-num_errors` is an error. + +If an index that represents an error is passed to a math function, that function +propagates the error to its result and does not perform the math operation. + +All of this is to, once again, make it easy to implement the math code as above. + +However, where possible, errors are returned directly. + +[1]: https://en.wikipedia.org/wiki/Bus_factor +[2]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/bc.html#top +[3]: https://en.wikipedia.org/wiki/Dc_(Unix) +[4]: https://en.wikipedia.org/wiki/Reverse_Polish_notation +[5]: ./bc/A.1.md#standard-library +[6]: https://github.com/torvalds/linux/blob/master/kernel/time/timeconst.bc +[7]: ./bc/A.1.md#extended-library +[8]: #libbc-2 +[9]: #strgensh +[10]: https://vimeo.com/230142234 +[11]: https://gavinhoward.com/2019/12/values-for-yao/ +[12]: http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf +[13]: ./build.md#cross-compiling +[14]: ./build.md +[15]: #strgenc +[16]: http://landley.net/toybox/about.html +[17]: https://www.busybox.net/ +[18]: https://en.wikipedia.org/wiki/Karatsuba_algorithm +[19]: https://clang-analyzer.llvm.org/scan-build.html +[20]: https://www.valgrind.org/ +[21]: https://clang.llvm.org/docs/AddressSanitizer.html +[22]: https://gavinhoward.com/2019/11/finishing-software/ +[23]: #history +[24]: https://clang.llvm.org/docs/ClangFormat.html +[25]: ./algorithms.md +[26]: #lib2bc +[27]: #vmh +[28]: https://github.com/rain-1/linenoise-mob +[29]: https://github.com/antirez/linenoise +[30]: #bclh +[31]: #argsh +[32]: ../NEWS.md#3-0-0 +[33]: ../NEWS.md +[34]: https://github.com/skeeto/optparse +[35]: #opth +[36]: #historyh +[37]: #randh +[38]: #langh +[39]: #numc +[40]: #bcc +[41]: #bc_lexc +[42]: #bc_parsec +[43]: #libraryc +[44]: #dcc +[45]: #dc_lexc +[46]: #dc_parsec +[47]: #filec +[48]: #historyc +[49]: #langc +[50]: #lexc +[51]: #optc +[52]: #parsec +[53]: #programc +[54]: #randc +[55]: #fileh +[56]: #readc +[57]: #programh +[58]: #vmc +[59]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/gencat.html#top +[60]: #manpagesh +[61]: #bcl3md +[62]: #bcl3 +[63]: #bclvcxproj +[64]: #bclvcxprojfilters +[65]: #bclsln +[66]: #bcvcxproj +[67]: #bcvcxprojfilters +[68]: #bcsln +[69]: #configuresh +[70]: #makefilein +[71]: #functionsh +[72]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sh.html#top +[73]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18 +[74]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/make.html#top +[75]: #versionh +[76]: ##posix-shell-scripts +[77]: #tests +[78]: #karatsubapy +[79]: #bc-1 +[80]: #dc-1 +[81]: ./build.md#build-type +[82]: #fuzzing-1 +[83]: #releasesh +[84]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_02 +[85]: #locales-1 +[86]: #manuals-1 +[87]: #locale_installsh +[88]: #locale_uninstallsh +[89]: #bc1mdin +[90]: #dc1mdin +[91]: #bc +[92]: https://pandoc.org/ +[93]: #release_settingstxt +[94]: #aflpy +[95]: #randmathpy +[96]: #caching-of-numbers +[97]: #error-handling +[98]: #radamsatxt +[99]: https://gitlab.com/akihe/radamsa +[100]: #radamsash +[101]: https://musl.libc.org/ +[102]: ./build.md#settings +[103]: #test_settingstxt +[104]: #test_settingssh +[105]: #functionssh +[106]: #bch +[107]: https://en.wikipedia.org/wiki/Infix_notation +[108]: https://en.wikipedia.org/wiki/Reverse_Polish_notation +[109]: https://en.wikipedia.org/wiki/Shunting-yard_algorithm +[110]: #bc-parsing +[111]: #vectors +[112]: https://git.yzena.com/Yzena/Yc/src/branch/master/include/yc/vector.h +[113]: https://en.wikipedia.org/wiki/Signal_(IPC) +[114]: #custom-io +[115]: https://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03_03 +[116]: https://pubs.opengroup.org/onlinepubs/9699919799/functions/setjmp.html +[117]: https://pubs.opengroup.org/onlinepubs/9699919799/functions/longjmp.html +[118]: https://www.youtube.com/watch?v=4PaWFYm0kEw +[119]: #fuzz_prepsh +[120]: #bc_aflyaml +[121]: #bc_afl_continueyaml +[122]: https://github.com/tmux/tmux +[123]: https://tmuxp.git-pull.com/ +[124]: #test-suite +[125]: https://aflplus.plus/ +[126]: #link-time-optimization +[127]: #fuzzing-performance +[128]: #radamsa +[129]: #afl-quickstart +[130]: #convenience +[131]: #datac +[132]: https://git.yzena.com/gavin/vim-bc +[133]: https://git.yzena.com/gavin/bc_libs +[134]: #debugging +[135]: #asserts +[136]: #portability +[137]: https://pexpect.readthedocs.io/en/stable/ +[138]: #test-suite-portability +[139]: #historypy +[140]: #historysh +[141]: #group-tests +[142]: #build-system +[143]: #generated-tests +[144]: #benchmarks-1 +[145]: #gen +[146]: #test-coverage +[147]: #integration-with-the-build-system +[148]: #test-scripts +[149]: #standard-tests +[150]: #script-tests +[151]: #error-tests +[152]: #stdin-tests +[153]: #read-tests +[154]: #other-tests +[155]: #history-tests +[156]: #bcl +[157]: #bcl-test +[158]: #bclc +[159]: #valgrind +[160]: #addresssanitizer-and-friends +[161]: #bc-2 +[162]: #dc-2 +[163]: #alltxt-1 +[164]: #errorstxt +[165]: #posix_errorstxt +[166]: #timeconstsh +[167]: #alltxt-3 +[168]: #errorstxt-1 +[169]: #scripts-1 +[170]: #scripts-2 +[171]: #alltxt-2 +[172]: #alltxt-4 +[173]: #async-signal-safe-signal-handling +[174]: #vectorh +[175]: #read_errorstxt +[176]: #statush +[177]: #numbers +[178]: #math-style +[179]: #pseudo-random-number-generator +[180]: #lexh +[181]: #parseh +[182]: #dch +[183]: #libraryh +[184]: #numh +[185]: #readh +[186]: #maps +[187]: #slabs-and-slab-vectors +[188]: ./build.md#extra-math +[189]: #command-line-history +[190]: #scriptsed +[191]: #linux-timeconstbc-script +[192]: #corpuses +[193]: ./build.md#history +[194]: https://www.valgrind.org/docs/manual/mc-manual.html +[195]: #othersh +[196]: https://scan.coverity.com/ +[197]: https://clang-analyzer.llvm.org/ +[198]: https://unix.stackexchange.com/questions/253349/eintr-is-there-a-rationale-behind-it +[199]: https://cr.yp.to/docs/selfpipe.html +[200]: https://skarnet.org/cgi-bin/archive.cgi?2:mss:1607:201701:dfblejammjllfkggpcph +[201]: https://slembcke.github.io/2020/10/12/CustomAllocators.html#1-slab-allocator +[202]: https://en.wikipedia.org/wiki/Tail_call +[203]: https://en.wikipedia.org/wiki/Functional_programming_language +[204]: https://en.wikipedia.org/wiki/C_dynamic_memory_allocation +[205]: #mainc +[206]: #argc +[207]: https://en.wikipedia.org/wiki/Abstract_syntax_tree +[208]: #functions +[209]: #parsing +[210]: https://en.wikipedia.org/wiki/Stack_machine +[211]: https://en.wikipedia.org/wiki/Register_machine +[212]: #include +[213]: #src +[214]: https://gmplib.org/manual/Nomenclature-and-Types +[215]: https://en.wikipedia.org/wiki/Radix_point +[216]: #main-and-read-functions +[215]: https://www.pcg-random.org/ +[216]: https://lemire.me/blog/2017/08/22/testing-non-cryptographic-random-number-generators-my-results/ +[217]: https://en.wikipedia.org/wiki/Profile-guided_optimization +[218]: https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html#index-_005f_005fbuiltin_005fexpect +[219]: ./build.md#optimization +[220]: #benchmarksh +[221]: https://cgit.freebsd.org/src/tree/usr.bin/ministat/ministat.c +[222]: https://www.freebsd.org/cgi/man.cgi?query=ministat&apropos=0&sektion=0&manpath=FreeBSD+13.0-RELEASE+and+Ports&arch=default&format=html +[223]: #ministatc +[224]: #dc +[225]: #allsh +[226]: #errorssh +[227]: #errorsh diff --git a/manuals/header.txt b/manuals/header.txt new file mode 100644 index 000000000000..d805e14ad691 --- /dev/null +++ b/manuals/header.txt @@ -0,0 +1,27 @@ +.\" +.\" SPDX-License-Identifier: BSD-2-Clause +.\" +.\" Copyright (c) 2018-2021 Gavin D. Howard and contributors. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions are met: +.\" +.\" * Redistributions of source code must retain the above copyright notice, +.\" this list of conditions and the following disclaimer. +.\" +.\" * Redistributions in binary form must reproduce the above copyright notice, +.\" this list of conditions and the following disclaimer in the documentation +.\" and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +.\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +.\" LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" diff --git a/manuals/header_bc.txt b/manuals/header_bc.txt new file mode 100644 index 000000000000..fc2e6bdcb9c5 --- /dev/null +++ b/manuals/header_bc.txt @@ -0,0 +1 @@ +.TH "BC" "1" "June 2021" "Gavin D. Howard" "General Commands Manual" diff --git a/manuals/header_bcl.txt b/manuals/header_bcl.txt new file mode 100644 index 000000000000..4b1c6974f3df --- /dev/null +++ b/manuals/header_bcl.txt @@ -0,0 +1 @@ +.TH "BCL" "3" "June 2021" "Gavin D. Howard" "Libraries Manual" diff --git a/manuals/header_dc.txt b/manuals/header_dc.txt new file mode 100644 index 000000000000..aad9e7cea50a --- /dev/null +++ b/manuals/header_dc.txt @@ -0,0 +1 @@ +.TH "DC" "1" "June 2021" "Gavin D. Howard" "General Commands Manual" diff --git a/manuals/release.md b/manuals/release.md new file mode 100644 index 000000000000..aa2de7ca9acb --- /dev/null +++ b/manuals/release.md @@ -0,0 +1,72 @@ +# Release Checklist + +This is the checklist for cutting a release. + +For a lot of these steps, they are only needed if the code that would be +affected was changed. For example, I don't need to run the `scripts/randmath.py` +test if I did not change any of the math code. + +1. Update the README. +2. Update the manuals. +3. Test history manually. +4. Test with POSIX test suite. +5. Run the `scripts/randmath.py` script an excessive amount and add failing + tests to test suite. + * debug + * release + * minrelease +6. Fuzz with AFL. + * reldebug +7. Fix AFL crashes. +8. Find ASan crashes on AFL test cases. +9. Fix ASan crashes. +10. Build on Windows, no errors or warnings. + * Debug/`x64`. + * Debug/`x86`. + * Release{MD,MT}/`x64`. + * Release{MD,MT}/`x86`. +11. Run and pass the `scripts/release.sh` script on my own machine. +12. Run and pass the `scripts/release.sh` script, without generated tests and + sanitizers, on FreeBSD. +13. Run and pass the `scripts/release.sh` script, without generated tests, + sanitizers, and 64-bit, on an ARM server. +14. Run and pass the release script, with no generated tests, no clang, no + sanitizers, and no valgrind, on NetBSD. +15. Run and pass the release script, with no generated tests, no sanitizers, and + no valgrind, on OpenBSD. +16. Run `scan-build make`. +17. Repeat steps 3-16 again and repeat until nothing is found. +18. Update the benchmarks. +19. Update the version and `NEWS.md` and commit. +20. Boot into Windows. +21. Build all release versions of everything. + * Release/`x64` for `bc`. + * Release/`x64` for `dc`. + * Release{MD,MT}/`x64` for `bcl`. + * Release/`x86` for `bc`. + * Release/`x86` for `dc`. + * Release{MD,MT}/`x86` for `bcl`. +22. Put the builds where Linux can access them. +23. Boot back into Linux. +24. Run `make clean_tests`. +25. Run the `scripts/package.sh` script. +26. Upload the custom tarball and Windows builds to Yzena Gitea. +27. Add output from `scripts/package.sh` to Yzena Gitea release notes. +28. Edit Yzena Gitea release notes for the changelog. +29. Upload the custom tarball to GitHub. +30. Add output from `scripts/package.sh` to GitHub release notes. +31. Edit GitHub release notes for the changelog. +32. Notify the following: + * FreeBSD + * Adelie Linux + * Ataraxia Linux + * Sabotage + * xstatic + * OpenBSD + * NetBSD +33. Submit new packages for the following: + * Alpine Linux + * Void Linux + * Gentoo Linux + * Linux from Scratch + * Arch Linux diff --git a/scripts/afl.py b/scripts/afl.py new file mode 100755 index 000000000000..c4312ce84f83 --- /dev/null +++ b/scripts/afl.py @@ -0,0 +1,245 @@ +#! /usr/bin/python3 -B +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2018-2021 Gavin D. Howard and contributors. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# + +import os +import sys +import shutil +import subprocess + + +# Print the usage and exit with an error. +def usage(): + print("usage: {} [--asan] dir [results_dir [exe options...]]".format(script)) + print(" The valid values for dir are: 'bc1', 'bc2', 'bc3', and 'dc'.") + sys.exit(1) + + +# Check for a crash. +# @param exebase The calculator that crashed. +# @param out The file to copy the crash file to. +# @param error The error code (negative). +# @param file The crash file. +# @param type The type of run that caused the crash. This is just a string +# that would make sense to the user. +# @param test The contents of the crash file, or which line caused the crash +# for a run through stdin. +def check_crash(exebase, out, error, file, type, test): + if error < 0: + print("\n{} crashed ({}) on {}:\n".format(exebase, -error, type)) + print(" {}".format(test)) + print("\nCopying to \"{}\"".format(out)) + shutil.copy2(file, out) + print("\nexiting...") + sys.exit(error) + + +# Runs a test. This function is used to ensure that if a test times out, it is +# discarded. Otherwise, some tests result in incredibly long runtimes. We need +# to ignore those. +# +# @param cmd The command to run. +# @param exebase The calculator to test. +# @param tout The timeout to use. +# @param indata The data to push through stdin for the test. +# @param out The file to copy the test file to if it causes a crash. +# @param file The test file. +# @param type The type of test. This is just a string that would make sense +# to the user. +# @param test The test. It could be an entire file, or just one line. +# @param environ The environment to run the command under. +def run_test(cmd, exebase, tout, indata, out, file, type, test, environ=None): + try: + p = subprocess.run(cmd, timeout=tout, input=indata, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, env=environ) + check_crash(exebase, out, p.returncode, file, type, test) + except subprocess.TimeoutExpired: + print("\n {} timed out. Continuing...\n".format(exebase)) + + +# Creates and runs a test. This basically just takes a file, runs it through the +# appropriate calculator as a whole file, then runs it through the calculator +# using stdin. +# @param file The file to test. +# @param tout The timeout to use. +# @param environ The environment to run under. +def create_test(file, tout, environ=None): + + print(" {}".format(file)) + + base = os.path.basename(file) + + if base == "README.txt": + return + + with open(file, "rb") as f: + lines = f.readlines() + + print(" Running whole file...") + + run_test(exe + [ file ], exebase, tout, halt.encode(), out, file, "file", file, environ) + + print(" Running file through stdin...") + + with open(file, "rb") as f: + content = f.read() + + run_test(exe, exebase, tout, content, out, file, + "running {} through stdin".format(file), file, environ) + + +# Get the children of a directory. +# @param dir The directory to get the children of. +# @param get_files True if files should be gotten, false if directories should +# be gotten. +def get_children(dir, get_files): + dirs = [] + with os.scandir(dir) as it: + for entry in it: + if not entry.name.startswith('.') and \ + ((entry.is_dir() and not get_files) or \ + (entry.is_file() and get_files)): + dirs.append(entry.name) + dirs.sort() + return dirs + + +# Returns the correct executable name for the directory under test. +# @param d The directory under test. +def exe_name(d): + return "bc" if d == "bc1" or d == "bc2" or d == "bc3" else "dc" + + +# Housekeeping. +script = sys.argv[0] +scriptdir = os.path.dirname(script) + +# Must run this script alone. +if __name__ != "__main__": + usage() + +timeout = 2.5 + +if len(sys.argv) < 2: + usage() + +idx = 1 + +exedir = sys.argv[idx] + +asan = (exedir == "--asan") + +# We could possibly run under ASan. See later for what that means. +if asan: + idx += 1 + if len(sys.argv) < idx + 1: + usage() + exedir = sys.argv[idx] + +print("exedir: {}".format(exedir)) + +# Grab the correct directory of AFL++ results. +if len(sys.argv) >= idx + 2: + resultsdir = sys.argv[idx + 1] +else: + if exedir == "bc1": + resultsdir = scriptdir + "/../tests/fuzzing/bc_outputs1" + elif exedir == "bc2": + resultsdir = scriptdir + "/../tests/fuzzing/bc_outputs2" + elif exedir == "bc3": + resultsdir = scriptdir + "/../tests/fuzzing/bc_outputs3" + elif exedir == "dc": + resultsdir = scriptdir + "/../tests/fuzzing/dc_outputs" + else: + raise ValueError("exedir must be either bc1, bc2, bc3, or dc"); + +print("resultsdir: {}".format(resultsdir)) + +# More command-line processing. +if len(sys.argv) >= idx + 3: + exe = sys.argv[idx + 2] +else: + exe = scriptdir + "/../bin/" + exe_name(exedir) + +exebase = os.path.basename(exe) + + +# Use the correct options. +if exebase == "bc": + halt = "halt\n" + options = "-lq" + seed = ["-e", "seed = 1280937142.20981723890730892738902938071028973408912703984712093", "-f-" ] +else: + halt = "q\n" + options = "-x" + seed = ["-e", "1280937142.20981723890730892738902938071028973408912703984712093j", "-f-" ] + +# More command-line processing. +if len(sys.argv) >= idx + 4: + exe = [ exe, sys.argv[idx + 3:], options ] + seed +else: + exe = [ exe, options ] + seed +for i in range(4, len(sys.argv)): + exe.append(sys.argv[i]) + +out = scriptdir + "/../.test.txt" + +print(os.path.realpath(os.getcwd())) + +dirs = get_children(resultsdir, False) + +# Set the correct ASAN_OPTIONS. +if asan: + env = os.environ.copy() + env['ASAN_OPTIONS'] = 'abort_on_error=1:allocator_may_return_null=1' + +for d in dirs: + + d = resultsdir + "/" + d + + print(d) + + # Check the crash files. + files = get_children(d + "/crashes/", True) + + for file in files: + file = d + "/crashes/" + file + create_test(file, timeout) + + # If we are running under ASan, we want to check all files. Otherwise, skip. + if not asan: + continue + + # Check all of the test cases found by AFL++. + files = get_children(d + "/queue/", True) + + for file in files: + file = d + "/queue/" + file + create_test(file, timeout * 2, env) + +print("Done") diff --git a/scripts/alloc.sh b/scripts/alloc.sh new file mode 100755 index 000000000000..c5c46febe0b3 --- /dev/null +++ b/scripts/alloc.sh @@ -0,0 +1,84 @@ +#!/bin/sh +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2018-2021 Gavin D. Howard and contributors. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# + +# This script is only really useful for running on Linux. It tests the code to +# free temps in order to make an allocation work. In order to see it work, I +# suggest adding code after the following line in src/vm.c: +# +# if (BC_ERR(ptr == NULL)) bc_vm_fatalError(BC_ERR_FATAL_ALLOC_ERR); +# +# The code you should add is the following: +# +# bc_file_printf(&vm.ferr, "If you see this, the code worked.\n"); +# bc_file_flush(&vm.ferr, bc_flush_none); +# +# If you do not see the that message printed, the code did not work. Or, in the +# case of some allocators, like jemalloc, the allocator just isn't great with +# turning a bunch of small allocations into a bigger allocation, + +script="$0" +scriptdir=$(dirname "$script") + +export LANG=C + +virtlimit=1000000 + +ulimit -v $virtlimit + +# This script is designed to allocate lots of memory with a lot of caching of +# numbers (the function f() specifically). Then, it's designed allocate one +# large number and grow it until allocation failure (the function g()). +"$scriptdir/../bin/bc" <<*EOF + +define f(i, n) { + if (n == 0) return i; + return f(i + 1, n - 1) +} + +define g(n) { + t = (10^9)^(2^24) + while (n) { + n *= t + print "success\n" + } +} + +iterations=2000000 + +for (l=0; l < 100; l++) { + iterations + j = f(0, iterations$) + iterations += 100000 + print "here\n" + n=10^235929600 + g(n) + print "success\n" + n=0 +} +*EOF diff --git a/scripts/benchmark.sh b/scripts/benchmark.sh new file mode 100755 index 000000000000..35f92452ce78 --- /dev/null +++ b/scripts/benchmark.sh @@ -0,0 +1,159 @@ +#! /bin/sh +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2018-2021 Gavin D. Howard and contributors. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# + +# This script depends on the GNU time utility, but I am okay with that because +# this script is only for maintainers. + +# Just print the usage and exit with an error. +usage() { + printf 'usage: %s [-n] [-p] dir benchmark...\n' "$0" 1>&2 + printf ' -n runs is how many runs to run the benchmark, default 10.\n' + printf ' -p pause is how many seconds to pause before running the benchmarks.\n' + printf '\n' + printf 'The fields are put in this order:\n' + printf '1. Elapsed Time\n' + printf '2. System Time\n' + printf '3. User Time\n' + printf '4. Max RSS\n' + printf '5. Average RSS\n' + printf '6. Average Total Memory Use\n' + printf '7. Average Unshared Data\n' + printf '8. Average Unshared Stack\n' + printf '9. Average Shared Text\n' + printf '10. Major Page Faults\n' + printf '11. Minor Page Faults\n' + printf '12. Swaps\n' + printf '13. Involuntary Context Switches\n' + printf '14. Voluntary Context Switches\n' + printf '15. Inputs\n' + printf '16. Outputs\n' + printf '17. Signals Delivered\n' + exit 1 +} + +script="$0" +scriptdir=$(dirname "$script") + +runs=10 +pause=0 + +# Process command-line arguments. +while getopts "n:p:" opt; do + + case "$opt" in + n) runs="$OPTARG" ;; + p) pause="$OPTARG" ;; + ?) usage "Invalid option: $opt" ;; + esac + +done + +while [ "$#" -gt 0 ] && [ "$OPTIND" -gt 1 ]; do + + OPTIND=$(bin/bc -e "$OPTIND - 1") + shift + +done + +if [ "$#" -lt 2 ]; then + usage +fi + +cd "$scriptdir/.." + +d="$1" +shift + +benchmarks="" + +# Create the list of benchmarks from the arguments. +while [ "$#" -gt 0 ]; do + + if [ "$benchmarks" = "" ]; then + benchmarks="$1" + else + benchmarks="$benchmarks $1" + fi + + shift +done + +files="" + +# Create the list of files from the benchmarks. +for b in $benchmarks; do + + f=$(printf "benchmarks/%s/%s.txt" "$d" "$b") + + if [ "$files" = "" ]; then + files="$f" + else + files="$files $f" + fi + +done + +if [ "$d" = "bc" ]; then + opts="-lq" + halt="halt" +else + opts="-x" + halt="q" +fi + +# Generate all of the benchmarks. +for b in $benchmarks; do + + if [ ! -f "./benchmarks/$d/$b.txt" ]; then + printf 'Benchmarking generation of benchmarks/%s/%s.txt...\n' "$d" "$b" >&2 + printf '%s\n' "$halt" | /usr/bin/time -v bin/$d $opts "./benchmarks/$d/$b.$d" \ + > "./benchmarks/$d/$b.txt" + fi +done + +# We use this format to make things easier to use with ministat. +format="%e %S %U %M %t %K %D %p %X %F %R %W %c %w %I %O %k" + +printf 'Benchmarking %s...\n' "$files" >&2 + +if [ "$pause" -gt 0 ]; then + sleep "$pause" +fi + +i=0 + +# Run the benchmarks as many times as told to. +while [ "$i" -lt "$runs" ]; do + + printf '%s\n' "$halt" | /usr/bin/time -f "$format" bin/$d $opts $files 2>&1 > /dev/null + + # Might as well use the existing bc. + i=$(printf '%s + 1\n' "$i" | bin/bc) + +done diff --git a/scripts/bitfuncgen.c b/scripts/bitfuncgen.c new file mode 100644 index 000000000000..8fae531b9286 --- /dev/null +++ b/scripts/bitfuncgen.c @@ -0,0 +1,240 @@ +/* + * ***************************************************************************** + * + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2018-2021 Gavin D. Howard and contributors. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * ***************************************************************************** + * + * A generator for bitwise operations test. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#define NTESTS (100) + +/** + * Abort with an error message. + * @param msg The error message. + */ +void err(const char *msg) { + fprintf(stderr, "%s\n", msg); + abort(); +} + +uint64_t rev(uint64_t a, size_t bits) { + + size_t i; + uint64_t res = 0; + + for (i = 0; i < bits; ++i) { + res <<= 1; + res |= a & 1; + a >>= 1; + } + + return res; +} + +uint64_t mod(uint64_t a, size_t bits) { + + uint64_t mod; + + if (bits < 64) mod = (uint64_t) ((1ULL << bits) - 1); + else mod = UINT64_MAX; + + return a & mod; +} + +uint64_t rol(uint64_t a, uint64_t p, size_t bits) { + + uint64_t res; + + assert(bits <= 64); + + p %= bits; + + if (!p) return a; + + res = (a << p) | (a >> (bits - p)); + + return mod(res, bits); +} + +uint64_t ror(uint64_t a, uint64_t p, size_t bits) { + + uint64_t res; + + assert(bits <= 64); + + p %= bits; + + if (!p) return a; + + res = (a << (bits - p)) | (a >> p); + + return mod(res, bits); +} + +int main(void) { + + uint64_t a = 0, b = 0, t; + size_t i; + + // We attempt to open this or /dev/random to get random data. + int fd = open("/dev/urandom", O_RDONLY); + + if (fd < 0) { + + fd = open("/dev/random", O_RDONLY); + + if (fd < 0) err("cannot open a random number generator"); + } + + // Generate NTESTS tests. + for (i = 0; i < NTESTS; ++i) { + + ssize_t nread; + + // Generate random data for the first operand. + nread = read(fd, (char*) &a, sizeof(uint64_t)); + if (nread != sizeof(uint64_t)) err("I/O error"); + + // Generate random data for the second operand. + nread = read(fd, (char*) &b, sizeof(uint64_t)); + if (nread != sizeof(uint64_t)) err("I/O error"); + + // Output the tests to stdout. + printf("band(%lu, %lu)\n", a, b); + printf("bor(%lu, %lu)\n", a, b); + printf("bxor(%lu, %lu)\n", a, b); + printf("bshl(%lu, %lu)\n", mod(a, 32), mod(b, 5)); + printf("bshr(%lu, %lu)\n", mod(a, 32), mod(b, 5)); + printf("bshl(%lu, %lu)\n", mod(b, 32), mod(a, 5)); + printf("bshr(%lu, %lu)\n", mod(b, 32), mod(a, 5)); + printf("bnot8(%lu)\nbnot8(%lu)\n", a, mod(a, 8)); + printf("bnot16(%lu)\nbnot16(%lu)\n", a, mod(a, 16)); + printf("bnot32(%lu)\nbnot32(%lu)\n", a, mod(a, 32)); + printf("bnot64(%lu)\n", a); + printf("brev8(%lu)\nbrev8(%lu)\n", a, mod(a, 8)); + printf("brev16(%lu)\nbrev16(%lu)\n", a, mod(a, 16)); + printf("brev32(%lu)\nbrev32(%lu)\n", a, mod(a, 32)); + printf("brev64(%lu)\n", a); + printf("brol8(%lu, %lu)\n", a, b); + printf("brol8(%lu, %lu)\n", mod(a, 8), b); + printf("brol8(%lu, %lu)\n", a, mod(b, 8)); + printf("brol8(%lu, %lu)\n", mod(a, 8), mod(b, 8)); + printf("brol16(%lu, %lu)\n", a, b); + printf("brol16(%lu, %lu)\n", mod(a, 16), b); + printf("brol16(%lu, %lu)\n", a, mod(b, 16)); + printf("brol16(%lu, %lu)\n", mod(a, 16), mod(b, 16)); + printf("brol32(%lu, %lu)\n", a, b); + printf("brol32(%lu, %lu)\n", mod(a, 32), b); + printf("brol32(%lu, %lu)\n", a, mod(b, 32)); + printf("brol32(%lu, %lu)\n", mod(a, 32), mod(b, 32)); + printf("brol64(%lu, %lu)\n", a, b); + printf("bror8(%lu, %lu)\n", a, b); + printf("bror8(%lu, %lu)\n", mod(a, 8), b); + printf("bror8(%lu, %lu)\n", a, mod(b, 8)); + printf("bror8(%lu, %lu)\n", mod(a, 8), mod(b, 8)); + printf("bror16(%lu, %lu)\n", a, b); + printf("bror16(%lu, %lu)\n", mod(a, 16), b); + printf("bror16(%lu, %lu)\n", a, mod(b, 16)); + printf("bror16(%lu, %lu)\n", mod(a, 16), mod(b, 16)); + printf("bror32(%lu, %lu)\n", a, b); + printf("bror32(%lu, %lu)\n", mod(a, 32), b); + printf("bror32(%lu, %lu)\n", a, mod(b, 32)); + printf("bror32(%lu, %lu)\n", mod(a, 32), mod(b, 32)); + printf("bror64(%lu, %lu)\n", a, b); + printf("bmod8(%lu)\nbmod8(%lu)\n", a, mod(a, 8)); + printf("bmod16(%lu)\nbmod16(%lu)\n", a, mod(a, 16)); + printf("bmod32(%lu)\nbmod32(%lu)\n", a, mod(a, 32)); + printf("bmod64(%lu)\n", a); + + // Output the results to stderr. + fprintf(stderr, "%lu\n", a & b); + fprintf(stderr, "%lu\n", a | b); + fprintf(stderr, "%lu\n", a ^ b); + fprintf(stderr, "%lu\n", mod(a, 32) << mod(b, 5)); + fprintf(stderr, "%lu\n", mod(a, 32) >> mod(b, 5)); + fprintf(stderr, "%lu\n", mod(b, 32) << mod(a, 5)); + fprintf(stderr, "%lu\n", mod(b, 32) >> mod(a, 5)); + t = mod(~a, 8); + fprintf(stderr, "%lu\n%lu\n", t, t); + t = mod(~a, 16); + fprintf(stderr, "%lu\n%lu\n", t, t); + t = mod(~a, 32); + fprintf(stderr, "%lu\n%lu\n", t, t); + fprintf(stderr, "%lu\n", ~a); + t = rev(a, 8); + fprintf(stderr, "%lu\n%lu\n", t, t); + t = rev(a, 16); + fprintf(stderr, "%lu\n%lu\n", t, t); + t = rev(a, 32); + fprintf(stderr, "%lu\n%lu\n", t, t); + t = rev(a, 64); + fprintf(stderr, "%lu\n", t); + fprintf(stderr, "%lu\n", rol(mod(a, 8), mod(b, 8), 8)); + fprintf(stderr, "%lu\n", rol(mod(a, 8), mod(b, 8), 8)); + fprintf(stderr, "%lu\n", rol(mod(a, 8), mod(b, 8), 8)); + fprintf(stderr, "%lu\n", rol(mod(a, 8), mod(b, 8), 8)); + fprintf(stderr, "%lu\n", rol(mod(a, 16), mod(b, 16), 16)); + fprintf(stderr, "%lu\n", rol(mod(a, 16), mod(b, 16), 16)); + fprintf(stderr, "%lu\n", rol(mod(a, 16), mod(b, 16), 16)); + fprintf(stderr, "%lu\n", rol(mod(a, 16), mod(b, 16), 16)); + fprintf(stderr, "%lu\n", rol(mod(a, 32), mod(b, 32), 32)); + fprintf(stderr, "%lu\n", rol(mod(a, 32), mod(b, 32), 32)); + fprintf(stderr, "%lu\n", rol(mod(a, 32), mod(b, 32), 32)); + fprintf(stderr, "%lu\n", rol(mod(a, 32), mod(b, 32), 32)); + fprintf(stderr, "%lu\n", rol(a, b, 64)); + fprintf(stderr, "%lu\n", ror(mod(a, 8), mod(b, 8), 8)); + fprintf(stderr, "%lu\n", ror(mod(a, 8), mod(b, 8), 8)); + fprintf(stderr, "%lu\n", ror(mod(a, 8), mod(b, 8), 8)); + fprintf(stderr, "%lu\n", ror(mod(a, 8), mod(b, 8), 8)); + fprintf(stderr, "%lu\n", ror(mod(a, 16), mod(b, 16), 16)); + fprintf(stderr, "%lu\n", ror(mod(a, 16), mod(b, 16), 16)); + fprintf(stderr, "%lu\n", ror(mod(a, 16), mod(b, 16), 16)); + fprintf(stderr, "%lu\n", ror(mod(a, 16), mod(b, 16), 16)); + fprintf(stderr, "%lu\n", ror(mod(a, 32), mod(b, 32), 32)); + fprintf(stderr, "%lu\n", ror(mod(a, 32), mod(b, 32), 32)); + fprintf(stderr, "%lu\n", ror(mod(a, 32), mod(b, 32), 32)); + fprintf(stderr, "%lu\n", ror(mod(a, 32), mod(b, 32), 32)); + fprintf(stderr, "%lu\n", ror(a, b, 64)); + fprintf(stderr, "%lu\n%lu\n", mod(a, 8), mod(a, 8)); + fprintf(stderr, "%lu\n%lu\n", mod(a, 16), mod(a, 16)); + fprintf(stderr, "%lu\n%lu\n", mod(a, 32), mod(a, 32)); + fprintf(stderr, "%lu\n", a); + } + + return 0; +} diff --git a/scripts/fuzz_prep.sh b/scripts/fuzz_prep.sh new file mode 100755 index 000000000000..0441f94e340c --- /dev/null +++ b/scripts/fuzz_prep.sh @@ -0,0 +1,81 @@ +#! /bin/sh +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2018-2021 Gavin D. Howard and contributors. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# + +# Just print the usage and exit with an error. +usage() { + printf 'usage: %s [-a] [afl_compiler]\n' "$0" 1>&2 + printf '\n' + printf ' If -a is given, then an ASan ready build is created.\n' + printf ' Otherwise, a normal fuzz build is created.\n' + printf ' The ASan-ready build is for running under\n' + printf ' `tests/afl.py --asan`, which checks that there were no\n' + printf ' memory errors in any path found by the fuzzer.\n' + printf ' It might also be useful to run scripts/randmath.py on an\n' + printf ' ASan-ready binary.\n' + exit 1 +} + +script="$0" +scriptdir=$(dirname "$script") + +asan=0 + +# Process command-line arguments. +while getopts "a" opt; do + + case "$opt" in + a) asan=1 ; shift ;; + ?) usage "Invalid option: $opt" ;; + esac + +done + +if [ $# -lt 1 ]; then + CC=afl-clang-lto +else + CC="$1" +fi + +# We want this for extra sensitive crashing +AFL_HARDEN=1 + +cd "$scriptdir/.." + +set -e + +if [ "$asan" -ne 0 ]; then + CFLAGS="-flto -fsanitize=address" +else + CFLAGS="-flto" +fi + +# We want a debug build because asserts are counted as crashes too. +CC="$CC" CFLAGS="$CFLAGS" ./configure.sh -gO3 -z + +make -j16 diff --git a/scripts/manpage.sh b/scripts/manpage.sh new file mode 100755 index 000000000000..c1429a6ed51f --- /dev/null +++ b/scripts/manpage.sh @@ -0,0 +1,175 @@ +#! /bin/sh +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2018-2021 Gavin D. Howard and contributors. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# + +# Print the usage and exit with an error. +usage() { + printf "usage: %s manpage\n" "$0" 1>&2 + exit 1 +} + +# Generate a manpage and print it to a file. +# @param md The markdown manual to generate a manpage for. +# @param out The file to print the manpage to. +gen_manpage() { + + _gen_manpage_md="$1" + shift + + _gen_manpage_out="$1" + shift + + cat "$manualsdir/header.txt" > "$_gen_manpage_out" + cat "$manualsdir/header_${manpage}.txt" >> "$_gen_manpage_out" + + pandoc -f commonmark_x -t man "$_gen_manpage_md" >> "$_gen_manpage_out" +} + +# Generate a manual from a template and print it to a file before generating +# its manpage. +# param args The type of markdown manual to generate. This is a string that +# corresponds to build type (see the Build Type section of the +# manuals/build.md manual). +gen_manual() { + + _gen_manual_args="$1" + shift + + # Set up some local variables. $manualsdir and $manpage from from the + # variables outside the function. + _gen_manual_status="$ALL" + _gen_manual_out="$manualsdir/$manpage/$_gen_manual_args.1" + _gen_manual_md="$manualsdir/$manpage/$_gen_manual_args.1.md" + _gen_manual_temp="$manualsdir/temp.1.md" + + # We need to set IFS, so we store it here for restoration later. + _gen_manual_ifs="$IFS" + + # Remove the files that will be generated. + rm -rf "$_gen_manual_out" "$_gen_manual_md" + + # Here is the magic. This loop reads the template line-by-line, and based on + # _gen_manual_status, either prints it to the markdown manual or not. + # + # Here is how the template is set up: it is a normal markdown file except + # that there are sections surrounded tags that look like this: + # + # {{ }} + # ... + # {{ end }} + # + # Those tags mean that whatever build types are found in the + # get to keep that section. Otherwise, skip. + # + # Obviously, the tag itself and its end are not printed to the markdown + # manual. + while IFS= read -r line; do + + # If we have found an end, reset the status. + if [ "$line" = "{{ end }}" ]; then + + # Some error checking. This helps when editing the templates. + if [ "$_gen_manual_status" -eq "$ALL" ]; then + err_exit "{{ end }} tag without corresponding start tag" 2 + fi + + _gen_manual_status="$ALL" + + # We have found a tag that allows our build type to use it. + elif [ "${line#\{\{* $_gen_manual_args *\}\}}" != "$line" ]; then + + # More error checking. We don't want tags nested. + if [ "$_gen_manual_status" -ne "$ALL" ]; then + err_exit "start tag nested in start tag" 3 + fi + + _gen_manual_status="$NOSKIP" + + # We have found a tag that is *not* allowed for our build type. + elif [ "${line#\{\{*\}\}}" != "$line" ]; then + + if [ "$_gen_manual_status" -ne "$ALL" ]; then + err_exit "start tag nested in start tag" 3 + fi + + _gen_manual_status="$SKIP" + + # This is for normal lines. If we are not skipping, print. + else + if [ "$_gen_manual_status" -ne "$SKIP" ]; then + printf '%s\n' "$line" >> "$_gen_manual_temp" + fi + fi + + done < "$manualsdir/${manpage}.1.md.in" + + # Remove multiple blank lines. + uniq "$_gen_manual_temp" "$_gen_manual_md" + + # Remove the temp file. + rm -rf "$_gen_manual_temp" + + # Reset IFS. + IFS="$_gen_manual_ifs" + + # Generate the manpage. + gen_manpage "$_gen_manual_md" "$_gen_manual_out" +} + +set -e + +script="$0" +scriptdir=$(dirname "$script") +manualsdir="$scriptdir/../manuals" + +. "$scriptdir/functions.sh" + +# Constants for use later. If the set of build types is changed, $ARGS must be +# updated. +ARGS="A E H N EH EN HN EHN" +ALL=0 +NOSKIP=1 +SKIP=2 + +# Process command-line arguments. +test "$#" -eq 1 || usage + +manpage="$1" +shift + +if [ "$manpage" != "bcl" ]; then + + # Generate a manual and manpage for each build type. + for a in $ARGS; do + gen_manual "$a" + done + +else + # For bcl, just generate the manpage. + gen_manpage "$manualsdir/${manpage}.3.md" "$manualsdir/${manpage}.3" +fi diff --git a/scripts/ministat.c b/scripts/ministat.c new file mode 100644 index 000000000000..e5b7cd47b3e4 --- /dev/null +++ b/scripts/ministat.c @@ -0,0 +1,675 @@ +/*- + * SPDX-License-Identifier: Beerware + * + * ---------------------------------------------------------------------------- + * "THE BEER-WARE LICENSE" (Revision 42): + * wrote this file. As long as you retain this notice you + * can do whatever you want with this stuff. If we meet some day, and you think + * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp + * ---------------------------------------------------------------------------- + * + */ + +#ifdef __GNU_LIBRARY__ +#include +#endif // __GNU_LIBRARY__ + +#include + +#ifdef __GNU_LIBRARY__ +#include +#endif // __GNU_LIBRARY__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NSTUDENT 100 +#define NCONF 6 +static double const studentpct[] = { 80, 90, 95, 98, 99, 99.5 }; +static double const student[NSTUDENT + 1][NCONF] = { +/* inf */ { 1.282, 1.645, 1.960, 2.326, 2.576, 3.090 }, +/* 1. */ { 3.078, 6.314, 12.706, 31.821, 63.657, 318.313 }, +/* 2. */ { 1.886, 2.920, 4.303, 6.965, 9.925, 22.327 }, +/* 3. */ { 1.638, 2.353, 3.182, 4.541, 5.841, 10.215 }, +/* 4. */ { 1.533, 2.132, 2.776, 3.747, 4.604, 7.173 }, +/* 5. */ { 1.476, 2.015, 2.571, 3.365, 4.032, 5.893 }, +/* 6. */ { 1.440, 1.943, 2.447, 3.143, 3.707, 5.208 }, +/* 7. */ { 1.415, 1.895, 2.365, 2.998, 3.499, 4.782 }, +/* 8. */ { 1.397, 1.860, 2.306, 2.896, 3.355, 4.499 }, +/* 9. */ { 1.383, 1.833, 2.262, 2.821, 3.250, 4.296 }, +/* 10. */ { 1.372, 1.812, 2.228, 2.764, 3.169, 4.143 }, +/* 11. */ { 1.363, 1.796, 2.201, 2.718, 3.106, 4.024 }, +/* 12. */ { 1.356, 1.782, 2.179, 2.681, 3.055, 3.929 }, +/* 13. */ { 1.350, 1.771, 2.160, 2.650, 3.012, 3.852 }, +/* 14. */ { 1.345, 1.761, 2.145, 2.624, 2.977, 3.787 }, +/* 15. */ { 1.341, 1.753, 2.131, 2.602, 2.947, 3.733 }, +/* 16. */ { 1.337, 1.746, 2.120, 2.583, 2.921, 3.686 }, +/* 17. */ { 1.333, 1.740, 2.110, 2.567, 2.898, 3.646 }, +/* 18. */ { 1.330, 1.734, 2.101, 2.552, 2.878, 3.610 }, +/* 19. */ { 1.328, 1.729, 2.093, 2.539, 2.861, 3.579 }, +/* 20. */ { 1.325, 1.725, 2.086, 2.528, 2.845, 3.552 }, +/* 21. */ { 1.323, 1.721, 2.080, 2.518, 2.831, 3.527 }, +/* 22. */ { 1.321, 1.717, 2.074, 2.508, 2.819, 3.505 }, +/* 23. */ { 1.319, 1.714, 2.069, 2.500, 2.807, 3.485 }, +/* 24. */ { 1.318, 1.711, 2.064, 2.492, 2.797, 3.467 }, +/* 25. */ { 1.316, 1.708, 2.060, 2.485, 2.787, 3.450 }, +/* 26. */ { 1.315, 1.706, 2.056, 2.479, 2.779, 3.435 }, +/* 27. */ { 1.314, 1.703, 2.052, 2.473, 2.771, 3.421 }, +/* 28. */ { 1.313, 1.701, 2.048, 2.467, 2.763, 3.408 }, +/* 29. */ { 1.311, 1.699, 2.045, 2.462, 2.756, 3.396 }, +/* 30. */ { 1.310, 1.697, 2.042, 2.457, 2.750, 3.385 }, +/* 31. */ { 1.309, 1.696, 2.040, 2.453, 2.744, 3.375 }, +/* 32. */ { 1.309, 1.694, 2.037, 2.449, 2.738, 3.365 }, +/* 33. */ { 1.308, 1.692, 2.035, 2.445, 2.733, 3.356 }, +/* 34. */ { 1.307, 1.691, 2.032, 2.441, 2.728, 3.348 }, +/* 35. */ { 1.306, 1.690, 2.030, 2.438, 2.724, 3.340 }, +/* 36. */ { 1.306, 1.688, 2.028, 2.434, 2.719, 3.333 }, +/* 37. */ { 1.305, 1.687, 2.026, 2.431, 2.715, 3.326 }, +/* 38. */ { 1.304, 1.686, 2.024, 2.429, 2.712, 3.319 }, +/* 39. */ { 1.304, 1.685, 2.023, 2.426, 2.708, 3.313 }, +/* 40. */ { 1.303, 1.684, 2.021, 2.423, 2.704, 3.307 }, +/* 41. */ { 1.303, 1.683, 2.020, 2.421, 2.701, 3.301 }, +/* 42. */ { 1.302, 1.682, 2.018, 2.418, 2.698, 3.296 }, +/* 43. */ { 1.302, 1.681, 2.017, 2.416, 2.695, 3.291 }, +/* 44. */ { 1.301, 1.680, 2.015, 2.414, 2.692, 3.286 }, +/* 45. */ { 1.301, 1.679, 2.014, 2.412, 2.690, 3.281 }, +/* 46. */ { 1.300, 1.679, 2.013, 2.410, 2.687, 3.277 }, +/* 47. */ { 1.300, 1.678, 2.012, 2.408, 2.685, 3.273 }, +/* 48. */ { 1.299, 1.677, 2.011, 2.407, 2.682, 3.269 }, +/* 49. */ { 1.299, 1.677, 2.010, 2.405, 2.680, 3.265 }, +/* 50. */ { 1.299, 1.676, 2.009, 2.403, 2.678, 3.261 }, +/* 51. */ { 1.298, 1.675, 2.008, 2.402, 2.676, 3.258 }, +/* 52. */ { 1.298, 1.675, 2.007, 2.400, 2.674, 3.255 }, +/* 53. */ { 1.298, 1.674, 2.006, 2.399, 2.672, 3.251 }, +/* 54. */ { 1.297, 1.674, 2.005, 2.397, 2.670, 3.248 }, +/* 55. */ { 1.297, 1.673, 2.004, 2.396, 2.668, 3.245 }, +/* 56. */ { 1.297, 1.673, 2.003, 2.395, 2.667, 3.242 }, +/* 57. */ { 1.297, 1.672, 2.002, 2.394, 2.665, 3.239 }, +/* 58. */ { 1.296, 1.672, 2.002, 2.392, 2.663, 3.237 }, +/* 59. */ { 1.296, 1.671, 2.001, 2.391, 2.662, 3.234 }, +/* 60. */ { 1.296, 1.671, 2.000, 2.390, 2.660, 3.232 }, +/* 61. */ { 1.296, 1.670, 2.000, 2.389, 2.659, 3.229 }, +/* 62. */ { 1.295, 1.670, 1.999, 2.388, 2.657, 3.227 }, +/* 63. */ { 1.295, 1.669, 1.998, 2.387, 2.656, 3.225 }, +/* 64. */ { 1.295, 1.669, 1.998, 2.386, 2.655, 3.223 }, +/* 65. */ { 1.295, 1.669, 1.997, 2.385, 2.654, 3.220 }, +/* 66. */ { 1.295, 1.668, 1.997, 2.384, 2.652, 3.218 }, +/* 67. */ { 1.294, 1.668, 1.996, 2.383, 2.651, 3.216 }, +/* 68. */ { 1.294, 1.668, 1.995, 2.382, 2.650, 3.214 }, +/* 69. */ { 1.294, 1.667, 1.995, 2.382, 2.649, 3.213 }, +/* 70. */ { 1.294, 1.667, 1.994, 2.381, 2.648, 3.211 }, +/* 71. */ { 1.294, 1.667, 1.994, 2.380, 2.647, 3.209 }, +/* 72. */ { 1.293, 1.666, 1.993, 2.379, 2.646, 3.207 }, +/* 73. */ { 1.293, 1.666, 1.993, 2.379, 2.645, 3.206 }, +/* 74. */ { 1.293, 1.666, 1.993, 2.378, 2.644, 3.204 }, +/* 75. */ { 1.293, 1.665, 1.992, 2.377, 2.643, 3.202 }, +/* 76. */ { 1.293, 1.665, 1.992, 2.376, 2.642, 3.201 }, +/* 77. */ { 1.293, 1.665, 1.991, 2.376, 2.641, 3.199 }, +/* 78. */ { 1.292, 1.665, 1.991, 2.375, 2.640, 3.198 }, +/* 79. */ { 1.292, 1.664, 1.990, 2.374, 2.640, 3.197 }, +/* 80. */ { 1.292, 1.664, 1.990, 2.374, 2.639, 3.195 }, +/* 81. */ { 1.292, 1.664, 1.990, 2.373, 2.638, 3.194 }, +/* 82. */ { 1.292, 1.664, 1.989, 2.373, 2.637, 3.193 }, +/* 83. */ { 1.292, 1.663, 1.989, 2.372, 2.636, 3.191 }, +/* 84. */ { 1.292, 1.663, 1.989, 2.372, 2.636, 3.190 }, +/* 85. */ { 1.292, 1.663, 1.988, 2.371, 2.635, 3.189 }, +/* 86. */ { 1.291, 1.663, 1.988, 2.370, 2.634, 3.188 }, +/* 87. */ { 1.291, 1.663, 1.988, 2.370, 2.634, 3.187 }, +/* 88. */ { 1.291, 1.662, 1.987, 2.369, 2.633, 3.185 }, +/* 89. */ { 1.291, 1.662, 1.987, 2.369, 2.632, 3.184 }, +/* 90. */ { 1.291, 1.662, 1.987, 2.368, 2.632, 3.183 }, +/* 91. */ { 1.291, 1.662, 1.986, 2.368, 2.631, 3.182 }, +/* 92. */ { 1.291, 1.662, 1.986, 2.368, 2.630, 3.181 }, +/* 93. */ { 1.291, 1.661, 1.986, 2.367, 2.630, 3.180 }, +/* 94. */ { 1.291, 1.661, 1.986, 2.367, 2.629, 3.179 }, +/* 95. */ { 1.291, 1.661, 1.985, 2.366, 2.629, 3.178 }, +/* 96. */ { 1.290, 1.661, 1.985, 2.366, 2.628, 3.177 }, +/* 97. */ { 1.290, 1.661, 1.985, 2.365, 2.627, 3.176 }, +/* 98. */ { 1.290, 1.661, 1.984, 2.365, 2.627, 3.175 }, +/* 99. */ { 1.290, 1.660, 1.984, 2.365, 2.626, 3.175 }, +/* 100. */ { 1.290, 1.660, 1.984, 2.364, 2.626, 3.174 } +}; + +#define MAX_DS 8 +static char symbol[MAX_DS] = { ' ', 'x', '+', '*', '%', '#', '@', 'O' }; + +struct dataset { + char *name; + double *points; + size_t lpoints; + double sy, syy; + size_t n; +}; + +static struct dataset * +NewSet(void) +{ + struct dataset *ds; + + ds = calloc(1, sizeof *ds); + assert(ds != NULL); + ds->lpoints = 100000; + ds->points = calloc(sizeof *ds->points, ds->lpoints); + assert(ds->points != NULL); + ds->syy = NAN; + return(ds); +} + +static void +AddPoint(struct dataset *ds, double a) +{ + double *dp; + + if (ds->n >= ds->lpoints) { + dp = ds->points; + ds->lpoints *= 4; + ds->points = calloc(sizeof *ds->points, ds->lpoints); + assert(ds->points != NULL); + memcpy(ds->points, dp, sizeof *dp * ds->n); + free(dp); + } + ds->points[ds->n++] = a; + ds->sy += a; +} + +static double +Min(const struct dataset *ds) +{ + + return (ds->points[0]); +} + +static double +Max(const struct dataset *ds) +{ + + return (ds->points[ds->n -1]); +} + +static double +Avg(const struct dataset *ds) +{ + + return(ds->sy / ds->n); +} + +static double +Median(const struct dataset *ds) +{ + const size_t m = ds->n / 2; + + if ((ds->n % 2) == 0) + return ((ds->points[m] + (ds->points[m - 1])) / 2); + return (ds->points[m]); +} + +static double +Var(struct dataset *ds) +{ + size_t z; + const double a = Avg(ds); + + if (isnan(ds->syy)) { + ds->syy = 0.0; + for (z = 0; z < ds->n; z++) + ds->syy += (ds->points[z] - a) * (ds->points[z] - a); + } + + return (ds->syy / (ds->n - 1.0)); +} + +static double +Stddev(struct dataset *ds) +{ + + return sqrt(Var(ds)); +} + +static void +VitalsHead(void) +{ + + printf(" N Min Max Median Avg Stddev\n"); +} + +static void +Vitals(struct dataset *ds, int flag) +{ + + printf("%c %3zu %13.8g %13.8g %13.8g %13.8g %13.8g", symbol[flag], + ds->n, Min(ds), Max(ds), Median(ds), Avg(ds), Stddev(ds)); + printf("\n"); +} + +static void +Relative(struct dataset *ds, struct dataset *rs, int confidx) +{ + double spool, s, d, e, t; + double re; + size_t z; + + z = ds->n + rs->n - 2; + if (z > NSTUDENT) + t = student[0][confidx]; + else + t = student[z][confidx]; + spool = (ds->n - 1) * Var(ds) + (rs->n - 1) * Var(rs); + spool /= ds->n + rs->n - 2; + spool = sqrt(spool); + s = spool * sqrt(1.0 / ds->n + 1.0 / rs->n); + d = Avg(ds) - Avg(rs); + e = t * s; + + re = (ds->n - 1) * Var(ds) + (rs->n - 1) * Var(rs) * + (Avg(ds) * Avg(ds)) / (Avg(rs) * Avg(rs)); + re *= (ds->n + rs->n) / (ds->n * rs->n * (ds->n + rs->n - 2.0)); + re = t * sqrt(re); + + if (fabs(d) > e) { + printf("Difference at %.1f%% confidence\n", studentpct[confidx]); + printf(" %g +/- %g\n", d, e); + printf(" %g%% +/- %g%%\n", d * 100 / Avg(rs), re * 100 / Avg(rs)); + printf(" (Student's t, pooled s = %g)\n", spool); + } else { + printf("No difference proven at %.1f%% confidence\n", + studentpct[confidx]); + } +} + +struct plot { + double min; + double max; + double span; + int width; + + double x0, dx; + size_t height; + char *data; + char **bar; + int separate_bars; + int num_datasets; +}; + +static struct plot plot; + +static void +SetupPlot(int width, int separate, int num_datasets) +{ + struct plot *pl; + + pl = &plot; + pl->width = width; + pl->height = 0; + pl->data = NULL; + pl->bar = NULL; + pl->separate_bars = separate; + pl->num_datasets = num_datasets; + pl->min = 999e99; + pl->max = -999e99; +} + +static void +AdjPlot(double a) +{ + struct plot *pl; + + pl = &plot; + if (a < pl->min) + pl->min = a; + if (a > pl->max) + pl->max = a; + pl->span = pl->max - pl->min; + pl->dx = pl->span / (pl->width - 1.0); + pl->x0 = pl->min - .5 * pl->dx; +} + +static void +DimPlot(struct dataset *ds) +{ + AdjPlot(Min(ds)); + AdjPlot(Max(ds)); + AdjPlot(Avg(ds) - Stddev(ds)); + AdjPlot(Avg(ds) + Stddev(ds)); +} + +static void +PlotSet(struct dataset *ds, int val) +{ + struct plot *pl; + int i, x; + size_t m, j, z; + size_t n; + int bar; + double av, sd; + + pl = &plot; + if (pl->span == 0) + return; + + if (pl->separate_bars) + bar = val-1; + else + bar = 0; + + if (pl->bar == NULL) { + pl->bar = calloc(sizeof(char *), pl->num_datasets); + assert(pl->bar != NULL); + } + + if (pl->bar[bar] == NULL) { + pl->bar[bar] = malloc(pl->width); + assert(pl->bar[bar] != NULL); + memset(pl->bar[bar], 0, pl->width); + } + + m = 1; + i = -1; + j = 0; + /* Set m to max(j) + 1, to allocate required memory */ + for (n = 0; n < ds->n; n++) { + x = (ds->points[n] - pl->x0) / pl->dx; + if (x == i) { + j++; + if (j > m) + m = j; + } else { + j = 1; + i = x; + } + } + m += 1; + if (m > pl->height) { + pl->data = realloc(pl->data, pl->width * m); + assert(pl->data != NULL); + memset(pl->data + pl->height * pl->width, 0, + (m - pl->height) * pl->width); + } + pl->height = m; + i = -1; + for (n = 0; n < ds->n; n++) { + x = (ds->points[n] - pl->x0) / pl->dx; + if (x == i) { + j++; + } else { + j = 1; + i = x; + } + pl->data[j * pl->width + x] |= val; + } + av = Avg(ds); + sd = Stddev(ds); + if (!isnan(sd)) { + x = ((av - sd) - pl->x0) / pl->dx; + m = ((av + sd) - pl->x0) / pl->dx; + pl->bar[bar][m] = '|'; + pl->bar[bar][x] = '|'; + for (z = x + 1; z < m; z++) + if (pl->bar[bar][z] == 0) + pl->bar[bar][z] = '_'; + } + x = (Median(ds) - pl->x0) / pl->dx; + pl->bar[bar][x] = 'M'; + x = (av - pl->x0) / pl->dx; + pl->bar[bar][x] = 'A'; +} + +static void +DumpPlot(void) +{ + struct plot *pl; + int i, j, k; + size_t z; + + pl = &plot; + if (pl->span == 0) { + printf("[no plot, span is zero width]\n"); + return; + } + + putchar('+'); + for (i = 0; i < pl->width; i++) + putchar('-'); + putchar('+'); + putchar('\n'); + for (z = 1; z < pl->height; z++) { + putchar('|'); + for (j = 0; j < pl->width; j++) { + k = pl->data[(pl->height - z) * pl->width + j]; + if (k >= 0 && k < MAX_DS) + putchar(symbol[k]); + else + printf("[%02x]", k); + } + putchar('|'); + putchar('\n'); + } + for (i = 0; i < pl->num_datasets; i++) { + if (pl->bar[i] == NULL) + continue; + putchar('|'); + for (j = 0; j < pl->width; j++) { + k = pl->bar[i][j]; + if (k == 0) + k = ' '; + putchar(k); + } + putchar('|'); + putchar('\n'); + } + putchar('+'); + for (i = 0; i < pl->width; i++) + putchar('-'); + putchar('+'); + putchar('\n'); +} + +static int +dbl_cmp(const void *a, const void *b) +{ + const double *aa = a; + const double *bb = b; + + if (*aa < *bb) + return (-1); + else if (*aa > *bb) + return (1); + else + return (0); +} + +static struct dataset * +ReadSet(FILE *f, const char *n, int column, const char *delim) +{ + char buf[BUFSIZ], *p, *t; + struct dataset *s; + double d; + int line; + int i; + + s = NewSet(); + s->name = strdup(n); + assert(s->name != NULL); + line = 0; + while (fgets(buf, sizeof buf, f) != NULL) { + line++; + + i = strlen(buf); + while (i > 0 && isspace(buf[i - 1])) + buf[--i] = '\0'; + for (i = 1, t = strtok(buf, delim); + t != NULL && *t != '#'; + i++, t = strtok(NULL, delim)) { + if (i == column) + break; + } + if (t == NULL || *t == '#') + continue; + + d = strtod(t, &p); + if (p != NULL && *p != '\0') + errx(2, "Invalid data on line %d in %s", line, n); + if (*buf != '\0') + AddPoint(s, d); + } + if (s->n < 3) { + fprintf(stderr, + "Dataset %s must contain at least 3 data points\n", n); + exit (2); + } + qsort(s->points, s->n, sizeof *s->points, dbl_cmp); + return (s); +} + +static void +usage(char const *whine) +{ + int i; + + fprintf(stderr, "%s\n", whine); + fprintf(stderr, + "Usage: ministat [-C column] [-c confidence] [-d delimiter(s)] [-Ans] [-w width] [file [file ...]]\n"); + fprintf(stderr, "\tconfidence = {"); + for (i = 0; i < NCONF; i++) { + fprintf(stderr, "%s%g%%", + i ? ", " : "", + studentpct[i]); + } + fprintf(stderr, "}\n"); + fprintf(stderr, "\t-A : print statistics only. suppress the graph.\n"); + fprintf(stderr, "\t-C : column number to extract (starts and defaults to 1)\n"); + fprintf(stderr, "\t-d : delimiter(s) string, default to \" \\t\"\n"); + fprintf(stderr, "\t-n : print summary statistics only, no graph/test\n"); + fprintf(stderr, "\t-s : print avg/median/stddev bars on separate lines\n"); + fprintf(stderr, "\t-w : width of graph/test output (default 74 or terminal width)\n"); + exit (2); +} + +int +main(int argc, char **argv) +{ + const char *setfilenames[MAX_DS - 1]; + struct dataset *ds[MAX_DS - 1]; + FILE *setfiles[MAX_DS - 1]; + int nds; + double a; + const char *delim = " \t"; + char *p; + int c, i, ci; + int column = 1; + int flag_s = 0; + int flag_n = 0; + int termwidth = 74; + int suppress_plot = 0; + + if (isatty(STDOUT_FILENO)) { + struct winsize wsz; + + if ((p = getenv("COLUMNS")) != NULL && *p != '\0') + termwidth = atoi(p); + else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &wsz) != -1 && + wsz.ws_col > 0) + termwidth = wsz.ws_col - 2; + } + + ci = -1; + while ((c = getopt(argc, argv, "AC:c:d:snw:")) != -1) + switch (c) { + case 'A': + suppress_plot = 1; + break; + case 'C': + column = strtol(optarg, &p, 10); + if (p != NULL && *p != '\0') + usage("Invalid column number."); + if (column <= 0) + usage("Column number should be positive."); + break; + case 'c': + a = strtod(optarg, &p); + if (p != NULL && *p != '\0') + usage("Not a floating point number"); + for (i = 0; i < NCONF; i++) + if (a == studentpct[i]) + ci = i; + if (ci == -1) + usage("No support for confidence level"); + break; + case 'd': + if (*optarg == '\0') + usage("Can't use empty delimiter string"); + delim = optarg; + break; + case 'n': + flag_n = 1; + break; + case 's': + flag_s = 1; + break; + case 'w': + termwidth = strtol(optarg, &p, 10); + if (p != NULL && *p != '\0') + usage("Invalid width, not a number."); + if (termwidth < 0) + usage("Unable to move beyond left margin."); + break; + default: + usage("Unknown option"); + break; + } + if (ci == -1) + ci = 2; + argc -= optind; + argv += optind; + + if (argc == 0) { + setfilenames[0] = ""; + setfiles[0] = stdin; + nds = 1; + } else { + if (argc > (MAX_DS - 1)) + usage("Too many datasets."); + nds = argc; + for (i = 0; i < nds; i++) { + setfilenames[i] = argv[i]; + if (!strcmp(argv[i], "-")) + setfiles[0] = stdin; + else + setfiles[i] = fopen(argv[i], "r"); + if (setfiles[i] == NULL) + err(2, "Cannot open %s", argv[i]); + } + } + + for (i = 0; i < nds; i++) { + ds[i] = ReadSet(setfiles[i], setfilenames[i], column, delim); + if (setfiles[i] != stdin) + fclose(setfiles[i]); + } + + for (i = 0; i < nds; i++) + printf("%c %s\n", symbol[i+1], ds[i]->name); + + if (!flag_n && !suppress_plot) { + SetupPlot(termwidth, flag_s, nds); + for (i = 0; i < nds; i++) + DimPlot(ds[i]); + for (i = 0; i < nds; i++) + PlotSet(ds[i], i + 1); + DumpPlot(); + } + VitalsHead(); + Vitals(ds[0], 1); + for (i = 1; i < nds; i++) { + Vitals(ds[i], i + 1); + if (!flag_n) + Relative(ds[i], ds[0], ci); + } + exit(0); +} diff --git a/scripts/package.sh b/scripts/package.sh new file mode 100755 index 000000000000..34692f7ab20a --- /dev/null +++ b/scripts/package.sh @@ -0,0 +1,248 @@ +#!/bin/sh +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2018-2021 Gavin D. Howard and contributors. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# + +# This script requires some non-POSIX utilities, but that's okay because it's +# really for maintainer use only. +# +# The non-POSIX utilities include: +# +# * git +# * stat +# * tar +# * xz +# * sha512sum +# * sha256sum +# * gpg +# * zip +# * unzip + +shasum() { + + f="$1" + shift + + # All this fancy stuff takes the sha512 and sha256 sums and signs it. The + # output after this point is what I usually copy into the release notes. + # (See manuals/release.md for more information.) + printf '$ sha512sum %s\n' "$f" + sha512sum "$f" + printf '\n' + printf '$ sha256sum %s\n' "$f" + sha256sum "$f" + printf '\n' + printf "$ stat -c '%%s %%n'\n" "$f" + stat -c '%s %n' "$f" + + if [ -f "$f.sig" ]; then + rm -f "$f.sig" + fi + + gpg --detach-sig -o "$f.sig" "$f" 2> /dev/null + + printf '\n' + printf '$ sha512sum %s.sig\n' "$f" + sha512sum "$f.sig" + printf '\n' + printf '$ sha256sum %s.sig\n' "$f" + sha256sum "$f.sig" + printf '\n' + printf "$ stat -c '%%s %%n'\n" "$f.sig" + stat -c '%s %n' "$f.sig" +} + +script="$0" +scriptdir=$(dirname "$script") + +repo="$scriptdir/.." +proj="bc" + +cd "$repo" + +if [ ! -f "../vs.zip" ]; then + printf 'Must have Windows builds!\n' + exit 1 +fi + +# We want the absolute path for later. +repo=$(pwd) + +# This convoluted mess does pull the version out. If you change the format of +# include/version.h, you may have to change this line. +version=$(cat include/version.h | grep "VERSION " - | awk '{ print $3 }' -) + +tag_msg="Version $version" +projver="${proj}-${version}" + +tempdir="/tmp/${projver}" +rm -rf $tempdir +mkdir -p $tempdir + +make clean_tests > /dev/null 2> /dev/null + +# Delete the tag and recreate it. This is the part of the script that makes it +# so you cannot run it twice on the same version, unless you know what you are +# doing. In fact, you cannot run it again if users have already started to use +# the old version of the tag. +if git rev-parse "$version" > /dev/null 2>&1; then + git push --delete origin "$version" > /dev/null 2> /dev/null + git tag --delete "$version" > /dev/null 2> /dev/null +fi + +git push > /dev/null 2> /dev/null +git tg "$version" -m "$tag_msg" > /dev/null 2> /dev/null +git push --tags > /dev/null 2> /dev/null + +# This line grabs the names of all of the files in .gitignore that still exist. +ignores=$(git check-ignore * **/*) + +cp -r ./* "$tempdir" + +cd $tempdir + +# Delete all the ignored files. +for i in $ignores; do + rm -rf "./$i" +done + +# This is a list of files that end users (including *software packagers* and +# *distro maintainers*!) do not care about. In particular, they *do* care about +# the testing infrastructure for the regular test suite because distro +# maintainers probably want to ensure the test suite runs. However, they +# probably don't care about fuzzing or other randomized testing. Also, I +# technically can't distribute tests/bc/scripts/timeconst.bc because it's from +# the Linux kernel, which is GPL. +extras=$(cat <<*EOF +.git/ +.gitignore +.gitattributes +benchmarks/ +manuals/bc.1.md.in +manuals/dc.1.md.in +manuals/benchmarks.md +manuals/development.md +manuals/header_bcl.txt +manuals/header_bc.txt +manuals/header_dc.txt +manuals/header.txt +manuals/release.md +scripts/afl.py +scripts/alloc.sh +scripts/benchmark.sh +scripts/bitfuncgen.c +scripts/fuzz_prep.sh +scripts/manpage.sh +scripts/ministat.c +scripts/package.sh +scripts/radamsa.sh +scripts/radamsa.txt +scripts/randmath.py +scripts/release_settings.txt +scripts/release.sh +scripts/test_settings.sh +scripts/test_settings.txt +tests/bc_outputs/ +tests/dc_outputs/ +tests/fuzzing/ +tests/bc/scripts/timeconst.bc +*EOF +) + +for i in $extras; do + rm -rf "./$i" +done + +cd .. + +parent="$repo/.." + +# Cleanup old stuff. +if [ -f "$projver.tar.xz" ]; then + rm -rf "$projver.tar.xz" +fi + +if [ -f "$projver.tar.xz.sig" ]; then + rm -rf "$projver.tar.xz.sig" +fi + +# Tar and compress and move into the parent directory of the repo. +tar cf "$projver.tar" "$projver/" +xz -z -v -9 -e "$projver.tar" > /dev/null 2> /dev/null +mv "$projver.tar.xz" "$parent" + +cd "$parent" + +# Clean up old Windows stuff. +if [ -d windows ]; then + rm -rf windows +fi + +if [ -f windows.zip ]; then + rm -rf $projver-windows.zip +fi + +# Prepare Windows stuff. +unzip vs.zip > /dev/null +mv vs windows + +# Remove unneeded Windows stuff. +rm -rf windows/*.vcxproj.user +rm -rf windows/src2 +rm -rf windows/tests +rm -rf windows/*.sln +rm -rf windows/*.vcxproj +rm -rf windows/*.vcxproj.filters + +rm -rf windows/bin/{Win32,x64}/{Debug,Release}/*.obj +rm -rf windows/bin/{Win32,x64}/{Debug,Release}/*.iobj +rm -rf windows/bin/{Win32,x64}/{Debug,Release}/bc.exe.recipe +rm -rf windows/bin/{Win32,x64}/{Debug,Release}/bc.ilk +rm -rf windows/bin/{Win32,x64}/{Debug,Release}/bc.log +rm -rf windows/bin/{Win32,x64}/{Debug,Release}/bc.tlog +rm -rf windows/bin/{Win32,x64}/{Debug,Release}/bc.pdb +rm -rf windows/bin/{Win32,x64}/{Debug,Release}/bc.ipdb +rm -rf windows/bin/{Win32,x64}/{Debug,Release}/bc.vcxproj.FileListAbsolute.txt +rm -rf windows/bin/{Win32,x64}/{Debug,Release}/strgen.exe +rm -rf windows/bin/{Win32,x64}/{Debug,Release}/vc142.idb +rm -rf windows/bin/{Win32,x64}/{Debug,Release}/vc142.pdb + +rm -rf windows/lib/{Win32,x64}/{Debug,ReleaseMD,ReleaseMT}/*.obj +rm -rf windows/lib/{Win32,x64}/{Debug,ReleaseMD,ReleaseMT}/bcl.lib.recipe +rm -rf windows/lib/{Win32,x64}/{Debug,ReleaseMD,ReleaseMT}/bcl.log +rm -rf windows/lib/{Win32,x64}/{Debug,ReleaseMD,ReleaseMT}/bcl.tlog +rm -rf windows/lib/{Win32,x64}/{Debug,ReleaseMD,ReleaseMT}/bcl.idb +rm -rf windows/lib/{Win32,x64}/{Debug,ReleaseMD,ReleaseMT}/bcl.pdb +rm -rf windows/lib/{Win32,x64}/{Debug,ReleaseMD,ReleaseMT}/bcl.vcxproj.FileListAbsolute.txt + +# Zip the Windows stuff. +zip -r $projver-windows.zip windows > /dev/null + +printf '\n' +shasum "$projver.tar.xz" +printf '\n' +shasum "$projver-windows.zip" diff --git a/scripts/radamsa.sh b/scripts/radamsa.sh new file mode 100755 index 000000000000..c92923ddadc4 --- /dev/null +++ b/scripts/radamsa.sh @@ -0,0 +1,133 @@ +#! /bin/sh +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2018-2021 Gavin D. Howard and contributors. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# + +# This script uses some non-POSIX behavior, but since it's meant for bc +# maintainers only, I can accept that. + +# Get an entry from the file. If an argument exists, it is an index. Get that +# line. Otherwise, get a random line. +getentry() { + + # Figure out if we get a specific or random line. + if [ $# -gt 0 ]; then + entnum="$1" + else + entnum=0 + fi + + # Get data from stdin and figure out how many lines there are. + e=$(cat -) + num=$(printf '%s\n' "$e" | wc -l) + + # Figure out what line we are going to get. Uses bc's own PRNG. + if [ "$entnum" -eq 0 ]; then + rand=$(printf 'irand(%s) + 1\n' "$num" | "$bcdir/bc") + else + rand="$entnum" + fi + + # Get the line. + ent=$(printf '%s\n' "$e" | tail -n +$rand | head -n 1) + + printf '%s\n' "$ent" +} + +script="$0" +dir=$(dirname "$script") + +. "$dir/functions.sh" + +# Command-line processing. +if [ "$#" -lt 1 ]; then + printf 'usage: %s dir\n' "$0" + exit 1 +fi + +d="$1" +shift + +bcdir="$dir/../bin" + +# Figure out the correct input directory. +if [ "$d" = "bc" ]; then + inputs="$dir/../tests/fuzzing/bc_inputs1" + opts="-lq" +elif [ "$d" = "dc" ]; then + inputs="$dir/../test/fuzzing/dc_inputs" + opts="-x" +else + err_exit "wrong type of executable" 1 +fi + +export ASAN_OPTIONS="abort_on_error=1:allocator_may_return_null=1" + +entries=$(cat "$dir/radamsa.txt") + +IFS=$'\n' + +go=1 + +# Infinite loop. +while [ "$go" -ne 0 ]; do + + # If we are running bc, fuzz command-line arguments in BC_ENV_ARGS. + if [ "$d" = "bc" ]; then + + entry=$(cat -- "$dir/radamsa.txt" | getentry) + items=$(printf '%s\n' "$entry" | radamsa -n 10) + + printf '%s\n' "$items" + + for i in `seq 1 10`; do + + item=$(printf '%s\n' "$items" | getentry "$i") + + export BC_ENV_ARGS="$item" + echo 'halt' | "$bcdir/$d" + err=$? + + checkcrash "$d" "$err" "radamsa env args: \"$item\"" + done + + fi + + f=$(ls "$inputs" | getentry) + l=$(cat "$inputs/$f" | wc -l) + ll=$(printf '%s^2\n' "$l" | bc) + + # Fuzz on the AFL++ inputs. + for i in $(seq 1 2); do + data=$(cat "$inputs/$f" | radamsa -n 1) + printf '%s\n' "$data" > "$dir/../.log_${d}_test.txt" + printf '%s\n' "$data" | timeout -s SIGTERM 5 "$bcdir/$d" "$opts" > /dev/null + err=$? + checkcrash "$d" "$err" "radamsa stdin" + done + +done diff --git a/scripts/radamsa.txt b/scripts/radamsa.txt new file mode 100644 index 000000000000..4bf28907bead --- /dev/null +++ b/scripts/radamsa.txt @@ -0,0 +1,17 @@ +-lq '/home/gavin/.bcrc' +-lq "/home/gavin/.bcrc" +-lqg '/home/gavin/bc stuff.bc' +-lqg "/home/gavin/bc stuff.bc" +-lqg '/home/gavin/"bc" stuff.bc' +-lqg "/home/gavin/'bc' stuff.bc" +-lqg '/home/gavin/bc stuff.bc +-lqg "/home/gavin/bc stuff.bc +-lqg '/home/gavin/"bc" stuff.bc +-lqg "/home/gavin/'bc' stuff.bc +--mathlib --expand +--file="/home/gavin/.bcrc" +--file=/home/gavin/.bcrc +--file="/home/gavin/bc stuff.bc" +--file +--expression "4+4" +-e "irand(128)" -f /home/gavin/.bcrc diff --git a/scripts/randmath.py b/scripts/randmath.py new file mode 100755 index 000000000000..896f0e46c97f --- /dev/null +++ b/scripts/randmath.py @@ -0,0 +1,421 @@ +#! /usr/bin/python3 -B +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2018-2021 Gavin D. Howard and contributors. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# + +import os, errno +import random +import sys +import subprocess + +# I want line length to *not* affect differences between the two, so I set it +# as high as possible. +env = { + "BC_LINE_LENGTH": "65535", + "DC_LINE_LENGTH": "65535" +} + + +# Generate a random integer between 0 and 2^limit. +# @param limit The power of two for the upper limit. +def gen(limit=4): + return random.randint(0, 2 ** (8 * limit)) + + +# Returns a random boolean for whether a number should be negative or not. +def negative(): + return random.randint(0, 1) == 1 + + +# Returns a random boolean for whether a number should be 0 or not. I decided to +# have it be 0 every 2^4 times since sometimes it is used to make a number less +# than 1. +def zero(): + return random.randint(0, 2 ** (4) - 1) == 0 + + +# Generate a real portion of a number. +def gen_real(): + + # Figure out if we should have a real portion. If so generate it. + if negative(): + n = str(gen(25)) + length = gen(7 / 8) + if len(n) < length: + n = ("0" * (length - len(n))) + n + else: + n = "0" + + return n + + +# Generates a number (as a string) based on the parameters. +# @param op The operation under test. +# @param neg Whether the number can be negative. +# @param real Whether the number can be a non-integer. +# @param z Whether the number can be zero. +# @param limit The power of 2 upper limit for the number. +def num(op, neg, real, z, limit=4): + + # Handle zero first. + if z: + z = zero() + else: + z = False + + if z: + # Generate a real portion maybe + if real: + n = gen_real() + if n != "0": + return "0." + n + return "0" + + # Figure out if we should be negative. + if neg: + neg = negative() + + # Generate the integer portion. + g = gen(limit) + + # Figure out if we should have a real number. negative() is used to give a + # 50/50 chance of getting a negative number. + if real: + n = gen_real() + else: + n = "0" + + # Generate the string. + g = str(g) + if n != "0": + g = g + "." + n + + # Make sure to use the right negative sign. + if neg and g != "0": + if op != modexp: + g = "-" + g + else: + g = "_" + g + + return g + + +# Add a failed test to the list. +# @param test The test that failed. +# @param op The operation for the test. +def add(test, op): + tests.append(test) + gen_ops.append(op) + + +# Compare the output between the two. +# @param exe The executable under test. +# @param options The command-line options. +# @param p The object returned from subprocess.run() for the calculator +# under test. +# @param test The test. +# @param halt The halt string for the calculator under test. +# @param expected The expected result. +# @param op The operation under test. +# @param do_add If true, add a failing test to the list, otherwise, don't. +def compare(exe, options, p, test, halt, expected, op, do_add=True): + + # Check for error from the calculator under test. + if p.returncode != 0: + + print(" {} returned an error ({})".format(exe, p.returncode)) + + if do_add: + print(" adding to checklist...") + add(test, op) + + return + + actual = p.stdout.decode() + + # Check for a difference in output. + if actual != expected: + + if op >= exponent: + + # This is here because GNU bc, like mine can be flaky on the + # functions in the math library. This is basically testing if adding + # 10 to the scale works to make them match. If so, the difference is + # only because of that. + indata = "scale += 10; {}; {}".format(test, halt) + args = [ exe, options ] + p2 = subprocess.run(args, input=indata.encode(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) + expected = p2.stdout[:-10].decode() + + if actual == expected: + print(" failed because of bug in other {}".format(exe)) + print(" continuing...") + return + + # Do the correct output for the situation. + if do_add: + print(" failed; adding to checklist...") + add(test, op) + else: + print(" failed {}".format(test)) + print(" expected:") + print(" {}".format(expected)) + print(" actual:") + print(" {}".format(actual)) + + +# Generates a test for op. I made sure that there was no clashing between +# calculators. Each calculator is responsible for certain ops. +# @param op The operation to test. +def gen_test(op): + + # First, figure out how big the scale should be. + scale = num(op, False, False, True, 5 / 8) + + # Do the right thing for each op. Generate the test based on the format + # string and the constraints of each op. For example, some ops can't accept + # 0 in some arguments, and some must have integers in some arguments. + if op < div: + s = fmts[op].format(scale, num(op, True, True, True), num(op, True, True, True)) + elif op == div or op == mod: + s = fmts[op].format(scale, num(op, True, True, True), num(op, True, True, False)) + elif op == power: + s = fmts[op].format(scale, num(op, True, True, True, 7 / 8), num(op, True, False, True, 6 / 8)) + elif op == modexp: + s = fmts[op].format(scale, num(op, True, False, True), num(op, True, False, True), + num(op, True, False, False)) + elif op == sqrt: + s = "1" + while s == "1": + s = num(op, False, True, True, 1) + s = fmts[op].format(scale, s) + else: + + if op == exponent: + first = num(op, True, True, True, 6 / 8) + elif op == bessel: + first = num(op, False, True, True, 6 / 8) + else: + first = num(op, True, True, True) + + if op != bessel: + s = fmts[op].format(scale, first) + else: + s = fmts[op].format(scale, first, 6 / 8) + + return s + + +# Runs a test with number t. +# @param t The number of the test. +def run_test(t): + + # Randomly select the operation. + op = random.randrange(bessel + 1) + + # Select the right calculator. + if op != modexp: + exe = "bc" + halt = "halt" + options = "-lq" + else: + exe = "dc" + halt = "q" + options = "" + + # Generate the test. + test = gen_test(op) + + # These don't work very well for some reason. + if "c(0)" in test or "scale = 4; j(4" in test: + return + + # Make sure the calculator will halt. + bcexe = exedir + "/" + exe + indata = test + "\n" + halt + + print("Test {}: {}".format(t, test)) + + # Only bc has options. + if exe == "bc": + args = [ exe, options ] + else: + args = [ exe ] + + # Run the GNU bc. + p = subprocess.run(args, input=indata.encode(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) + + output1 = p.stdout.decode() + + # Error checking for GNU. + if p.returncode != 0 or output1 == "": + print(" other {} returned an error ({}); continuing...".format(exe, p.returncode)) + return + + if output1 == "\n": + print(" other {} has a bug; continuing...".format(exe)) + return + + # Don't know why GNU has this problem... + if output1 == "-0\n": + output1 = "0\n" + elif output1 == "-0": + output1 = "0" + + args = [ bcexe, options ] + + # Run this bc/dc and compare. + p = subprocess.run(args, input=indata.encode(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) + compare(exe, options, p, test, halt, output1, op) + + +# This script must be run by itself. +if __name__ != "__main__": + sys.exit(1) + +script = sys.argv[0] +testdir = os.path.dirname(script) + +exedir = testdir + "/../bin" + +# The following are tables used to generate numbers. + +# The operations to test. +ops = [ '+', '-', '*', '/', '%', '^', '|' ] + +# The functions that can be tested. +funcs = [ "sqrt", "e", "l", "a", "s", "c", "j" ] + +# The files (corresponding to the operations with the functions appended) to add +# tests to if they fail. +files = [ "add", "subtract", "multiply", "divide", "modulus", "power", "modexp", + "sqrt", "exponent", "log", "arctangent", "sine", "cosine", "bessel" ] + +# The format strings corresponding to each operation and then each function. +fmts = [ "scale = {}; {} + {}", "scale = {}; {} - {}", "scale = {}; {} * {}", + "scale = {}; {} / {}", "scale = {}; {} % {}", "scale = {}; {} ^ {}", + "{}k {} {} {}|pR", "scale = {}; sqrt({})", "scale = {}; e({})", + "scale = {}; l({})", "scale = {}; a({})", "scale = {}; s({})", + "scale = {}; c({})", "scale = {}; j({}, {})" ] + +# Constants to make some code easier later. +div = 3 +mod = 4 +power = 5 +modexp = 6 +sqrt = 7 +exponent = 8 +bessel = 13 + +gen_ops = [] +tests = [] + +# Infinite loop until the user sends SIGINT. +try: + i = 0 + while True: + run_test(i) + i = i + 1 +except KeyboardInterrupt: + pass + +# This is where we start processing the checklist of possible failures. Why only +# possible failures? Because some operations, specifically the functions in the +# math library, are not guaranteed to be exactly correct. Because of that, we +# need to present every failed test to the user for a final check before we +# add them as test cases. + +# No items, just exit. +if len(tests) == 0: + print("\nNo items in checklist.") + print("Exiting") + sys.exit(0) + +print("\nGoing through the checklist...\n") + +# Just do some error checking. If this fails here, it's a bug in this script. +if len(tests) != len(gen_ops): + print("Corrupted checklist!") + print("Exiting...") + sys.exit(1) + +# Go through each item in the checklist. +for i in range(0, len(tests)): + + # Yes, there's some code duplication. Sue me. + + print("\n{}".format(tests[i])) + + op = int(gen_ops[i]) + + if op != modexp: + exe = "bc" + halt = "halt" + options = "-lq" + else: + exe = "dc" + halt = "q" + options = "" + + # We want to run the test again to show the user the difference. + indata = tests[i] + "\n" + halt + + args = [ exe, options ] + + p = subprocess.run(args, input=indata.encode(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) + + expected = p.stdout.decode() + + bcexe = exedir + "/" + exe + args = [ bcexe, options ] + + p = subprocess.run(args, input=indata.encode(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) + + compare(exe, options, p, tests[i], halt, expected, op, False) + + # Ask the user to make a decision on the failed test. + answer = input("\nAdd test ({}/{}) to test suite? [y/N]: ".format(i + 1, len(tests))) + + # Quick and dirty answer parsing. + if 'Y' in answer or 'y' in answer: + + print("Yes") + + name = testdir + "/" + exe + "/" + files[op] + + # Write the test to the test file and the expected result to the + # results file. + with open(name + ".txt", "a") as f: + f.write(tests[i] + "\n") + + with open(name + "_results.txt", "a") as f: + f.write(expected) + + else: + print("No") + +print("Done!") diff --git a/scripts/release.sh b/scripts/release.sh new file mode 100755 index 000000000000..12097b1cc8b9 --- /dev/null +++ b/scripts/release.sh @@ -0,0 +1,810 @@ +#! /bin/sh +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2018-2021 Gavin D. Howard and contributors. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# + +# For OpenBSD, run using the following: +# +# scripts/release.sh 1 0 1 0 0 0 0 1 0 0 0 0 0 0 +# +# For FreeBSD, run using the following: +# +# scripts/release.sh 1 0 1 0 0 0 0 1 0 1 0 0 0 0 +# +# There is one problem with running this script on FreeBSD: it takes overcommit +# to the extreme. This means that some tests that try to create allocation +# failures instead make bc and dc crash. So running this script on FreeBSD does +# not work right now. +# +# For Linux, run two separate ones (in different checkouts), like so: +# +# scripts/release.sh 1 1 1 0 1 0 0 1 0 1 0 1 0 0 +# cd build; ../scripts/release.sh 1 1 0 1 0 1 0 1 0 1 0 0 1 1 +# +# Yes, I usually do sanitizers with Clang and Valgrind with GCC, and I also do +# out-of-source builds with GCC. +# +# To run sanitizers or Valgrind with generated tests, use the following: +# +# scripts/release.sh 1 1 1 0 1 0 0 1 0 1 0 1 0 0 +# cd build; ../scripts/release.sh 1 1 0 1 0 1 0 1 0 1 0 0 1 1 +# +# The reason I run history tests with GCC and not with Clang is because Clang +# already runs slower as a result of running with sanitizers, and the history +# tests are a little sensitive to load on a system. +# +# If this script fails on any platform when starting the Karatsuba test, check +# that Python is installed, especially if the error says something like: +# "karatsuba.py: not found". + +# Print the usage and exit with an error. Each parameter should be an integer. +# Non-zero activates, and zero deactivates. +usage() { + printf 'usage: %s [run_tests] [generate_tests] [test_with_clang] [test_with_gcc] \n' "$script" + printf ' [run_sanitizers] [run_valgrind] [test_settings] [run_64_bit] \n' + printf ' [run_gen_script] [test_c11] [test_128_bit] [test_computed_goto]\n' + printf ' [test_karatsuba] [test_history]\n' + exit 1 +} + +# Print a header with a message. This is just to make it easy to track progress. +# @param msg The message to print in the header. +header() { + + _header_msg="$1" + shift + + printf '\n' + printf '*******************\n' + printf "$_header_msg" + printf '\n' + printf '*******************\n' + printf '\n' +} + +# Easy way to call make. +do_make() { + # No reason to do 64 except to see if I actually can overload my system. :) + # Well, also that it might actually improve throughput as other jobs can run + # while some are waiting. + make -j64 "$@" +} + +# Run configure.sh. +# @param CFLAGS The CFLAGS. +# @param CC The C compiler. +# @param configure_flags The flags for configure.sh itself. +# @param GEN_HOST The setting for GEN_HOST. +# @param LONG_BIT The setting for LONG_BIT. +configure() { + + _configure_CFLAGS="$1" + shift + + _configure_CC="$1" + shift + + _configure_configure_flags="$1" + shift + + _configure_GEN_HOST="$1" + shift + + _configure_LONG_BIT="$1" + shift + + # Make sure to not generate tests if necessary. + if [ "$gen_tests" -eq 0 ]; then + _configure_configure_flags="-G $_configure_configure_flags" + fi + + # Choose the right extra flags. + if [ "$_configure_CC" = "clang" ]; then + _configure_CFLAGS="$clang_flags $_configure_CFLAGS" + elif [ "$_configure_CC" = "gcc" ]; then + _configure_CFLAGS="$gcc_flags $_configure_CFLAGS" + fi + + # Print the header and do the job. + _configure_header=$(printf 'Running configure.sh %s ...' "$_configure_configure_flags") + _configure_header=$(printf "$_configure_header\n CC=\"%s\"\n" "$_configure_CC") + _configure_header=$(printf "$_configure_header\n CFLAGS=\"%s\"\n" "$_configure_CFLAGS") + _configure_header=$(printf "$_configure_header\n LONG_BIT=%s" "$_configure_LONG_BIT") + _configure_header=$(printf "$_configure_header\n GEN_HOST=%s" "$_configure_GEN_HOST") + + header "$_configure_header" + CFLAGS="$_configure_CFLAGS" CC="$_configure_CC" GEN_HOST="$_configure_GEN_HOST" \ + LONG_BIT="$_configure_LONG_BIT" "$real/configure.sh" $_configure_configure_flags > /dev/null +} + +# Build with make. This function also captures and outputs any warnings if they +# exists because as far as I am concerned, warnings are not acceptable for +# release. +# @param CFLAGS The CFLAGS. +# @param CC The C compiler. +# @param configure_flags The flags for configure.sh itself. +# @param GEN_HOST The setting for GEN_HOST. +# @param LONG_BIT The setting for LONG_BIT. +build() { + + _build_CFLAGS="$1" + shift + + _build_CC="$1" + shift + + _build_configure_flags="$1" + shift + + _build_GEN_HOST="$1" + shift + + _build_LONG_BIT="$1" + shift + + configure "$_build_CFLAGS" "$_build_CC" "$_build_configure_flags" "$_build_GEN_HOST" "$_build_LONG_BIT" + + _build_header=$(printf 'Building...\n CC=%s' "$_build_CC") + _build_header=$(printf "$_build_header\n CFLAGS=\"%s\"" "$_build_CFLAGS") + _build_header=$(printf "$_build_header\n LONG_BIT=%s" "$_build_LONG_BIT") + _build_header=$(printf "$_build_header\n GEN_HOST=%s" "$_build_GEN_HOST") + + header "$_build_header" + + # Capture and print warnings. + do_make > /dev/null 2> "./.test.txt" + + if [ -s "./.test.txt" ]; then + printf '%s generated warning(s):\n' "$_build_CC" + printf '\n' + cat "./.test.txt" + exit 1 + fi +} + +# Run tests with make. +runtest() { + + header "Running tests" + + if [ "$#" -gt 0 ]; then + do_make "$@" + else + + do_make test + + if [ "$test_history" -ne 0 ]; then + do_make test_history + fi + fi +} + +# Builds and runs tests with both calculators, then bc only, then dc only. If +# run_tests is false, then it just does the builds. +# @param CFLAGS The CFLAGS. +# @param CC The C compiler. +# @param configure_flags The flags for configure.sh itself. +# @param GEN_HOST The setting for GEN_HOST. +# @param LONG_BIT The setting for LONG_BIT. +# @param run_tests Whether to run tests or not. +runconfigtests() { + + _runconfigtests_CFLAGS="$1" + shift + + _runconfigtests_CC="$1" + shift + + _runconfigtests_configure_flags="$1" + shift + + _runconfigtests_GEN_HOST="$1" + shift + + _runconfigtests_LONG_BIT="$1" + shift + + _runconfigtests_run_tests="$1" + shift + + if [ "$_runconfigtests_run_tests" -ne 0 ]; then + _runconfigtests_header=$(printf 'Running tests with configure flags') + else + _runconfigtests_header=$(printf 'Building with configure flags') + fi + + _runconfigtests_header=$(printf "$_runconfigtests_header \"%s\" ...\n" "$_runconfigtests_configure_flags") + _runconfigtests_header=$(printf "$_runconfigtests_header\n CC=%s\n" "$_runconfigseries_CC") + _runconfigtests_header=$(printf "$_runconfigtests_header\n CFLAGS=\"%s\"" "$_runconfigseries_CFLAGS") + _runconfigtests_header=$(printf "$_runconfigtests_header\n LONG_BIT=%s" "$_runconfigtests_LONG_BIT") + _runconfigtests_header=$(printf "$_runconfigtests_header\n GEN_HOST=%s" "$_runconfigtests_GEN_HOST") + + header "$_runconfigtests_header" + + build "$_runconfigtests_CFLAGS" "$_runconfigtests_CC" \ + "$_runconfigtests_configure_flags" "$_runconfigtests_GEN_HOST" \ + "$_runconfigtests_LONG_BIT" + + if [ "$_runconfigtests_run_tests" -ne 0 ]; then + runtest + fi + + do_make clean + + build "$_runconfigtests_CFLAGS" "$_runconfigtests_CC" \ + "$_runconfigtests_configure_flags -b" "$_runconfigtests_GEN_HOST" \ + "$_runconfigtests_LONG_BIT" + + if [ "$_runconfigtests_run_tests" -ne 0 ]; then + runtest + fi + + do_make clean + + build "$_runconfigtests_CFLAGS" "$_runconfigtests_CC" \ + "$_runconfigtests_configure_flags -d" "$_runconfigtests_GEN_HOST" \ + "$_runconfigtests_LONG_BIT" + + if [ "$_runconfigtests_run_tests" -ne 0 ]; then + runtest + fi + + do_make clean +} + +# Builds and runs tests with runconfigtests(), but also does 64-bit, 32-bit, and +# 128-bit rand, if requested. It also does it with the gen script (strgen.sh) if +# requested. If run_tests is false, it just does the builds. +# @param CFLAGS The CFLAGS. +# @param CC The C compiler. +# @param configure_flags The flags for configure.sh itself. +# @param run_tests Whether to run tests or not. +runconfigseries() { + + _runconfigseries_CFLAGS="$1" + shift + + _runconfigseries_CC="$1" + shift + + _runconfigseries_configure_flags="$1" + shift + + _runconfigseries_run_tests="$1" + shift + + if [ "$run_64_bit" -ne 0 ]; then + + if [ "$test_128_bit" -ne 0 ]; then + runconfigtests "$_runconfigseries_CFLAGS" "$_runconfigseries_CC" \ + "$_runconfigseries_configure_flags" 1 64 "$_runconfigseries_run_tests" + fi + + if [ "$run_gen_script" -ne 0 ]; then + runconfigtests "$_runconfigseries_CFLAGS" "$_runconfigseries_CC" \ + "$_runconfigseries_configure_flags" 0 64 "$_runconfigseries_run_tests" + fi + + runconfigtests "$_runconfigseries_CFLAGS -DBC_RAND_BUILTIN=0" "$_runconfigseries_CC" \ + "$_runconfigseries_configure_flags" 1 64 "$_runconfigseries_run_tests" + + fi + + runconfigtests "$_runconfigseries_CFLAGS" "$_runconfigseries_CC" \ + "$_runconfigseries_configure_flags" 1 32 "$_runconfigseries_run_tests" + + if [ "$run_gen_script" -ne 0 ]; then + runconfigtests "$_runconfigseries_CFLAGS" "$_runconfigseries_CC" \ + "$_runconfigseries_configure_flags" 0 32 "$_runconfigseries_run_tests" + fi +} + +# Builds and runs tests with each setting combo running runconfigseries(). If +# run_tests is false, it just does the builds. +# @param CFLAGS The CFLAGS. +# @param CC The C compiler. +# @param configure_flags The flags for configure.sh itself. +# @param run_tests Whether to run tests or not. +runsettingsseries() { + + _runsettingsseries_CFLAGS="$1" + shift + + _runsettingsseries_CC="$1" + shift + + _runsettingsseries_configure_flags="$1" + shift + + _runsettingsseries_run_tests="$1" + shift + + if [ "$test_settings" -ne 0 ]; then + + while read _runsettingsseries_s; do + runconfigseries "$_runsettingsseries_CFLAGS" "$_runsettingsseries_CC" \ + "$_runsettingsseries_configure_flags $_runsettingsseries_s" \ + "$_runsettingsseries_run_tests" + done < "$scriptdir/release_settings.txt" + + else + runconfigseries "$_runsettingsseries_CFLAGS" "$_runsettingsseries_CC" \ + "$_runsettingsseries_configure_flags" "$_runsettingsseries_run_tests" + fi +} + +# Builds and runs tests with each build type running runsettingsseries(). If +# run_tests is false, it just does the builds. +# @param CFLAGS The CFLAGS. +# @param CC The C compiler. +# @param configure_flags The flags for configure.sh itself. +# @param run_tests Whether to run tests or not. +runtestseries() { + + _runtestseries_CFLAGS="$1" + shift + + _runtestseries_CC="$1" + shift + + _runtestseries_configure_flags="$1" + shift + + _runtestseries_run_tests="$1" + shift + + _runtestseries_flags="E H N EH EN HN EHN" + + runsettingsseries "$_runtestseries_CFLAGS" "$_runtestseries_CC" \ + "$_runtestseries_configure_flags" "$_runtestseries_run_tests" + + for _runtestseries_f in $_runtestseries_flags; do + runsettingsseries "$_runtestseries_CFLAGS" "$_runtestseries_CC" \ + "$_runtestseries_configure_flags -$_runtestseries_f" "$_runtestseries_run_tests" + done +} + +# Builds and runs the tests for bcl. If run_tests is false, it just does the +# builds. +# @param CFLAGS The CFLAGS. +# @param CC The C compiler. +# @param configure_flags The flags for configure.sh itself. +# @param run_tests Whether to run tests or not. +runlibtests() { + + _runlibtests_CFLAGS="$1" + shift + + _runlibtests_CC="$1" + shift + + _runlibtests_configure_flags="$1" + shift + + _runlibtests_run_tests="$1" + shift + + _runlibtests_configure_flags="$_runlibtests_configure_flags -a" + + build "$_runlibtests_CFLAGS" "$_runlibtests_CC" "$_runlibtests_configure_flags" 1 64 + + if [ "$_runlibtests_run_tests" -ne 0 ]; then + runtest test + fi + + build "$_runlibtests_CFLAGS" "$_runlibtests_CC" "$_runlibtests_configure_flags" 1 32 + + if [ "$_runlibtests_run_tests" -ne 0 ]; then + runtest test + fi +} + +# Builds and runs tests under C99, then C11, if requested, using +# runtestseries(). If run_tests is false, it just does the builds. +# @param CFLAGS The CFLAGS. +# @param CC The C compiler. +# @param configure_flags The flags for configure.sh itself. +# @param run_tests Whether to run tests or not. +runtests() { + + _runtests_CFLAGS="$1" + shift + + _runtests_CC="$1" + shift + + _runtests_configure_flags="$1" + shift + + _runtests_run_tests="$1" + shift + + runtestseries "-std=c99 $_runtests_CFLAGS" "$_runtests_CC" "$_runtests_configure_flags" "$_runtests_run_tests" + + if [ "$test_c11" -ne 0 ]; then + runtestseries "-std=c11 $_runtests_CFLAGS" "$_runtests_CC" "$_runtests_configure_flags" "$_runtests_run_tests" + fi +} + +# Runs the karatsuba tests. +karatsuba() { + + header "Running Karatsuba tests" + do_make karatsuba_test +} + +# Builds and runs under valgrind. It runs both, bc only, then dc only. +vg() { + + header "Running valgrind" + + if [ "$run_64_bit" -ne 0 ]; then + _vg_bits=64 + else + _vg_bits=32 + fi + + build "$debug -std=c99" "gcc" "-O3 -gv" "1" "$_vg_bits" + runtest test + + do_make clean_config + + build "$debug -std=c99" "gcc" "-O3 -gvb" "1" "$_vg_bits" + runtest test + + do_make clean_config + + build "$debug -std=c99" "gcc" "-O3 -gvd" "1" "$_vg_bits" + runtest test + + do_make clean_config + + build "$debug -std=c99" "gcc" "-O3 -gva" "1" "$_vg_bits" + runtest test + + do_make clean_config +} + +# Builds the debug series and runs the tests if run_tests allows. If sanitizers +# are enabled, it also does UBSan. +# @param CC The C compiler. +# @param run_tests Whether to run tests or not. +debug() { + + _debug_CC="$1" + shift + + _debug_run_tests="$1" + shift + + + if [ "$_debug_CC" = "clang" -a "$run_sanitizers" -ne 0 ]; then + runtests "$debug -fsanitize=undefined" "$_debug_CC" "-gm" "$_debug_run_tests" + else + runtests "$debug" "$_debug_CC" "-g" "$_debug_run_tests" + fi + + + if [ "$_debug_CC" = "clang" -a "$run_sanitizers" -ne 0 ]; then + runlibtests "$debug -fsanitize=undefined" "$_debug_CC" "-gm" "$_debug_run_tests" + else + runlibtests "$debug" "$_debug_CC" "-g" "$_debug_run_tests" + fi +} + +# Builds the release series and runs the test if run_tests allows. +# @param CC The C compiler. +# @param run_tests Whether to run tests or not. +release() { + + _release_CC="$1" + shift + + _release_run_tests="$1" + shift + + runtests "$release" "$_release_CC" "-O3" "$_release_run_tests" + + runlibtests "$release" "$_release_CC" "-O3" "$_release_run_tests" +} + +# Builds the release debug series and runs the test if run_tests allows. If +# sanitizers are enabled, it also does ASan and MSan. +# @param CC The C compiler. +# @param run_tests Whether to run tests or not. +reldebug() { + + _reldebug_CC="$1" + shift + + _reldebug_run_tests="$1" + shift + + + if [ "$_reldebug_CC" = "clang" -a "$run_sanitizers" -ne 0 ]; then + runtests "$debug -fsanitize=address" "$_reldebug_CC" "-mgO3" "$_reldebug_run_tests" + runtests "$debug -fsanitize=memory" "$_reldebug_CC" "-mgO3" "$_reldebug_run_tests" + else + runtests "$debug" "$_reldebug_CC" "-gO3" "$_reldebug_run_tests" + fi + + + if [ "$_reldebug_CC" = "clang" -a "$run_sanitizers" -ne 0 ]; then + runlibtests "$debug -fsanitize=address" "$_reldebug_CC" "-mgO3" "$_reldebug_run_tests" + runlibtests "$debug -fsanitize=memory" "$_reldebug_CC" "-mgO3" "$_reldebug_run_tests" + else + runlibtests "$debug" "$_reldebug_CC" "-gO3" "$_reldebug_run_tests" + fi +} + +# Builds the min size release series and runs the test if run_tests allows. +# @param CC The C compiler. +# @param run_tests Whether to run tests or not. +minsize() { + + _minsize_CC="$1" + shift + + _minsize_run_tests="$1" + shift + + runtests "$release" "$_minsize_CC" "-Os" "$_minsize_run_tests" + + runlibtests "$release" "$_minsize_CC" "-Os" "$_minsize_run_tests" +} + +# Builds all sets: debug, release, release debug, and min size, and runs the +# tests if run_tests allows. +# @param CC The C compiler. +# @param run_tests Whether to run tests or not. +build_set() { + + _build_set_CC="$1" + shift + + _build_set_run_tests="$1" + shift + + debug "$_build_set_CC" "$_build_set_run_tests" + release "$_build_set_CC" "$_build_set_run_tests" + reldebug "$_build_set_CC" "$_build_set_run_tests" + minsize "$_build_set_CC" "$_build_set_run_tests" +} + +# Set some strict warning flags. Clang's -Weverything can be way too strict, so +# we actually have to turn off some things. +clang_flags="-Weverything -Wno-padded -Wno-switch-enum -Wno-format-nonliteral" +clang_flags="$clang_flags -Wno-cast-align -Wno-missing-noreturn -Wno-disabled-macro-expansion" +clang_flags="$clang_flags -Wno-unreachable-code -Wno-unreachable-code-return" +clang_flags="$clang_flags -Wno-implicit-fallthrough -Wno-unused-macros -Wno-gnu-label-as-value" +# -Wno-undef is here because Clang seems to think BC_C11 is undefined, when it's defined. +clang_flags="$clang_flags -Wno-undef" +gcc_flags="-Wno-maybe-uninitialized -Wno-clobbered" + +# Common CFLAGS. +cflags="-Wall -Wextra -Werror -pedantic -Wno-conditional-uninitialized" + +# Common debug and release flags. +debug="$cflags -fno-omit-frame-pointer" +release="$cflags -DNDEBUG" + +set -e + +script="$0" +scriptdir=$(dirname "$script") + +real=$(realpath "$scriptdir/../") + +# Whether to run tests. +if [ "$#" -gt 0 ]; then + run_tests="$1" + shift +else + run_tests=1 +fi + +# Whether to generate tests. On platforms like OpenBSD, there is no GNU bc to +# generate tests, so this must be off. +if [ "$#" -gt 0 ]; then + gen_tests="$1" + shift +else + gen_tests=1 +fi + +# Whether to test with clang. +if [ "$#" -gt 0 ]; then + test_with_clang="$1" + shift +else + test_with_clang=1 +fi + +# Whether to test with gcc. +if [ "$#" -gt 0 ]; then + test_with_gcc="$1" + shift +else + test_with_gcc=1 +fi + +# Whether to test with sanitizers. +if [ "$#" -gt 0 ]; then + run_sanitizers="$1" + shift +else + run_sanitizers=1 +fi + +# Whether to test with valgrind. +if [ "$#" -gt 0 ]; then + run_valgrind="$1" + shift +else + run_valgrind=1 +fi + +# Whether to test all settings combos. +if [ "$#" -gt 0 ]; then + test_settings="$1" + shift +else + test_settings=1 +fi + +# Whether to test 64-bit in addition to 32-bit. +if [ "$#" -gt 0 ]; then + run_64_bit="$1" + shift +else + run_64_bit=1 +fi + +# Whether to test with strgen.sh in addition to strgen.c. +if [ "$#" -gt 0 ]; then + run_gen_script="$1" + shift +else + run_gen_script=0 +fi + +# Whether to test on C11 in addition to C99. +if [ "$#" -gt 0 ]; then + test_c11="$1" + shift +else + test_c11=0 +fi + +# Whether to test 128-bit integers in addition to no 128-bit integers. +if [ "$#" -gt 0 ]; then + test_128_bit="$1" + shift +else + test_128_bit=0 +fi + +# Whether to test with computed goto or not. +if [ "$#" -gt 0 ]; then + test_computed_goto="$1" + shift +else + test_computed_goto=0 +fi + +# Whether to test history or not. +if [ "$#" -gt 0 ]; then + test_karatsuba="$1" + shift +else + test_karatsuba=1 +fi + +# Whether to test history or not. +if [ "$#" -gt 0 ]; then + test_history="$1" + shift +else + test_history=0 +fi + +if [ "$run_64_bit" -ne 0 ]; then + bits=64 +else + bits=32 +fi + +if [ "$test_computed_goto" -eq 0 ]; then + clang_flags="-DBC_NO_COMPUTED_GOTO $clang_flags" + gcc_flags="-DBC_NO_COMPUTED_GOTO $gcc_flags" +fi + +# Setup a default compiler. +if [ "$test_with_clang" -ne 0 ]; then + defcc="clang" +elif [ "$test_with_gcc" -ne 0 ]; then + defcc="gcc" +else + defcc="c99" +fi + +export ASAN_OPTIONS="abort_on_error=1,allocator_may_return_null=1:strict_string_checks=1:detect_stack_use_after_return=1:check_initialization_order=1:strict_init_order=1:detect_invalid_pointer_pairs=2" +export UBSAN_OPTIONS="print_stack_trace=1,silence_unsigned_overflow=1" + +build "$debug -std=c99" "$defcc" "-g" "1" "$bits" + +header "Running math library under --standard" + +# Make sure the math library is POSIX compliant. +printf 'quit\n' | bin/bc -ls + +do_make clean_tests + +# Run the clang build sets. +if [ "$test_with_clang" -ne 0 ]; then + build_set "clang" "$run_tests" +fi + +# Run the gcc build sets. +if [ "$test_with_gcc" -ne 0 ]; then + build_set "gcc" "$run_tests" +fi + +if [ "$run_tests" -ne 0 ]; then + + build "$release" "$defcc" "-O3" "1" "$bits" + + # Run karatsuba. + if [ "$test_karatsuba" -ne 0 ]; then + karatsuba + fi + + # Valgrind. + if [ "$run_valgrind" -ne 0 -a "$test_with_gcc" -ne 0 ]; then + vg + fi + + printf '\n' + printf 'Tests successful.\n' + + # I just assume that I am going to be fuzzing when I am done. + header "Building for AFL++..." + + "$scriptdir/fuzz_prep.sh" + + printf '\n' + printf 'Ready for scripts/randmath.py and for fuzzing.\n' + printf '\n' + printf 'Run scripts/randmath.py if you changed any math code.\n' + printf '\n' + printf 'Then if there are no problems, run the fuzzer.\n' + printf '\n' + printf 'Then run `scripts/fuzz_prep.sh -a`.\n' + printf '\n' + printf 'Then run `scripts/afl.py --asan`.\n' + +fi diff --git a/scripts/release_settings.txt b/scripts/release_settings.txt new file mode 100644 index 000000000000..1cf572347241 --- /dev/null +++ b/scripts/release_settings.txt @@ -0,0 +1,16 @@ +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -Sbc.history -Sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -Sbc.prompt -Sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -Sbc.history -Sdc.history -sbc.prompt -sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -Sbc.prompt -Sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -Sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -sbc.history -sdc.history -Sbc.prompt -Sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -Sbc.history -Sdc.history -Sbc.prompt -Sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -Sdc.history -sbc.prompt -sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -sbc.history -sdc.history -Sbc.prompt -Sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -Sbc.history -Sdc.history -Sbc.prompt -Sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -Sdc.history -Sbc.prompt -Sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -Sdc.history -Sbc.prompt -Sdc.prompt diff --git a/scripts/test_settings.sh b/scripts/test_settings.sh new file mode 100755 index 000000000000..563dbf0e58f3 --- /dev/null +++ b/scripts/test_settings.sh @@ -0,0 +1,77 @@ +#! /bin/sh +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2018-2021 Gavin D. Howard and contributors. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# + +# This script's argument is a number, which is the index of the setting set +# that is under test. This script is for maintainers only. +# +# The procedure is this: run the script with: +# +# ./scripts/test_settings.sh 1 +# +# Then run bc and dc to ensure their stuff is correct. Then run this script +# again with: +# +# ./scripts/test_settings.sh 2 +# +# And repeat. You can also test various environment variable sets with them. + +# Print the usage and exit with an error. +usage() { + printf 'usage: %s index\n' "$0" 1>&2 + exit 1 +} + +script="$0" +scriptdir=$(dirname "$script") + +cd "$scriptdir/.." + +test "$#" -eq 1 || usage + +target="$1" +shift + +line=0 + +# This loop just loops until it gets to the right line. Quick and dirty. +while read s; do + + line=$(printf '%s + 1\n' "$line" | bc) + + if [ "$line" -eq "$target" ]; then + + # Configure, build, and exit. + ./configure.sh -O3 $s + + make -j16 > /dev/null + + exit + fi + +done < "$scriptdir/test_settings.txt" diff --git a/scripts/test_settings.txt b/scripts/test_settings.txt new file mode 100644 index 000000000000..e6dd8ac92929 --- /dev/null +++ b/scripts/test_settings.txt @@ -0,0 +1,93 @@ +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -sdc.prompt +-Sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -Sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -Sbc.history -sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -Sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -Sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -Sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -sdc.prompt +-Sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -sdc.prompt +-Sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -Sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -sdc.prompt +-Sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -Sbc.history -sdc.history -sbc.prompt -sdc.prompt +-Sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -Sdc.history -sbc.prompt -sdc.prompt +-Sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -Sbc.prompt -sdc.prompt +-Sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -sbc.tty_mode -Sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -Sbc.history -sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -Sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -Sbc.prompt -sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -sdc.tty_mode -Sbc.history -sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -sdc.tty_mode -sbc.history -Sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -Sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -Sdc.tty_mode -Sbc.history -sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -Sdc.tty_mode -sbc.history -Sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -Sdc.tty_mode -sbc.history -sdc.history -Sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -Sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -Sbc.history -Sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -Sbc.history -sdc.history -Sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -Sbc.history -sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -Sdc.history -Sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -Sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -Sbc.prompt -Sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -sbc.tty_mode -Sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -Sbc.history -sdc.history -sbc.prompt -sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -Sdc.history -sbc.prompt -sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -Sbc.prompt -sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -sdc.tty_mode -Sbc.history -sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -sdc.tty_mode -sbc.history -Sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -Sbc.prompt -sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -sbc.history -Sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -sbc.history -sdc.history -Sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -Sdc.tty_mode -Sbc.history -Sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -Sdc.tty_mode -Sbc.history -sdc.history -Sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -Sdc.tty_mode -Sbc.history -sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -Sbc.history -Sdc.history -Sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -Sbc.history -Sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -sbc.history -Sdc.history -Sbc.prompt -Sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -sdc.tty_mode -Sbc.history -sdc.history -sbc.prompt -sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -sdc.tty_mode -sbc.history -Sdc.history -sbc.prompt -sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -Sbc.prompt -sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -sbc.history -Sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -sbc.history -sdc.history -Sbc.prompt -sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -Sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -sdc.history -Sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -Sdc.tty_mode -Sbc.history -Sdc.history -Sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -Sdc.tty_mode -Sbc.history -Sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -sdc.tty_mode -Sbc.history -Sdc.history -Sbc.prompt -Sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -sdc.history -sbc.prompt -sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -sbc.history -Sdc.history -sbc.prompt -sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -sbc.history -sdc.history -Sbc.prompt -sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -sbc.history -sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -Sdc.history -sbc.prompt -sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -sdc.history -Sbc.prompt -sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -Sdc.history -Sbc.prompt -sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -Sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -sbc.tty_mode -Sdc.tty_mode -Sbc.history -Sdc.history -Sbc.prompt -Sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -Sdc.history -sbc.prompt -sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -sdc.history -Sbc.prompt -sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -Sdc.history -Sbc.prompt -sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -Sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -Sdc.history -Sbc.prompt -Sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -Sdc.history -Sbc.prompt -sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -Sdc.history -sbc.prompt -Sdc.prompt +-sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -Sdc.history -Sbc.prompt -Sdc.prompt +-Sbc.sigint_reset -Sdc.sigint_reset -Sbc.tty_mode -Sdc.tty_mode -Sbc.history -Sdc.history -Sbc.prompt -Sdc.prompt diff --git a/src/bc_lex.c b/src/bc_lex.c index bd03d169ee06..4ab17c824b71 100644 --- a/src/bc_lex.c +++ b/src/bc_lex.c @@ -1,481 +1,481 @@ /* * ***************************************************************************** * * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2018-2021 Gavin D. Howard and contributors. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * ***************************************************************************** * * The lexer for bc. * */ #if BC_ENABLED #include #include #include #include #include /** * Lexes an identifier, which may be a keyword. * @param l The lexer. */ static void bc_lex_identifier(BcLex *l) { // We already passed the first character, so we need to be sure to include // it. const char *buf = l->buf + l->i - 1; size_t i; // This loop is simply checking for keywords. for (i = 0; i < bc_lex_kws_len; ++i) { const BcLexKeyword *kw = bc_lex_kws + i; size_t n = BC_LEX_KW_LEN(kw); if (!strncmp(buf, kw->name, n) && !isalnum(buf[n]) && buf[n] != '_') { // If the keyword has been redefined, and redefinition is allowed // (it is not allowed for builtin libraries), break out of the loop // and use it as a name. This depends on the argument parser to // ensure that only non-POSIX keywords get redefined. if (!vm.no_redefine && vm.redefined_kws[i]) break; l->t = BC_LEX_KW_AUTO + (BcLexType) i; // Warn or error, as appropriate for the mode, if the keyword is not // in the POSIX standard. if (!BC_LEX_KW_POSIX(kw)) bc_lex_verr(l, BC_ERR_POSIX_KW, kw->name); // We minus 1 because the index has already been incremented. l->i += n - 1; // Already have the token; bail. return; } } // If not a keyword, parse the name. bc_lex_name(l); // POSIX doesn't allow identifiers that are more than one character, so we // might have to warn or error here too. if (BC_ERR(l->str.len - 1 > 1)) bc_lex_verr(l, BC_ERR_POSIX_NAME_LEN, l->str.v); } /** * Parses a bc string. This is separate from dc strings because dc strings need * to be balanced. * @param l The lexer. */ static void bc_lex_string(BcLex *l) { // We need to keep track of newlines to increment them properly. size_t len, nlines, i; const char *buf; char c; bool got_more; l->t = BC_LEX_STR; do { nlines = 0; buf = l->buf; got_more = false; assert(!vm.is_stdin || buf == vm.buffer.v); // Fortunately for us, bc doesn't escape quotes. Instead, the equivalent // is '\q', which makes this loop simpler. for (i = l->i; (c = buf[i]) && c != '"'; ++i) nlines += (c == '\n'); - if (BC_ERR(c == '\0') && !vm.eof && l->is_stdin) + if (BC_ERR(c == '\0') && !vm.eof && (l->is_stdin || l->is_exprs)) got_more = bc_lex_readLine(l); } while (got_more && c != '"'); // If the string did not end properly, barf. if (c != '"') { l->i = i; bc_lex_err(l, BC_ERR_PARSE_STRING); } // Set the temp string to the parsed string. len = i - l->i; bc_vec_string(&l->str, len, l->buf + l->i); l->i = i + 1; l->line += nlines; } /** * This function takes a lexed operator and checks to see if it's the assignment * version, setting the token appropriately. * @param l The lexer. * @param with The token to assign if it is an assignment operator. * @param without The token to assign if it is not an assignment operator. */ static void bc_lex_assign(BcLex *l, BcLexType with, BcLexType without) { if (l->buf[l->i] == '=') { l->i += 1; l->t = with; } else l->t = without; } void bc_lex_token(BcLex *l) { // We increment here. This means that all lexing needs to take that into // account, such as when parsing an identifier. If we don't, the first // character of every identifier would be missing. char c = l->buf[l->i++], c2; BC_SIG_ASSERT_LOCKED; // This is the workhorse of the lexer. switch (c) { case '\0': case '\n': case '\t': case '\v': case '\f': case '\r': case ' ': { bc_lex_commonTokens(l, c); break; } case '!': { // Even though it's not an assignment, we can use this. bc_lex_assign(l, BC_LEX_OP_REL_NE, BC_LEX_OP_BOOL_NOT); // POSIX doesn't allow boolean not. if (l->t == BC_LEX_OP_BOOL_NOT) bc_lex_verr(l, BC_ERR_POSIX_BOOL, "!"); break; } case '"': { bc_lex_string(l); break; } case '#': { // POSIX does not allow line comments. bc_lex_err(l, BC_ERR_POSIX_COMMENT); bc_lex_lineComment(l); break; } case '%': { bc_lex_assign(l, BC_LEX_OP_ASSIGN_MODULUS, BC_LEX_OP_MODULUS); break; } case '&': { c2 = l->buf[l->i]; // Either we have boolean and or an error. And boolean and is not // allowed by POSIX. if (BC_NO_ERR(c2 == '&')) { bc_lex_verr(l, BC_ERR_POSIX_BOOL, "&&"); l->i += 1; l->t = BC_LEX_OP_BOOL_AND; } else bc_lex_invalidChar(l, c); break; } #if BC_ENABLE_EXTRA_MATH case '$': { l->t = BC_LEX_OP_TRUNC; break; } case '@': { bc_lex_assign(l, BC_LEX_OP_ASSIGN_PLACES, BC_LEX_OP_PLACES); break; } #endif // BC_ENABLE_EXTRA_MATH case '(': case ')': { l->t = (BcLexType) (c - '(' + BC_LEX_LPAREN); break; } case '*': { bc_lex_assign(l, BC_LEX_OP_ASSIGN_MULTIPLY, BC_LEX_OP_MULTIPLY); break; } case '+': { c2 = l->buf[l->i]; // Have to check for increment first. if (c2 == '+') { l->i += 1; l->t = BC_LEX_OP_INC; } else bc_lex_assign(l, BC_LEX_OP_ASSIGN_PLUS, BC_LEX_OP_PLUS); break; } case ',': { l->t = BC_LEX_COMMA; break; } case '-': { c2 = l->buf[l->i]; // Have to check for decrement first. if (c2 == '-') { l->i += 1; l->t = BC_LEX_OP_DEC; } else bc_lex_assign(l, BC_LEX_OP_ASSIGN_MINUS, BC_LEX_OP_MINUS); break; } case '.': { c2 = l->buf[l->i]; // If it's alone, it's an alias for last. if (BC_LEX_NUM_CHAR(c2, true, false)) bc_lex_number(l, c); else { l->t = BC_LEX_KW_LAST; bc_lex_err(l, BC_ERR_POSIX_DOT); } break; } case '/': { c2 = l->buf[l->i]; if (c2 =='*') bc_lex_comment(l); else bc_lex_assign(l, BC_LEX_OP_ASSIGN_DIVIDE, BC_LEX_OP_DIVIDE); break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': // Apparently, GNU bc (and maybe others) allows any uppercase letter as // a number. When single digits, they act like the ones above. When // multi-digit, any letter above the input base is automatically set to // the biggest allowable digit in the input base. case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': { bc_lex_number(l, c); break; } case ';': { l->t = BC_LEX_SCOLON; break; } case '<': { #if BC_ENABLE_EXTRA_MATH c2 = l->buf[l->i]; // Check for shift. if (c2 == '<') { l->i += 1; bc_lex_assign(l, BC_LEX_OP_ASSIGN_LSHIFT, BC_LEX_OP_LSHIFT); break; } #endif // BC_ENABLE_EXTRA_MATH bc_lex_assign(l, BC_LEX_OP_REL_LE, BC_LEX_OP_REL_LT); break; } case '=': { bc_lex_assign(l, BC_LEX_OP_REL_EQ, BC_LEX_OP_ASSIGN); break; } case '>': { #if BC_ENABLE_EXTRA_MATH c2 = l->buf[l->i]; // Check for shift. if (c2 == '>') { l->i += 1; bc_lex_assign(l, BC_LEX_OP_ASSIGN_RSHIFT, BC_LEX_OP_RSHIFT); break; } #endif // BC_ENABLE_EXTRA_MATH bc_lex_assign(l, BC_LEX_OP_REL_GE, BC_LEX_OP_REL_GT); break; } case '[': case ']': { l->t = (BcLexType) (c - '[' + BC_LEX_LBRACKET); break; } case '\\': { // In bc, a backslash+newline is whitespace. if (BC_NO_ERR(l->buf[l->i] == '\n')) { l->i += 1; l->t = BC_LEX_WHITESPACE; } else bc_lex_invalidChar(l, c); break; } case '^': { bc_lex_assign(l, BC_LEX_OP_ASSIGN_POWER, BC_LEX_OP_POWER); break; } case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': { bc_lex_identifier(l); break; } case '{': case '}': { l->t = (BcLexType) (c - '{' + BC_LEX_LBRACE); break; } case '|': { c2 = l->buf[l->i]; // Once again, boolean or is not allowed by POSIX. if (BC_NO_ERR(c2 == '|')) { bc_lex_verr(l, BC_ERR_POSIX_BOOL, "||"); l->i += 1; l->t = BC_LEX_OP_BOOL_OR; } else bc_lex_invalidChar(l, c); break; } default: { bc_lex_invalidChar(l, c); } } } #endif // BC_ENABLED diff --git a/src/dc_lex.c b/src/dc_lex.c index 576d50943f25..7f2f5dedc484 100644 --- a/src/dc_lex.c +++ b/src/dc_lex.c @@ -1,278 +1,279 @@ /* * ***************************************************************************** * * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2018-2021 Gavin D. Howard and contributors. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * ***************************************************************************** * * The lexer for dc. * */ #if DC_ENABLED #include #include #include bool dc_lex_negCommand(BcLex *l) { char c = l->buf[l->i]; return !BC_LEX_NUM_CHAR(c, false, false); } /** * Processes a dc command that needs a register. This is where the * extended-register extension is implemented. * @param l The lexer. */ static void dc_lex_register(BcLex *l) { // If extended register is enabled and the character is whitespace... if (DC_X && isspace(l->buf[l->i - 1])) { char c; // Eat the whitespace. bc_lex_whitespace(l); c = l->buf[l->i]; // Check for a letter or underscore. if (BC_ERR(!isalpha(c) && c != '_')) bc_lex_verr(l, BC_ERR_PARSE_CHAR, c); // Parse a normal identifier. l->i += 1; bc_lex_name(l); } else { // I don't allow newlines because newlines are used for controlling when // execution happens, and allowing newlines would just be complex. if (BC_ERR(l->buf[l->i - 1] == '\n')) bc_lex_verr(l, BC_ERR_PARSE_CHAR, l->buf[l->i - 1]); // Set the lexer string and token. bc_vec_popAll(&l->str); bc_vec_pushByte(&l->str, (uchar) l->buf[l->i - 1]); bc_vec_pushByte(&l->str, '\0'); l->t = BC_LEX_NAME; } } /** * Parses a dc string. Since dc's strings need to check for balanced brackets, * we can't just parse bc and dc strings with different start and end * characters. Oh, and dc strings need to check for escaped brackets. * @param l The lexer. */ static void dc_lex_string(BcLex *l) { size_t depth, nls, i; char c; bool got_more; // Set the token and clear the string. l->t = BC_LEX_STR; bc_vec_popAll(&l->str); do { depth = 1; nls = 0; got_more = false; assert(!l->is_stdin || l->buf == vm.buffer.v); // This is the meat. As long as we don't run into the NUL byte, and we // have "depth", which means we haven't completely balanced brackets // yet, we continue eating the string. for (i = l->i; (c = l->buf[i]) && depth; ++i) { // Check for escaped brackets and set the depths as appropriate. if (c == '\\') { c = l->buf[++i]; if (!c) break; } else { depth += (c == '['); depth -= (c == ']'); } // We want to adjust the line in the lexer as necessary. nls += (c == '\n'); if (depth) bc_vec_push(&l->str, &c); } if (BC_ERR(c == '\0' && depth)) { - if (!vm.eof && l->is_stdin) got_more = bc_lex_readLine(l); + if (!vm.eof && (l->is_stdin || l->is_exprs)) + got_more = bc_lex_readLine(l); if (got_more) bc_vec_popAll(&l->str); } } while (got_more && depth); // Obviously, if we didn't balance, that's an error. if (BC_ERR(c == '\0' && depth)) { l->i = i; bc_lex_err(l, BC_ERR_PARSE_STRING); } bc_vec_pushByte(&l->str, '\0'); l->i = i; l->line += nls; } /** * Lexes a dc token. This is the dc implementation of BcLexNext. * @param l The lexer. */ void dc_lex_token(BcLex *l) { char c = l->buf[l->i++], c2; size_t i; BC_SIG_ASSERT_LOCKED; // If the last token was a command that needs a register, we need to parse a // register, so do so. for (i = 0; i < dc_lex_regs_len; ++i) { // If the token is a register token, take care of it and return. if (l->last == dc_lex_regs[i]) { dc_lex_register(l); return; } } // These lines are for tokens that easily correspond to one character. We // just set the token. if (c >= '"' && c <= '~' && (l->t = dc_lex_tokens[(c - '"')]) != BC_LEX_INVALID) { return; } // This is the workhorse of the lexer when more complicated things are // needed. switch (c) { case '\0': case '\n': case '\t': case '\v': case '\f': case '\r': case ' ': { bc_lex_commonTokens(l, c); break; } // We don't have the ! command, so we always expect certain things // after the exclamation point. case '!': { c2 = l->buf[l->i]; if (c2 == '=') l->t = BC_LEX_OP_REL_NE; else if (c2 == '<') l->t = BC_LEX_OP_REL_LE; else if (c2 == '>') l->t = BC_LEX_OP_REL_GE; else bc_lex_invalidChar(l, c); l->i += 1; break; } case '#': { bc_lex_lineComment(l); break; } case '.': { c2 = l->buf[l->i]; // If the character after is a number, this dot is part of a number. // Otherwise, it's the BSD dot (equivalent to last). if (BC_NO_ERR(BC_LEX_NUM_CHAR(c2, true, false))) bc_lex_number(l, c); else bc_lex_invalidChar(l, c); break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': { bc_lex_number(l, c); break; } case 'g': { c2 = l->buf[l->i]; if (c2 == 'l') l->t = BC_LEX_KW_LINE_LENGTH; else if (c2 == 'z') l->t = BC_LEX_KW_LEADING_ZERO; else bc_lex_invalidChar(l, c2); l->i += 1; break; } case '[': { dc_lex_string(l); break; } default: { bc_lex_invalidChar(l, c); } } } #endif // DC_ENABLED diff --git a/src/lex.c b/src/lex.c index 51e9f31bfa11..3b84734efc55 100644 --- a/src/lex.c +++ b/src/lex.c @@ -1,326 +1,335 @@ /* * ***************************************************************************** * * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2018-2021 Gavin D. Howard and contributors. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * ***************************************************************************** * * Common code for the lexers. * */ #include #include #include #include #include #include #include void bc_lex_invalidChar(BcLex *l, char c) { l->t = BC_LEX_INVALID; bc_lex_verr(l, BC_ERR_PARSE_CHAR, c); } void bc_lex_lineComment(BcLex *l) { l->t = BC_LEX_WHITESPACE; while (l->i < l->len && l->buf[l->i] != '\n') l->i += 1; } void bc_lex_comment(BcLex *l) { size_t i, nlines = 0; const char *buf; bool end = false, got_more; char c; l->i += 1; l->t = BC_LEX_WHITESPACE; // This loop is complex because it might need to request more data from // stdin if the comment is not ended. This loop is taken until the comment // is finished or we have EOF. do { buf = l->buf; got_more = false; // If we are in stdin mode, the buffer must be the one used for stdin. assert(!vm.is_stdin || buf == vm.buffer.v); // Find the end of the comment. for (i = l->i; !end; i += !end) { // While we don't have an asterisk, eat, but increment nlines. for (; (c = buf[i]) && c != '*'; ++i) nlines += (c == '\n'); // If this is true, we need to request more data. if (BC_ERR(!c || buf[i + 1] == '\0')) { - // Read more. - if (!vm.eof && l->is_stdin) got_more = bc_lex_readLine(l); + // Read more, if possible. + if (!vm.eof && (l->is_stdin || l->is_exprs)) + got_more = bc_lex_readLine(l); break; } // If this turns true, we found the end. Yay! end = (buf[i + 1] == '/'); } } while (got_more && !end); // If we didn't find the end, barf. if (!end) { l->i = i; bc_lex_err(l, BC_ERR_PARSE_COMMENT); } l->i = i + 2; l->line += nlines; } void bc_lex_whitespace(BcLex *l) { char c; l->t = BC_LEX_WHITESPACE; // Eat. We don't eat newlines because they can be special. for (c = l->buf[l->i]; c != '\n' && isspace(c); c = l->buf[++l->i]); } void bc_lex_commonTokens(BcLex *l, char c) { if (!c) l->t = BC_LEX_EOF; else if (c == '\n') l->t = BC_LEX_NLINE; else bc_lex_whitespace(l); } /** * Parses a number. * @param l The lexer. * @param start The start character. * @param int_only Whether this function should only look for an integer. This * is used to implement the exponent of scientific notation. */ static size_t bc_lex_num(BcLex *l, char start, bool int_only) { const char *buf = l->buf + l->i; size_t i; char c; bool last_pt, pt = (start == '.'); // This loop looks complex. It is not. It is asking if the character is not // a nul byte and it if it a valid num character based on what we have found // thus far, or whether it is a backslash followed by a newline. I can do // i+1 on the buffer because the buffer must have a nul byte. for (i = 0; (c = buf[i]) && (BC_LEX_NUM_CHAR(c, pt, int_only) || (c == '\\' && buf[i + 1] == '\n')); ++i) { // I don't need to test that the next character is a newline because // the loop condition above ensures that. if (c == '\\') { i += 2; // Make sure to eat whitespace at the beginning of the line. while(isspace(buf[i]) && buf[i] != '\n') i += 1; c = buf[i]; // If the next character is not a number character, bail. if (!BC_LEX_NUM_CHAR(c, pt, int_only)) break; } // Did we find the radix point? last_pt = (c == '.'); // If we did, and we already have one, then break because it's not part // of this number. if (pt && last_pt) break; // Set whether we have found a radix point. pt = pt || last_pt; bc_vec_push(&l->str, &c); } return i; } void bc_lex_number(BcLex *l, char start) { l->t = BC_LEX_NUMBER; // Make sure the string is clear. bc_vec_popAll(&l->str); bc_vec_push(&l->str, &start); // Parse the number. l->i += bc_lex_num(l, start, false); #if BC_ENABLE_EXTRA_MATH { char c = l->buf[l->i]; // Do we have a number in scientific notation? if (c == 'e') { #if BC_ENABLED // Barf for POSIX. if (BC_IS_POSIX) bc_lex_err(l, BC_ERR_POSIX_EXP_NUM); #endif // BC_ENABLED // Push the e. bc_vec_push(&l->str, &c); l->i += 1; c = l->buf[l->i]; // Check for negative specifically because bc_lex_num() does not. if (c == BC_LEX_NEG_CHAR) { bc_vec_push(&l->str, &c); l->i += 1; c = l->buf[l->i]; } // We must have a number character, so barf if not. if (BC_ERR(!BC_LEX_NUM_CHAR(c, false, true))) bc_lex_verr(l, BC_ERR_PARSE_CHAR, c); // Parse the exponent. l->i += bc_lex_num(l, 0, true); } } #endif // BC_ENABLE_EXTRA_MATH bc_vec_pushByte(&l->str, '\0'); } void bc_lex_name(BcLex *l) { size_t i = 0; const char *buf = l->buf + l->i - 1; char c = buf[i]; l->t = BC_LEX_NAME; // Should be obvious. It's looking for valid characters. while ((c >= 'a' && c <= 'z') || isdigit(c) || c == '_') c = buf[++i]; // Set the string to the identifier. bc_vec_string(&l->str, i, buf); // Increment the index. We minus 1 because it has already been incremented. l->i += i - 1; } void bc_lex_init(BcLex *l) { BC_SIG_ASSERT_LOCKED; assert(l != NULL); bc_vec_init(&l->str, sizeof(char), BC_DTOR_NONE); } void bc_lex_free(BcLex *l) { BC_SIG_ASSERT_LOCKED; assert(l != NULL); bc_vec_free(&l->str); } void bc_lex_file(BcLex *l, const char *file) { assert(l != NULL && file != NULL); l->line = 1; vm.file = file; } void bc_lex_next(BcLex *l) { BC_SIG_ASSERT_LOCKED; assert(l != NULL); l->last = l->t; // If this wasn't here, the line number would be off. l->line += (l->i != 0 && l->buf[l->i - 1] == '\n'); // If the last token was EOF, someone called this one too many times. if (BC_ERR(l->last == BC_LEX_EOF)) bc_lex_err(l, BC_ERR_PARSE_EOF); l->t = BC_LEX_EOF; // We are done if this is true. if (l->i == l->len) return; // Loop until failure or we don't have whitespace. This // is so the parser doesn't get inundated with whitespace. do { vm.next(l); } while (l->t == BC_LEX_WHITESPACE); } /** * Updates the buffer and len so that they are not invalidated when the stdin * buffer grows. * @param l The lexer. * @param text The text. * @param len The length of the text. */ static void bc_lex_fixText(BcLex *l, const char *text, size_t len) { l->buf = text; l->len = len; } bool bc_lex_readLine(BcLex *l) { bool good; // These are reversed because they should be already locked, but // bc_vm_readLine() needs them to be unlocked. BC_SIG_UNLOCK; - good = bc_vm_readLine(false); + // Make sure we read from the appropriate place. + if (l->is_stdin) good = bc_vm_readLine(false); + else { + assert(l->is_exprs); + good = bc_vm_readBuf(false); + } BC_SIG_LOCK; bc_lex_fixText(l, vm.buffer.v, vm.buffer.len - 1); return good; } -void bc_lex_text(BcLex *l, const char *text, bool is_stdin) { +void bc_lex_text(BcLex *l, const char *text, bool is_stdin, bool is_exprs) { BC_SIG_ASSERT_LOCKED; assert(l != NULL && text != NULL); bc_lex_fixText(l, text, strlen(text)); l->i = 0; l->t = l->last = BC_LEX_INVALID; l->is_stdin = is_stdin; + l->is_exprs = is_exprs; + + assert(!l->is_stdin || !l->is_exprs); bc_lex_next(l); } diff --git a/src/parse.c b/src/parse.c index 7fdfa31df4ac..ba139f61ba46 100644 --- a/src/parse.c +++ b/src/parse.c @@ -1,249 +1,249 @@ /* * ***************************************************************************** * * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2018-2021 Gavin D. Howard and contributors. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * ***************************************************************************** * * Code common to the parsers. * */ #include #include #include #include #include #include #include #include void bc_parse_updateFunc(BcParse *p, size_t fidx) { p->fidx = fidx; p->func = bc_vec_item(&p->prog->fns, fidx); } inline void bc_parse_pushName(const BcParse *p, char *name, bool var) { bc_parse_pushIndex(p, bc_program_search(p->prog, name, var)); } /** * Updates the function, then pushes the instruction and the index. This is a * convenience function. * @param p The parser. * @param inst The instruction to push. * @param idx The index to push. */ static void bc_parse_update(BcParse *p, uchar inst, size_t idx) { bc_parse_updateFunc(p, p->fidx); bc_parse_push(p, inst); bc_parse_pushIndex(p, idx); } void bc_parse_addString(BcParse *p) { size_t idx; idx = bc_program_addString(p->prog, p->l.str.v, p->fidx); // Push the string info. bc_parse_update(p, BC_INST_STR, p->fidx); bc_parse_pushIndex(p, idx); } static void bc_parse_addNum(BcParse *p, const char *string) { BcVec *consts = &p->func->consts; size_t idx; BcConst *c; BcVec *slabs; BC_SIG_ASSERT_LOCKED; // Special case 0. if (bc_parse_zero[0] == string[0] && bc_parse_zero[1] == string[1]) { bc_parse_push(p, BC_INST_ZERO); return; } // Special case 1. if (bc_parse_one[0] == string[0] && bc_parse_one[1] == string[1]) { bc_parse_push(p, BC_INST_ONE); return; } // Get the index. idx = consts->len; // Get the right slab. slabs = p->fidx == BC_PROG_MAIN || p->fidx == BC_PROG_READ ? &vm.main_const_slab : &vm.other_slabs; // Push an empty constant. c = bc_vec_pushEmpty(consts); // Set the fields. c->val = bc_slabvec_strdup(slabs, string); c->base = BC_NUM_BIGDIG_MAX; // We need this to be able to tell that the number has not been allocated. bc_num_clear(&c->num); bc_parse_update(p, BC_INST_NUM, idx); } void bc_parse_number(BcParse *p) { #if BC_ENABLE_EXTRA_MATH char *exp = strchr(p->l.str.v, 'e'); size_t idx = SIZE_MAX; // Do we have a number in scientific notation? If so, add a nul byte where // the e is. if (exp != NULL) { idx = ((size_t) (exp - p->l.str.v)); *exp = 0; } #endif // BC_ENABLE_EXTRA_MATH bc_parse_addNum(p, p->l.str.v); #if BC_ENABLE_EXTRA_MATH // If we have a number in scientific notation... if (exp != NULL) { bool neg; // Figure out if the exponent is negative. neg = (*((char*) bc_vec_item(&p->l.str, idx + 1)) == BC_LEX_NEG_CHAR); // Add the number and instruction. bc_parse_addNum(p, bc_vec_item(&p->l.str, idx + 1 + neg)); bc_parse_push(p, BC_INST_LSHIFT + neg); } #endif // BC_ENABLE_EXTRA_MATH } -void bc_parse_text(BcParse *p, const char *text, bool is_stdin) { +void bc_parse_text(BcParse *p, const char *text, bool is_stdin, bool is_exprs) { BC_SIG_LOCK; // Make sure the pointer isn't invalidated. p->func = bc_vec_item(&p->prog->fns, p->fidx); - bc_lex_text(&p->l, text, is_stdin); + bc_lex_text(&p->l, text, is_stdin, is_exprs); BC_SIG_UNLOCK; } void bc_parse_reset(BcParse *p) { BC_SIG_ASSERT_LOCKED; // Reset the function if it isn't main and switch to main. if (p->fidx != BC_PROG_MAIN) { bc_func_reset(p->func); bc_parse_updateFunc(p, BC_PROG_MAIN); } // Reset the lexer. p->l.i = p->l.len; p->l.t = BC_LEX_EOF; #if BC_ENABLED if (BC_IS_BC) { // Get rid of the bc parser state. p->auto_part = false; bc_vec_npop(&p->flags, p->flags.len - 1); bc_vec_popAll(&p->exits); bc_vec_popAll(&p->conds); bc_vec_popAll(&p->ops); } #endif // BC_ENABLED // Reset the program. This might clear the error. bc_program_reset(p->prog); // Jump if there is an error. if (BC_ERR(vm.status)) BC_JMP; } #ifndef NDEBUG void bc_parse_free(BcParse *p) { BC_SIG_ASSERT_LOCKED; assert(p != NULL); #if BC_ENABLED if (BC_IS_BC) { bc_vec_free(&p->flags); bc_vec_free(&p->exits); bc_vec_free(&p->conds); bc_vec_free(&p->ops); bc_vec_free(&p->buf); } #endif // BC_ENABLED bc_lex_free(&p->l); } #endif // NDEBUG void bc_parse_init(BcParse *p, BcProgram *prog, size_t func) { #if BC_ENABLED uint16_t flag = 0; #endif // BC_ENABLED BC_SIG_ASSERT_LOCKED; assert(p != NULL && prog != NULL); #if BC_ENABLED if (BC_IS_BC) { // We always want at least one flag set on the flags stack. bc_vec_init(&p->flags, sizeof(uint16_t), BC_DTOR_NONE); bc_vec_push(&p->flags, &flag); bc_vec_init(&p->exits, sizeof(BcInstPtr), BC_DTOR_NONE); bc_vec_init(&p->conds, sizeof(size_t), BC_DTOR_NONE); bc_vec_init(&p->ops, sizeof(BcLexType), BC_DTOR_NONE); bc_vec_init(&p->buf, sizeof(char), BC_DTOR_NONE); p->auto_part = false; } #endif // BC_ENABLED bc_lex_init(&p->l); // Set up the function. p->prog = prog; bc_parse_updateFunc(p, func); } diff --git a/src/program.c b/src/program.c index 8ec1a011a26d..7c38ef547a82 100644 --- a/src/program.c +++ b/src/program.c @@ -1,3347 +1,3347 @@ /* * ***************************************************************************** * * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2018-2021 Gavin D. Howard and contributors. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * ***************************************************************************** * * Code to execute bc programs. * */ #include #include #include #include #include #include #include #include #include #include /** * Quickly sets the const and strs vector pointers in the program. This is a * convenience function. * @param p The program. * @param f The new function. */ static inline void bc_program_setVecs(BcProgram *p, BcFunc *f) { BC_SIG_ASSERT_LOCKED; p->consts = &f->consts; p->strs = &f->strs; } /** * Does a type check for something that expects a number. * @param r The result that will be checked. * @param n The result's number. */ static inline void bc_program_type_num(BcResult *r, BcNum *n) { #if BC_ENABLED // This should have already been taken care of. assert(r->t != BC_RESULT_VOID); #endif // BC_ENABLED if (BC_ERR(!BC_PROG_NUM(r, n))) bc_err(BC_ERR_EXEC_TYPE); } #if BC_ENABLED /** * Does a type check. * @param r The result to check. * @param t The type that the result should be. */ static void bc_program_type_match(BcResult *r, BcType t) { if (BC_ERR((r->t != BC_RESULT_ARRAY) != (!t))) bc_err(BC_ERR_EXEC_TYPE); } #endif // BC_ENABLED /** * Pulls an index out of a bytecode vector and updates the index into the vector * to point to the spot after the index. For more details on bytecode indices, * see the development manual (manuals/development.md#bytecode-indices). * @param code The bytecode vector. * @param bgn An in/out parameter; the index into the vector that will be * updated. * @return The index at @a bgn in the bytecode vector. */ static size_t bc_program_index(const char *restrict code, size_t *restrict bgn) { uchar amt = (uchar) code[(*bgn)++], i = 0; size_t res = 0; for (; i < amt; ++i, ++(*bgn)) { size_t temp = ((size_t) ((int) (uchar) code[*bgn]) & UCHAR_MAX); res |= (temp << (i * CHAR_BIT)); } return res; } /** * Returns a string from a result and its number. * @param p The program. * @param n The number tied to the result. * @return The string corresponding to the result and number. */ static char* bc_program_string(BcProgram *p, const BcNum *n) { BcFunc *f = bc_vec_item(&p->fns, n->rdx); return *((char**) bc_vec_item(&f->strs, n->scale)); } #if BC_ENABLED /** * Prepares the globals for a function call. This is only called when global * stacks are on because it pushes a copy of the current globals onto each of * their respective stacks. * @param p The program. */ static void bc_program_prepGlobals(BcProgram *p) { size_t i; for (i = 0; i < BC_PROG_GLOBALS_LEN; ++i) bc_vec_push(p->globals_v + i, p->globals + i); #if BC_ENABLE_EXTRA_MATH bc_rand_push(&p->rng); #endif // BC_ENABLE_EXTRA_MATH } /** * Pops globals stacks on returning from a function, or in the case of reset, * pops all but one item on each global stack. * @param p The program. * @param reset True if all but one item on each stack should be popped, false * otherwise. */ static void bc_program_popGlobals(BcProgram *p, bool reset) { size_t i; BC_SIG_ASSERT_LOCKED; for (i = 0; i < BC_PROG_GLOBALS_LEN; ++i) { BcVec *v = p->globals_v + i; bc_vec_npop(v, reset ? v->len - 1 : 1); p->globals[i] = BC_PROG_GLOBAL(v); } #if BC_ENABLE_EXTRA_MATH bc_rand_pop(&p->rng, reset); #endif // BC_ENABLE_EXTRA_MATH } /** * Derefeneces an array reference and returns a pointer to the real array. * @param p The program. * @param vec The reference vector. * @return A pointer to the desired array. */ static BcVec* bc_program_dereference(const BcProgram *p, BcVec *vec) { BcVec *v; size_t vidx, nidx, i = 0; // We want to be sure we have a reference vector. assert(vec->size == sizeof(uchar)); // Get the index of the vector in arrs, then the index of the original // referenced vector. vidx = bc_program_index(vec->v, &i); nidx = bc_program_index(vec->v, &i); v = bc_vec_item(bc_vec_item(&p->arrs, vidx), nidx); // We want to be sure we do *not* have a reference vector. assert(v->size != sizeof(uchar)); return v; } #endif // BC_ENABLED /** * Creates a BcNum from a BcBigDig and pushes onto the results stack. This is a * convenience function. * @param p The program. * @param dig The BcBigDig to push onto the results stack. * @param type The type that the pushed result should be. */ static void bc_program_pushBigdig(BcProgram *p, BcBigDig dig, BcResultType type) { BcResult res; res.t = type; BC_SIG_LOCK; bc_num_createFromBigdig(&res.d.n, dig); bc_vec_push(&p->results, &res); BC_SIG_UNLOCK; } size_t bc_program_addString(BcProgram *p, const char *str, size_t fidx) { BcFunc *f; char **str_ptr; BcVec *slabs; BC_SIG_ASSERT_LOCKED; // Push an empty string on the proper vector. f = bc_vec_item(&p->fns, fidx); str_ptr = bc_vec_pushEmpty(&f->strs); // Figure out which slab vector to use. slabs = fidx == BC_PROG_MAIN || fidx == BC_PROG_READ ? &vm.main_slabs : &vm.other_slabs; *str_ptr = bc_slabvec_strdup(slabs, str); return f->strs.len - 1; } size_t bc_program_search(BcProgram *p, const char *id, bool var) { BcVec *v, *map; size_t i; BC_SIG_ASSERT_LOCKED; // Grab the right vector and map. v = var ? &p->vars : &p->arrs; map = var ? &p->var_map : &p->arr_map; // We do an insert because the variable might not exist yet. This is because // the parser calls this function. If the insert succeeds, we create a stack // for the variable/array. But regardless, bc_map_insert() gives us the // index of the item in i. if (bc_map_insert(map, id, v->len, &i)) { BcVec *temp = bc_vec_pushEmpty(v); bc_array_init(temp, var); } return ((BcId*) bc_vec_item(map, i))->idx; } /** * Returns the correct variable or array stack for the type. * @param p The program. * @param idx The index of the variable or array in the variable or array * vector. * @param type The type of vector to return. * @return A pointer to the variable or array stack. */ static inline BcVec* bc_program_vec(const BcProgram *p, size_t idx, BcType type) { const BcVec *v = (type == BC_TYPE_VAR) ? &p->vars : &p->arrs; return bc_vec_item(v, idx); } /** * Returns a pointer to the BcNum corresponding to the result. There is one * case, however, where this returns a pointer to a BcVec: if the type of the * result is array. In that case, the pointer is casted to a pointer to BcNum, * but is never used. The function that calls this expecting an array casts the * pointer back. This function is called a lot and needs to be as fast as * possible. * @param p The program. * @param r The result whose number will be returned. * @return The BcNum corresponding to the result. */ static BcNum* bc_program_num(BcProgram *p, BcResult *r) { BcNum *n; #ifdef _WIN32 // Windows made it an error to not initialize this, so shut it up. // I don't want to do this on other platforms because this procedure // is one of the most heavily-used, and eliminating the initialization // is a performance win. n = NULL; #endif // _WIN32 switch (r->t) { case BC_RESULT_STR: case BC_RESULT_TEMP: case BC_RESULT_IBASE: case BC_RESULT_SCALE: case BC_RESULT_OBASE: #if BC_ENABLE_EXTRA_MATH case BC_RESULT_SEED: #endif // BC_ENABLE_EXTRA_MATH { n = &r->d.n; break; } case BC_RESULT_VAR: case BC_RESULT_ARRAY: case BC_RESULT_ARRAY_ELEM: { BcVec *v; BcType type = (r->t == BC_RESULT_VAR) ? BC_TYPE_VAR : BC_TYPE_ARRAY; // Get the correct variable or array vector. v = bc_program_vec(p, r->d.loc.loc, type); // Surprisingly enough, the hard case is *not* returning an array; // it's returning an array element. This is because we have to dig // deeper to get *to* the element. That's what the code inside this // if statement does. if (r->t == BC_RESULT_ARRAY_ELEM) { size_t idx = r->d.loc.idx; v = bc_vec_top(v); #if BC_ENABLED // If this is true, we have a reference vector, so dereference // it. The reason we don't need to worry about it for returning // a straight array is because we only care about references // when we access elements of an array that is a reference. That // is this code, so in essence, this line takes care of arrays // as well. if (v->size == sizeof(uchar)) v = bc_program_dereference(p, v); #endif // BC_ENABLED // We want to be sure we got a valid array of numbers. assert(v->size == sizeof(BcNum)); // The bc spec says that if an element is accessed that does not // exist, it should be preinitialized to 0. Well, if we access // an element *way* out there, we have to preinitialize all // elements between the current last element and the actual // accessed element. if (v->len <= idx) { BC_SIG_LOCK; bc_array_expand(v, bc_vm_growSize(idx, 1)); BC_SIG_UNLOCK; } n = bc_vec_item(v, idx); } // This is either a number (for a var) or an array (for an array). // Because bc_vec_top() returns a void*, we don't need to cast. else n = bc_vec_top(v); break; } case BC_RESULT_ZERO: { n = &vm.zero; break; } case BC_RESULT_ONE: { n = &vm.one; break; } #if BC_ENABLED // We should never get here; this is taken care of earlier because a // result is expected. case BC_RESULT_VOID: #ifndef NDEBUG { abort(); } #endif // NDEBUG // Fallthrough case BC_RESULT_LAST: { n = &p->last; break; } #endif // BC_ENABLED } return n; } /** * Prepares an operand for use. * @param p The program. * @param r An out parameter; this is set to the pointer to the result that * we care about. * @param n An out parameter; this is set to the pointer to the number that * we care about. * @param idx The index of the result from the top of the results stack. */ static void bc_program_operand(BcProgram *p, BcResult **r, BcNum **n, size_t idx) { *r = bc_vec_item_rev(&p->results, idx); #if BC_ENABLED if (BC_ERR((*r)->t == BC_RESULT_VOID)) bc_err(BC_ERR_EXEC_VOID_VAL); #endif // BC_ENABLED *n = bc_program_num(p, *r); } /** * Prepares the operands of a binary operator. * @param p The program. * @param l An out parameter; this is set to the pointer to the result for * the left operand. * @param ln An out parameter; this is set to the pointer to the number for * the left operand. * @param r An out parameter; this is set to the pointer to the result for * the right operand. * @param rn An out parameter; this is set to the pointer to the number for * the right operand. * @param idx The starting index where the operands are in the results stack, * starting from the top. */ static void bc_program_binPrep(BcProgram *p, BcResult **l, BcNum **ln, BcResult **r, BcNum **rn, size_t idx) { BcResultType lt; assert(p != NULL && l != NULL && ln != NULL && r != NULL && rn != NULL); #ifndef BC_PROG_NO_STACK_CHECK // Check the stack for dc. if (BC_IS_DC) { if (BC_ERR(!BC_PROG_STACK(&p->results, idx + 2))) bc_err(BC_ERR_EXEC_STACK); } #endif // BC_PROG_NO_STACK_CHECK assert(BC_PROG_STACK(&p->results, idx + 2)); // Get the operands. bc_program_operand(p, l, ln, idx + 1); bc_program_operand(p, r, rn, idx); lt = (*l)->t; #if BC_ENABLED // bc_program_operand() checked these for us. assert(lt != BC_RESULT_VOID && (*r)->t != BC_RESULT_VOID); #endif // BC_ENABLED // We run this again under these conditions in case any vector has been // reallocated out from under the BcNums or arrays we had. In other words, // this is to fix pointer invalidation. if (lt == (*r)->t && (lt == BC_RESULT_VAR || lt == BC_RESULT_ARRAY_ELEM)) *ln = bc_program_num(p, *l); if (BC_ERR(lt == BC_RESULT_STR)) bc_err(BC_ERR_EXEC_TYPE); } /** * Prepares the operands of a binary operator and type checks them. This is * separate from bc_program_binPrep() because some places want this, others want * bc_program_binPrep(). * @param p The program. * @param l An out parameter; this is set to the pointer to the result for * the left operand. * @param ln An out parameter; this is set to the pointer to the number for * the left operand. * @param r An out parameter; this is set to the pointer to the result for * the right operand. * @param rn An out parameter; this is set to the pointer to the number for * the right operand. * @param idx The starting index where the operands are in the results stack, * starting from the top. */ static void bc_program_binOpPrep(BcProgram *p, BcResult **l, BcNum **ln, BcResult **r, BcNum **rn, size_t idx) { bc_program_binPrep(p, l, ln, r, rn, idx); bc_program_type_num(*l, *ln); bc_program_type_num(*r, *rn); } /** * Prepares the operands of an assignment operator. * @param p The program. * @param l An out parameter; this is set to the pointer to the result for the * left operand. * @param ln An out parameter; this is set to the pointer to the number for the * left operand. * @param r An out parameter; this is set to the pointer to the result for the * right operand. * @param rn An out parameter; this is set to the pointer to the number for the * right operand. */ static void bc_program_assignPrep(BcProgram *p, BcResult **l, BcNum **ln, BcResult **r, BcNum **rn) { BcResultType lt, min; // This is the min non-allowable result type. dc allows strings. min = BC_RESULT_TEMP - ((unsigned int) (BC_IS_BC)); // Prepare the operands. bc_program_binPrep(p, l, ln, r, rn, 0); lt = (*l)->t; // Typecheck the left. if (BC_ERR(lt >= min && lt <= BC_RESULT_ONE)) bc_err(BC_ERR_EXEC_TYPE); // Strings can be assigned to variables. We are already good if we are // assigning a string. bool good = ((*r)->t == BC_RESULT_STR && lt <= BC_RESULT_ARRAY_ELEM); assert(BC_PROG_STR(*rn) || (*r)->t != BC_RESULT_STR); // If not, type check for a number. if (!good) bc_program_type_num(*r, *rn); } /** * Prepares a single operand and type checks it. This is separate from * bc_program_operand() because different places want one or the other. * @param p The program. * @param r An out parameter; this is set to the pointer to the result that * we care about. * @param n An out parameter; this is set to the pointer to the number that * we care about. * @param idx The index of the result from the top of the results stack. */ static void bc_program_prep(BcProgram *p, BcResult **r, BcNum **n, size_t idx) { assert(p != NULL && r != NULL && n != NULL); #ifndef BC_PROG_NO_STACK_CHECK // Check the stack for dc. if (BC_IS_DC) { if (BC_ERR(!BC_PROG_STACK(&p->results, idx + 1))) bc_err(BC_ERR_EXEC_STACK); } #endif // BC_PROG_NO_STACK_CHECK assert(BC_PROG_STACK(&p->results, idx + 1)); bc_program_operand(p, r, n, idx); // dc does not allow strings in this case. bc_program_type_num(*r, *n); } /** * Prepares and returns a clean result for the result of an operation. * @param p The program. * @return A clean result. */ static BcResult* bc_program_prepResult(BcProgram *p) { BcResult *res = bc_vec_pushEmpty(&p->results); bc_result_clear(res); return res; } /** * Prepares a constant for use. This parses the constant into a number and then * pushes that number onto the results stack. * @param p The program. * @param code The bytecode vector that we will pull the index of the constant * from. * @param bgn An in/out parameter; marks the start of the index in the * bytecode vector and will be updated to point to after the index. */ static void bc_program_const(BcProgram *p, const char *code, size_t *bgn) { // I lied. I actually push the result first. I can do this because the // result will be popped on error. I also get the constant itself. BcResult *r = bc_program_prepResult(p); BcConst *c = bc_vec_item(p->consts, bc_program_index(code, bgn)); BcBigDig base = BC_PROG_IBASE(p); // Only reparse if the base changed. if (c->base != base) { // Allocate if we haven't yet. if (c->num.num == NULL) { BC_SIG_LOCK; bc_num_init(&c->num, BC_NUM_RDX(strlen(c->val))); BC_SIG_UNLOCK; } // bc_num_parse() should only do operations that cannot fail. bc_num_parse(&c->num, c->val, base); c->base = base; } BC_SIG_LOCK; bc_num_createCopy(&r->d.n, &c->num); BC_SIG_UNLOCK; } /** * Executes a binary operator operation. * @param p The program. * @param inst The instruction corresponding to the binary operator to execute. */ static void bc_program_op(BcProgram *p, uchar inst) { BcResult *opd1, *opd2, *res; BcNum *n1, *n2; size_t idx = inst - BC_INST_POWER; res = bc_program_prepResult(p); bc_program_binOpPrep(p, &opd1, &n1, &opd2, &n2, 1); BC_SIG_LOCK; // Initialize the number with enough space, using the correct // BcNumBinaryOpReq function. This looks weird because it is executing an // item of an array. Rest assured that item is a function. bc_num_init(&res->d.n, bc_program_opReqs[idx](n1, n2, BC_PROG_SCALE(p))); BC_SIG_UNLOCK; assert(BC_NUM_RDX_VALID(n1)); assert(BC_NUM_RDX_VALID(n2)); // Run the operation. This also executes an item of an array. bc_program_ops[idx](n1, n2, &res->d.n, BC_PROG_SCALE(p)); bc_program_retire(p, 1, 2); } /** * Executes a read() or ? command. * @param p The program. */ static void bc_program_read(BcProgram *p) { BcStatus s; BcInstPtr ip; size_t i; const char* file; bool is_stdin; BcFunc *f = bc_vec_item(&p->fns, BC_PROG_READ); // If we are already executing a read, that is an error. So look for a read // and barf. for (i = 0; i < p->stack.len; ++i) { BcInstPtr *ip_ptr = bc_vec_item(&p->stack, i); if (ip_ptr->func == BC_PROG_READ) bc_err(BC_ERR_EXEC_REC_READ); } BC_SIG_LOCK; // Save the filename because we are going to overwrite it. file = vm.file; is_stdin = vm.is_stdin; // It is a parse error if there needs to be more than one line, so we unset // this to tell the lexer to not request more. We set it back later. vm.is_stdin = false; if (!BC_PARSE_IS_INITED(&vm.read_prs, p)) { // We need to parse, but we don't want to use the existing parser // because it has state it needs to keep. (It could have a partial parse // state.) So we create a new parser. This parser is in the BcVm struct // so that it is not local, which means that a longjmp() could change // it. bc_parse_init(&vm.read_prs, p, BC_PROG_READ); // We need a separate input buffer; that's why it is also in the BcVm // struct. bc_vec_init(&vm.read_buf, sizeof(char), BC_DTOR_NONE); } // This needs to be updated because the parser could have been used // somewhere else else bc_parse_updateFunc(&vm.read_prs, BC_PROG_READ); BC_SETJMP_LOCKED(exec_err); BC_SIG_UNLOCK; // Set up the lexer and the read function. bc_lex_file(&vm.read_prs.l, bc_program_stdin_name); bc_vec_popAll(&f->code); // Read a line. if (!BC_R) s = bc_read_line(&vm.read_buf, ""); else s = bc_read_line(&vm.read_buf, BC_IS_BC ? "read> " : "?> "); // We should *not* have run into EOF. if (s == BC_STATUS_EOF) bc_err(BC_ERR_EXEC_READ_EXPR); - // Parse *one* expression. - bc_parse_text(&vm.read_prs, vm.read_buf.v, false); + // Parse *one* expression, so is_stdin should be false. + bc_parse_text(&vm.read_prs, vm.read_buf.v, false, false); BC_SIG_LOCK; vm.expr(&vm.read_prs, BC_PARSE_NOREAD | BC_PARSE_NEEDVAL); BC_SIG_UNLOCK; // We *must* have a valid expression. A semicolon cannot end an expression, // although EOF can. if (BC_ERR(vm.read_prs.l.t != BC_LEX_NLINE && vm.read_prs.l.t != BC_LEX_EOF)) { bc_err(BC_ERR_EXEC_READ_EXPR); } #if BC_ENABLED // Push on the globals stack if necessary. if (BC_G) bc_program_prepGlobals(p); #endif // BC_ENABLED // Set up a new BcInstPtr. ip.func = BC_PROG_READ; ip.idx = 0; ip.len = p->results.len; // Update this pointer, just in case. f = bc_vec_item(&p->fns, BC_PROG_READ); // We want a return instruction to simplify things. bc_vec_pushByte(&f->code, vm.read_ret); // This lock is here to make sure dc's tail calls are the same length. BC_SIG_LOCK; bc_vec_push(&p->stack, &ip); #if DC_ENABLED // We need a new tail call entry for dc. if (BC_IS_DC) { size_t temp = 0; bc_vec_push(&p->tail_calls, &temp); } #endif // DC_ENABLED exec_err: BC_SIG_MAYLOCK; vm.is_stdin = is_stdin; vm.file = file; BC_LONGJMP_CONT; } #if BC_ENABLE_EXTRA_MATH /** * Execute a rand(). * @param p The program. */ static void bc_program_rand(BcProgram *p) { BcRand rand = bc_rand_int(&p->rng); bc_program_pushBigdig(p, (BcBigDig) rand, BC_RESULT_TEMP); #ifndef NDEBUG // This is just to ensure that the generated number is correct. I also use // braces because I declare every local at the top of the scope. { BcResult *r = bc_vec_top(&p->results); assert(BC_NUM_RDX_VALID_NP(r->d.n)); } #endif // NDEBUG } #endif // BC_ENABLE_EXTRA_MATH /** * Prints a series of characters, without escapes. * @param str The string (series of characters). */ static void bc_program_printChars(const char *str) { const char *nl; size_t len = vm.nchars + strlen(str); sig_atomic_t lock; BC_SIG_TRYLOCK(lock); bc_file_puts(&vm.fout, bc_flush_save, str); // We need to update the number of characters, so we find the last newline // and set the characters accordingly. nl = strrchr(str, '\n'); if (nl != NULL) len = strlen(nl + 1); vm.nchars = len > UINT16_MAX ? UINT16_MAX : (uint16_t) len; BC_SIG_TRYUNLOCK(lock); } /** * Prints a string with escapes. * @param str The string. */ static void bc_program_printString(const char *restrict str) { size_t i, len = strlen(str); #if DC_ENABLED // This is to ensure a nul byte is printed for dc's stream operation. if (!len && BC_IS_DC) { bc_vm_putchar('\0', bc_flush_save); return; } #endif // DC_ENABLED // Loop over the characters, processing escapes and printing the rest. for (i = 0; i < len; ++i) { int c = str[i]; // If we have an escape... if (c == '\\' && i != len - 1) { const char *ptr; // Get the escape character and its companion. c = str[++i]; ptr = strchr(bc_program_esc_chars, c); // If we have a companion character... if (ptr != NULL) { // We need to specially handle a newline. if (c == 'n') { BC_SIG_LOCK; vm.nchars = UINT16_MAX; BC_SIG_UNLOCK; } // Grab the actual character. c = bc_program_esc_seqs[(size_t) (ptr - bc_program_esc_chars)]; } else { // Just print the backslash if there is no companion character. // The following character will be printed later after the outer // if statement. bc_vm_putchar('\\', bc_flush_save); } } bc_vm_putchar(c, bc_flush_save); } } /** * Executes a print. This function handles all printing except streaming. * @param p The program. * @param inst The instruction for the type of print we are doing. * @param idx The index of the result that we are printing. */ static void bc_program_print(BcProgram *p, uchar inst, size_t idx) { BcResult *r; char *str; BcNum *n; bool pop = (inst != BC_INST_PRINT); assert(p != NULL); #ifndef BC_PROG_NO_STACK_CHECK if (BC_IS_DC) { if (BC_ERR(!BC_PROG_STACK(&p->results, idx + 1))) bc_err(BC_ERR_EXEC_STACK); } #endif // BC_PROG_NO_STACK_CHECK assert(BC_PROG_STACK(&p->results, idx + 1)); r = bc_vec_item_rev(&p->results, idx); #if BC_ENABLED // If we have a void value, that's not necessarily an error. It is if pop is // true because that means that we are executing a print statement, but // attempting to do a print on a lone void value is allowed because that's // exactly how we want void values used. if (r->t == BC_RESULT_VOID) { if (BC_ERR(pop)) bc_err(BC_ERR_EXEC_VOID_VAL); bc_vec_pop(&p->results); return; } #endif // BC_ENABLED n = bc_program_num(p, r); // If we have a number... if (BC_PROG_NUM(r, n)) { #if BC_ENABLED assert(inst != BC_INST_PRINT_STR); #endif // BC_ENABLED // Print the number. bc_num_print(n, BC_PROG_OBASE(p), !pop); #if BC_ENABLED // Need to store the number in last. if (BC_IS_BC) bc_num_copy(&p->last, n); #endif // BC_ENABLED } else { // We want to flush any stuff in the stdout buffer first. bc_file_flush(&vm.fout, bc_flush_save); str = bc_program_string(p, n); #if BC_ENABLED if (inst == BC_INST_PRINT_STR) bc_program_printChars(str); else #endif // BC_ENABLED { bc_program_printString(str); // Need to print a newline only in this case. if (inst == BC_INST_PRINT) bc_vm_putchar('\n', bc_flush_err); } } // bc always pops. if (BC_IS_BC || pop) bc_vec_pop(&p->results); } void bc_program_negate(BcResult *r, BcNum *n) { bc_num_copy(&r->d.n, n); if (BC_NUM_NONZERO(&r->d.n)) BC_NUM_NEG_TGL_NP(r->d.n); } void bc_program_not(BcResult *r, BcNum *n) { if (!bc_num_cmpZero(n)) bc_num_one(&r->d.n); } #if BC_ENABLE_EXTRA_MATH void bc_program_trunc(BcResult *r, BcNum *n) { bc_num_copy(&r->d.n, n); bc_num_truncate(&r->d.n, n->scale); } #endif // BC_ENABLE_EXTRA_MATH /** * Runs a unary operation. * @param p The program. * @param inst The unary operation. */ static void bc_program_unary(BcProgram *p, uchar inst) { BcResult *res, *ptr; BcNum *num; res = bc_program_prepResult(p); bc_program_prep(p, &ptr, &num, 1); BC_SIG_LOCK; bc_num_init(&res->d.n, num->len); BC_SIG_UNLOCK; // This calls a function that is in an array. bc_program_unarys[inst - BC_INST_NEG](res, num); bc_program_retire(p, 1, 1); } /** * Executes a logical operator. * @param p The program. * @param inst The operator. */ static void bc_program_logical(BcProgram *p, uchar inst) { BcResult *opd1, *opd2, *res; BcNum *n1, *n2; bool cond = 0; ssize_t cmp; res = bc_program_prepResult(p); // All logical operators (except boolean not, which is taken care of by // bc_program_unary()), are binary operators. bc_program_binOpPrep(p, &opd1, &n1, &opd2, &n2, 1); // Boolean and and or are not short circuiting. This is why; they can be // implemented much easier this way. if (inst == BC_INST_BOOL_AND) cond = (bc_num_cmpZero(n1) && bc_num_cmpZero(n2)); else if (inst == BC_INST_BOOL_OR) cond = (bc_num_cmpZero(n1) || bc_num_cmpZero(n2)); else { // We have a relational operator, so do a comparison. cmp = bc_num_cmp(n1, n2); switch (inst) { case BC_INST_REL_EQ: { cond = (cmp == 0); break; } case BC_INST_REL_LE: { cond = (cmp <= 0); break; } case BC_INST_REL_GE: { cond = (cmp >= 0); break; } case BC_INST_REL_NE: { cond = (cmp != 0); break; } case BC_INST_REL_LT: { cond = (cmp < 0); break; } case BC_INST_REL_GT: { cond = (cmp > 0); break; } #ifndef NDEBUG default: { // There is a bug if we get here. abort(); } #endif // NDEBUG } } BC_SIG_LOCK; bc_num_init(&res->d.n, BC_NUM_DEF_SIZE); BC_SIG_UNLOCK; if (cond) bc_num_one(&res->d.n); bc_program_retire(p, 1, 2); } /** * Assigns a string to a variable. * @param p The program. * @param num The location of the string as a BcNum. * @param v The stack for the variable. * @param push Whether to push the string or not. To push means to move the * string from the results stack and push it onto the variable * stack. */ static void bc_program_assignStr(BcProgram *p, BcNum *num, BcVec *v, bool push) { BcNum *n; assert(BC_PROG_STACK(&p->results, 1 + !push)); assert(num != NULL && num->num == NULL && num->cap == 0); // If we are not pushing onto the variable stack, we need to replace the // top of the variable stack. if (!push) bc_vec_pop(v); bc_vec_npop(&p->results, 1 + !push); n = bc_vec_pushEmpty(v); // We can just copy because the num should not have allocated anything. memcpy(n, num, sizeof(BcNum)); } /** * Copies a value to a variable. This is used for storing in dc as well as to * set function parameters to arguments in bc. * @param p The program. * @param idx The index of the variable or array to copy to. * @param t The type to copy to. This could be a variable or an array. * @param last Whether to grab the last item on the variable stack or not (for * bc function parameters). This is important because if a new * value has been pushed to the variable already, we need to grab * the value pushed before. This happens when you have a parameter * named something like "x", and a variable "x" is passed to * another parameter. */ static void bc_program_copyToVar(BcProgram *p, size_t idx, BcType t, bool last) { BcResult *ptr = NULL, r; BcVec *vec; BcNum *n = NULL; bool var = (t == BC_TYPE_VAR); #if DC_ENABLED // Check the stack for dc. if (BC_IS_DC) { if (BC_ERR(!BC_PROG_STACK(&p->results, 1))) bc_err(BC_ERR_EXEC_STACK); } #endif assert(BC_PROG_STACK(&p->results, 1)); bc_program_operand(p, &ptr, &n, 0); #if BC_ENABLED // Get the variable for a bc function call. if (BC_IS_BC) { // Type match the result. bc_program_type_match(ptr, t); // Get the variable or array, taking care to get the real item. We take // care of last with arrays later. if (!last && var) n = bc_vec_item_rev(bc_program_vec(p, ptr->d.loc.loc, t), 1); } #endif // BC_ENABLED vec = bc_program_vec(p, idx, t); // We can shortcut in dc if it's assigning a string by using // bc_program_assignStr(). if (ptr->t == BC_RESULT_STR) { assert(BC_PROG_STR(n)); if (BC_ERR(!var)) bc_err(BC_ERR_EXEC_TYPE); bc_program_assignStr(p, n, vec, true); return; } BC_SIG_LOCK; // Just create and copy for a normal variable. if (var) { if (BC_PROG_STR(n)) memcpy(&r.d.n, n, sizeof(BcNum)); else bc_num_createCopy(&r.d.n, n); } else { // If we get here, we are handling an array. This is one place we need // to cast the number from bc_program_num() to a vector. BcVec *v = (BcVec*) n, *rv = &r.d.v; #if BC_ENABLED if (BC_IS_BC) { BcVec *parent; bool ref, ref_size; // We need to figure out if the parameter is a reference or not and // construct the reference vector, if necessary. So this gets the // parent stack for the array. parent = bc_program_vec(p, ptr->d.loc.loc, t); assert(parent != NULL); // This takes care of last for arrays. Mostly. if (!last) v = bc_vec_item_rev(parent, !last); assert(v != NULL); // True if we are using a reference. ref = (v->size == sizeof(BcNum) && t == BC_TYPE_REF); // True if we already have a reference vector. This is slightly // (okay, a lot; it just doesn't look that way) different from // above. The above means that we need to construct a reference // vector, whereas this means that we have one and we might have to // *dereference* it. ref_size = (v->size == sizeof(uchar)); // If we *should* have a reference. if (ref || (ref_size && t == BC_TYPE_REF)) { // Create a new reference vector. bc_vec_init(rv, sizeof(uchar), BC_DTOR_NONE); // If this is true, then we need to construct a reference. if (ref) { assert(parent->len >= (size_t) (!last + 1)); // Make sure the pointer was not invalidated. vec = bc_program_vec(p, idx, t); // Push the indices onto the reference vector. This takes // care of last; it ensures the reference goes to the right // place. bc_vec_pushIndex(rv, ptr->d.loc.loc); bc_vec_pushIndex(rv, parent->len - !last - 1); } // If we get here, we are copying a ref to a ref. Just push a // copy of all of the bytes. else bc_vec_npush(rv, v->len * sizeof(uchar), v->v); // Push the reference vector onto the array stack and pop the // source. bc_vec_push(vec, &r.d); bc_vec_pop(&p->results); // We need to return early to avoid executing code that we must // not touch. BC_SIG_UNLOCK; return; } // If we get here, we have a reference, but we need an array, so // dereference the array. else if (ref_size && t != BC_TYPE_REF) v = bc_program_dereference(p, v); } #endif // BC_ENABLED // If we get here, we need to copy the array because in bc, all // arguments are passed by value. Yes, this is expensive. bc_array_init(rv, true); bc_array_copy(rv, v); } // Push the vector onto the array stack and pop the source. bc_vec_push(vec, &r.d); bc_vec_pop(&p->results); BC_SIG_UNLOCK; } /** * Executes an assignment operator. * @param p The program. * @param inst The assignment operator to execute. */ static void bc_program_assign(BcProgram *p, uchar inst) { // The local use_val is true when the assigned value needs to be copied. BcResult *left, *right, res; BcNum *l, *r; bool ob, sc, use_val = BC_INST_USE_VAL(inst); bc_program_assignPrep(p, &left, &l, &right, &r); // Assigning to a string should be impossible simply because of the parse. assert(left->t != BC_RESULT_STR); // If we are assigning a string... if (right->t == BC_RESULT_STR) { assert(BC_PROG_STR(r)); #if BC_ENABLED if (inst != BC_INST_ASSIGN && inst != BC_INST_ASSIGN_NO_VAL) bc_err(BC_ERR_EXEC_TYPE); #endif // BC_ENABLED // If we are assigning to an array element... if (left->t == BC_RESULT_ARRAY_ELEM) { BC_SIG_LOCK; // We need to free the number and clear it. bc_num_free(l); memcpy(l, r, sizeof(BcNum)); // Now we can pop the results. bc_vec_npop(&p->results, 2); BC_SIG_UNLOCK; } else { // If we get here, we are assigning to a variable, which we can use // bc_program_assignStr() for. BcVec *v = bc_program_vec(p, left->d.loc.loc, BC_TYPE_VAR); bc_program_assignStr(p, r, v, false); } #if BC_ENABLED // If this is true, the value is going to be used again, so we want to // push a temporary with the string. if (inst == BC_INST_ASSIGN) { res.t = BC_RESULT_STR; memcpy(&res.d.n, r, sizeof(BcNum)); bc_vec_push(&p->results, &res); } #endif // BC_ENABLED // By using bc_program_assignStr(), we short-circuited this, so return. return; } // If we have a normal assignment operator, not a math one... if (BC_INST_IS_ASSIGN(inst)) { // Assigning to a variable that has a string here is fine because there // is no math done on it. // BC_RESULT_TEMP, BC_RESULT_IBASE, BC_RESULT_OBASE, BC_RESULT_SCALE, // and BC_RESULT_SEED all have temporary copies. Because that's the // case, we can free the left and just move the value over. We set the // type of right to BC_RESULT_ZERO in order to prevent it from being // freed. We also don't have to worry about BC_RESULT_STR because it's // take care of above. if (right->t == BC_RESULT_TEMP || right->t >= BC_RESULT_IBASE) { BC_SIG_LOCK; bc_num_free(l); memcpy(l, r, sizeof(BcNum)); right->t = BC_RESULT_ZERO; BC_SIG_UNLOCK; } // Copy over. else bc_num_copy(l, r); } #if BC_ENABLED else { // If we get here, we are doing a math assignment (+=, -=, etc.). So // we need to prepare for a binary operator. BcBigDig scale = BC_PROG_SCALE(p); // At this point, the left side could still be a string because it could // be a variable that has the string. If that's the case, we have a type // error. if (BC_PROG_STR(l)) bc_err(BC_ERR_EXEC_TYPE); // Get the right type of assignment operator, whether val is used or // NO_VAL for performance. if (!use_val) inst -= (BC_INST_ASSIGN_POWER_NO_VAL - BC_INST_ASSIGN_POWER); assert(BC_NUM_RDX_VALID(l)); assert(BC_NUM_RDX_VALID(r)); // Run the actual operation. We do not need worry about reallocating l // because bc_num_binary() does that behind the scenes for us. bc_program_ops[inst - BC_INST_ASSIGN_POWER](l, r, l, scale); } #endif // BC_ENABLED ob = (left->t == BC_RESULT_OBASE); sc = (left->t == BC_RESULT_SCALE); // The globals need special handling, especially the non-seed ones. The // first part of the if statement handles them. if (ob || sc || left->t == BC_RESULT_IBASE) { BcVec *v; BcBigDig *ptr, *ptr_t, val, max, min; // Get the actual value. val = bc_num_bigdig(l); // Scale needs handling separate from ibase and obase. if (sc) { // Set the min and max. min = 0; max = vm.maxes[BC_PROG_GLOBALS_SCALE]; // Get a pointer to the stack and to the current value. v = p->globals_v + BC_PROG_GLOBALS_SCALE; ptr_t = p->globals + BC_PROG_GLOBALS_SCALE; } else { // Set the min and max. min = BC_NUM_MIN_BASE; if (BC_ENABLE_EXTRA_MATH && ob && (BC_IS_DC || !BC_IS_POSIX)) min = 0; max = vm.maxes[ob + BC_PROG_GLOBALS_IBASE]; // Get a pointer to the stack and to the current value. v = p->globals_v + BC_PROG_GLOBALS_IBASE + ob; ptr_t = p->globals + BC_PROG_GLOBALS_IBASE + ob; } // Check for error. if (BC_ERR(val > max || val < min)) { // This grabs the right error. BcErr e = left->t - BC_RESULT_IBASE + BC_ERR_EXEC_IBASE; bc_verr(e, min, max); } // Set the top of the stack and the actual global value. ptr = bc_vec_top(v); *ptr = val; *ptr_t = val; } #if BC_ENABLE_EXTRA_MATH // To assign to steed, let bc_num_rng() do its magic. else if (left->t == BC_RESULT_SEED) bc_num_rng(l, &p->rng); #endif // BC_ENABLE_EXTRA_MATH BC_SIG_LOCK; // If we needed to use the value, then we need to copy it. Otherwise, we can // pop indiscriminately. Oh, and the copy should be a BC_RESULT_TEMP. if (use_val) { bc_num_createCopy(&res.d.n, l); res.t = BC_RESULT_TEMP; bc_vec_npop(&p->results, 2); bc_vec_push(&p->results, &res); } else bc_vec_npop(&p->results, 2); BC_SIG_UNLOCK; } /** * Pushes a variable's value onto the results stack. * @param p The program. * @param code The bytecode vector to pull the variable's index out of. * @param bgn An in/out parameter; the start of the index in the bytecode * vector, and will be updated to point after the index on return. * @param pop True if the variable's value should be popped off its stack. * This is only used in dc. * @param copy True if the variable's value should be copied to the results * stack. This is only used in dc. */ static void bc_program_pushVar(BcProgram *p, const char *restrict code, size_t *restrict bgn, bool pop, bool copy) { BcResult r; size_t idx = bc_program_index(code, bgn); // Set the result appropriately. r.t = BC_RESULT_VAR; r.d.loc.loc = idx; #if DC_ENABLED // If this condition is true, then we have the hard case, where we have to // adjust dc registers. if (BC_IS_DC && (pop || copy)) { // Get the stack for the variable and the number at the top. BcVec *v = bc_program_vec(p, idx, BC_TYPE_VAR); BcNum *num = bc_vec_top(v); // Ensure there are enough elements on the stack. if (BC_ERR(!BC_PROG_STACK(v, 2 - copy))) { const char *name = bc_map_name(&p->var_map, idx); bc_verr(BC_ERR_EXEC_STACK_REGISTER, name); } assert(BC_PROG_STACK(v, 2 - copy)); // If the top of the stack is actually a number... if (!BC_PROG_STR(num)) { BC_SIG_LOCK; // Create a copy to go onto the results stack as appropriate. r.t = BC_RESULT_TEMP; bc_num_createCopy(&r.d.n, num); // If we are not actually copying, we need to do a replace, so pop. if (!copy) bc_vec_pop(v); bc_vec_push(&p->results, &r); BC_SIG_UNLOCK; return; } else { // Set the string result. We can just memcpy because all of the // fields in the num should be cleared. memcpy(&r.d.n, num, sizeof(BcNum)); r.t = BC_RESULT_STR; } // If we are not actually copying, we need to do a replace, so pop. if (!copy) bc_vec_pop(v); } #endif // DC_ENABLED bc_vec_push(&p->results, &r); } /** * Pushes an array or an array element onto the results stack. * @param p The program. * @param code The bytecode vector to pull the variable's index out of. * @param bgn An in/out parameter; the start of the index in the bytecode * vector, and will be updated to point after the index on return. * @param inst The instruction; whether to push an array or an array element. */ static void bc_program_pushArray(BcProgram *p, const char *restrict code, size_t *restrict bgn, uchar inst) { BcResult r, *operand; BcNum *num; BcBigDig temp; // Get the index of the array. r.d.loc.loc = bc_program_index(code, bgn); // Doing an array is easy; just set the result type and finish. if (inst == BC_INST_ARRAY) { r.t = BC_RESULT_ARRAY; bc_vec_push(&p->results, &r); return; } // Grab the top element of the results stack for the array index. bc_program_prep(p, &operand, &num, 0); temp = bc_num_bigdig(num); // Set the result. r.t = BC_RESULT_ARRAY_ELEM; r.d.loc.idx = (size_t) temp; BC_SIG_LOCK; // Pop the index and push the element. bc_vec_pop(&p->results); bc_vec_push(&p->results, &r); BC_SIG_UNLOCK; } #if BC_ENABLED /** * Executes an increment or decrement operator. This only handles postfix * inc/dec because the parser translates prefix inc/dec into an assignment where * the value is used. * @param p The program. * @param inst The instruction; whether to do an increment or decrement. */ static void bc_program_incdec(BcProgram *p, uchar inst) { BcResult *ptr, res, copy; BcNum *num; uchar inst2; bc_program_prep(p, &ptr, &num, 0); BC_SIG_LOCK; // We need a copy from *before* the operation. copy.t = BC_RESULT_TEMP; bc_num_createCopy(©.d.n, num); BC_SETJMP_LOCKED(exit); BC_SIG_UNLOCK; // Create the proper assignment. res.t = BC_RESULT_ONE; inst2 = BC_INST_ASSIGN_PLUS_NO_VAL + (inst & 0x01); bc_vec_push(&p->results, &res); bc_program_assign(p, inst2); BC_SIG_LOCK; bc_vec_push(&p->results, ©); BC_UNSETJMP; BC_SIG_UNLOCK; // No need to free the copy here because we pushed it onto the stack. return; exit: BC_SIG_MAYLOCK; bc_num_free(©.d.n); BC_LONGJMP_CONT; } /** * Executes a function call for bc. * @param p The program. * @param code The bytecode vector to pull the number of arguments and the * function index out of. * @param bgn An in/out parameter; the start of the indices in the bytecode * vector, and will be updated to point after the indices on * return. */ static void bc_program_call(BcProgram *p, const char *restrict code, size_t *restrict bgn) { BcInstPtr ip; size_t i, nargs; BcFunc *f; BcVec *v; BcAuto *a; BcResult *arg; // Pull the number of arguments out of the bytecode vector. nargs = bc_program_index(code, bgn); // Set up instruction pointer. ip.idx = 0; ip.func = bc_program_index(code, bgn); f = bc_vec_item(&p->fns, ip.func); // Error checking. if (BC_ERR(!f->code.len)) bc_verr(BC_ERR_EXEC_UNDEF_FUNC, f->name); if (BC_ERR(nargs != f->nparams)) bc_verr(BC_ERR_EXEC_PARAMS, f->nparams, nargs); // Set the length of the results stack. We discount the argument, of course. ip.len = p->results.len - nargs; assert(BC_PROG_STACK(&p->results, nargs)); // Prepare the globals' stacks. if (BC_G) bc_program_prepGlobals(p); // Push the arguments onto the stacks of their respective parameters. for (i = 0; i < nargs; ++i) { size_t j; bool last = true; arg = bc_vec_top(&p->results); if (BC_ERR(arg->t == BC_RESULT_VOID)) bc_err(BC_ERR_EXEC_VOID_VAL); // Get the corresponding parameter. a = bc_vec_item(&f->autos, nargs - 1 - i); // If I have already pushed to a var, I need to make sure I // get the previous version, not the already pushed one. This condition // must be true for that to even be possible. if (arg->t == BC_RESULT_VAR || arg->t == BC_RESULT_ARRAY) { // Loop through all of the previous parameters. for (j = 0; j < i && last; ++j) { BcAuto *aptr = bc_vec_item(&f->autos, nargs - 1 - j); // This condition is true if there is a previous parameter with // the same name *and* type because variables and arrays do not // interfere with each other. last = (arg->d.loc.loc != aptr->idx || (!aptr->type) != (arg->t == BC_RESULT_VAR)); } } // Actually push the value onto the parameter's stack. bc_program_copyToVar(p, a->idx, a->type, last); } BC_SIG_LOCK; // Push zeroes onto the stacks of the auto variables. for (; i < f->autos.len; ++i) { // Get the auto and its stack. a = bc_vec_item(&f->autos, i); v = bc_program_vec(p, a->idx, a->type); // If a variable, just push a 0; otherwise, push an array. if (a->type == BC_TYPE_VAR) { BcNum *n = bc_vec_pushEmpty(v); bc_num_init(n, BC_NUM_DEF_SIZE); } else { BcVec *v2; assert(a->type == BC_TYPE_ARRAY); v2 = bc_vec_pushEmpty(v); bc_array_init(v2, true); } } // Push the instruction pointer onto the execution stack. bc_vec_push(&p->stack, &ip); BC_SIG_UNLOCK; } /** * Executes a return instruction. * @param p The program. * @param inst The return instruction. bc can return void, and we need to know * if it is. */ static void bc_program_return(BcProgram *p, uchar inst) { BcResult *res; BcFunc *f; BcInstPtr *ip; size_t i, nresults; // Get the instruction pointer. ip = bc_vec_top(&p->stack); // Get the difference between the actual number of results and the number of // results the caller expects. nresults = p->results.len - ip->len; // If this isn't true, there was a missing call somewhere. assert(BC_PROG_STACK(&p->stack, 2)); // If this isn't true, the parser screwed by giving us no value when we // expected one, or giving us a value when we expected none. assert(BC_PROG_STACK(&p->results, ip->len + (inst == BC_INST_RET))); // Get the function we are returning from. f = bc_vec_item(&p->fns, ip->func); res = bc_program_prepResult(p); // If we are returning normally... if (inst == BC_INST_RET) { BcNum *num; BcResult *operand; // Prepare and copy the return value. bc_program_operand(p, &operand, &num, 1); if (BC_PROG_STR(num)) { // We need to set this because otherwise, it will be a // BC_RESULT_TEMP, and BC_RESULT_TEMP needs an actual number to make // it easier to do type checking. res->t = BC_RESULT_STR; memcpy(&res->d.n, num, sizeof(BcNum)); } else { BC_SIG_LOCK; bc_num_createCopy(&res->d.n, num); } } // Void is easy; set the result. else if (inst == BC_INST_RET_VOID) res->t = BC_RESULT_VOID; else { BC_SIG_LOCK; // If we get here, the instruction is for returning a zero, so do that. bc_num_init(&res->d.n, BC_NUM_DEF_SIZE); } BC_SIG_MAYUNLOCK; // We need to pop items off of the stacks of arguments and autos as well. for (i = 0; i < f->autos.len; ++i) { BcAuto *a = bc_vec_item(&f->autos, i); BcVec *v = bc_program_vec(p, a->idx, a->type); bc_vec_pop(v); } BC_SIG_LOCK; // When we retire, pop all of the unused results. bc_program_retire(p, 1, nresults); // Pop the globals, if necessary. if (BC_G) bc_program_popGlobals(p, false); // Pop the stack. This is what causes the function to actually "return." bc_vec_pop(&p->stack); BC_SIG_UNLOCK; } #endif // BC_ENABLED /** * Executes a builtin function. * @param p The program. * @param inst The builtin to execute. */ static void bc_program_builtin(BcProgram *p, uchar inst) { BcResult *opd, *res; BcNum *num; bool len = (inst == BC_INST_LENGTH); // Ensure we have a valid builtin. #if BC_ENABLE_EXTRA_MATH assert(inst >= BC_INST_LENGTH && inst <= BC_INST_IRAND); #else // BC_ENABLE_EXTRA_MATH assert(inst >= BC_INST_LENGTH && inst <= BC_INST_ABS); #endif // BC_ENABLE_EXTRA_MATH #ifndef BC_PROG_NO_STACK_CHECK // Check stack for dc. if (BC_IS_DC && BC_ERR(!BC_PROG_STACK(&p->results, 1))) bc_err(BC_ERR_EXEC_STACK); #endif // BC_PROG_NO_STACK_CHECK assert(BC_PROG_STACK(&p->results, 1)); res = bc_program_prepResult(p); bc_program_operand(p, &opd, &num, 1); assert(num != NULL); // We need to ensure that strings and arrays aren't passed to most builtins. // The scale function can take strings in dc. if (!len && (inst != BC_INST_SCALE_FUNC || BC_IS_BC)) bc_program_type_num(opd, num); // Square root is easy. if (inst == BC_INST_SQRT) bc_num_sqrt(num, &res->d.n, BC_PROG_SCALE(p)); // Absolute value is easy. else if (inst == BC_INST_ABS) { BC_SIG_LOCK; bc_num_createCopy(&res->d.n, num); BC_SIG_UNLOCK; BC_NUM_NEG_CLR_NP(res->d.n); } #if BC_ENABLE_EXTRA_MATH // irand() is easy. else if (inst == BC_INST_IRAND) { BC_SIG_LOCK; bc_num_init(&res->d.n, num->len - BC_NUM_RDX_VAL(num)); BC_SIG_UNLOCK; bc_num_irand(num, &res->d.n, &p->rng); } #endif // BC_ENABLE_EXTRA_MATH // Everything else is...not easy. else { BcBigDig val = 0; // Well, scale() is easy, but length() is not. if (len) { // If we are bc and we have an array... if (opd->t == BC_RESULT_ARRAY) { // Yes, this is one place where we need to cast the number from // bc_program_num() to a vector. BcVec *v = (BcVec*) num; #if BC_ENABLED // Dereference the array, if necessary. if (BC_IS_BC && v->size == sizeof(uchar)) v = bc_program_dereference(p, v); #endif // BC_ENABLED assert(v->size == sizeof(BcNum)); val = (BcBigDig) v->len; } else { // If the item is a string... if (!BC_PROG_NUM(opd, num)) { char *str; // Get the string, then get the length. str = bc_program_string(p, num); val = (BcBigDig) strlen(str); } else { // Calculate the length of the number. val = (BcBigDig) bc_num_len(num); } } } // Like I said; scale() is actually easy. It just also needs the integer // conversion that length() does. else if (BC_IS_BC || BC_PROG_NUM(opd, num)) val = (BcBigDig) bc_num_scale(num); BC_SIG_LOCK; // Create the result. bc_num_createFromBigdig(&res->d.n, val); BC_SIG_UNLOCK; } bc_program_retire(p, 1, 1); } /** * Executes a divmod. * @param p The program. */ static void bc_program_divmod(BcProgram *p) { BcResult *opd1, *opd2, *res, *res2; BcNum *n1, *n2; size_t req; // We grow first to avoid pointer invalidation. bc_vec_grow(&p->results, 2); // We don't need to update the pointer because // the capacity is enough due to the line above. res2 = bc_program_prepResult(p); res = bc_program_prepResult(p); // Prepare the operands. bc_program_binOpPrep(p, &opd1, &n1, &opd2, &n2, 2); req = bc_num_mulReq(n1, n2, BC_PROG_SCALE(p)); BC_SIG_LOCK; // Initialize the results. bc_num_init(&res->d.n, req); bc_num_init(&res2->d.n, req); BC_SIG_UNLOCK; // Execute. bc_num_divmod(n1, n2, &res2->d.n, &res->d.n, BC_PROG_SCALE(p)); bc_program_retire(p, 2, 2); } /** * Executes modular exponentiation. * @param p The program. */ static void bc_program_modexp(BcProgram *p) { BcResult *r1, *r2, *r3, *res; BcNum *n1, *n2, *n3; #if DC_ENABLED // Check the stack. if (BC_IS_DC && BC_ERR(!BC_PROG_STACK(&p->results, 3))) bc_err(BC_ERR_EXEC_STACK); #endif // DC_ENABLED assert(BC_PROG_STACK(&p->results, 3)); res = bc_program_prepResult(p); // Get the first operand and typecheck. bc_program_operand(p, &r1, &n1, 3); bc_program_type_num(r1, n1); // Get the last two operands. bc_program_binOpPrep(p, &r2, &n2, &r3, &n3, 1); // Make sure that the values have their pointers updated, if necessary. // Only array elements are possible because this is dc. if (r1->t == BC_RESULT_ARRAY_ELEM && (r1->t == r2->t || r1->t == r3->t)) n1 = bc_program_num(p, r1); BC_SIG_LOCK; bc_num_init(&res->d.n, n3->len); BC_SIG_UNLOCK; bc_num_modexp(n1, n2, n3, &res->d.n); bc_program_retire(p, 1, 3); } /** * Asciifies a number for dc. This is a helper for bc_program_asciify(). * @param p The program. * @param n The number to asciify. */ static uchar bc_program_asciifyNum(BcProgram *p, BcNum *n) { BcNum num; BcBigDig val; #ifndef NDEBUG // This is entirely to satisfy a useless scan-build error. val = 0; #endif // NDEBUG bc_num_clear(&num); BC_SETJMP(num_err); BC_SIG_LOCK; bc_num_createCopy(&num, n); BC_SIG_UNLOCK; // We want to clear the scale and sign for easy mod later. bc_num_truncate(&num, num.scale); BC_NUM_NEG_CLR_NP(num); // This is guaranteed to not have a divide by 0 // because strmb is equal to 256. bc_num_mod(&num, &p->strmb, &num, 0); // This is also guaranteed to not error because num is in the range // [0, UCHAR_MAX], which is definitely in range for a BcBigDig. And // it is not negative. val = bc_num_bigdig2(&num); num_err: BC_SIG_MAYLOCK; bc_num_free(&num); BC_LONGJMP_CONT; return (uchar) val; } /** * Executes the "asciify" command in dc. * @param p The program. * @param fidx The index of the current function. */ static void bc_program_asciify(BcProgram *p, size_t fidx) { BcResult *r, res; BcNum *n; char str[2], *str2; uchar c; size_t idx; // Check the stack. if (BC_ERR(!BC_PROG_STACK(&p->results, 1))) bc_err(BC_ERR_EXEC_STACK); assert(BC_PROG_STACK(&p->results, 1)); // Get the top of the results stack. bc_program_operand(p, &r, &n, 0); assert(n != NULL); // Asciify. if (BC_PROG_NUM(r, n)) c = bc_program_asciifyNum(p, n); else { // Get the string itself, then the first character. str2 = bc_program_string(p, n); c = (uchar) str2[0]; } // Fill the resulting string. str[0] = (char) c; str[1] = '\0'; // Add the string to the data structures. BC_SIG_LOCK; idx = bc_program_addString(p, str, fidx); BC_SIG_UNLOCK; // Set the result res.t = BC_RESULT_STR; bc_num_clear(&res.d.n); res.d.n.rdx = fidx; res.d.n.scale = idx; // Pop and push. bc_vec_pop(&p->results); bc_vec_push(&p->results, &res); } /** * Streams a number or a string to stdout. * @param p The program. */ static void bc_program_printStream(BcProgram *p) { BcResult *r; BcNum *n; // Check the stack. if (BC_ERR(!BC_PROG_STACK(&p->results, 1))) bc_err(BC_ERR_EXEC_STACK); assert(BC_PROG_STACK(&p->results, 1)); // Get the top of the results stack. bc_program_operand(p, &r, &n, 0); assert(n != NULL); // Stream appropriately. if (BC_PROG_NUM(r, n)) bc_num_stream(n); else bc_program_printChars(bc_program_string(p, n)); // Pop the operand. bc_vec_pop(&p->results); } #if DC_ENABLED /** * Gets the length of a register in dc and pushes it onto the results stack. * @param p The program. * @param code The bytecode vector to pull the register's index out of. * @param bgn An in/out parameter; the start of the index in the bytecode * vector, and will be updated to point after the index on return. */ static void bc_program_regStackLen(BcProgram *p, const char *restrict code, size_t *restrict bgn) { size_t idx = bc_program_index(code, bgn); BcVec *v = bc_program_vec(p, idx, BC_TYPE_VAR); bc_program_pushBigdig(p, (BcBigDig) v->len, BC_RESULT_TEMP); } /** * Pushes the length of the results stack onto the results stack. * @param p The program. */ static void bc_program_stackLen(BcProgram *p) { bc_program_pushBigdig(p, (BcBigDig) p->results.len, BC_RESULT_TEMP); } /** * Pops a certain number of elements off the execution stack. * @param p The program. * @param inst The instruction to tell us how many. There is one to pop up to * 2, and one to pop the amount equal to the number at the top of * the results stack. */ static void bc_program_nquit(BcProgram *p, uchar inst) { BcResult *opnd; BcNum *num; BcBigDig val; size_t i; // Ensure that the tail calls stack is correct. assert(p->stack.len == p->tail_calls.len); // Get the number of executions to pop. if (inst == BC_INST_QUIT) val = 2; else { bc_program_prep(p, &opnd, &num, 0); val = bc_num_bigdig(num); bc_vec_pop(&p->results); } // Loop over the tail call stack and adjust the quit value appropriately. for (i = 0; val && i < p->tail_calls.len; ++i) { // Get the number of tail calls for this one. size_t calls = *((size_t*) bc_vec_item_rev(&p->tail_calls, i)) + 1; // Adjust the value. if (calls >= val) val = 0; else val -= (BcBigDig) calls; } // If we don't have enough executions, just quit. if (i == p->stack.len) { vm.status = BC_STATUS_QUIT; BC_JMP; } else { // We can always pop the last item we reached on the tail call stack // because these are for tail calls. That means that any executions that // we would not have quit in that position on the stack would have quit // anyway. BC_SIG_LOCK; bc_vec_npop(&p->stack, i); bc_vec_npop(&p->tail_calls, i); BC_SIG_UNLOCK; } } /** * Pushes the depth of the execution stack onto the stack. * @param p The program. */ static void bc_program_execStackLen(BcProgram *p) { size_t i, amt, len = p->tail_calls.len; amt = len; for (i = 0; i < len; ++i) amt += *((size_t*) bc_vec_item(&p->tail_calls, i)); bc_program_pushBigdig(p, (BcBigDig) amt, BC_RESULT_TEMP); } /** * * @param p The program. * @param code The bytecode vector to pull the register's index out of. * @param bgn An in/out parameter; the start of the index in the bytecode * vector, and will be updated to point after the index on return. * @param cond True if the execution is conditional, false otherwise. * @param len The number of bytes in the bytecode vector. */ static void bc_program_execStr(BcProgram *p, const char *restrict code, size_t *restrict bgn, bool cond, size_t len) { BcResult *r; char *str; BcFunc *f; BcInstPtr ip; size_t fidx; BcNum *n; assert(p->stack.len == p->tail_calls.len); // Check the stack. if (BC_ERR(!BC_PROG_STACK(&p->results, 1))) bc_err(BC_ERR_EXEC_STACK); assert(BC_PROG_STACK(&p->results, 1)); // Get the operand. bc_program_operand(p, &r, &n, 0); // If execution is conditional... if (cond) { bool exec; size_t idx, then_idx, else_idx; // Get the index of the "then" var and "else" var. then_idx = bc_program_index(code, bgn); else_idx = bc_program_index(code, bgn); // Figure out if we should execute. exec = (r->d.n.len != 0); idx = exec ? then_idx : else_idx; BC_SIG_LOCK; BC_SETJMP_LOCKED(exit); // If we are supposed to execute, execute. If else_idx == SIZE_MAX, that // means there was no else clause, so if execute is false and else does // not exist, we don't execute. The goto skips all of the setup for the // execution. if (exec || (else_idx != SIZE_MAX)) n = bc_vec_top(bc_program_vec(p, idx, BC_TYPE_VAR)); else goto exit; if (BC_ERR(!BC_PROG_STR(n))) bc_err(BC_ERR_EXEC_TYPE); BC_UNSETJMP; BC_SIG_UNLOCK; } else { // In non-conditional situations, only the top of stack can be executed, // and in those cases, variables are not allowed to be "on the stack"; // they are only put on the stack to be assigned to. assert(r->t != BC_RESULT_VAR); if (r->t != BC_RESULT_STR) return; } assert(BC_PROG_STR(n)); // Get the string. str = bc_program_string(p, n); // Get the function index and function. BC_SIG_LOCK; fidx = bc_program_insertFunc(p, str); BC_SIG_UNLOCK; f = bc_vec_item(&p->fns, fidx); // If the function has not been parsed yet... if (!f->code.len) { BC_SIG_LOCK; if (!BC_PARSE_IS_INITED(&vm.read_prs, p)) { bc_parse_init(&vm.read_prs, p, fidx); // Initialize this too because bc_vm_shutdown() expects them to be // initialized togther. bc_vec_init(&vm.read_buf, sizeof(char), BC_DTOR_NONE); } // This needs to be updated because the parser could have been used // somewhere else else bc_parse_updateFunc(&vm.read_prs, fidx); bc_lex_file(&vm.read_prs.l, vm.file); BC_SETJMP_LOCKED(err); BC_SIG_UNLOCK; // Parse. - bc_parse_text(&vm.read_prs, str, false); + bc_parse_text(&vm.read_prs, str, false, false); BC_SIG_LOCK; vm.expr(&vm.read_prs, BC_PARSE_NOCALL); BC_UNSETJMP; // We can just assert this here because // dc should parse everything until EOF. assert(vm.read_prs.l.t == BC_LEX_EOF); BC_SIG_UNLOCK; } // Set the instruction pointer. ip.idx = 0; ip.len = p->results.len; ip.func = fidx; BC_SIG_LOCK; // Pop the operand. bc_vec_pop(&p->results); // Tail call processing. This condition means that there is more on the // execution stack, and we are at the end of the bytecode vector, and the // last instruction is just a BC_INST_POP_EXEC, which would return. if (p->stack.len > 1 && *bgn == len - 1 && code[*bgn] == BC_INST_POP_EXEC) { size_t *call_ptr = bc_vec_top(&p->tail_calls); // Add one to the tail call. *call_ptr += 1; // Pop the execution stack before pushing the new instruction pointer // on. bc_vec_pop(&p->stack); } // If not a tail call, just push a new one. else bc_vec_push(&p->tail_calls, &ip.idx); // Push the new function onto the execution stack and return. bc_vec_push(&p->stack, &ip); BC_SIG_UNLOCK; return; err: BC_SIG_MAYLOCK; f = bc_vec_item(&p->fns, fidx); // Make sure to erase the bytecode vector so dc knows it is not parsed. bc_vec_popAll(&f->code); exit: bc_vec_pop(&p->results); BC_LONGJMP_CONT; } /** * Prints every item on the results stack, one per line. * @param p The program. */ static void bc_program_printStack(BcProgram *p) { size_t idx; for (idx = 0; idx < p->results.len; ++idx) bc_program_print(p, BC_INST_PRINT, idx); } #endif // DC_ENABLED /** * Pushes the value of a global onto the results stack. * @param p The program. * @param inst Which global to push, as an instruction. */ static void bc_program_pushGlobal(BcProgram *p, uchar inst) { BcResultType t; // Make sure the instruction is valid. assert(inst >= BC_INST_IBASE && inst <= BC_INST_SCALE); // Push the global. t = inst - BC_INST_IBASE + BC_RESULT_IBASE; bc_program_pushBigdig(p, p->globals[inst - BC_INST_IBASE], t); } /** * Pushes the value of a global setting onto the stack. * @param p The program. * @param inst Which global setting to push, as an instruction. */ static void bc_program_globalSetting(BcProgram *p, uchar inst) { BcBigDig val; // Make sure the instruction is valid. assert(inst >= BC_INST_LINE_LENGTH && inst <= BC_INST_LEADING_ZERO); if (inst == BC_INST_LINE_LENGTH) val = (BcBigDig) vm.line_len; #if BC_ENABLED else if (inst == BC_INST_GLOBAL_STACKS) val = (BC_G != 0); #endif // BC_ENABLED else val = (BC_Z != 0); // Push the global. bc_program_pushBigdig(p, val, BC_RESULT_TEMP); } #if BC_ENABLE_EXTRA_MATH /** * Pushes the value of seed on the stack. * @param p The program. */ static void bc_program_pushSeed(BcProgram *p) { BcResult *res; res = bc_program_prepResult(p); res->t = BC_RESULT_SEED; BC_SIG_LOCK; // We need 2*BC_RAND_NUM_SIZE because of the size of the state. bc_num_init(&res->d.n, 2 * BC_RAND_NUM_SIZE); BC_SIG_UNLOCK; bc_num_createFromRNG(&res->d.n, &p->rng); } #endif // BC_ENABLE_EXTRA_MATH /** * Adds a function to the fns array. The function's ID must have already been * inserted into the map. * @param p The program. * @param id_ptr The ID of the function as inserted into the map. */ static void bc_program_addFunc(BcProgram *p, BcId *id_ptr) { BcInstPtr *ip; BcFunc *f; BC_SIG_ASSERT_LOCKED; // Push and init. f = bc_vec_pushEmpty(&p->fns); bc_func_init(f, id_ptr->name); // This is to make sure pointers are updated if the array was moved. if (p->stack.len) { ip = bc_vec_top(&p->stack); bc_program_setVecs(p, (BcFunc*) bc_vec_item(&p->fns, ip->func)); } } size_t bc_program_insertFunc(BcProgram *p, const char *name) { BcId *id_ptr; bool new; size_t idx; BC_SIG_ASSERT_LOCKED; assert(p != NULL && name != NULL); // Insert into the map and get the resulting ID. new = bc_map_insert(&p->fn_map, name, p->fns.len, &idx); id_ptr = (BcId*) bc_vec_item(&p->fn_map, idx); idx = id_ptr->idx; // If the function is new... if (new) { // Add the function to the fns array. bc_program_addFunc(p, id_ptr); } #if BC_ENABLED // bc has to reset the function because it's about to be redefined. else if (BC_IS_BC) { BcFunc *func = bc_vec_item(&p->fns, idx); bc_func_reset(func); } #endif // BC_ENABLED return idx; } #ifndef NDEBUG void bc_program_free(BcProgram *p) { size_t i; BC_SIG_ASSERT_LOCKED; assert(p != NULL); // Free the globals stacks. for (i = 0; i < BC_PROG_GLOBALS_LEN; ++i) bc_vec_free(p->globals_v + i); bc_vec_free(&p->fns); bc_vec_free(&p->fn_map); bc_vec_free(&p->vars); bc_vec_free(&p->var_map); bc_vec_free(&p->arrs); bc_vec_free(&p->arr_map); bc_vec_free(&p->results); bc_vec_free(&p->stack); #if BC_ENABLED if (BC_IS_BC) bc_num_free(&p->last); #endif // BC_ENABLED #if BC_ENABLE_EXTRA_MATH bc_rand_free(&p->rng); #endif // BC_ENABLE_EXTRA_MATH #if DC_ENABLED if (BC_IS_DC) bc_vec_free(&p->tail_calls); #endif // DC_ENABLED } #endif // NDEBUG void bc_program_init(BcProgram *p) { BcInstPtr ip; size_t i; BC_SIG_ASSERT_LOCKED; assert(p != NULL); // We want this clear. memset(&ip, 0, sizeof(BcInstPtr)); // Setup the globals stacks and the current values. for (i = 0; i < BC_PROG_GLOBALS_LEN; ++i) { BcBigDig val = i == BC_PROG_GLOBALS_SCALE ? 0 : BC_BASE; bc_vec_init(p->globals_v + i, sizeof(BcBigDig), BC_DTOR_NONE); bc_vec_push(p->globals_v + i, &val); p->globals[i] = val; } #if DC_ENABLED // dc-only setup. if (BC_IS_DC) { bc_vec_init(&p->tail_calls, sizeof(size_t), BC_DTOR_NONE); // We want an item for the main function on the tail call stack. i = 0; bc_vec_push(&p->tail_calls, &i); } #endif // DC_ENABLED bc_num_setup(&p->strmb, p->strmb_num, BC_NUM_BIGDIG_LOG10); bc_num_bigdig2num(&p->strmb, BC_NUM_STREAM_BASE); #if BC_ENABLE_EXTRA_MATH // We need to initialize srand() just in case /dev/urandom and /dev/random // are not available. srand((unsigned int) time(NULL)); bc_rand_init(&p->rng); #endif // BC_ENABLE_EXTRA_MATH #if BC_ENABLED if (BC_IS_BC) bc_num_init(&p->last, BC_NUM_DEF_SIZE); #endif // BC_ENABLED #ifndef NDEBUG bc_vec_init(&p->fns, sizeof(BcFunc), BC_DTOR_FUNC); #else // NDEBUG bc_vec_init(&p->fns, sizeof(BcFunc), BC_DTOR_NONE); #endif // NDEBUG bc_map_init(&p->fn_map); bc_program_insertFunc(p, bc_func_main); bc_program_insertFunc(p, bc_func_read); bc_vec_init(&p->vars, sizeof(BcVec), BC_DTOR_VEC); bc_map_init(&p->var_map); bc_vec_init(&p->arrs, sizeof(BcVec), BC_DTOR_VEC); bc_map_init(&p->arr_map); bc_vec_init(&p->results, sizeof(BcResult), BC_DTOR_RESULT); // Push the first instruction pointer onto the execution stack. bc_vec_init(&p->stack, sizeof(BcInstPtr), BC_DTOR_NONE); bc_vec_push(&p->stack, &ip); // Make sure the pointers are properly set up. bc_program_setVecs(p, (BcFunc*) bc_vec_item(&p->fns, BC_PROG_MAIN)); assert(p->consts != NULL && p->strs != NULL); } void bc_program_reset(BcProgram *p) { BcFunc *f; BcInstPtr *ip; BC_SIG_ASSERT_LOCKED; // Pop all but the last execution and all results. bc_vec_npop(&p->stack, p->stack.len - 1); bc_vec_popAll(&p->results); #if BC_ENABLED // Clear the globals' stacks. if (BC_G) bc_program_popGlobals(p, true); #endif // BC_ENABLED // Clear the bytecode vector of the main function. f = bc_vec_item(&p->fns, BC_PROG_MAIN); bc_vec_npop(&f->code, f->code.len); // Reset the instruction pointer. ip = bc_vec_top(&p->stack); bc_program_setVecs(p, f); memset(ip, 0, sizeof(BcInstPtr)); // Write the ready message for a signal, and clear the signal. if (vm.sig) { bc_file_write(&vm.fout, bc_flush_none, bc_program_ready_msg, bc_program_ready_msg_len); bc_file_flush(&vm.fout, bc_flush_err); vm.sig = 0; } } void bc_program_exec(BcProgram *p) { size_t idx; BcResult r, *ptr; BcInstPtr *ip; BcFunc *func; char *code; bool cond = false; uchar inst; #if BC_ENABLED BcNum *num; #endif // BC_ENABLED #if !BC_HAS_COMPUTED_GOTO #ifndef NDEBUG size_t jmp_bufs_len; #endif // NDEBUG #endif // !BC_HAS_COMPUTED_GOTO #if BC_HAS_COMPUTED_GOTO BC_PROG_LBLS; BC_PROG_LBLS_ASSERT; // BC_INST_INVALID is a marker for the end so that we don't have to have an // execution loop. func = (BcFunc*) bc_vec_item(&p->fns, BC_PROG_MAIN); bc_vec_pushByte(&func->code, BC_INST_INVALID); #endif // BC_HAS_COMPUTED_GOTO ip = bc_vec_top(&p->stack); func = (BcFunc*) bc_vec_item(&p->fns, ip->func); code = func->code.v; // Ensure the pointers are correct. BC_SIG_LOCK; bc_program_setVecs(p, func); BC_SIG_UNLOCK; #if !BC_HAS_COMPUTED_GOTO #ifndef NDEBUG jmp_bufs_len = vm.jmp_bufs.len; #endif // NDEBUG // This loop is the heart of the execution engine. It *is* the engine. For // computed goto, it is ignored. while (ip->idx < func->code.len) #endif // !BC_HAS_COMPUTED_GOTO { BC_SIG_ASSERT_NOT_LOCKED; #if BC_HAS_COMPUTED_GOTO BC_PROG_JUMP(inst, code, ip); #else // BC_HAS_COMPUTED_GOTO // Get the next instruction and increment the index. inst = (uchar) code[(ip->idx)++]; #endif // BC_HAS_COMPUTED_GOTO #if BC_DEBUG_CODE bc_file_printf(&vm.ferr, "inst: %s\n", bc_inst_names[inst]); bc_file_flush(&vm.ferr, bc_flush_none); #endif // BC_DEBUG_CODE #if !BC_HAS_COMPUTED_GOTO switch (inst) #endif // !BC_HAS_COMPUTED_GOTO { #if BC_ENABLED // This just sets up the condition for the unconditional jump below, // which checks the condition, if necessary. BC_PROG_LBL(BC_INST_JUMP_ZERO): { bc_program_prep(p, &ptr, &num, 0); cond = !bc_num_cmpZero(num); bc_vec_pop(&p->results); BC_PROG_DIRECT_JUMP(BC_INST_JUMP) } // Fallthrough. BC_PROG_FALLTHROUGH BC_PROG_LBL(BC_INST_JUMP): { idx = bc_program_index(code, &ip->idx); // If a jump is required... if (inst == BC_INST_JUMP || cond) { // Get the address to jump to. size_t *addr = bc_vec_item(&func->labels, idx); // If this fails, then the parser failed to set up the // labels correctly. assert(*addr != SIZE_MAX); // Set the new address. ip->idx = *addr; } BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_CALL): { assert(BC_IS_BC); bc_program_call(p, code, &ip->idx); // Because we changed the execution stack and where we are // executing, we have to update all of this. BC_SIG_LOCK; ip = bc_vec_top(&p->stack); func = bc_vec_item(&p->fns, ip->func); code = func->code.v; bc_program_setVecs(p, func); BC_SIG_UNLOCK; BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_INC): BC_PROG_LBL(BC_INST_DEC): { bc_program_incdec(p, inst); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_HALT): { vm.status = BC_STATUS_QUIT; // Just jump out. The jump series will take care of everything. BC_JMP; BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_RET): BC_PROG_LBL(BC_INST_RET0): BC_PROG_LBL(BC_INST_RET_VOID): { bc_program_return(p, inst); // Because we changed the execution stack and where we are // executing, we have to update all of this. BC_SIG_LOCK; ip = bc_vec_top(&p->stack); func = bc_vec_item(&p->fns, ip->func); code = func->code.v; bc_program_setVecs(p, func); BC_SIG_UNLOCK; BC_PROG_JUMP(inst, code, ip); } #endif // BC_ENABLED BC_PROG_LBL(BC_INST_BOOL_OR): BC_PROG_LBL(BC_INST_BOOL_AND): BC_PROG_LBL(BC_INST_REL_EQ): BC_PROG_LBL(BC_INST_REL_LE): BC_PROG_LBL(BC_INST_REL_GE): BC_PROG_LBL(BC_INST_REL_NE): BC_PROG_LBL(BC_INST_REL_LT): BC_PROG_LBL(BC_INST_REL_GT): { bc_program_logical(p, inst); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_READ): { // We want to flush output before // this in case there is a prompt. bc_file_flush(&vm.fout, bc_flush_save); bc_program_read(p); // Because we changed the execution stack and where we are // executing, we have to update all of this. BC_SIG_LOCK; ip = bc_vec_top(&p->stack); func = bc_vec_item(&p->fns, ip->func); code = func->code.v; bc_program_setVecs(p, func); BC_SIG_UNLOCK; BC_PROG_JUMP(inst, code, ip); } #if BC_ENABLE_EXTRA_MATH BC_PROG_LBL(BC_INST_RAND): { bc_program_rand(p); BC_PROG_JUMP(inst, code, ip); } #endif // BC_ENABLE_EXTRA_MATH BC_PROG_LBL(BC_INST_MAXIBASE): BC_PROG_LBL(BC_INST_MAXOBASE): BC_PROG_LBL(BC_INST_MAXSCALE): #if BC_ENABLE_EXTRA_MATH BC_PROG_LBL(BC_INST_MAXRAND): #endif // BC_ENABLE_EXTRA_MATH { BcBigDig dig = vm.maxes[inst - BC_INST_MAXIBASE]; bc_program_pushBigdig(p, dig, BC_RESULT_TEMP); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_LINE_LENGTH): #if BC_ENABLED BC_PROG_LBL(BC_INST_GLOBAL_STACKS): #endif // BC_ENABLED BC_PROG_LBL(BC_INST_LEADING_ZERO): { bc_program_globalSetting(p, inst); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_VAR): { bc_program_pushVar(p, code, &ip->idx, false, false); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_ARRAY_ELEM): BC_PROG_LBL(BC_INST_ARRAY): { bc_program_pushArray(p, code, &ip->idx, inst); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_IBASE): BC_PROG_LBL(BC_INST_SCALE): BC_PROG_LBL(BC_INST_OBASE): { bc_program_pushGlobal(p, inst); BC_PROG_JUMP(inst, code, ip); } #if BC_ENABLE_EXTRA_MATH BC_PROG_LBL(BC_INST_SEED): { bc_program_pushSeed(p); BC_PROG_JUMP(inst, code, ip); } #endif // BC_ENABLE_EXTRA_MATH BC_PROG_LBL(BC_INST_LENGTH): BC_PROG_LBL(BC_INST_SCALE_FUNC): BC_PROG_LBL(BC_INST_SQRT): BC_PROG_LBL(BC_INST_ABS): #if BC_ENABLE_EXTRA_MATH BC_PROG_LBL(BC_INST_IRAND): #endif // BC_ENABLE_EXTRA_MATH { bc_program_builtin(p, inst); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_ASCIIFY): { bc_program_asciify(p, ip->func); // Because we changed the execution stack and where we are // executing, we have to update all of this. BC_SIG_LOCK; ip = bc_vec_top(&p->stack); func = bc_vec_item(&p->fns, ip->func); code = func->code.v; bc_program_setVecs(p, func); BC_SIG_UNLOCK; BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_NUM): { bc_program_const(p, code, &ip->idx); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_ZERO): BC_PROG_LBL(BC_INST_ONE): #if BC_ENABLED BC_PROG_LBL(BC_INST_LAST): #endif // BC_ENABLED { r.t = BC_RESULT_ZERO + (inst - BC_INST_ZERO); bc_vec_push(&p->results, &r); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_PRINT): BC_PROG_LBL(BC_INST_PRINT_POP): #if BC_ENABLED BC_PROG_LBL(BC_INST_PRINT_STR): #endif // BC_ENABLED { bc_program_print(p, inst, 0); // We want to flush right away to save the output for history, // if history must preserve it when taking input. bc_file_flush(&vm.fout, bc_flush_save); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_STR): { // Set up the result and push. r.t = BC_RESULT_STR; bc_num_clear(&r.d.n); r.d.n.rdx = bc_program_index(code, &ip->idx); r.d.n.scale = bc_program_index(code, &ip->idx); bc_vec_push(&p->results, &r); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_POWER): BC_PROG_LBL(BC_INST_MULTIPLY): BC_PROG_LBL(BC_INST_DIVIDE): BC_PROG_LBL(BC_INST_MODULUS): BC_PROG_LBL(BC_INST_PLUS): BC_PROG_LBL(BC_INST_MINUS): #if BC_ENABLE_EXTRA_MATH BC_PROG_LBL(BC_INST_PLACES): BC_PROG_LBL(BC_INST_LSHIFT): BC_PROG_LBL(BC_INST_RSHIFT): #endif // BC_ENABLE_EXTRA_MATH { bc_program_op(p, inst); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_NEG): BC_PROG_LBL(BC_INST_BOOL_NOT): #if BC_ENABLE_EXTRA_MATH BC_PROG_LBL(BC_INST_TRUNC): #endif // BC_ENABLE_EXTRA_MATH { bc_program_unary(p, inst); BC_PROG_JUMP(inst, code, ip); } #if BC_ENABLED BC_PROG_LBL(BC_INST_ASSIGN_POWER): BC_PROG_LBL(BC_INST_ASSIGN_MULTIPLY): BC_PROG_LBL(BC_INST_ASSIGN_DIVIDE): BC_PROG_LBL(BC_INST_ASSIGN_MODULUS): BC_PROG_LBL(BC_INST_ASSIGN_PLUS): BC_PROG_LBL(BC_INST_ASSIGN_MINUS): #if BC_ENABLE_EXTRA_MATH BC_PROG_LBL(BC_INST_ASSIGN_PLACES): BC_PROG_LBL(BC_INST_ASSIGN_LSHIFT): BC_PROG_LBL(BC_INST_ASSIGN_RSHIFT): #endif // BC_ENABLE_EXTRA_MATH BC_PROG_LBL(BC_INST_ASSIGN): BC_PROG_LBL(BC_INST_ASSIGN_POWER_NO_VAL): BC_PROG_LBL(BC_INST_ASSIGN_MULTIPLY_NO_VAL): BC_PROG_LBL(BC_INST_ASSIGN_DIVIDE_NO_VAL): BC_PROG_LBL(BC_INST_ASSIGN_MODULUS_NO_VAL): BC_PROG_LBL(BC_INST_ASSIGN_PLUS_NO_VAL): BC_PROG_LBL(BC_INST_ASSIGN_MINUS_NO_VAL): #if BC_ENABLE_EXTRA_MATH BC_PROG_LBL(BC_INST_ASSIGN_PLACES_NO_VAL): BC_PROG_LBL(BC_INST_ASSIGN_LSHIFT_NO_VAL): BC_PROG_LBL(BC_INST_ASSIGN_RSHIFT_NO_VAL): #endif // BC_ENABLE_EXTRA_MATH #endif // BC_ENABLED BC_PROG_LBL(BC_INST_ASSIGN_NO_VAL): { bc_program_assign(p, inst); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_POP): { #ifndef BC_PROG_NO_STACK_CHECK // dc must do a stack check, but bc does not. if (BC_IS_DC) { if (BC_ERR(!BC_PROG_STACK(&p->results, 1))) bc_err(BC_ERR_EXEC_STACK); } #endif // BC_PROG_NO_STACK_CHECK assert(BC_PROG_STACK(&p->results, 1)); bc_vec_pop(&p->results); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_SWAP): { BcResult *ptr2; // Check the stack. if (BC_ERR(!BC_PROG_STACK(&p->results, 2))) bc_err(BC_ERR_EXEC_STACK); assert(BC_PROG_STACK(&p->results, 2)); // Get the two items. ptr = bc_vec_item_rev(&p->results, 0); ptr2 = bc_vec_item_rev(&p->results, 1); // Swap. It's just easiest to do it this way. memcpy(&r, ptr, sizeof(BcResult)); memcpy(ptr, ptr2, sizeof(BcResult)); memcpy(ptr2, &r, sizeof(BcResult)); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_MODEXP): { bc_program_modexp(p); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_DIVMOD): { bc_program_divmod(p); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_PRINT_STREAM): { bc_program_printStream(p); BC_PROG_JUMP(inst, code, ip); } #if DC_ENABLED BC_PROG_LBL(BC_INST_POP_EXEC): { // If this fails, the dc parser got something wrong. assert(BC_PROG_STACK(&p->stack, 2)); // Pop the execution stack and tail call stack. bc_vec_pop(&p->stack); bc_vec_pop(&p->tail_calls); // Because we changed the execution stack and where we are // executing, we have to update all of this. BC_SIG_LOCK; ip = bc_vec_top(&p->stack); func = bc_vec_item(&p->fns, ip->func); code = func->code.v; bc_program_setVecs(p, func); BC_SIG_UNLOCK; BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_EXECUTE): BC_PROG_LBL(BC_INST_EXEC_COND): { cond = (inst == BC_INST_EXEC_COND); bc_program_execStr(p, code, &ip->idx, cond, func->code.len); // Because we changed the execution stack and where we are // executing, we have to update all of this. BC_SIG_LOCK; ip = bc_vec_top(&p->stack); func = bc_vec_item(&p->fns, ip->func); code = func->code.v; bc_program_setVecs(p, func); BC_SIG_UNLOCK; BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_PRINT_STACK): { bc_program_printStack(p); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_CLEAR_STACK): { bc_vec_popAll(&p->results); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_REG_STACK_LEN): { bc_program_regStackLen(p, code, &ip->idx); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_STACK_LEN): { bc_program_stackLen(p); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_DUPLICATE): { // Check the stack. if (BC_ERR(!BC_PROG_STACK(&p->results, 1))) bc_err(BC_ERR_EXEC_STACK); assert(BC_PROG_STACK(&p->results, 1)); // Get the top of the stack. ptr = bc_vec_top(&p->results); BC_SIG_LOCK; // Copy and push. bc_result_copy(&r, ptr); bc_vec_push(&p->results, &r); BC_SIG_UNLOCK; BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_LOAD): BC_PROG_LBL(BC_INST_PUSH_VAR): { bool copy = (inst == BC_INST_LOAD); bc_program_pushVar(p, code, &ip->idx, true, copy); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_PUSH_TO_VAR): { idx = bc_program_index(code, &ip->idx); bc_program_copyToVar(p, idx, BC_TYPE_VAR, true); BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_QUIT): BC_PROG_LBL(BC_INST_NQUIT): { bc_program_nquit(p, inst); // Because we changed the execution stack and where we are // executing, we have to update all of this. BC_SIG_LOCK; ip = bc_vec_top(&p->stack); func = bc_vec_item(&p->fns, ip->func); code = func->code.v; bc_program_setVecs(p, func); BC_SIG_UNLOCK; BC_PROG_JUMP(inst, code, ip); } BC_PROG_LBL(BC_INST_EXEC_STACK_LEN): { bc_program_execStackLen(p); BC_PROG_JUMP(inst, code, ip); } #endif // DC_ENABLED #if BC_HAS_COMPUTED_GOTO BC_PROG_LBL(BC_INST_INVALID): { return; } #else // BC_HAS_COMPUTED_GOTO default: { BC_UNREACHABLE #ifndef NDEBUG abort(); #endif // NDEBUG } #endif // BC_HAS_COMPUTED_GOTO } #if !BC_HAS_COMPUTED_GOTO #ifndef NDEBUG // This is to allow me to use a debugger to see the last instruction, // which will point to which function was the problem. But it's also a // good smoke test for error handling changes. assert(jmp_bufs_len == vm.jmp_bufs.len); #endif // NDEBUG #endif // !BC_HAS_COMPUTED_GOTO } } #if BC_DEBUG_CODE #if BC_ENABLED && DC_ENABLED void bc_program_printStackDebug(BcProgram *p) { bc_file_puts(&vm.fout, bc_flush_err, "-------------- Stack ----------\n"); bc_program_printStack(p); bc_file_puts(&vm.fout, bc_flush_err, "-------------- Stack End ------\n"); } static void bc_program_printIndex(const char *restrict code, size_t *restrict bgn) { uchar byte, i, bytes = (uchar) code[(*bgn)++]; ulong val = 0; for (byte = 1, i = 0; byte && i < bytes; ++i) { byte = (uchar) code[(*bgn)++]; if (byte) val |= ((ulong) byte) << (CHAR_BIT * i); } bc_vm_printf(" (%lu) ", val); } static void bc_program_printStr(const BcProgram *p, const char *restrict code, size_t *restrict bgn) { size_t idx = bc_program_index(code, bgn); char *s; s = *((char**) bc_vec_item(p->strs, idx)); bc_vm_printf(" (\"%s\") ", s); } void bc_program_printInst(const BcProgram *p, const char *restrict code, size_t *restrict bgn) { uchar inst = (uchar) code[(*bgn)++]; bc_vm_printf("Inst[%zu]: %s [%lu]; ", *bgn - 1, bc_inst_names[inst], (unsigned long) inst); if (inst == BC_INST_VAR || inst == BC_INST_ARRAY_ELEM || inst == BC_INST_ARRAY) { bc_program_printIndex(code, bgn); } else if (inst == BC_INST_STR) bc_program_printStr(p, code, bgn); else if (inst == BC_INST_NUM) { size_t idx = bc_program_index(code, bgn); BcConst *c = bc_vec_item(p->consts, idx); bc_vm_printf("(%s)", c->val); } else if (inst == BC_INST_CALL || (inst > BC_INST_STR && inst <= BC_INST_JUMP_ZERO)) { bc_program_printIndex(code, bgn); if (inst == BC_INST_CALL) bc_program_printIndex(code, bgn); } bc_vm_putchar('\n', bc_flush_err); } void bc_program_code(const BcProgram* p) { BcFunc *f; char *code; BcInstPtr ip; size_t i; for (i = 0; i < p->fns.len; ++i) { ip.idx = ip.len = 0; ip.func = i; f = bc_vec_item(&p->fns, ip.func); code = f->code.v; bc_vm_printf("func[%zu]:\n", ip.func); while (ip.idx < f->code.len) bc_program_printInst(p, code, &ip.idx); bc_file_puts(&vm.fout, bc_flush_err, "\n\n"); } } #endif // BC_ENABLED && DC_ENABLED #endif // BC_DEBUG_CODE diff --git a/src/vm.c b/src/vm.c index ef2257644f52..da8f0e1f4416 100644 --- a/src/vm.c +++ b/src/vm.c @@ -1,1461 +1,1488 @@ /* * ***************************************************************************** * * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2018-2021 Gavin D. Howard and contributors. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * ***************************************************************************** * * Code common to all of bc and dc. * */ #include #include #include #include #include #include #include #ifndef _WIN32 #include #include #include #else // _WIN32 #define WIN32_LEAN_AND_MEAN #include #include #endif // _WIN32 #include #include #include #include #include #include // The actual globals. static BcDig* temps_buf[BC_VM_MAX_TEMPS]; char output_bufs[BC_VM_BUF_SIZE]; BcVm vm; #if BC_DEBUG_CODE BC_NORETURN void bc_vm_jmp(const char* f) { #else // BC_DEBUG_CODE BC_NORETURN void bc_vm_jmp(void) { #endif assert(BC_SIG_EXC); BC_SIG_MAYLOCK; #if BC_DEBUG_CODE bc_file_puts(&vm.ferr, bc_flush_none, "Longjmp: "); bc_file_puts(&vm.ferr, bc_flush_none, f); bc_file_putchar(&vm.ferr, bc_flush_none, '\n'); bc_file_flush(&vm.ferr, bc_flush_none); #endif // BC_DEBUG_CODE #ifndef NDEBUG assert(vm.jmp_bufs.len - (size_t) vm.sig_pop); #endif // NDEBUG if (vm.jmp_bufs.len == 0) abort(); if (vm.sig_pop) bc_vec_pop(&vm.jmp_bufs); else vm.sig_pop = 1; siglongjmp(*((sigjmp_buf*) bc_vec_top(&vm.jmp_bufs)), 1); } #if !BC_ENABLE_LIBRARY /** * Handles signals. This is the signal handler. * @param sig The signal to handle. */ static void bc_vm_sig(int sig) { // There is already a signal in flight. if (vm.status == (sig_atomic_t) BC_STATUS_QUIT || vm.sig) { if (!BC_I || sig != SIGINT) vm.status = BC_STATUS_QUIT; return; } // Only reset under these conditions; otherwise, quit. if (sig == SIGINT && BC_SIGINT && BC_I) { int err = errno; // Write the message. if (write(STDOUT_FILENO, vm.sigmsg, vm.siglen) != (ssize_t) vm.siglen) vm.status = BC_STATUS_ERROR_FATAL; else vm.sig = 1; errno = err; } else vm.status = BC_STATUS_QUIT; assert(vm.jmp_bufs.len); // Only jump if signals are not locked. The jump will happen by whoever // unlocks signals. if (!vm.sig_lock) BC_JMP; } /** * Sets up signal handling. */ static void bc_vm_sigaction(void) { #ifndef _WIN32 struct sigaction sa; sigemptyset(&sa.sa_mask); sa.sa_handler = bc_vm_sig; sa.sa_flags = SA_NODEFER; sigaction(SIGTERM, &sa, NULL); sigaction(SIGQUIT, &sa, NULL); sigaction(SIGINT, &sa, NULL); #if BC_ENABLE_HISTORY if (BC_TTY) sigaction(SIGHUP, &sa, NULL); #endif // BC_ENABLE_HISTORY #else // _WIN32 signal(SIGTERM, bc_vm_sig); signal(SIGINT, bc_vm_sig); #endif // _WIN32 } void bc_vm_info(const char* const help) { BC_SIG_ASSERT_LOCKED; // Print the banner. bc_file_puts(&vm.fout, bc_flush_none, vm.name); bc_file_putchar(&vm.fout, bc_flush_none, ' '); bc_file_puts(&vm.fout, bc_flush_none, BC_VERSION); bc_file_putchar(&vm.fout, bc_flush_none, '\n'); bc_file_puts(&vm.fout, bc_flush_none, bc_copyright); // Print the help. if (help) { bc_file_putchar(&vm.fout, bc_flush_none, '\n'); #if BC_ENABLED if (BC_IS_BC) { const char* const banner = BC_DEFAULT_BANNER ? "to" : "to not"; const char* const sigint = BC_DEFAULT_SIGINT_RESET ? "to reset" : "to exit"; const char* const tty = BC_DEFAULT_TTY_MODE ? "enabled" : "disabled"; const char* const prompt = BC_DEFAULT_PROMPT ? "enabled" : "disabled"; const char* const expr = BC_DEFAULT_EXPR_EXIT ? "to exit" : "to not exit"; bc_file_printf(&vm.fout, help, vm.name, vm.name, BC_VERSION, BC_BUILD_TYPE, banner, sigint, tty, prompt, expr); } #endif // BC_ENABLED #if DC_ENABLED if (BC_IS_DC) { const char* const sigint = DC_DEFAULT_SIGINT_RESET ? "to reset" : "to exit"; const char* const tty = DC_DEFAULT_TTY_MODE ? "enabled" : "disabled"; const char* const prompt = DC_DEFAULT_PROMPT ? "enabled" : "disabled"; const char* const expr = DC_DEFAULT_EXPR_EXIT ? "to exit" : "to not exit"; bc_file_printf(&vm.fout, help, vm.name, vm.name, BC_VERSION, BC_BUILD_TYPE, sigint, tty, prompt, expr); } #endif // DC_ENABLED } // Flush. bc_file_flush(&vm.fout, bc_flush_none); } #endif // !BC_ENABLE_LIBRARY #if !BC_ENABLE_LIBRARY && !BC_ENABLE_MEMCHECK BC_NORETURN #endif // !BC_ENABLE_LIBRARY && !BC_ENABLE_MEMCHECK void bc_vm_fatalError(BcErr e) { bc_err(e); #if !BC_ENABLE_LIBRARY && !BC_ENABLE_MEMCHECK BC_UNREACHABLE abort(); #endif // !BC_ENABLE_LIBRARY && !BC_ENABLE_MEMCHECK } #if BC_ENABLE_LIBRARY void bc_vm_handleError(BcErr e) { assert(e < BC_ERR_NELEMS); assert(!vm.sig_pop); BC_SIG_LOCK; // If we have a normal error... if (e <= BC_ERR_MATH_DIVIDE_BY_ZERO) { // Set the error. vm.err = (BclError) (e - BC_ERR_MATH_NEGATIVE + BCL_ERROR_MATH_NEGATIVE); } // Abort if we should. else if (vm.abrt) abort(); else if (e == BC_ERR_FATAL_ALLOC_ERR) vm.err = BCL_ERROR_FATAL_ALLOC_ERR; else vm.err = BCL_ERROR_FATAL_UNKNOWN_ERR; BC_JMP; } #else // BC_ENABLE_LIBRARY void bc_vm_handleError(BcErr e, size_t line, ...) { BcStatus s; va_list args; uchar id = bc_err_ids[e]; const char* err_type = vm.err_ids[id]; sig_atomic_t lock; assert(e < BC_ERR_NELEMS); assert(!vm.sig_pop); #if BC_ENABLED // Figure out if the POSIX error should be an error, a warning, or nothing. if (!BC_S && e >= BC_ERR_POSIX_START) { if (BC_W) { // Make sure to not return an error. id = UCHAR_MAX; err_type = vm.err_ids[BC_ERR_IDX_WARN]; } else return; } #endif // BC_ENABLED BC_SIG_TRYLOCK(lock); // Make sure all of stdout is written first. s = bc_file_flushErr(&vm.fout, bc_flush_err); // Just jump out if the flush failed; there's nothing we can do. if (BC_ERR(s == BC_STATUS_ERROR_FATAL)) { vm.status = (sig_atomic_t) s; BC_JMP; } // Print the error message. va_start(args, line); bc_file_putchar(&vm.ferr, bc_flush_none, '\n'); bc_file_puts(&vm.ferr, bc_flush_none, err_type); bc_file_putchar(&vm.ferr, bc_flush_none, ' '); bc_file_vprintf(&vm.ferr, vm.err_msgs[e], args); va_end(args); // Print the extra information if we have it. if (BC_NO_ERR(vm.file != NULL)) { // This is the condition for parsing vs runtime. // If line is not 0, it is parsing. if (line) { bc_file_puts(&vm.ferr, bc_flush_none, "\n "); bc_file_puts(&vm.ferr, bc_flush_none, vm.file); bc_file_printf(&vm.ferr, bc_err_line, line); } else { BcInstPtr *ip = bc_vec_item_rev(&vm.prog.stack, 0); BcFunc *f = bc_vec_item(&vm.prog.fns, ip->func); bc_file_puts(&vm.ferr, bc_flush_none, "\n "); bc_file_puts(&vm.ferr, bc_flush_none, vm.func_header); bc_file_putchar(&vm.ferr, bc_flush_none, ' '); bc_file_puts(&vm.ferr, bc_flush_none, f->name); #if BC_ENABLED if (BC_IS_BC && ip->func != BC_PROG_MAIN && ip->func != BC_PROG_READ) { bc_file_puts(&vm.ferr, bc_flush_none, "()"); } #endif // BC_ENABLED } } bc_file_puts(&vm.ferr, bc_flush_none, "\n\n"); s = bc_file_flushErr(&vm.ferr, bc_flush_err); #if !BC_ENABLE_MEMCHECK // Because this function is called by a BC_NORETURN function when fatal // errors happen, we need to make sure to exit on fatal errors. This will // be faster anyway. This function *cannot jump when a fatal error occurs!* if (BC_ERR(id == BC_ERR_IDX_FATAL || s == BC_STATUS_ERROR_FATAL)) exit(bc_vm_atexit((int) BC_STATUS_ERROR_FATAL)); #else // !BC_ENABLE_MEMCHECK if (BC_ERR(s == BC_STATUS_ERROR_FATAL)) vm.status = (sig_atomic_t) s; else #endif // !BC_ENABLE_MEMCHECK { vm.status = (sig_atomic_t) (uchar) (id + 1); } // Only jump if there is an error. if (BC_ERR(vm.status)) BC_JMP; BC_SIG_TRYUNLOCK(lock); } char* bc_vm_getenv(const char* var) { char* ret; #ifndef _WIN32 ret = getenv(var); #else // _WIN32 _dupenv_s(&ret, NULL, var); #endif // _WIN32 return ret; } void bc_vm_getenvFree(char* val) { BC_UNUSED(val); #ifdef _WIN32 free(val); #endif // _WIN32 } /** * Sets a flag from an environment variable and the default. * @param var The environment variable. * @param def The default. * @param flag The flag to set. */ static void bc_vm_setenvFlag(const char* const var, int def, uint16_t flag) { // Get the value. char* val = bc_vm_getenv(var); // If there is no value... if (val == NULL) { // Set the default. if (def) vm.flags |= flag; else vm.flags &= ~(flag); } // Parse the value. else if (strtoul(val, NULL, 0)) vm.flags |= flag; else vm.flags &= ~(flag); bc_vm_getenvFree(val); } /** * Parses the arguments in {B,D]C_ENV_ARGS. * @param env_args_name The environment variable to use. */ static void bc_vm_envArgs(const char* const env_args_name) { char *env_args = bc_vm_getenv(env_args_name), *buf, *start; char instr = '\0'; BC_SIG_ASSERT_LOCKED; if (env_args == NULL) return; // Windows already allocates, so we don't need to. #ifndef _WIN32 start = buf = vm.env_args_buffer = bc_vm_strdup(env_args); #else // _WIN32 start = buf = vm.env_args_buffer = env_args; #endif // _WIN32 assert(buf != NULL); // Create two buffers for parsing. These need to stay throughout the entire // execution of bc, unfortunately, because of filenames that might be in // there. bc_vec_init(&vm.env_args, sizeof(char*), BC_DTOR_NONE); bc_vec_push(&vm.env_args, &env_args_name); // While we haven't reached the end of the args... while (*buf) { // If we don't have whitespace... if (!isspace(*buf)) { // If we have the start of a string... if (*buf == '"' || *buf == '\'') { // Set stuff appropriately. instr = *buf; buf += 1; // Check for the empty string. if (*buf == instr) { instr = '\0'; buf += 1; continue; } } // Push the pointer to the args buffer. bc_vec_push(&vm.env_args, &buf); // Parse the string. while (*buf && ((!instr && !isspace(*buf)) || (instr && *buf != instr))) { buf += 1; } // If we did find the end of the string... if (*buf) { if (instr) instr = '\0'; // Reset stuff. *buf = '\0'; buf += 1; start = buf; } else if (instr) bc_error(BC_ERR_FATAL_OPTION, 0, start); } // If we have whitespace, eat it. else buf += 1; } // Make sure to push a NULL pointer at the end. buf = NULL; bc_vec_push(&vm.env_args, &buf); // Parse the arguments. bc_args((int) vm.env_args.len - 1, bc_vec_item(&vm.env_args, 0), false); } /** * Gets the {B,D}C_LINE_LENGTH. * @param var The environment variable to pull it from. * @return The line length. */ static size_t bc_vm_envLen(const char *var) { char *lenv = bc_vm_getenv(var); size_t i, len = BC_NUM_PRINT_WIDTH; int num; // Return the default with none. if (lenv == NULL) return len; len = strlen(lenv); // Figure out if it's a number. for (num = 1, i = 0; num && i < len; ++i) num = isdigit(lenv[i]); // If it is a number... if (num) { // Parse it and clamp it if needed. len = (size_t) atoi(lenv) - 1; if (len == 1 || len >= UINT16_MAX) len = BC_NUM_PRINT_WIDTH; } // Set the default. else len = BC_NUM_PRINT_WIDTH; bc_vm_getenvFree(lenv); return len; } #endif // BC_ENABLE_LIBRARY void bc_vm_shutdown(void) { BC_SIG_ASSERT_LOCKED; #if BC_ENABLE_NLS if (vm.catalog != BC_VM_INVALID_CATALOG) catclose(vm.catalog); #endif // BC_ENABLE_NLS #if BC_ENABLE_HISTORY // This must always run to ensure that the terminal is back to normal, i.e., // has raw mode disabled. if (BC_TTY) bc_history_free(&vm.history); #endif // BC_ENABLE_HISTORY #ifndef NDEBUG #if !BC_ENABLE_LIBRARY bc_vec_free(&vm.env_args); free(vm.env_args_buffer); bc_vec_free(&vm.files); bc_vec_free(&vm.exprs); if (BC_PARSE_IS_INITED(&vm.read_prs, &vm.prog)) { bc_vec_free(&vm.read_buf); bc_parse_free(&vm.read_prs); } bc_parse_free(&vm.prs); bc_program_free(&vm.prog); bc_slabvec_free(&vm.other_slabs); bc_slabvec_free(&vm.main_slabs); bc_slabvec_free(&vm.main_const_slab); #endif // !BC_ENABLE_LIBRARY bc_vm_freeTemps(); #endif // NDEBUG #if !BC_ENABLE_LIBRARY // We always want to flush. bc_file_free(&vm.fout); bc_file_free(&vm.ferr); #endif // !BC_ENABLE_LIBRARY } void bc_vm_addTemp(BcDig *num) { BC_SIG_ASSERT_LOCKED; // If we don't have room, just free. if (vm.temps_len == BC_VM_MAX_TEMPS) free(num); else { // Add to the buffer and length. temps_buf[vm.temps_len] = num; vm.temps_len += 1; } } BcDig* bc_vm_takeTemp(void) { BC_SIG_ASSERT_LOCKED; if (!vm.temps_len) return NULL; vm.temps_len -= 1; return temps_buf[vm.temps_len]; } void bc_vm_freeTemps(void) { size_t i; BC_SIG_ASSERT_LOCKED; if (!vm.temps_len) return; // Free them all... for (i = 0; i < vm.temps_len; ++i) free(temps_buf[i]); vm.temps_len = 0; } inline size_t bc_vm_arraySize(size_t n, size_t size) { size_t res = n * size; if (BC_ERR(BC_VM_MUL_OVERFLOW(n, size, res))) bc_vm_fatalError(BC_ERR_FATAL_ALLOC_ERR); return res; } inline size_t bc_vm_growSize(size_t a, size_t b) { size_t res = a + b; if (BC_ERR(res >= SIZE_MAX || res < a)) bc_vm_fatalError(BC_ERR_FATAL_ALLOC_ERR); return res; } void* bc_vm_malloc(size_t n) { void* ptr; BC_SIG_ASSERT_LOCKED; ptr = malloc(n); if (BC_ERR(ptr == NULL)) { bc_vm_freeTemps(); ptr = malloc(n); if (BC_ERR(ptr == NULL)) bc_vm_fatalError(BC_ERR_FATAL_ALLOC_ERR); } return ptr; } void* bc_vm_realloc(void *ptr, size_t n) { void* temp; BC_SIG_ASSERT_LOCKED; temp = realloc(ptr, n); if (BC_ERR(temp == NULL)) { bc_vm_freeTemps(); temp = realloc(ptr, n); if (BC_ERR(temp == NULL)) bc_vm_fatalError(BC_ERR_FATAL_ALLOC_ERR); } return temp; } char* bc_vm_strdup(const char *str) { char *s; BC_SIG_ASSERT_LOCKED; s = strdup(str); if (BC_ERR(s == NULL)) { bc_vm_freeTemps(); s = strdup(str); if (BC_ERR(s == NULL)) bc_vm_fatalError(BC_ERR_FATAL_ALLOC_ERR); } return s; } #if !BC_ENABLE_LIBRARY void bc_vm_printf(const char *fmt, ...) { va_list args; sig_atomic_t lock; BC_SIG_TRYLOCK(lock); va_start(args, fmt); bc_file_vprintf(&vm.fout, fmt, args); va_end(args); vm.nchars = 0; BC_SIG_TRYUNLOCK(lock); } #endif // !BC_ENABLE_LIBRARY void bc_vm_putchar(int c, BcFlushType type) { #if BC_ENABLE_LIBRARY bc_vec_pushByte(&vm.out, (uchar) c); #else // BC_ENABLE_LIBRARY bc_file_putchar(&vm.fout, type, (uchar) c); vm.nchars = (c == '\n' ? 0 : vm.nchars + 1); #endif // BC_ENABLE_LIBRARY } #if !BC_ENABLE_LIBRARY #ifdef __OpenBSD__ /** * Aborts with a message. This should never be called because I have carefully * made sure that the calls to pledge() and unveil() are correct, but it's here * just in case. * @param msg The message to print. */ BC_NORETURN static void bc_abortm(const char* msg) { bc_file_puts(&vm.ferr, bc_flush_none, msg); bc_file_puts(&vm.ferr, bc_flush_none, "; this is a bug"); bc_file_flush(&vm.ferr, bc_flush_none); abort(); } void bc_pledge(const char *promises, const char* execpromises) { int r = pledge(promises, execpromises); if (r) bc_abortm("pledge() failed"); } #if BC_ENABLE_EXTRA_MATH /** * A convenience and portability function for OpenBSD's unveil(). * @param path The path. * @param permissions The permissions for the path. */ static void bc_unveil(const char *path, const char *permissions) { int r = unveil(path, permissions); if (r) bc_abortm("unveil() failed"); } #endif // BC_ENABLE_EXTRA_MATH #else // __OpenBSD__ void bc_pledge(const char *promises, const char *execpromises) { BC_UNUSED(promises); BC_UNUSED(execpromises); } #if BC_ENABLE_EXTRA_MATH static void bc_unveil(const char *path, const char *permissions) { BC_UNUSED(path); BC_UNUSED(permissions); } #endif // BC_ENABLE_EXTRA_MATH #endif // __OpenBSD__ /** * Cleans unneeded variables, arrays, functions, strings, and constants when * done executing a line of stdin. This is to prevent memory usage growing * without bound. This is an idea from busybox. */ static void bc_vm_clean(void) { BcVec *fns = &vm.prog.fns; BcFunc *f = bc_vec_item(fns, BC_PROG_MAIN); BcInstPtr *ip = bc_vec_item(&vm.prog.stack, 0); bool good = ((vm.status && vm.status != BC_STATUS_QUIT) || vm.sig); BC_SIG_ASSERT_LOCKED; // If all is good, go ahead and reset. if (good) bc_program_reset(&vm.prog); #if BC_ENABLED // bc has this extra condition. If it not satisfied, it is in the middle of // a parse. if (good && BC_IS_BC) good = !BC_PARSE_NO_EXEC(&vm.prs); #endif // BC_ENABLED #if DC_ENABLED // For dc, it is safe only when all of the results on the results stack are // safe, which means that they are temporaries or other things that don't // need strings or constants. if (BC_IS_DC) { size_t i; good = true; for (i = 0; good && i < vm.prog.results.len; ++i) { BcResult *r = (BcResult*) bc_vec_item(&vm.prog.results, i); good = BC_VM_SAFE_RESULT(r); } } #endif // DC_ENABLED // If this condition is true, we can get rid of strings, // constants, and code. if (good && vm.prog.stack.len == 1 && ip->idx == f->code.len) { #if BC_ENABLED if (BC_IS_BC) { bc_vec_popAll(&f->labels); bc_vec_popAll(&f->strs); bc_vec_popAll(&f->consts); // I can't clear out the other_slabs because it has functions, // consts, strings, vars, and arrays. It has strings from *other* // functions, specifically. bc_slabvec_clear(&vm.main_const_slab); bc_slabvec_clear(&vm.main_slabs); } #endif // BC_ENABLED #if DC_ENABLED // Note to self: you cannot delete strings and functions. Deal with it. if (BC_IS_DC) { bc_vec_popAll(vm.prog.consts); bc_slabvec_clear(&vm.main_const_slab); } #endif // DC_ENABLED bc_vec_popAll(&f->code); ip->idx = 0; } } /** * Process a bunch of text. * @param text The text to process. * @param is_stdin True if the text came from stdin, false otherwise. + * @param is_exprs True if the text is from command-line expressions, false + * otherwise. */ -static void bc_vm_process(const char *text, bool is_stdin) { +static void bc_vm_process(const char *text, bool is_stdin, bool is_exprs) { // Set up the parser. - bc_parse_text(&vm.prs, text, is_stdin); + bc_parse_text(&vm.prs, text, is_stdin, is_exprs); do { BC_SIG_LOCK; #if BC_ENABLED // If the first token is the keyword define, then we need to do this // specially because bc thinks it may not be able to parse. if (vm.prs.l.t == BC_LEX_KW_DEFINE) vm.parse(&vm.prs); #endif // BC_ENABLED // Parse it all. while (BC_PARSE_CAN_PARSE(vm.prs)) vm.parse(&vm.prs); BC_SIG_UNLOCK; // Execute if possible. - if(BC_IS_DC || !BC_PARSE_NO_EXEC(&vm.prs)) bc_program_exec(&vm.prog); + if (BC_IS_DC || !BC_PARSE_NO_EXEC(&vm.prs)) bc_program_exec(&vm.prog); assert(BC_IS_DC || vm.prog.results.len == 0); // Flush in interactive mode. if (BC_I) bc_file_flush(&vm.fout, bc_flush_save); } while (vm.prs.l.t != BC_LEX_EOF); } #if BC_ENABLED /** * Ends a series of if statements. This is to ensure that full parses happen * when a file finishes or stdin has no more data. Without this, bc thinks that * it cannot parse any further. But if we reach the end of a file or stdin has * no more data, we know we can add an empty else clause. */ static void bc_vm_endif(void) { bc_parse_endif(&vm.prs); bc_program_exec(&vm.prog); } #endif // BC_ENABLED /** * Processes a file. * @param file The filename. */ static void bc_vm_file(const char *file) { char *data = NULL; assert(!vm.sig_pop); // Set up the lexer. bc_lex_file(&vm.prs.l, file); BC_SIG_LOCK; // Read the file. data = bc_read_file(file); assert(data != NULL); BC_SETJMP_LOCKED(err); BC_SIG_UNLOCK; // Process it. - bc_vm_process(data, false); + bc_vm_process(data, false, false); #if BC_ENABLED // Make sure to end any open if statements. if (BC_IS_BC) bc_vm_endif(); #endif // BC_ENABLED err: BC_SIG_MAYLOCK; // Cleanup. free(data); bc_vm_clean(); // bc_program_reset(), called by bc_vm_clean(), resets the status. // We want it to clear the sig_pop variable in case it was set. if (vm.status == (sig_atomic_t) BC_STATUS_SUCCESS) BC_LONGJMP_STOP; BC_LONGJMP_CONT; } bool bc_vm_readLine(bool clear) { BcStatus s; bool good; BC_SIG_ASSERT_NOT_LOCKED; // Clear the buffer if desired. if (clear) bc_vec_empty(&vm.buffer); // Empty the line buffer. bc_vec_empty(&vm.line_buf); if (vm.eof) return false; do { // bc_read_line() must always return either BC_STATUS_SUCCESS or // BC_STATUS_EOF. Everything else, it and whatever it calls, must jump // out instead. s = bc_read_line(&vm.line_buf, ">>> "); vm.eof = (s == BC_STATUS_EOF); } while (!(s) && !vm.eof && vm.line_buf.len < 1); good = (vm.line_buf.len > 1); // Concat if we found something. if (good) bc_vec_concat(&vm.buffer, vm.line_buf.v); return good; } /** * Processes text from stdin. */ static void bc_vm_stdin(void) { bool clear = true; vm.is_stdin = true; // Set up the lexer. bc_lex_file(&vm.prs.l, bc_program_stdin_name); - // These are global so that the dc lexer can access them, but they are tied - // to this function, really. Well, this and bc_vm_readLine(). These are the - // reason that we have vm.is_stdin to tell the dc lexer if we are reading - // from stdin. Well, both lexers care. And the reason they care is so that - // if a comment or a string goes across multiple lines, the lexer can - // request more data from stdin until the comment or string is ended. + // These are global so that the lexers can access them, but they are + // allocated and freed in this function because they should only be used for + // stdin and expressions (they are used in bc_vm_exprs() as well). So they + // are tied to this function, really. Well, this and bc_vm_readLine(). These + // are the reasons that we have vm.is_stdin to tell the lexers if we are + // reading from stdin. Well, both lexers care. And the reason they care is + // so that if a comment or a string goes across multiple lines, the lexer + // can request more data from stdin until the comment or string is ended. BC_SIG_LOCK; bc_vec_init(&vm.buffer, sizeof(uchar), BC_DTOR_NONE); bc_vec_init(&vm.line_buf, sizeof(uchar), BC_DTOR_NONE); BC_SETJMP_LOCKED(err); BC_SIG_UNLOCK; // This label exists because errors can cause jumps to end up at the err label // below. If that happens, and the error should be cleared and execution // continue, then we need to jump back. restart: // While we still read data from stdin. while (bc_vm_readLine(clear)) { size_t len = vm.buffer.len - 1; const char *str = vm.buffer.v; // We don't want to clear the buffer when the line ends with a backslash // because a backslash newline is special in bc. clear = (len < 2 || str[len - 2] != '\\' || str[len - 1] != '\n'); if (!clear) continue; // Process the data. - bc_vm_process(vm.buffer.v, true); + bc_vm_process(vm.buffer.v, true, false); if (vm.eof) break; else { BC_SIG_LOCK; bc_vm_clean(); BC_SIG_UNLOCK; } } #if BC_ENABLED // End the if statements. if (BC_IS_BC) bc_vm_endif(); #endif // BC_ENABLED err: + BC_SIG_MAYLOCK; // Cleanup. bc_vm_clean(); #if !BC_ENABLE_MEMCHECK assert(vm.status != BC_STATUS_ERROR_FATAL); vm.status = vm.status == BC_STATUS_QUIT || !BC_I ? vm.status : BC_STATUS_SUCCESS; #else // !BC_ENABLE_MEMCHECK vm.status = vm.status == BC_STATUS_ERROR_FATAL || vm.status == BC_STATUS_QUIT || !BC_I ? vm.status : BC_STATUS_SUCCESS; #endif // !BC_ENABLE_MEMCHECK if (!vm.status && !vm.eof) { bc_vec_empty(&vm.buffer); BC_LONGJMP_STOP; BC_SIG_UNLOCK; goto restart; } #ifndef NDEBUG - // Since these are tied to this function, free them here. + // Since these are tied to this function, free them here. We only free in + // debug mode because stdin is always the last thing read. bc_vec_free(&vm.line_buf); bc_vec_free(&vm.buffer); #endif // NDEBUG BC_LONGJMP_CONT; } +bool bc_vm_readBuf(bool clear) { + + size_t len = vm.exprs.len - 1; + bool more; + + BC_SIG_ASSERT_NOT_LOCKED; + + // Clear the buffer if desired. + if (clear) bc_vec_empty(&vm.buffer); + + // We want to pop the nul byte off because that's what bc_read_buf() + // expects. + bc_vec_pop(&vm.buffer); + + // Read one line of expressions. + more = bc_read_buf(&vm.buffer, vm.exprs.v, &len); + bc_vec_pushByte(&vm.buffer, '\0'); + + return more; +} + +static void bc_vm_exprs(void) { + + bool clear = true; + + // Prepare the lexer. + bc_lex_file(&vm.prs.l, bc_program_exprs_name); + + // We initialize this so that the lexer can access it in the case that it + // needs more data for expressions, such as for a multiline string or + // comment. See the comment on the allocation of vm.buffer above in + // bc_vm_stdin() for more information. + BC_SIG_LOCK; + bc_vec_init(&vm.buffer, sizeof(uchar), BC_DTOR_NONE); + BC_SETJMP_LOCKED(err); + BC_SIG_UNLOCK; + + while (bc_vm_readBuf(clear)) { + + size_t len = vm.buffer.len - 1; + const char *str = vm.buffer.v; + + // We don't want to clear the buffer when the line ends with a backslash + // because a backslash newline is special in bc. + clear = (len < 2 || str[len - 2] != '\\' || str[len - 1] != '\n'); + if (!clear) continue; + + // Process the data. + bc_vm_process(vm.buffer.v, false, true); + } + + // If we were not supposed to clear, then we should process everything. This + // makes sure that errors get reported. + if (!clear) bc_vm_process(vm.buffer.v, false, true); + +err: + + BC_SIG_MAYLOCK; + + // Cleanup. + bc_vm_clean(); + + // Since this is tied to this function, free it here. We always free it here + // because bc_vm_stdin() may or may not use it later. + bc_vec_free(&vm.buffer); + + BC_LONGJMP_CONT; +} + #if BC_ENABLED /** * Loads a math library. * @param name The name of the library. * @param text The text of the source code. */ static void bc_vm_load(const char *name, const char *text) { bc_lex_file(&vm.prs.l, name); - bc_parse_text(&vm.prs, text, false); + bc_parse_text(&vm.prs, text, false, false); BC_SIG_LOCK; while (vm.prs.l.t != BC_LEX_EOF) vm.parse(&vm.prs); BC_SIG_UNLOCK; } #endif // BC_ENABLED /** * Loads the default error messages. */ static void bc_vm_defaultMsgs(void) { size_t i; vm.func_header = bc_err_func_header; // Load the error categories. for (i = 0; i < BC_ERR_IDX_NELEMS + BC_ENABLED; ++i) vm.err_ids[i] = bc_errs[i]; // Load the error messages. for (i = 0; i < BC_ERR_NELEMS; ++i) vm.err_msgs[i] = bc_err_msgs[i]; } /** * Loads the error messages for the locale. If NLS is disabled, this just loads * the default messages. */ static void bc_vm_gettext(void) { #if BC_ENABLE_NLS uchar id = 0; int set = 1, msg = 1; size_t i; // If no locale, load the defaults. if (vm.locale == NULL) { vm.catalog = BC_VM_INVALID_CATALOG; bc_vm_defaultMsgs(); return; } vm.catalog = catopen(BC_MAINEXEC, NL_CAT_LOCALE); // If no catalog, load the defaults. if (vm.catalog == BC_VM_INVALID_CATALOG) { bc_vm_defaultMsgs(); return; } // Load the function header. vm.func_header = catgets(vm.catalog, set, msg, bc_err_func_header); // Load the error categories. for (set += 1; msg <= BC_ERR_IDX_NELEMS + BC_ENABLED; ++msg) vm.err_ids[msg - 1] = catgets(vm.catalog, set, msg, bc_errs[msg - 1]); i = 0; id = bc_err_ids[i]; // Load the error messages. In order to understand this loop, you must know // the order of messages and categories in the enum and in the locale files. for (set = id + 3, msg = 1; i < BC_ERR_NELEMS; ++i, ++msg) { if (id != bc_err_ids[i]) { msg = 1; id = bc_err_ids[i]; set = id + 3; } vm.err_msgs[i] = catgets(vm.catalog, set, msg, bc_err_msgs[i]); } #else // BC_ENABLE_NLS bc_vm_defaultMsgs(); #endif // BC_ENABLE_NLS } /** * Starts execution. Really, this is a function of historical accident; it could * probably be combined with bc_vm_boot(), but I don't care enough. Really, this * function starts when execution of bc or dc source code starts. */ static void bc_vm_exec(void) { size_t i; bool has_file = false; - BcVec buf; #if BC_ENABLED // Load the math libraries. if (BC_IS_BC && (vm.flags & BC_FLAG_L)) { // Can't allow redefinitions in the builtin library. vm.no_redefine = true; bc_vm_load(bc_lib_name, bc_lib); #if BC_ENABLE_EXTRA_MATH if (!BC_IS_POSIX) bc_vm_load(bc_lib2_name, bc_lib2); #endif // BC_ENABLE_EXTRA_MATH // Make sure to clear this. vm.no_redefine = false; // Execute to ensure that all is hunky dory. Without this, scale can be // set improperly. bc_program_exec(&vm.prog); } #endif // BC_ENABLED // If there are expressions to execute... if (vm.exprs.len) { - size_t len = vm.exprs.len - 1; - bool more; - - BC_SIG_LOCK; - - // Create this as a buffer for reading into. - bc_vec_init(&buf, sizeof(uchar), BC_DTOR_NONE); - -#ifndef NDEBUG - BC_SETJMP_LOCKED(err); -#endif // NDEBUG - - BC_SIG_UNLOCK; - - // Prepare the lexer. - bc_lex_file(&vm.prs.l, bc_program_exprs_name); - - // Process the expressions one at a time. - do { - - more = bc_read_buf(&buf, vm.exprs.v, &len); - bc_vec_pushByte(&buf, '\0'); - bc_vm_process(buf.v, false); - - bc_vec_popAll(&buf); - - } while (more); - - BC_SIG_LOCK; - - bc_vec_free(&buf); - -#ifndef NDEBUG - BC_UNSETJMP; -#endif // NDEBUG - - BC_SIG_UNLOCK; + // Process the expressions. + bc_vm_exprs(); // Sometimes, executing expressions means we need to quit. if (!vm.no_exprs && vm.exit_exprs && BC_EXPR_EXIT) return; } // Process files. for (i = 0; i < vm.files.len; ++i) { char *path = *((char**) bc_vec_item(&vm.files, i)); if (!strcmp(path, "")) continue; has_file = true; bc_vm_file(path); } #if BC_ENABLE_EXTRA_MATH // These are needed for the pseudo-random number generator. bc_unveil("/dev/urandom", "r"); bc_unveil("/dev/random", "r"); bc_unveil(NULL, NULL); #endif // BC_ENABLE_EXTRA_MATH #if BC_ENABLE_HISTORY // We need to keep tty if history is enabled, and we need to keep rpath for // the times when we read from /dev/urandom. if (BC_TTY && !vm.history.badTerm) bc_pledge(bc_pledge_end_history, NULL); else #endif // BC_ENABLE_HISTORY { bc_pledge(bc_pledge_end, NULL); } #if BC_ENABLE_AFL // This is the thing that makes fuzzing with AFL++ so fast. If you move this // back, you won't cause any problems, but fuzzing will slow down. If you // move this forward, you won't fuzz anything because you will be skipping // the reading from stdin. __AFL_INIT(); #endif // BC_ENABLE_AFL // Execute from stdin. bc always does. if (BC_IS_BC || !has_file) bc_vm_stdin(); - -// These are all protected by ifndef NDEBUG because if these are needed, bc is -// going to exit anyway, and I see no reason to include this code in a release -// build when the OS is going to free all of the resources anyway. -#ifndef NDEBUG - return; - -err: - BC_SIG_MAYLOCK; - bc_vec_free(&buf); - BC_LONGJMP_CONT; -#endif // NDEBUG } void bc_vm_boot(int argc, char *argv[]) { int ttyin, ttyout, ttyerr; bool tty; const char* const env_len = BC_IS_BC ? "BC_LINE_LENGTH" : "DC_LINE_LENGTH"; const char* const env_args = BC_IS_BC ? "BC_ENV_ARGS" : "DC_ENV_ARGS"; const char* const env_exit = BC_IS_BC ? "BC_EXPR_EXIT" : "DC_EXPR_EXIT"; int env_exit_def = BC_IS_BC ? BC_DEFAULT_EXPR_EXIT : DC_DEFAULT_EXPR_EXIT; // We need to know which of stdin, stdout, and stderr are tty's. ttyin = isatty(STDIN_FILENO); ttyout = isatty(STDOUT_FILENO); ttyerr = isatty(STDERR_FILENO); tty = (ttyin != 0 && ttyout != 0 && ttyerr != 0); vm.flags |= ttyin ? BC_FLAG_TTYIN : 0; vm.flags |= tty ? BC_FLAG_TTY : 0; vm.flags |= ttyin && ttyout ? BC_FLAG_I : 0; // Set up signals. bc_vm_sigaction(); // Initialize some vm stuff. This is separate to make things easier for the // library. bc_vm_init(); // Explicitly set this in case NULL isn't all zeroes. vm.file = NULL; // Set the error messages. bc_vm_gettext(); // Initialize the output file buffers. They each take portions of the global // buffer. stdout gets more because it will probably have more data. bc_file_init(&vm.ferr, STDERR_FILENO, output_bufs + BC_VM_STDOUT_BUF_SIZE, BC_VM_STDERR_BUF_SIZE); bc_file_init(&vm.fout, STDOUT_FILENO, output_bufs, BC_VM_STDOUT_BUF_SIZE); // Set the input buffer to the rest of the global buffer. vm.buf = output_bufs + BC_VM_STDOUT_BUF_SIZE + BC_VM_STDERR_BUF_SIZE; // Set the line length by environment variable. vm.line_len = (uint16_t) bc_vm_envLen(env_len); bc_vm_setenvFlag(env_exit, env_exit_def, BC_FLAG_EXPR_EXIT); // Clear the files and expressions vectors, just in case. This marks them as // *not* allocated. bc_vec_clear(&vm.files); bc_vec_clear(&vm.exprs); #if !BC_ENABLE_LIBRARY // Initialize the slab vectors. bc_slabvec_init(&vm.main_const_slab); bc_slabvec_init(&vm.main_slabs); bc_slabvec_init(&vm.other_slabs); #endif // !BC_ENABLE_LIBRARY // Initialize the program and main parser. These have to be in this order // because the program has to be initialized first, since a pointer to it is // passed to the parser. bc_program_init(&vm.prog); bc_parse_init(&vm.prs, &vm.prog, BC_PROG_MAIN); // Set defaults. vm.flags |= BC_TTY ? BC_FLAG_P | BC_FLAG_R : 0; vm.flags |= BC_I ? BC_FLAG_Q : 0; #if BC_ENABLED if (BC_IS_BC) { // bc checks this environment variable to see if it should run in // standard mode. char* var = bc_vm_getenv("POSIXLY_CORRECT"); vm.flags |= BC_FLAG_S * (var != NULL); bc_vm_getenvFree(var); // Set whether we print the banner or not. if (BC_I) bc_vm_setenvFlag("BC_BANNER", BC_DEFAULT_BANNER, BC_FLAG_Q); } #endif // BC_ENABLED // Are we in TTY mode? if (BC_TTY) { const char* const env_tty = BC_IS_BC ? "BC_TTY_MODE" : "DC_TTY_MODE"; int env_tty_def = BC_IS_BC ? BC_DEFAULT_TTY_MODE : DC_DEFAULT_TTY_MODE; const char* const env_prompt = BC_IS_BC ? "BC_PROMPT" : "DC_PROMPT"; int env_prompt_def = BC_IS_BC ? BC_DEFAULT_PROMPT : DC_DEFAULT_PROMPT; // Set flags for TTY mode and prompt. bc_vm_setenvFlag(env_tty, env_tty_def, BC_FLAG_TTY); bc_vm_setenvFlag(env_prompt, tty ? env_prompt_def : 0, BC_FLAG_P); #if BC_ENABLE_HISTORY // If TTY mode is used, activate history. if (BC_TTY) bc_history_init(&vm.history); #endif // BC_ENABLE_HISTORY } // Process environment and command-line arguments. bc_vm_envArgs(env_args); bc_args(argc, argv, true); // If we are in interactive mode... if (BC_I) { const char* const env_sigint = BC_IS_BC ? "BC_SIGINT_RESET" : "DC_SIGINT_RESET"; int env_sigint_def = BC_IS_BC ? BC_DEFAULT_SIGINT_RESET : DC_DEFAULT_SIGINT_RESET; // Set whether we reset on SIGINT or not. bc_vm_setenvFlag(env_sigint, env_sigint_def, BC_FLAG_SIGINT); } #if BC_ENABLED // Disable global stacks in POSIX mode. if (BC_IS_POSIX) vm.flags &= ~(BC_FLAG_G); // Print the banner if allowed. We have to be in bc, in interactive mode, // and not be quieted by command-line option or environment variable. if (BC_IS_BC && BC_I && (vm.flags & BC_FLAG_Q)) { bc_vm_info(NULL); bc_file_putchar(&vm.fout, bc_flush_none, '\n'); bc_file_flush(&vm.fout, bc_flush_none); } #endif // BC_ENABLED BC_SIG_UNLOCK; // Start executing. bc_vm_exec(); } #endif // !BC_ENABLE_LIBRARY void bc_vm_init(void) { BC_SIG_ASSERT_LOCKED; #if !BC_ENABLE_LIBRARY // Set up the constant zero. bc_num_setup(&vm.zero, vm.zero_num, BC_VM_ONE_CAP); #endif // !BC_ENABLE_LIBRARY // Set up more constant BcNum's. bc_num_setup(&vm.one, vm.one_num, BC_VM_ONE_CAP); bc_num_one(&vm.one); // Set up more constant BcNum's. memcpy(vm.max_num, bc_num_bigdigMax, bc_num_bigdigMax_size * sizeof(BcDig)); memcpy(vm.max2_num, bc_num_bigdigMax2, bc_num_bigdigMax2_size * sizeof(BcDig)); bc_num_setup(&vm.max, vm.max_num, BC_NUM_BIGDIG_LOG10); bc_num_setup(&vm.max2, vm.max2_num, BC_NUM_BIGDIG_LOG10); vm.max.len = bc_num_bigdigMax_size; vm.max2.len = bc_num_bigdigMax2_size; // Set up the maxes for the globals. vm.maxes[BC_PROG_GLOBALS_IBASE] = BC_NUM_MAX_POSIX_IBASE; vm.maxes[BC_PROG_GLOBALS_OBASE] = BC_MAX_OBASE; vm.maxes[BC_PROG_GLOBALS_SCALE] = BC_MAX_SCALE; #if BC_ENABLE_EXTRA_MATH vm.maxes[BC_PROG_MAX_RAND] = ((BcRand) 0) - 1; #endif // BC_ENABLE_EXTRA_MATH #if BC_ENABLED #if !BC_ENABLE_LIBRARY // bc has a higher max ibase when it's not in POSIX mode. if (BC_IS_BC && !BC_IS_POSIX) #endif // !BC_ENABLE_LIBRARY { vm.maxes[BC_PROG_GLOBALS_IBASE] = BC_NUM_MAX_IBASE; } #endif // BC_ENABLED } #if BC_ENABLE_LIBRARY void bc_vm_atexit(void) { bc_vm_shutdown(); #ifndef NDEBUG bc_vec_free(&vm.jmp_bufs); #endif // NDEBUG } #else // BC_ENABLE_LIBRARY int bc_vm_atexit(int status) { // Set the status correctly. int s = BC_STATUS_IS_ERROR(status) ? status : BC_STATUS_SUCCESS; bc_vm_shutdown(); #ifndef NDEBUG bc_vec_free(&vm.jmp_bufs); #endif // NDEBUG return s; } #endif // BC_ENABLE_LIBRARY diff --git a/tests/fuzzing/bc_afl.yaml b/tests/fuzzing/bc_afl.yaml new file mode 100644 index 000000000000..7d13bff95824 --- /dev/null +++ b/tests/fuzzing/bc_afl.yaml @@ -0,0 +1,125 @@ +session_name: bc_afl + +windows: + - window_name: control + layout: even-horizontal + panes: + - shell_command: + - echo core | doas tee /proc/sys/kernel/core_pattern + - echo performance | doas tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor + - rm -rf tests/fuzzing/bc_outputs1/ + - rm -rf tests/fuzzing/bc_outputs2/ + - rm -rf tests/fuzzing/bc_outputs3/ + - rm -rf tests/fuzzing/dc_outputs/ + - window_name: bc11 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 15 + - afl-fuzz -i tests/fuzzing/bc_inputs1 -o tests/fuzzing/bc_outputs1 -p exploit -D -M bc11 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc12 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 16 + - afl-fuzz -i tests/fuzzing/bc_inputs1 -o tests/fuzzing/bc_outputs1 -p coe -S bc12 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc13 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 17 + - afl-fuzz -i tests/fuzzing/bc_inputs1 -o tests/fuzzing/bc_outputs1 -p fast -S bc13 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc14 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 18 + - afl-fuzz -i tests/fuzzing/bc_inputs1 -o tests/fuzzing/bc_outputs1 -p explore -S bc14 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc21 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 19 + - afl-fuzz -i tests/fuzzing/bc_inputs2 -o tests/fuzzing/bc_outputs2 -p exploit -D -M bc21 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc22 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 20 + - afl-fuzz -i tests/fuzzing/bc_inputs2 -o tests/fuzzing/bc_outputs2 -p coe -S bc22 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc23 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 21 + - afl-fuzz -i tests/fuzzing/bc_inputs2 -o tests/fuzzing/bc_outputs2 -p fast -S bc23 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc24 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 22 + - afl-fuzz -i tests/fuzzing/bc_inputs2 -o tests/fuzzing/bc_outputs2 -p explore -S bc24 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc31 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 23 + - afl-fuzz -i tests/fuzzing/bc_inputs3 -o tests/fuzzing/bc_outputs3 -p exploit -D -M bc31 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc32 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 24 + - afl-fuzz -i tests/fuzzing/bc_inputs3 -o tests/fuzzing/bc_outputs3 -p coe -S bc32 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc33 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 25 + - afl-fuzz -i tests/fuzzing/bc_inputs3 -o tests/fuzzing/bc_outputs3 -p fast -S bc33 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc34 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 26 + - afl-fuzz -i tests/fuzzing/bc_inputs3 -o tests/fuzzing/bc_outputs3 -p explore -S bc34 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: dc11 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 27 + - afl-fuzz -i tests/fuzzing/dc_inputs -o tests/fuzzing/dc_outputs -p exploit -D -M dc11 bin/dc -x -e "1280937142.20981723890730892738902938071028973408912703984712093j" -f- + - window_name: dc12 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 28 + - afl-fuzz -i tests/fuzzing/dc_inputs -o tests/fuzzing/dc_outputs -p coe -S dc12 bin/dc -x -e "1280937142.20981723890730892738902938071028973408912703984712093j" -f- + - window_name: dc13 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 29 + - afl-fuzz -i tests/fuzzing/dc_inputs -o tests/fuzzing/dc_outputs -p fast -S dc13 bin/dc -x -e "1280937142.20981723890730892738902938071028973408912703984712093j" -f- + - window_name: dc14 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 30 + - afl-fuzz -i tests/fuzzing/dc_inputs -o tests/fuzzing/dc_outputs -p explore -S dc14 bin/dc -x -e "1280937142.20981723890730892738902938071028973408912703984712093j" -f- diff --git a/tests/fuzzing/bc_afl_continue.yaml b/tests/fuzzing/bc_afl_continue.yaml new file mode 100644 index 000000000000..486984bdaef5 --- /dev/null +++ b/tests/fuzzing/bc_afl_continue.yaml @@ -0,0 +1,122 @@ +session_name: bc_afl_continue +start_directory: ./ + +windows: + - window_name: control + layout: even-horizontal + panes: + - shell_command: + - echo core | doas tee /proc/sys/kernel/core_pattern + - echo performance | doas tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor + - window_name: bc11 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 4 + - afl-fuzz -i- -o tests/fuzzing/bc_outputs1 -p exploit -D -M bc11 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc12 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 5 + - afl-fuzz -i- -o tests/fuzzing/bc_outputs1 -p coe -S bc12 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc13 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 6 + - afl-fuzz -i- -o tests/fuzzing/bc_outputs1 -p fast -S bc13 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc14 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 7 + - afl-fuzz -i- -o tests/fuzzing/bc_outputs1 -p explore -S bc14 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc21 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 8 + - afl-fuzz -i- -o tests/fuzzing/bc_outputs2 -p exploit -D -M bc21 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc22 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 9 + - afl-fuzz -i- -o tests/fuzzing/bc_outputs2 -p coe -S bc22 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc23 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 10 + - afl-fuzz -i- -o tests/fuzzing/bc_outputs2 -p fast -S bc23 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc24 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 11 + - afl-fuzz -i- -o tests/fuzzing/bc_outputs2 -p explore -S bc24 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc31 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 12 + - afl-fuzz -i- -o tests/fuzzing/bc_outputs3 -p exploit -D -M bc31 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc32 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 13 + - afl-fuzz -i- -o tests/fuzzing/bc_outputs3 -p coe -S bc32 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc33 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 14 + - afl-fuzz -i- -o tests/fuzzing/bc_outputs3 -p fast -S bc33 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: bc34 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 15 + - afl-fuzz -i- -o tests/fuzzing/bc_outputs3 -p explore -S bc34 bin/bc -lq -e "seed = 1280937142.20981723890730892738902938071028973408912703984712093" -f- + - window_name: dc11 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 16 + - afl-fuzz -i- -o tests/fuzzing/dc_outputs -p exploit -D -M dc11 bin/dc -x -e "1280937142.20981723890730892738902938071028973408912703984712093j" -f- + - window_name: dc12 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 17 + - afl-fuzz -i- -o tests/fuzzing/dc_outputs -p coe -S dc12 bin/dc -x -e "1280937142.20981723890730892738902938071028973408912703984712093j" -f- + - window_name: dc13 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 18 + - afl-fuzz -i- -o tests/fuzzing/dc_outputs -p fast -S dc13 bin/dc -x -e "1280937142.20981723890730892738902938071028973408912703984712093j" -f- + - window_name: dc14 + layout: even-horizontal + panes: + - shell_command: + - export AFL_AUTORESUME=1 + - sleep 19 + - afl-fuzz -i- -o tests/fuzzing/dc_outputs -p explore -S dc14 bin/dc -x -e "1280937142.20981723890730892738902938071028973408912703984712093j" -f- diff --git a/tests/fuzzing/bc_inputs1/array.bc b/tests/fuzzing/bc_inputs1/array.bc new file mode 100644 index 000000000000..dac232804914 --- /dev/null +++ b/tests/fuzzing/bc_inputs1/array.bc @@ -0,0 +1,60 @@ +#! /usr/bin/bc -q + +define z(a[]) { + for (i = 0; i < l; ++i) { + a[i] + } +} + +define x(a[]) { + + # Test for separate vars and arrays. + auto a + + for (a = 0; a < l; ++a) { + a[a] = -a + } + + z(a[]) +} + +define g(x[], y[]) { + return x[0] - y[0] +} + +define h(y[], x[]) { + return g(x[], y[]) +} + +define m(*x[], *y[]) { + return x[0] / y[0] +} + +define n(*y[], *x[]) { + return m(x[], y[]) +} + +for (i = 0; i < 101; ++i) { + a[i] = i +} + +a[104] = 204 + +l = length(a[]) + +for (i = 0; i <= l; ++i) { + a[i] +} + +z(a[]) +x(a[]) +z(a[]) +l + +x[0] = 5 +y[0] = 4 + +h(x[], y[]) +n(x[], y[]) + +halt diff --git a/tests/fuzzing/bc_inputs1/decimal.txt b/tests/fuzzing/bc_inputs1/decimal.txt new file mode 100644 index 000000000000..b79da99e3dd2 --- /dev/null +++ b/tests/fuzzing/bc_inputs1/decimal.txt @@ -0,0 +1,30 @@ +0 +0.0 +000000000000000000000000.00000000000000000000000 +000000000000000000000000000135482346782356 +000000000000000000000000002 +1 +1023468723275435238491972521917846 +4343472432431705867392073517038270398027352709027389273920739037937960379637893607893607893670530278200795207952702873892786172916728961783907893607418973587857386079679267926737520730925372983782793652793 +-1 +-18586 +-31378682943772818461924738352952347258 +-823945628745673589495067238723986520375698237620834674509627345273096287563846592384526349872634895763257893467523987578690283762897568459072348758071071087813501875908127359018715023841710239872301387278 +.123521346523546 +0.1245923756273856 +-.1024678456387 +-0.8735863475634587 +4.0 +-6.0 +234237468293576.000000000000000000000000000000 +23987623568943567.00000000000000000005677834650000000000000 +23856934568940675.000000000000000435676782300000000000000456784 +77567648698496.000000000000000000587674750000000000458563800000000000000 +2348672354968723.2374823546000000000003256987394502346892435623870000000034578 +-2354768.000000000000000000000000000000000000 +-96739874567.000000000347683456 +-3764568345.000000000004573845000000347683460 +-356784356.934568495770004586495678300000000 +74325437345273852773827101738273127312738521733017537073520735207307570358738257390761276072160719802671980267018728630178.7082681027680521760217867841276127681270867827821768173178207830710978017738178678012767377058785378278207385237085237803278203782037237582795870 +-756752732785273851273728537852738257837283678965738527385272983678372867327835672967385278372637862738627836279863782673862783670.71738178361738718367186378610738617836781603760178367018603760178107735278372832783728367826738627836278378260736270367362073867097307925 +9812734012837410982345719208345712908357412903587192048571920458712.23957182459817249058172945781 diff --git a/tests/fuzzing/bc_inputs1/functions.bc b/tests/fuzzing/bc_inputs1/functions.bc new file mode 100644 index 000000000000..80d6d1623d8d --- /dev/null +++ b/tests/fuzzing/bc_inputs1/functions.bc @@ -0,0 +1,7 @@ +e(0.5) + +define e(x) { + return x +} + +e(0.5) diff --git a/tests/fuzzing/bc_inputs1/len.bc b/tests/fuzzing/bc_inputs1/len.bc new file mode 100644 index 000000000000..ec931f2386a5 --- /dev/null +++ b/tests/fuzzing/bc_inputs1/len.bc @@ -0,0 +1,48 @@ +define fast_gcd(a, b) { + + if (a == b) return a; + if (a > b) return fast_gcd(a - b, b) + + return fast_gcd(a, b - a); +} + +define void r_reduce(*r[]) { + + auto g,s; + + if (length(r[]) != 2) sqrt(-1); + if (scale(r[0])) 2^r[0]; + if (scale(r[1])) 2^r[1]; + + if (r[0] >= 0 && r[1] >= 0) g = fast_gcd(r[0], r[1]); + else g = gcd(r[0], r[1]); + + s = scale; + scale = 0; + + r[0] /= g; + r[1] /= g; + + scale = s; +} + +define void r_init(*r[], a, b) { + r[0] = a; + r[1] = b; + r_reduce(r[]); +} + +define void r_initi(*r[], i, a, b) { + + length(r[]); + + r[0] = i * b + a; + r[1] = b; + + length(r[]); + + r_reduce(r[]); +} + +length(a[]) +r_initi(a[], 5, 63, 94); diff --git a/tests/fuzzing/bc_inputs1/lib10.txt b/tests/fuzzing/bc_inputs1/lib10.txt new file mode 100644 index 000000000000..7aa3fda19cc7 --- /dev/null +++ b/tests/fuzzing/bc_inputs1/lib10.txt @@ -0,0 +1,4 @@ +l10(0) +l10(99) +l10(100) +l10(-100) diff --git a/tests/fuzzing/bc_inputs1/lib12.txt b/tests/fuzzing/bc_inputs1/lib12.txt new file mode 100644 index 000000000000..7d70e1ccdd5e --- /dev/null +++ b/tests/fuzzing/bc_inputs1/lib12.txt @@ -0,0 +1 @@ +uint(0) diff --git a/tests/fuzzing/bc_inputs1/lib2.txt b/tests/fuzzing/bc_inputs1/lib2.txt new file mode 100644 index 000000000000..f345bd1669cb --- /dev/null +++ b/tests/fuzzing/bc_inputs1/lib2.txt @@ -0,0 +1,15 @@ +r(0, 0) +r(0, 1) +r(0, 100) +r(1, 0) +r(1, 3) +r(1.4, 0) +r(1.5, 0) +r(34.45, 2) +r(64.1223, 4) +r(-1, 0) +r(-1, 3) +r(-1.4, 0) +r(-1.5, 0) +r(-34.45, 2) +r(-64.1223, 4) diff --git a/tests/fuzzing/bc_inputs1/lib3.txt b/tests/fuzzing/bc_inputs1/lib3.txt new file mode 100644 index 000000000000..1da42385ea44 --- /dev/null +++ b/tests/fuzzing/bc_inputs1/lib3.txt @@ -0,0 +1,6 @@ +f(0) +f(1) +f(2) +f(3) +f(4) +f(5) diff --git a/tests/fuzzing/bc_inputs1/lib6.txt b/tests/fuzzing/bc_inputs1/lib6.txt new file mode 100644 index 000000000000..260e159f9fb6 --- /dev/null +++ b/tests/fuzzing/bc_inputs1/lib6.txt @@ -0,0 +1,5 @@ +pi(5) +p=pi(scale) +r2d(-p) +d2r(180) +d2r(-180) diff --git a/tests/fuzzing/bc_inputs2/bitfuncs.txt b/tests/fuzzing/bc_inputs2/bitfuncs.txt new file mode 100644 index 000000000000..e0703a715c08 --- /dev/null +++ b/tests/fuzzing/bc_inputs2/bitfuncs.txt @@ -0,0 +1,42 @@ +band(13946233938940740889, 12028823668264674112) +bor(13946233938940740889, 12028823668264674112) +bxor(13946233938940740889, 12028823668264674112) +bshl(2366588185, 0) +bshr(2366588185, 0) +bshl(347743040, 25) +bshr(347743040, 25) +bnot8(13946233938940740889) +bnot8(25) +bnot16(13946233938940740889) +bnot16(17689) +bnot32(13946233938940740889) +bnot32(2366588185) +bnot64(13946233938940740889) +brev8(13946233938940740889) +brev8(25) +brev16(13946233938940740889) +brev16(17689) +brev32(13946233938940740889) +brev32(2366588185) +brev64(13946233938940740889) +brol8(13946233938940740889, 12028823668264674112) +brol8(25, 64) +brol16(13946233938940740889, 12028823668264674112) +brol16(17689, 9024) +brol32(13946233938940740889, 12028823668264674112) +brol32(2366588185, 347743040) +brol64(13946233938940740889, 12028823668264674112) +bror8(13946233938940740889, 12028823668264674112) +bror8(25, 64) +bror16(13946233938940740889, 12028823668264674112) +bror16(17689, 9024) +bror32(13946233938940740889, 12028823668264674112) +bror32(2366588185, 347743040) +bror64(13946233938940740889, 12028823668264674112) +bmod8(13946233938940740889) +bmod8(25) +bmod16(13946233938940740889) +bmod16(17689) +bmod32(13946233938940740889) +bmod32(2366588185) +bmod64(13946233938940740889) diff --git a/tests/fuzzing/bc_inputs2/lib15.txt b/tests/fuzzing/bc_inputs2/lib15.txt new file mode 100644 index 000000000000..13be33145ba3 --- /dev/null +++ b/tests/fuzzing/bc_inputs2/lib15.txt @@ -0,0 +1 @@ +uint(1) diff --git a/tests/fuzzing/bc_inputs2/lib21.txt b/tests/fuzzing/bc_inputs2/lib21.txt new file mode 100644 index 000000000000..82693695945d --- /dev/null +++ b/tests/fuzzing/bc_inputs2/lib21.txt @@ -0,0 +1 @@ +int(1) diff --git a/tests/fuzzing/bc_inputs2/misc3.txt b/tests/fuzzing/bc_inputs2/misc3.txt new file mode 100644 index 000000000000..7aad374c4ef6 --- /dev/null +++ b/tests/fuzzing/bc_inputs2/misc3.txt @@ -0,0 +1,12 @@ +for (i = 0; i < A; ++i) +{print "n" +if(1)if(1){3 +} +if(0)if(1){3 +} +else 4 +if(0){if(1){3 +}} +else 5 +{i} +} diff --git a/tests/fuzzing/bc_inputs2/modulus.txt b/tests/fuzzing/bc_inputs2/modulus.txt new file mode 100644 index 000000000000..049cd7dbd73c --- /dev/null +++ b/tests/fuzzing/bc_inputs2/modulus.txt @@ -0,0 +1,27 @@ +1 % 1 +2 % 1 +16 % 4 +17 % 4 +3496723859067234 % 298375462837546928347623059375486 +-1 % 1 +-2 % 1 +-1274852934765 % 2387628935486273546 +1 % -1 +2 % -1 +2 % -3 +16 % 5 +89237423 % -237856923854 +-1 % -1 +-2 % -1 +-2 % -2 +-2 % -3 +-13 % -7 +-14 % -7 +-15 % -7 +-127849612 % -23712347682193 +scale = 0 +1 % 1 +2 % 1 +scale = 0; -899510228 % -2448300078.40314 +scale = 0; -7424863 % -207.2609738667 +scale = 0; 3769798918 % 0.6 diff --git a/tests/fuzzing/bc_inputs2/references.bc b/tests/fuzzing/bc_inputs2/references.bc new file mode 100644 index 000000000000..8188f17aa017 --- /dev/null +++ b/tests/fuzzing/bc_inputs2/references.bc @@ -0,0 +1,408 @@ +#! /usr/bin/bc -q + +define printarray(a[], len) { + + auto i + + for (i = 0; i < len; ++i) { + a[i] + } +} + +define a2(a[], len) { + + auto i + + for (i = 0; i < len; ++i) { + a[i] = a[i] * a[i] + } + + printarray(a[], len) +} + +define a4(a__[], len) { + + auto i + + for (i = 0; i < len; ++i) { + a__[i] = a__[i] * a__[i] + } + + printarray(a__[], len) +} + +define a6(*a__[], len) { + + auto i + + for (i = 0; i < len; ++i) { + a__[i] = a__[i] * a__[i] + } + + printarray(a__[], len) +} + +define a1(*a[], len) { + + auto i + + for (i = 0; i < len; ++i) { + a[i] = i + } + + a2(a[], len) + + printarray(a[], len) +} + +define a3(*a__[], len) { + + auto i + + for (i = 0; i < len; ++i) { + a__[i] = i + } + + a4(a__[], len) + + printarray(a__[], len) +} + +define a5(*a__[], len) { + + auto i + + for (i = 0; i < len; ++i) { + a__[i] = i + } + + a2(a__[], len) + + printarray(a__[], len) +} + +define a7(*a__[], len) { + + auto i + + for (i = 0; i < len; ++i) { + a__[i] = i + } + + a6(a__[], len) + + printarray(a__[], len) +} + +len = 16 + +a1(a[], len) +printarray(a[], len) +a3(a[], len) +printarray(a[], len) +a5(a[], len) +printarray(a[], len) +a7(a[], len) +printarray(a[], len) + +a1(b[], len) +printarray(b[], len) +a3(b[], len) +printarray(b[], len) +a5(b[], len) +printarray(b[], len) +a7(b[], len) +printarray(b[], len) + +a1[0] = 0 +a2[0] = 0 +a3[0] = 0 +a4[0] = 0 +a5[0] = 0 +a6[0] = 0 +a7[0] = 0 +a8[0] = 0 +a9[0] = 0 +a10[0] = 0 +a11[0] = 0 +a12[0] = 0 +a13[0] = 0 +a14[0] = 0 +a15[0] = 0 +a16[0] = 0 +a17[0] = 0 +a18[0] = 0 +a19[0] = 0 +a20[0] = 0 +a21[0] = 0 +a22[0] = 0 +a23[0] = 0 +a24[0] = 0 +a25[0] = 0 +a26[0] = 0 +a27[0] = 0 +a28[0] = 0 +a29[0] = 0 +a30[0] = 0 +a31[0] = 0 +a32[0] = 0 +a33[0] = 0 +a34[0] = 0 +a35[0] = 0 +a36[0] = 0 +a37[0] = 0 +a38[0] = 0 +a39[0] = 0 +a40[0] = 0 +a41[0] = 0 +a42[0] = 0 +a43[0] = 0 +a44[0] = 0 +a45[0] = 0 +a46[0] = 0 +a47[0] = 0 +a48[0] = 0 +a49[0] = 0 +a50[0] = 0 +a51[0] = 0 +a52[0] = 0 +a53[0] = 0 +a54[0] = 0 +a55[0] = 0 +a56[0] = 0 +a57[0] = 0 +a58[0] = 0 +a59[0] = 0 +a60[0] = 0 +a61[0] = 0 +a62[0] = 0 +a63[0] = 0 +a64[0] = 0 +a65[0] = 0 +a66[0] = 0 +a67[0] = 0 +a68[0] = 0 +a69[0] = 0 +a70[0] = 0 +a71[0] = 0 +a72[0] = 0 +a73[0] = 0 +a74[0] = 0 +a75[0] = 0 +a76[0] = 0 +a77[0] = 0 +a78[0] = 0 +a79[0] = 0 +a80[0] = 0 +a81[0] = 0 +a82[0] = 0 +a83[0] = 0 +a84[0] = 0 +a85[0] = 0 +a86[0] = 0 +a87[0] = 0 +a88[0] = 0 +a89[0] = 0 +a90[0] = 0 +a91[0] = 0 +a92[0] = 0 +a93[0] = 0 +a94[0] = 0 +a95[0] = 0 +a96[0] = 0 +a97[0] = 0 +a98[0] = 0 +a99[0] = 0 +a100[0] = 0 +a101[0] = 0 +a102[0] = 0 +a103[0] = 0 +a104[0] = 0 +a105[0] = 0 +a106[0] = 0 +a107[0] = 0 +a108[0] = 0 +a109[0] = 0 +a110[0] = 0 +a111[0] = 0 +a112[0] = 0 +a113[0] = 0 +a114[0] = 0 +a115[0] = 0 +a116[0] = 0 +a117[0] = 0 +a118[0] = 0 +a119[0] = 0 +a120[0] = 0 +a121[0] = 0 +a122[0] = 0 +a123[0] = 0 +a124[0] = 0 +a125[0] = 0 +a126[0] = 0 +a127[0] = 0 +a128[0] = 0 +a129[0] = 0 +a130[0] = 0 +a131[0] = 0 +a132[0] = 0 +a133[0] = 0 +a134[0] = 0 +a135[0] = 0 +a136[0] = 0 +a137[0] = 0 +a138[0] = 0 +a139[0] = 0 +a140[0] = 0 +a141[0] = 0 +a142[0] = 0 +a143[0] = 0 +a144[0] = 0 +a145[0] = 0 +a146[0] = 0 +a147[0] = 0 +a148[0] = 0 +a149[0] = 0 +a150[0] = 0 +a151[0] = 0 +a152[0] = 0 +a153[0] = 0 +a154[0] = 0 +a155[0] = 0 +a156[0] = 0 +a157[0] = 0 +a158[0] = 0 +a159[0] = 0 +a160[0] = 0 +a161[0] = 0 +a162[0] = 0 +a163[0] = 0 +a164[0] = 0 +a165[0] = 0 +a166[0] = 0 +a167[0] = 0 +a168[0] = 0 +a169[0] = 0 +a170[0] = 0 +a171[0] = 0 +a172[0] = 0 +a173[0] = 0 +a174[0] = 0 +a175[0] = 0 +a176[0] = 0 +a177[0] = 0 +a178[0] = 0 +a179[0] = 0 +a180[0] = 0 +a181[0] = 0 +a182[0] = 0 +a183[0] = 0 +a184[0] = 0 +a185[0] = 0 +a186[0] = 0 +a187[0] = 0 +a188[0] = 0 +a189[0] = 0 +a190[0] = 0 +a191[0] = 0 +a192[0] = 0 +a193[0] = 0 +a194[0] = 0 +a195[0] = 0 +a196[0] = 0 +a197[0] = 0 +a198[0] = 0 +a199[0] = 0 +a200[0] = 0 +a201[0] = 0 +a202[0] = 0 +a203[0] = 0 +a204[0] = 0 +a205[0] = 0 +a206[0] = 0 +a207[0] = 0 +a208[0] = 0 +a209[0] = 0 +a210[0] = 0 +a211[0] = 0 +a212[0] = 0 +a213[0] = 0 +a214[0] = 0 +a215[0] = 0 +a216[0] = 0 +a217[0] = 0 +a218[0] = 0 +a219[0] = 0 +a220[0] = 0 +a221[0] = 0 +a222[0] = 0 +a223[0] = 0 +a224[0] = 0 +a225[0] = 0 +a226[0] = 0 +a227[0] = 0 +a228[0] = 0 +a229[0] = 0 +a230[0] = 0 +a231[0] = 0 +a232[0] = 0 +a233[0] = 0 +a234[0] = 0 +a235[0] = 0 +a236[0] = 0 +a237[0] = 0 +a238[0] = 0 +a239[0] = 0 +a240[0] = 0 +a241[0] = 0 +a242[0] = 0 +a243[0] = 0 +a244[0] = 0 +a245[0] = 0 +a246[0] = 0 +a247[0] = 0 +a248[0] = 0 +a249[0] = 0 +a250[0] = 0 +a251[0] = 0 +a252[0] = 0 +a253[0] = 0 +a254[0] = 0 +a255[0] = 0 +a256[0] = 0 + +a1(a253[], len) +printarray(a253[], len) +a3(a253[], len) +printarray(a253[], len) +a5(a253[], len) +printarray(a253[], len) +a7(a253[], len) +printarray(a253[], len) + +a1(a254[], len) +printarray(a254[], len) +a3(a254[], len) +printarray(a254[], len) +a5(a254[], len) +printarray(a254[], len) +a7(a254[], len) +printarray(a254[], len) + +a1(a255[], len) +printarray(a255[], len) +a3(a255[], len) +printarray(a255[], len) +a5(a255[], len) +printarray(a255[], len) +a7(a255[], len) +printarray(a255[], len) + +a1(a256[], len) +printarray(a256[], len) +a3(a256[], len) +printarray(a256[], len) +a5(a256[], len) +printarray(a256[], len) +a7(a256[], len) +printarray(a256[], len) diff --git a/tests/fuzzing/bc_inputs3/02.txt b/tests/fuzzing/bc_inputs3/02.txt new file mode 100644 index 000000000000..8cf0f3e6fec1 --- /dev/null +++ b/tests/fuzzing/bc_inputs3/02.txt @@ -0,0 +1 @@ +obase^= 20-f-b-4^-f-4-4^-f-4^-d diff --git a/tests/fuzzing/bc_inputs3/03.txt b/tests/fuzzing/bc_inputs3/03.txt new file mode 100644 index 000000000000..00e645cea896 --- /dev/null +++ b/tests/fuzzing/bc_inputs3/03.txt @@ -0,0 +1,2 @@ +for (i = 0; ; ) +for (i = 0; ;(p(s(ssqrtt()-p())))000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000#000 diff --git a/tests/fuzzing/bc_inputs3/06.txt b/tests/fuzzing/bc_inputs3/06.txt new file mode 100644 index 000000000000..29fe6be37021 --- /dev/null +++ b/tests/fuzzing/bc_inputs3/06.txt @@ -0,0 +1 @@ +while (i == 0) { diff --git a/tests/fuzzing/bc_inputs3/07.txt b/tests/fuzzing/bc_inputs3/07.txt new file mode 100644 index 000000000000..e899d8547868 --- /dev/null +++ b/tests/fuzzing/bc_inputs3/07.txt @@ -0,0 +1,8 @@ +for(q=F;i<=020; ++i) #00 +{print "0" +if(6)if(6){3 + } +{pr0n} +"" } +{pr0n} +{print "" "" diff --git a/tests/fuzzing/bc_inputs3/10.txt b/tests/fuzzing/bc_inputs3/10.txt new file mode 100644 index 000000000000..23fb8689f598 --- /dev/null +++ b/tests/fuzzing/bc_inputs3/10.txt @@ -0,0 +1 @@ +d000$++ diff --git a/tests/fuzzing/bc_inputs3/12.txt b/tests/fuzzing/bc_inputs3/12.txt new file mode 100644 index 000000000000..143bb71deeda --- /dev/null +++ b/tests/fuzzing/bc_inputs3/12.txt @@ -0,0 +1,2 @@ +for (v ;!j -90-90; ++i) +a= ibase ++;0 diff --git a/tests/fuzzing/bc_inputs3/16.txt b/tests/fuzzing/bc_inputs3/16.txt new file mode 100644 index 000000000000..977569bda330 --- /dev/null +++ b/tests/fuzzing/bc_inputs3/16.txt @@ -0,0 +1 @@ +"0 \ No newline at end of file diff --git a/tests/fuzzing/bc_inputs3/trunc.txt b/tests/fuzzing/bc_inputs3/trunc.txt new file mode 100644 index 000000000000..364bb224a2e3 --- /dev/null +++ b/tests/fuzzing/bc_inputs3/trunc.txt @@ -0,0 +1,15 @@ +0$ +1$ +2$ +0.8249167203486$ +1.28937150237$ +2.0$ +28937.92837605126$ +2890.000000000$ +-1$ +-1.128973$ +-9812387.28910273$ +x = 83.298 +x$ +x = -1893.19 +(x)$ diff --git a/tests/fuzzing/dc_inputs/01.txt b/tests/fuzzing/dc_inputs/01.txt new file mode 100644 index 000000000000..9622de95a241 --- /dev/null +++ b/tests/fuzzing/dc_inputs/01.txt @@ -0,0 +1,2 @@ +[[000000000 +00000] diff --git a/tests/fuzzing/dc_inputs/02.txt b/tests/fuzzing/dc_inputs/02.txt new file mode 100644 index 000000000000..79565935cf23 --- /dev/null +++ b/tests/fuzzing/dc_inputs/02.txt @@ -0,0 +1,5 @@ +0 R +2 1 +1 0+pRpp +30.x +[00000000]ip1+pR diff --git a/tests/fuzzing/dc_inputs/03.txt b/tests/fuzzing/dc_inputs/03.txt new file mode 100644 index 000000000000..ecede2e05629 --- /dev/null +++ b/tests/fuzzing/dc_inputs/03.txt @@ -0,0 +1,2 @@ +0 lip1-si0l0+200sx_9lq+pR 30.x +[li100L0dp1+s+sX10lM<0]sL0sJlLx diff --git a/tests/fuzzing/dc_inputs/04.txt b/tests/fuzzing/dc_inputs/04.txt new file mode 100644 index 000000000000..209f50c16d52 --- /dev/null +++ b/tests/fuzzing/dc_inputs/04.txt @@ -0,0 +1,9 @@ +zp100000000.000004p1+pR +0 1 1+kpR +1 1+pR +1 0IpR +2 9+iR +037 483+pR +999 999+pR +237467456283846vpR +.0000000ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddsdddddddddddddddddddddddddddddddddddddddddddddddddddddsdddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd/ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddfddddddddddddddddddddddddddddddddddddddddddddddddddddddcdddddddddddd9000000000000000000 diff --git a/tests/fuzzing/dc_inputs/05.txt b/tests/fuzzing/dc_inputs/05.txt new file mode 100644 index 000000000000..bf2ca982ed2b --- /dev/null +++ b/tests/fuzzing/dc_inputs/05.txt @@ -0,0 +1,3 @@ +04000000000000300sx_9000.00000syzpRlxlq+pR +30.x +[li1000000sxL0LLLL900000.00000sLLL]sL0s0lLx diff --git a/tests/fuzzing/dc_inputs/06.txt b/tests/fuzzing/dc_inputs/06.txt new file mode 100644 index 000000000000..eff417eb55b4 --- /dev/null +++ b/tests/fuzzing/dc_inputs/06.txt @@ -0,0 +1 @@ +00Q;pd60 p d9S06+00I;pd60Q2 0^pR diff --git a/tests/fuzzing/dc_inputs/07.txt b/tests/fuzzing/dc_inputs/07.txt new file mode 100644 index 000000000000..8a09152faf58 --- /dev/null +++ b/tests/fuzzing/dc_inputs/07.txt @@ -0,0 +1,3 @@ +1 0 1|dR +1 [li0L]SL10sildR +1 [li0L]sL10|Lx diff --git a/tests/fuzzing/dc_inputs/08.txt b/tests/fuzzing/dc_inputs/08.txt new file mode 100644 index 000000000000..156de2f536b6 --- /dev/null +++ b/tests/fuzzing/dc_inputs/08.txt @@ -0,0 +1 @@ +0 2+p[lip1-si0li!=0^di>0]S098sil0x diff --git a/tests/fuzzing/dc_inputs/09.txt b/tests/fuzzing/dc_inputs/09.txt new file mode 100644 index 000000000000..ffc3a08908d4 --- /dev/null +++ b/tests/fuzzing/dc_inputs/09.txt @@ -0,0 +1,9 @@ +#00000 +0sm[Nx]0s0[]0s0x[]zs0x[]0s0[]zs0c +0s0[Nx]0s0[]zs0x[]zs0x[Nx]0s0[]zs0#000000000 +0s0[Nx]0s_[]zs0x[li]zs^x[l0000000]0sm[]zs0x[liNx]zs0x[li;0lilix] +x[liN]zsWx[liN]zs0x[li;0lilix] +x[liNzs0x#000000000 +*sm[Nx]0sm[]zs0x[li]zs0x[Nx]0sm[]zsdc +0sm[Nx]0sm[]zs0x[li]zs0x[Nx]0sm[]x]zsxx#000000000 +*s0[Nx]0sm[]0s00[00]zs0x[Nx]0sm[]z0dc diff --git a/tests/fuzzing/dc_inputs/10.txt b/tests/fuzzing/dc_inputs/10.txt new file mode 100644 index 000000000000..0fade4bb899f --- /dev/null +++ b/tests/fuzzing/dc_inputs/10.txt @@ -0,0 +1,11 @@ +#0000 +0sm[Nx]0sm[]0s0x[li]0s0x[Nfvfff[]0sm[]zs0x[li]zs0x[Nx]0sm[]zs0c +0sm[Nx]0sm[]x[li]zs0x[Nx]0sm[]zs0c +0sm[Nx]0sm[]zs0x[li]zs0x[Nx]0sm[]zs0c +0sm[Nx]0sm[]zs0x[li]zs0x[Nx]0sm[]zs0#000000000 +0sm[Nx]0s0[]zs0x[li]zs0x[0000000000]0sm[]zs0x[liNx]zs0x[li;0l0l0x] +x[liNx]zs0x#000000000 +0sm[Nx]0sm[]zs0x[li]zs0x[000]0sm[]x[li]zs0x[Nx]0sm[]zs0c +0sm[Nx]0sm[]zs0x[li]zs0x[Nx]0sm[]zs0#0000 +0sm[Nx]0sm[]zs0x[li]zs0x[Nx]0sm[]zs0#000000000 +0sm[Nx]0s_@]zs0x[li]s^x[0000000000]0s0[]zsW[0000]zsxx[000000000] diff --git a/tests/fuzzing/dc_inputs/11.txt b/tests/fuzzing/dc_inputs/11.txt new file mode 100644 index 000000000000..73bbc7d88f1c --- /dev/null +++ b/tests/fuzzing/dc_inputs/11.txt @@ -0,0 +1,4 @@ +#00000000 +[[00000]aa]sM[lip1-si0li>0eM]s010sil0x +[[0000]00]sM[]s010sil0x +[R]sM[lip=000]s0;0 diff --git a/tests/fuzzing/dc_inputs/12.txt b/tests/fuzzing/dc_inputs/12.txt new file mode 100644 index 000000000000..aff25e7381cd --- /dev/null +++ b/tests/fuzzing/dc_inputs/12.txt @@ -0,0 +1,2 @@ +1;09R0si[lii000000000000000000000000000000000000000000]li1000 2346dvdddd;ddddddddddddd?-sdddddddddd0+dd0 1+pR +0dvdddd;ddSddddddddddd 0si[lid1+sil0sili10li?-s0]dsxx[00000000]li1000 2000dvddddddddddddddddddddddddd0 0+ddd 1+pR diff --git a/tests/fuzzing/dc_inputs/13.txt b/tests/fuzzing/dc_inputs/13.txt new file mode 100644 index 000000000000..e6af9463e38c --- /dev/null +++ b/tests/fuzzing/dc_inputs/13.txt @@ -0,0 +1,7 @@ +zp10[00000000\00]pppppppppppppppR +_1 _1(pR +_1 _2(pR +2 1{pR +_1 1{pR 990+pR +2000000 300000300000000+pR +2070000000aaaaaaaaaaaaaaxaaaaaaaaaaaaR diff --git a/tests/fuzzing/dc_inputs/14.txt b/tests/fuzzing/dc_inputs/14.txt new file mode 100644 index 000000000000..741f3bfd7704 --- /dev/null +++ b/tests/fuzzing/dc_inputs/14.txt @@ -0,0 +1,7 @@ +0bpR +1bpR +.200000bpR +100000.0000600bpR +_10bpR +_.1000000bpR +_30000.00bpR: \ No newline at end of file diff --git a/tests/fuzzing/dc_inputs/15.txt b/tests/fuzzing/dc_inputs/15.txt new file mode 100644 index 000000000000..828e8204a2aa --- /dev/null +++ b/tests/fuzzing/dc_inputs/15.txt @@ -0,0 +1,11 @@ +0bpax1bpR +1bpR +.30b900pR +_10bp/90 +_30_.1/10bp30_.1/90 +_300.1/90/90 +_30_.1/90 +_30000.1/90 +90 +_30000.1/90 +70.000 70u diff --git a/tests/fuzzing/dc_inputs/16.txt b/tests/fuzzing/dc_inputs/16.txt new file mode 100644 index 000000000000..b021dd66d7ff --- /dev/null +++ b/tests/fuzzing/dc_inputs/16.txt @@ -0,0 +1 @@ +0 0;^dddddRps0R@s016dddRRd^2ddRZ(b-P;;$p;;;;;;9;;;;;;$ppppppppppppp30 diff --git a/tests/fuzzing/dc_inputs/17.txt b/tests/fuzzing/dc_inputs/17.txt new file mode 100644 index 000000000000..fb46510fc3e3 --- /dev/null +++ b/tests/fuzzing/dc_inputs/17.txt @@ -0,0 +1,20 @@ +0 1(pR +1;;;;;pR +1 +0 18d[0000000 +000000000000000000000000000000]sM[liv1-si0li!<0pR +_1 0{pR +_1 _1{pR +_1 f0070000000000.0000000000000_10000000000006.00000000000005~pRpR +_23745860900000.070000000000000 _0.20542357869124050~pRpR +_3000000000000000.0000000700000006002 _7000000000000005000000000.000F000000000000003~pRpR20000000 300000000003.00000000000000030~pRpR +_30000000000000000000000000 +1 0.001.00000000030 +1 0.000000000000000000000000000000000000000000000000000000000000000000002x30000000000000000000000000000000000000$80000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000<0800000000000000008000000000000000000000000000000000000000000000000000000000000000000000000000000000.000F000000000000003~pRpR20000000 300000000003.00000000000000030~pRpR +_30000000000000.00000000000 +1 0.070.70000000000 +1 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008000$80000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000<080000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000S8800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000Z800000000000 +_10pR +=390NpR +_2000 +40000 5000C 2i>0eM]s01@sil0x diff --git a/tests/fuzzing/dc_inputs/18.txt b/tests/fuzzing/dc_inputs/18.txt new file mode 100644 index 000000000000..ed2659d097c2 --- /dev/null +++ b/tests/fuzzing/dc_inputs/18.txt @@ -0,0 +1,3 @@ +1oVVf[li;WORli1S0Zli1;rORli1dH|2li@d-NliO+rK28729@9547628O745/pR +_29307546189299999999999999999999999999999999999995 0.00000000000000000000000000009999999999999999999+99$9999999999.999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999995 0.0000000000000000000000000000000000R0000000000000000000000000000000000000+0000000000000000-0000000000000000000005+pR +99999999999999999999999999999999999999999999999999999999999.999999999999999999999.99999999999999999999999999999999999999999900000000000000000000000R0000000000000000000000000000000000000+0000000000000000-0000000000000000000005+pR999999999999999999999999999999999999999999 0.00000000000000000000000000000000000000000000000000000000000C0020P00000000000000000000000000000000000007fli1+7fli1+si;d7dli1+si;0=Rls1d:0li +i100>x]dsxx[0000000]dsxx[p000]l010000000 00000000000pR diff --git a/tests/fuzzing/dc_inputs/20.txt b/tests/fuzzing/dc_inputs/20.txt new file mode 100644 index 000000000000..aa7752a85c67 --- /dev/null +++ b/tests/fuzzing/dc_inputs/20.txt @@ -0,0 +1,3 @@ +#0000 +I20PPrP PPPP PPs0daP1:0pR + PPP1d:0pRR diff --git a/tests/fuzzing/dc_inputs/21.txt b/tests/fuzzing/dc_inputs/21.txt new file mode 100644 index 000000000000..01707d8256b6 --- /dev/null +++ b/tests/fuzzing/dc_inputs/21.txt @@ -0,0 +1,5 @@ +0bpR +1bpR +.200000bpR +100000.0000600bpR +S09bpR diff --git a/tests/fuzzing/dc_inputs/22.txt b/tests/fuzzing/dc_inputs/22.txt new file mode 100644 index 000000000000..898184649926 --- /dev/null +++ b/tests/fuzzing/dc_inputs/22.txt @@ -0,0 +1,36 @@ +[0000000000000]ZpR +[He00 +[0000000\00000000]00 +[0000000\00000000]000 +[0000000\00000000]00 +[0000] +00000] +[28pR] +[27pR] +[26pR] +[25pR] +[24pR] +[23pR] +[22pR] +[21pR] +[20pR] +[19pR] +[18pR] +[17pR] +[16pR] +[15pR] +[14pR] +[13pR] +[12pR] +[11pR] +[10pR] +[9pR] +[8pR] +[7pR] +[6pR] +[5pR] +[4pR] +[3pR] +[2pR] +[1pR] +[xz0x]dsxx0sx0s0 +1 2 +s0[l0d:0l01;0d:0l01;0pRl01+s0l010>x]dsxx0sx0s0 +1 2 + diff --git a/tests/fuzzing/dc_inputs/26.txt b/tests/fuzzing/dc_inputs/26.txt new file mode 100644 index 000000000000..731d3969ac3b --- /dev/null +++ b/tests/fuzzing/dc_inputs/26.txt @@ -0,0 +1,155 @@ +0bpR +1bp0 +.20bpR +100000.0000005bpR +_10bpR +_.1000[l0;0;rpRl01+s0l010>x]dsxx0sx0s0 +1 2+p+p +3+p +4+p +5+p +6+p +7+p +8+p +9+p +10+p +11+p +12+p +13+p +14+p +15+p +16+p +17+p +18+p +19+p +20+p +21+0+p +71+o +70+p +70+p +70+p +70+p +22+p +20+p +20+p +20+p +20+p +20+p +20+p +20+p +30+p +30+p +30+p +30+p +30+p +30+p +30+p +30+p +30+p +30+p +40+1+p +40+p +40+p +40+p +40+p +40+p +40+p +40+p +40+p +50+p +50+p +50+p +50+p +50+p +50+p +50+p +50+p +50+p +50+p +60+p +60+p +60bpR +1bp0 +.20bpR +100000.0070000bpR +_10bpR +_.1000[l0;0;rpRl01+s0l010>x]dsxx0sx0s0 +1 2+p+p +3+p +4+p +5+p +6+p +7+p +8+p +9+p +10+p +11+p +12+p +13+p +14+p +15+p +16+p +17+p +18+p +19+p +20+p +21+0+p +71+o +70+p +70+p +70+p +70+p +22+p +20+p +20+p +20+p +20+p +20+p +20+p +20+p +30+p +30+p +30+p +30+p +30+p +30+p +30+p +30+p +30+p +30+p +40+1+p +40+p +40+p +40+p +40+p +40+p +40+p +40+p +40+p +50+p +50+p +50+p +50+p +50+p +50+p +50+p +50+p +50+p +50+p +60+p +60+p +60+p +60+p +60+p +60+p +60+p +60 +70+p +70+p +70+p +70+p +70+p +70+p +70+p +80+p + diff --git a/tests/fuzzing/dc_inputs/27.txt b/tests/fuzzing/dc_inputs/27.txt new file mode 100644 index 000000000000..69745b952afc --- /dev/null +++ b/tests/fuzzing/dc_inputs/27.txt @@ -0,0 +1,2 @@ +"0000000\ + diff --git a/tests/fuzzing/dc_inputs/28.txt b/tests/fuzzing/dc_inputs/28.txt new file mode 100644 index 000000000000..fe81732b3e38 --- /dev/null +++ b/tests/fuzzing/dc_inputs/28.txt @@ -0,0 +1 @@ +10 4%0:i[000] 1:b 0;0 p 1;b0:b [000] 1:b 0;b p 1;b~b 0;b p 0;b~~~0k diff --git a/tests/fuzzing/dc_inputs/29.txt b/tests/fuzzing/dc_inputs/29.txt new file mode 100644 index 000000000000..886eb8ee0a8e --- /dev/null +++ b/tests/fuzzing/dc_inputs/29.txt @@ -0,0 +1,13 @@ +10 4%0:b [200] 1:b 0;b 1;b X + 2000 1%p0 +3460:b [200] 1:b 0;b p bp0 +.2 1%pR +6 4%pR +10 4%0:b [200] 1:b 0;b p 1;b X + 20000 1%pR +b 0;b p 0;b2 +1bpb [200] 1:u 0;b p 1;b X + 2 +[000] 0:b [200] 0:b 0;b p S0b p +[s0]XpR + diff --git a/tests/fuzzing/dc_inputs/30.txt b/tests/fuzzing/dc_inputs/30.txt new file mode 100644 index 000000000000..e072e71617d8 --- /dev/null +++ b/tests/fuzzing/dc_inputs/30.txt @@ -0,0 +1 @@ +0;0[]0:b;bs0l0x;0 diff --git a/tests/fuzzing/dc_inputs/abs.txt b/tests/fuzzing/dc_inputs/abs.txt new file mode 100644 index 000000000000..9907dfc6679d --- /dev/null +++ b/tests/fuzzing/dc_inputs/abs.txt @@ -0,0 +1,7 @@ +0bpR +1bpR +.218933bpR +138963.9873645bpR +_19bpR +_.1298376bpR +_3892173.289375bpR diff --git a/tests/fuzzing/dc_inputs/add.txt b/tests/fuzzing/dc_inputs/add.txt new file mode 100644 index 000000000000..42da2f1f309c --- /dev/null +++ b/tests/fuzzing/dc_inputs/add.txt @@ -0,0 +1,33 @@ +0 0+pR +0 0 0++pR +0 1+pR +0 1 1++pR +1 1+pR +1 0+pR +2 5+pR +237 483+pR +999 999+pR +2374623 324869356734856+pR +2378639084586723980562 23468729367839+pR +37298367203972395108367910823465293084561329084561390845613409516734503870691837451 785621394067928346918023476190834672198467134908618723249671349062187346898241093486139046139084613490817356023871869102746182749061872609129847+pR +1.1 0+pR +0 1.1+pR +457283.731284923576 37842934672834.3874629385672354+pR +1.0 0.1+pR +3746289134067138046 0.138375863945672398456712389456273486293+pR +_1 _1+pR +_4 _15+pR +_1346782 _1287904651762468913476+pR +99999999999999999999999999999999999999999999999999999999999.999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001+pR +99999999999999999999999999999999999999999999999999999999999.999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999995 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005+pR +99999999999999999999999999999999999999999999999999999999999.999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001+pR +99999999999999999999999999999999999999999999999999999999999.999999999999999999999999999999999999999999999999999999999899999999999999999999999999999999999999999999999999999999999999 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001+pR +99999999999999999999999999999999999989999999999999999999999.999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001+pR +_1889985797 2012747315+pR +0 _14338.391079082+pR +_2422297 1.3134942556+pR +_1289374 1289374.2893417 0.238971 28937.28971+++pR +1289374 1289374.2893417 _0.238971 28937.28971+++pR +1289374 1289374.2893417 0.238971 _28937.28971+++pR +1289374 1289374.2893417 _0.238971 _28937.28971+++pR +1289374 _1289374.2893417 _0.238971 _28937.28971+++pR diff --git a/tests/fuzzing/dc_inputs/array.dc b/tests/fuzzing/dc_inputs/array.dc new file mode 100644 index 000000000000..970f29a68768 --- /dev/null +++ b/tests/fuzzing/dc_inputs/array.dc @@ -0,0 +1,2 @@ +#! /usr/bin/dc +0si[lid:rli1+sili100>x]dsxx[li;rpRli1-sili100!>x]li1+[li;rpRli1+sili100>x] diff --git a/tests/fuzzing/dc_inputs/boolean.txt b/tests/fuzzing/dc_inputs/boolean.txt new file mode 100644 index 000000000000..815100f0d085 --- /dev/null +++ b/tests/fuzzing/dc_inputs/boolean.txt @@ -0,0 +1,80 @@ +0 1(pR +1 1(pR +2 1(pR +_1 1(pR +_1 0(pR +_1 _1(pR +_1 _2(pR +0 1{pR +1 1{pR +2 1{pR +_1 1{pR +_1 0{pR +_1 _1{pR +_1 _2{pR +0 1)pR +1 1)pR +2 1)pR +_1 1)pR +_1 0)pR +_1 _1)pR +_1 _2)pR +0 1}pR +1 1}pR +2 1}pR +_1 1}pR +_1 0}pR +_1 _1}pR +_1 _2}pR +0 0GpR +0 1GpR +1 0GpR +_1 _1GpR +0 _1GpR +_1 0GpR +1 1GpR +238 2GpR +0NpR +1NpR +_1NpR +2398NpR +_2983.2389NpR +0 0MpR +1 0MpR +0 1MpR +1 1MpR +128973240 0MpR +0 2893712MpR +1982 28937MpR +_2938 0MpR +0 _1023.298037MpR +0.283917 0MpR +2389 _1208.28937MpR +0 289.289372MpR +_298.29387 0MpR +_2983.28973 82937MpR +0 _2938.320837MpR +_2089.2308 0MpR +_0.2893 _2938.28973MpR +0.00000 1892MpR +1289.023 .0000MpR +0 0mpR +1 0mpR +0 1mpR +1 1mpR +128973240 0mpR +0 2893712mpR +1982 28937mpR +_2938 0mpR +0 _1023.298037mpR +0.283917 0mpR +2389 _1208.28937mpR +0 289.289372mpR +_298.29387 0mpR +_2983.28973 82937mpR +0 _2938.320837mpR +_2089.2308 0mpR +_0.2893 _2938.28973mpR +0.00000 1892mpR +1289.023 .0000mpR +0.0000 .00000mpR diff --git a/tests/fuzzing/dc_inputs/decimal.txt b/tests/fuzzing/dc_inputs/decimal.txt new file mode 100644 index 000000000000..fdc628c857e0 --- /dev/null +++ b/tests/fuzzing/dc_inputs/decimal.txt @@ -0,0 +1,41 @@ +0pR +0.0pR +.0000pR +000000000000000000000000.00000000000000000000000pR +000000000000000000000000000135482346782356pR +000000000000000000000000002pR +1pR +11pR +123pR +7505pR +1023468723275435238491972521917846pR +4343472432431705867392073517038270398027352709027389273920739037937960379637893607893607893670530278200795207952702873892786172916728961783907893607418973587857386079679267926737520730925372983782793652793pR +_1pR +_203pR +_57pR +_18586pR +_31378682943772818461924738352952347258pR +_823945628745673589495067238723986520375698237620834674509627345273096287563846592384526349872634895763257893467523987578690283762897568459072348758071071087813501875908127359018715023841710239872301387278pR +.123521346523546pR +0.1245923756273856pR +_.1024678456387pR +_0.8735863475634587pR +4.0pR +_6.0pR +234237468293576.000000000000000000000000000000pR +23987623568943567.00000000000000000005677834650000000000000pR +23856934568940675.000000000000000435676782300000000000000456784pR +77567648698496.000000000000000000587674750000000000458563800000000000000pR +2348672354968723.2374823546000000000003256987394502346892435623870000000034578pR +_2354768.000000000000000000000000000000000000pR +_96739874567.000000000347683456pR +_3764568345.000000000004573845000000347683460pR +_356784356.934568495770004586495678300000000pR +74325437345273852773827101738273127312738521733017537073520735207307570358738257390761276072160719802671980267018728630178.7082681027680521760217867841276127681270867827821768173178207830710978017738178678012767377058785378278207385237085237803278203782037237582795870pR +_756752732785273851273728537852738257837283678965738527385272983678372867327835672967385278372637862738627836279863782673862783670.71738178361738718367186378610738617836781603760178367018603760178107735278372832783728367826738627836278378260736270367362073867097307925pR +9812734012837410982345719208345712908357412903587192048571920458712.23957182459817249058172945781pR +2893.982.28937pRpR +198273\ +.192837pR +1892.238907\ +.3982739pRpR diff --git a/tests/fuzzing/dc_inputs/divide.txt b/tests/fuzzing/dc_inputs/divide.txt new file mode 100644 index 000000000000..38b874e9f175 --- /dev/null +++ b/tests/fuzzing/dc_inputs/divide.txt @@ -0,0 +1,33 @@ +20k +0 1/pR +0 321566/pR +0 0.3984567238456/pR +1 1/pR +1 1287469297356/pR +1 0.2395672438567234/pR +1 237586239856.0293596728392360/pR +1249687284356 3027949207835207/pR +378617298617396719 35748521/pR +9348576237845624358 0.9857829375461/pR +35768293846193284 2374568947.045762839567823/pR +_78987234567812345 876542837618936/pR +_356789237555535468 0.3375273860984786903/pR +_5203475364850390 435742903748307.70869378534043296404530458/pR +_0.37861723347576903 7385770896/pR +_0.399454682043962 0.34824389304/pR +_0.6920414523873204 356489645223.76076045304879030/pR +_35872917389671.7573280963748 73924708/pR +_78375896314.4836709876983 0.78356798637817/pR +_2374123896417.143789621437581 347821469423789.1473856783960/pR +_896729350238549726 _34976289345762/pR +_2374568293458762348596 _0.8792370647234987679/pR +_237584692306721845726038 _21783910782374529637.978102738746189024761/pR +_0.23457980123576298375682 _1375486293874612/pR +_0.173897061862478951264 _0.8179327486017634987516298745/pR +_0.9186739823576829347586 _0.235678293458756239846/pR +_0.9375896183746982374568 _13784962873546.0928729395476283745/pR +_2930754618923467.12323745862937465 _734869238465/pR +_23745861923467.874675129834675 _0.23542357869124756/pR +_3878923750692883.7238596702834756902 _7384192674957215364986723.9738461923487621983/pR +1 0.00000000000000000000000000000000000000000002346728372937352457354204563027/pR +239854711289345712 2891374 182 .2893 ///pR diff --git a/tests/fuzzing/dc_inputs/divmod.txt b/tests/fuzzing/dc_inputs/divmod.txt new file mode 100644 index 000000000000..1633203ff99f --- /dev/null +++ b/tests/fuzzing/dc_inputs/divmod.txt @@ -0,0 +1,64 @@ +20k +0 1~pRpR +0 321566~pRpR +0 0.3984567238456~pRpR +1 1~pRpR +1 1287469297356~pRpR +1 0.2395672438567234~pRpR +1 237586239856.0293596728392360~pRpR +1249687284356 3027949207835207~pRpR +378617298617396719 35748521~pRpR +9348576237845624358 0.9857829375461~pRpR +35768293846193284 2374568947.045762839567823~pRpR +_78987234567812345 876542837618936~pRpR +_356789237555535468 0.3375273860984786903~pRpR +_5203475364850390 435742903748307.70869378534043296404530458~pRpR +_0.37861723347576903 7385770896~pRpR +_0.399454682043962 0.34824389304~pRpR +_0.6920414523873204 356489645223.76076045304879030~pRpR +_35872917389671.7573280963748 73924708~pRpR +_78375896314.4836709876983 0.78356798637817~pRpR +_2374123896417.143789621437581 347821469423789.1473856783960~pRpR +_896729350238549726 _34976289345762~pRpR +_2374568293458762348596 _0.8792370647234987679~pRpR +_237584692306721845726038 _21783910782374529637.978102738746189024761~pRpR +_0.23457980123576298375682 _1375486293874612~pRpR +_0.173897061862478951264 _0.8179327486017634987516298745~pRpR +_0.9186739823576829347586 _0.235678293458756239846~pRpR +_0.9375896183746982374568 _13784962873546.0928729395476283745~pRpR +_2930754618923467.12323745862937465 _734869238465~pRpR +_23745861923467.874675129834675 _0.23542357869124756~pRpR +_3878923750692883.7238596702834756902 _7384192674957215364986723.9738461923487621983~pRpR +1 0.00000000000000000000000000000000000000000002346728372937352457354204563027~pRpR +0k +0 1~pRpR +0 321566~pRpR +0 0.3984567238456~pRpR +1 1~pRpR +1 1287469297356~pRpR +1 0.2395672438567234~pRpR +1 237586239856.0293596728392360~pRpR +1249687284356 3027949207835207~pRpR +378617298617396719 35748521~pRpR +9348576237845624358 0.9857829375461~pRpR +35768293846193284 2374568947.045762839567823~pRpR +_78987234567812345 876542837618936~pRpR +_356789237555535468 0.3375273860984786903~pRpR +_5203475364850390 435742903748307.70869378534043296404530458~pRpR +_0.37861723347576903 7385770896~pRpR +_0.399454682043962 0.34824389304~pRpR +_0.6920414523873204 356489645223.76076045304879030~pRpR +_35872917389671.7573280963748 73924708~pRpR +_78375896314.4836709876983 0.78356798637817~pRpR +_2374123896417.143789621437581 347821469423789.1473856783960~pRpR +_896729350238549726 _34976289345762~pRpR +_2374568293458762348596 _0.8792370647234987679~pRpR +_237584692306721845726038 _21783910782374529637.978102738746189024761~pRpR +_0.23457980123576298375682 _1375486293874612~pRpR +_0.173897061862478951264 _0.8179327486017634987516298745~pRpR +_0.9186739823576829347586 _0.235678293458756239846~pRpR +_0.9375896183746982374568 _13784962873546.0928729395476283745~pRpR +_2930754618923467.12323745862937465 _734869238465~pRpR +_23745861923467.874675129834675 _0.23542357869124756~pRpR +_3878923750692883.7238596702834756902 _7384192674957215364986723.9738461923487621983~pRpR +1 0.00000000000000000000000000000000000000000002346728372937352457354204563027~pRpR diff --git a/tests/fuzzing/dc_inputs/else.dc b/tests/fuzzing/dc_inputs/else.dc new file mode 100644 index 000000000000..84deb8754e9f --- /dev/null +++ b/tests/fuzzing/dc_inputs/else.dc @@ -0,0 +1,4 @@ +#! /usr/bin/dc +[[Done!]pR]sM[lip1-si0li>LeM]sL10silLx +[[Done!]pR]sM[lip1-si0li!L]sL10silLx +[lip1+si10lix]dsxxli1-si[li;rpRli1-sili0!>x]dsxxli1+si[li;rpRli1+sili10>x]dsxx0sx0si +1 2+p +[foo] +0 +1+p +2+p +3+p +4+p +5+p +6+p +7+p +8+p +9+p +10+p +11+p +12+p +13+p +14+p +15+p +16+p +17+p +18+p +19+p +20+p +21+p +22+p +23+p +24+p +25+p +26+p +27+p +28+p +29+p +30+p +31+p +32+p +33+p +34+p +35+p +36+p +37+p +38+p +39+p +40+p +41+p +42+p +43+p +44+p +45+p +46+p +47+p +48+p +49+p +50+p +51+p +52+p +53+p +54+p +55+p +56+p +57+p +58+p +59+p +60+p +61+p +62+p +63+p +64+p +65+p +66+p +67+p +68+p +69+p +70+p +71+p +72+p +73+p +74+p +75+p +76+p +77+p +78+p +79+p +80+p +81+p +82+p +83+p +84+p +85+p +86+p +87+p +88+p +89+p +90+p +91+p +92+p +93+p +94+p +95+p +96+p +97+p +98+p +99+p +100+p +101+p +102+p +103+p +104+p +105+p +106+p +107+p +108+p +109+p +110+p +111+p +112+p +113+p +114+p +115+p +116+p +117+p +118+p +119+p +120+p +121+p +122+p +123+p +124+p +125+p +126+p +127+p +128+p +129+p +130+p +131+p +132+p +133+p +134+p +135+p +136+p +137+p +138+p +139+p +140+p +141+p +142+p +143+p +144+p +145+p +146+p +147+p +148+p +149+p +150+p +151+p +152+p +153+p +154+p +155+p +156+p +157+p +158+p +159+p +160+p +161+p +162+p +163+p +164+p +165+p +166+p +167+p +168+p +169+p +170+p +171+p +172+p +173+p +174+p +175+p +176+p +177+p +178+p +179+p +180+p +181+p +182+p +183+p +184+p +185+p +186+p +187+p +188+p +189+p +190+p +191+p +192+p +193+p +194+p +195+p +196+p +197+p +198+p +199+p +200+p +p diff --git a/tests/fuzzing/dc_inputs/stream.dc b/tests/fuzzing/dc_inputs/stream.dc new file mode 100644 index 000000000000..5c61e7c931f9 --- /dev/null +++ b/tests/fuzzing/dc_inputs/stream.dc @@ -0,0 +1,2 @@ +#! /usr/bin/dc +0si[liPlid1+sili4096>x]ddsxPx diff --git a/tests/fuzzing/dc_inputs/strings.txt b/tests/fuzzing/dc_inputs/strings.txt new file mode 100644 index 000000000000..460976abbd9e --- /dev/null +++ b/tests/fuzzing/dc_inputs/strings.txt @@ -0,0 +1,51 @@ +[Hello, World!]ZpR +[Hello, World!]pR +[Hello, \[ World!]ZpR +[Hello, \[ World!]pR +[Hello, \] World!]ZpR +[Hello, \] World!]pR +[30pR] +[29pR] +[28pR] +[27pR] +[26pR] +[25pR] +[24pR] +[23pR] +[22pR] +[21pR] +[20pR] +[19pR] +[18pR] +[17pR] +[16pR] +[15pR] +[14pR] +[13pR] +[12pR] +[11pR] +[10pR] +[9pR] +[8pR] +[7pR] +[6pR] +[5pR] +[4pR] +[3pR] +[2pR] +[1pR] +[xz0 /dev/null 2>&1 checktest_retcode "$d" "$?" "quit" # bc has two halt or quit commands, so test the second as well. if [ "$d" = bc ]; then printf '%s\n' "quit" | "$exe" "$@" > /dev/null 2>&1 checktest_retcode "$d" "$?" quit two=$("$exe" "$@" -e 1+1 -e quit) checktest_retcode "$d" "$?" quit if [ "$two" != "2" ]; then err_exit "$d failed test quit" 1 fi fi printf 'pass\n' base=$(basename "$exe") printf 'Running %s environment var tests...' "$d" if [ "$d" = "bc" ]; then export BC_ENV_ARGS=" '-l' '' -q" printf 's(.02893)\n' | "$exe" "$@" > /dev/null checktest_retcode "$d" "$?" "environment var" "$exe" "$@" -e 4 > /dev/null err="$?" checktest_retcode "$d" "$?" "environment var" printf 'pass\n' printf 'Running keyword redefinition test...' unset BC_ENV_ARGS redefine_res="$outputdir/bc_outputs/redefine.txt" redefine_out="$outputdir/bc_outputs/redefine_results.txt" outdir=$(dirname "$easter_out") if [ ! -d "$outdir" ]; then mkdir -p "$outdir" fi printf '5\n0\n' > "$redefine_res" "$exe" "$@" --redefine=print -e 'define print(x) { x }' -e 'print(5)' > "$redefine_out" + err="$?" checktest "$d" "$err" "keyword redefinition" "$redefine_res" "$redefine_out" "$exe" "$@" -r "abs" -r "else" -e 'abs = 5;else = 0' -e 'abs;else' > "$redefine_out" + err="$?" checktest "$d" "$err" "keyword redefinition" "$redefine_res" "$redefine_out" if [ "$extra_math" -ne 0 ]; then "$exe" "$@" -lr abs -e "perm(5, 1)" -e "0" > "$redefine_out" + err="$?" checktest "$d" "$err" "keyword not redefined in builtin library" "$redefine_res" "$redefine_out" fi "$exe" "$@" -r "break" -e 'define break(x) { x }' 2> "$redefine_out" err="$?" checkerrtest "$d" "$err" "keyword redefinition error" "$redefine_out" "$d" "$exe" "$@" -e 'define read(x) { x }' 2> "$redefine_out" err="$?" checkerrtest "$d" "$err" "Keyword redefinition error without BC_REDEFINE_KEYWORDS" "$redefine_out" "$d" printf 'pass\n' + printf 'Running multiline comment expression file test...' + + multiline_expr_res="" + multiline_expr_out="$outputdir/bc_outputs/multiline_expr_results.txt" + + # tests/bc/misc1.txt happens to have a multiline comment in it. + "$exe" "$@" -f "$testdir/bc/misc1.txt" > "$multiline_expr_out" + err="$?" + + checktest "$d" "$err" "multiline comment in expression file" "$testdir/bc/misc1_results.txt" \ + "$multiline_expr_out" + + printf 'pass\n' + printf 'Running multiline comment expression file error test...' + + "$exe" "$@" -f "$testdir/bc/errors/05.txt" 2> "$multiline_expr_out" + err="$?" + + checkerrtest "$d" "$err" "multiline comment in expression file error" \ + "$multiline_expr_out" "$d" + + printf 'pass\n' + printf 'Running multiline string expression file test...' + + # tests/bc/strings.txt happens to have a multiline string in it. + "$exe" "$@" -f "$testdir/bc/strings.txt" > "$multiline_expr_out" + err="$?" + + checktest "$d" "$err" "multiline string in expression file" "$testdir/bc/strings_results.txt" \ + "$multiline_expr_out" + + printf 'pass\n' + printf 'Running multiline string expression file error test...' + + "$exe" "$@" -f "$testdir/bc/errors/16.txt" 2> "$multiline_expr_out" + err="$?" + + checkerrtest "$d" "$err" "multiline string in expression file with backslash error" \ + "$multiline_expr_out" "$d" + + "$exe" "$@" -f "$testdir/bc/errors/04.txt" 2> "$multiline_expr_out" + err="$?" + + checkerrtest "$d" "$err" "multiline string in expression file error" \ + "$multiline_expr_out" "$d" + + printf 'pass\n' else export DC_ENV_ARGS="'-x'" export DC_EXPR_EXIT="1" printf '4s stuff\n' | "$exe" "$@" > /dev/null checktest_retcode "$d" "$?" "environment var" "$exe" "$@" -e 4pR > /dev/null checktest_retcode "$d" "$?" "environment var" printf 'pass\n' set +e # dc has an extra test for a case that someone found running this easter.dc # script. It went into an infinite loop, so we want to check that we did not # regress. printf 'three\n' | cut -c1-3 > /dev/null err=$? if [ "$err" -eq 0 ]; then printf 'Running dc Easter script...' easter_res="$outputdir/dc_outputs/easter.txt" easter_out="$outputdir/dc_outputs/easter_results.txt" outdir=$(dirname "$easter_out") if [ ! -d "$outdir" ]; then mkdir -p "$outdir" fi printf '4 April 2021\n' > "$easter_res" "$testdir/dc/scripts/easter.sh" "$exe" 2021 "$@" | cut -c1-12 > "$easter_out" err="$?" checktest "$d" "$err" "Easter script" "$easter_res" "$easter_out" printf 'pass\n' fi fi out1="$outputdir/${d}_outputs/${d}_other.txt" out2="$outputdir/${d}_outputs/${d}_other_test.txt" printf 'Running %s line length tests...' "$d" printf '%s\n' "$numres" > "$out1" export "$line_var"=80 printf '%s\n' "$num" | "$exe" "$@" > "$out2" checktest "$d" "$?" "line length" "$out1" "$out2" printf '%s\n' "$num70" > "$out1" export "$line_var"=2147483647 printf '%s\n' "$num" | "$exe" "$@" > "$out2" checktest "$d" "$?" "line length 2" "$out1" "$out2" printf '%s\n' "$num2" > "$out1" export "$line_var"=62 printf '%s\n' "$num" | "$exe" "$@" -L > "$out2" checktest "$d" "$?" "line length 3" "$out1" "$out2" printf '0\n' > "$out1" printf '%s\n' "$lltest" | "$exe" "$@" -L > "$out2" checktest "$d" "$?" "line length 3" "$out1" "$out2" printf 'pass\n' printf '%s\n' "$numres" > "$out1" export "$line_var"=2147483647 printf 'Running %s arg tests...' "$d" f="$testdir/$d/add.txt" exprs=$(cat "$f") results=$(cat "$testdir/$d/add_results.txt") printf '%s\n%s\n%s\n%s\n' "$results" "$results" "$results" "$results" > "$out1" "$exe" "$@" -e "$exprs" -f "$f" --expression "$exprs" --file "$f" -e "$halt" > "$out2" checktest "$d" "$?" "arg" "$out1" "$out2" printf '%s\n' "$halt" | "$exe" "$@" -- "$f" "$f" "$f" "$f" > "$out2" checktest "$d" "$?" "arg" "$out1" "$out2" if [ "$d" = "bc" ]; then printf '%s\n' "$halt" | "$exe" "$@" -i > /dev/null 2>&1 fi printf '%s\n' "$halt" | "$exe" "$@" -h > /dev/null checktest_retcode "$d" "$?" "arg" printf '%s\n' "$halt" | "$exe" "$@" -P > /dev/null checktest_retcode "$d" "$?" "arg" printf '%s\n' "$halt" | "$exe" "$@" -R > /dev/null checktest_retcode "$d" "$?" "arg" printf '%s\n' "$halt" | "$exe" "$@" -v > /dev/null checktest_retcode "$d" "$?" "arg" printf '%s\n' "$halt" | "$exe" "$@" -V > /dev/null checktest_retcode "$d" "$?" "arg" out=$(printf '0.1\n-0.1\n1.1\n-1.1\n0.1\n-0.1\n') printf '%s\n' "$out" > "$out1" if [ "$d" = "bc" ]; then data=$(printf '0.1\n-0.1\n1.1\n-1.1\n.1\n-.1\n') else data=$(printf '0.1pR\n_0.1pR\n1.1pR\n_1.1pR\n.1pR\n_.1pR\n') fi printf '%s\n' "$data" | "$exe" "$@" -z > "$out2" checktest "$d" "$?" "leading zero" "$out1" "$out2" if [ "$d" = "bc" ] && [ "$extra_math" -ne 0 ]; then printf '%s\n' "$halt" | "$exe" "$@" -lz "$testdir/bc/leadingzero.txt" > "$out2" checktest "$d" "$?" "leading zero script" "$testdir/bc/leadingzero_results.txt" "$out2" fi "$exe" "$@" -f "saotehasotnehasthistohntnsahxstnhalcrgxgrlpyasxtsaosysxsatnhoy.txt" > /dev/null 2> "$out2" err="$?" checkerrtest "$d" "$err" "invalid file argument" "$out2" "$d" "$exe" "$@" "-$opt" -e "$exprs" > /dev/null 2> "$out2" err="$?" checkerrtest "$d" "$err" "invalid option argument" "$out2" "$d" "$exe" "$@" "--$lopt" -e "$exprs" > /dev/null 2> "$out2" err="$?" checkerrtest "$d" "$err" "invalid long option argument" "$out2" "$d" "$exe" "$@" "-u" -e "$exprs" > /dev/null 2> "$out2" err="$?" checkerrtest "$d" "$err" "unrecognized option argument" "$out2" "$d" "$exe" "$@" "--uniform" -e "$exprs" > /dev/null 2> "$out2" err="$?" checkerrtest "$d" "$err" "unrecognized long option argument" "$out2" "$d" "$exe" "$@" -f > /dev/null 2> "$out2" err="$?" checkerrtest "$d" "$err" "missing required argument to short option" "$out2" "$d" "$exe" "$@" --file > /dev/null 2> "$out2" err="$?" checkerrtest "$d" "$err" "missing required argument to long option" "$out2" "$d" "$exe" "$@" --version=5 > /dev/null 2> "$out2" err="$?" checkerrtest "$d" "$err" "given argument to long option with no argument" "$out2" "$d" "$exe" "$@" -: > /dev/null 2> "$out2" err="$?" checkerrtest "$d" "$err" "colon short option" "$out2" "$d" "$exe" "$@" --: > /dev/null 2> "$out2" err="$?" checkerrtest "$d" "$err" "colon long option" "$out2" "$d" printf 'pass\n' printf 'Running %s directory test...' "$d" "$exe" "$@" "$testdir" > /dev/null 2> "$out2" err="$?" checkerrtest "$d" "$err" "directory" "$out2" "$d" printf 'pass\n' printf 'Running %s binary file test...' "$d" bin="/bin/sh" "$exe" "$@" "$bin" > /dev/null 2> "$out2" err="$?" checkerrtest "$d" "$err" "binary file" "$out2" "$d" printf 'pass\n' printf 'Running %s binary stdin test...' "$d" cat "$bin" | "$exe" "$@" > /dev/null 2> "$out2" err="$?" checkerrtest "$d" "$err" "binary stdin" "$out2" "$d" printf 'pass\n' if [ "$d" = "bc" ]; then printf 'Running %s limits tests...' "$d" printf 'limits\n' | "$exe" "$@" > "$out2" /dev/null 2>&1 checktest_retcode "$d" "$?" "limits" if [ ! -s "$out2" ]; then err_exit "$d did not produce output on the limits test" 1 fi exec printf 'pass\n' fi