diff --git a/contrib/expat/Changes b/contrib/expat/Changes index 40127e1b76f7..95f697b39a48 100644 --- a/contrib/expat/Changes +++ b/contrib/expat/Changes @@ -1,1127 +1,1161 @@ NOTE: We are looking for help with a few things: https://github.com/libexpat/libexpat/labels/help%20wanted If you can help, please get in touch. Thanks! +Release 2.4.7 Fri March 4 2022 + Bug fixes: + #572 #577 Relax fix to CVE-2022-25236 (introduced with release 2.4.5) + with regard to all valid URI characters (RFC 3986), + i.e. the following set (excluding whitespace): + ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz + 0123456789 % -._~ :/?#[]@ !$&'()*+,;= + + Other changes: + #555 #570 #581 CMake|Windows: Store Expat version in the DLL + #577 Document consequences of namespace separator choices not just + in doc/reference.html but also in header + #577 Document Expat's lack of validation of namespace URIs against + RFC 3986, and that the XML 1.0r4 specification doesn't + require Expat to validate namespace URIs, and that Expat + may do more in that regard in future releases. + If you find need for strict RFC 3986 URI validation on + application level today, https://uriparser.github.io/ may + be of interest. + #579 Fix documentation of XML_EndDoctypeDeclHandler in + #575 Document that a call to XML_FreeContentModel can be done at + a later time from outside the element declaration handler + #574 Make hardcoded namespace URIs easier to find in code + #573 Update documentation on use of XML_POOR_ENTOPY on Solaris + #569 #571 tests: Resolve use of macros NAN and INFINITY for GNU G++ + 4.8.2 on Solaris. + #578 #580 Version info bumped from 9:6:8 to 9:7:8; + see https://verbump.de/ for what these numbers do + + Special thanks to: + Jeffrey Walton + Johnny Jazeix + Thijs Schreijer + Release 2.4.6 Sun February 20 2022 Bug fixes: #566 Fix a regression introduced by the fix for CVE-2022-25313 in release 2.4.5 that affects applications that (1) call function XML_SetElementDeclHandler and (2) are parsing XML that contains nested element declarations (e.g. ""). Other changes: #567 #568 Version info bumped from 9:5:8 to 9:6:8; see https://verbump.de/ for what these numbers do Special thanks to: Matt Sergeant Samanta Navarro Sergei Trofimovich and NixOS Perl XML::Parser Release 2.4.5 Fri February 18 2022 Security fixes: #562 CVE-2022-25235 -- Passing malformed 2- and 3-byte UTF-8 sequences (e.g. from start tag names) to the XML processing application on top of Expat can cause arbitrary damage (e.g. code execution) depending on how invalid UTF-8 is handled inside the XML processor; validation was not their job but Expat's. Exploits with code execution are known to exist. #561 CVE-2022-25236 -- Passing (one or more) namespace separator characters in "xmlns[:prefix]" attribute values made Expat send malformed tag names to the XML processor on top of Expat which can cause arbitrary damage (e.g. code execution) depending on such unexpectable cases are handled inside the XML processor; validation was not their job but Expat's. Exploits with code execution are known to exist. #558 CVE-2022-25313 -- Fix stack exhaustion in doctype parsing that could be triggered by e.g. a 2 megabytes file with a large number of opening braces. Expected impact is denial of service or potentially arbitrary code execution. #560 CVE-2022-25314 -- Fix integer overflow in function copyString; only affects the encoding name parameter at parser creation time which is often hardcoded (rather than user input), takes a value in the gigabytes to trigger, and a 64-bit machine. Expected impact is denial of service. #559 CVE-2022-25315 -- Fix integer overflow in function storeRawNames; needs input in the gigabytes and a 64-bit machine. Expected impact is denial of service or potentially arbitrary code execution. Other changes: #557 #564 Version info bumped from 9:4:8 to 9:5:8; see https://verbump.de/ for what these numbers do Special thanks to: Ivan Fratric Samanta Navarro and Google Project Zero JetBrains Release 2.4.4 Sun January 30 2022 Security fixes: #550 CVE-2022-23852 -- Fix signed integer overflow (undefined behavior) in function XML_GetBuffer (that is also called by function XML_Parse internally) for when XML_CONTEXT_BYTES is defined to >0 (which is both common and default). Impact is denial of service or more. #551 CVE-2022-23990 -- Fix unsigned integer overflow in function doProlog triggered by large content in element type declarations when there is an element declaration handler present (from a prior call to XML_SetElementDeclHandler). Impact is denial of service or more. Bug fixes: #544 #545 xmlwf: Fix a memory leak on output file opening error Other changes: #546 Autotools: Fix broken CMake support under Cygwin #554 Windows: Add missing files to the installer to fix compilation with CMake from installed sources #552 #554 Version info bumped from 9:3:8 to 9:4:8; see https://verbump.de/ for what these numbers do Special thanks to: Carlo Bramini hwt0415 Roland Illig Samanta Navarro and Clang LeakSan and the Clang team Release 2.4.3 Sun January 16 2022 Security fixes: #531 #534 CVE-2021-45960 -- Fix issues with left shifts by >=29 places resulting in a) realloc acting as free b) realloc allocating too few bytes c) undefined behavior depending on architecture and precise value for XML documents with >=2^27+1 prefixed attributes on a single XML tag a la "" where XML_ParserCreateNS is used to create the parser (which needs argument "-n" when running xmlwf). Impact is denial of service, or more. #532 #538 CVE-2021-46143 (ZDI-CAN-16157) -- Fix integer overflow on variable m_groupSize in function doProlog leading to realloc acting as free. Impact is denial of service or more. #539 CVE-2022-22822 to CVE-2022-22827 -- Prevent integer overflows near memory allocation at multiple places. Mitre assigned a dedicated CVE for each involved internal C function: - CVE-2022-22822 for function addBinding - CVE-2022-22823 for function build_model - CVE-2022-22824 for function defineAttribute - CVE-2022-22825 for function lookup - CVE-2022-22826 for function nextScaffoldPart - CVE-2022-22827 for function storeAtts Impact is denial of service or more. Other changes: #535 CMake: Make call to file(GENERATE [..]) work for CMake <3.19 #541 Autotools|CMake: MinGW: Make run.sh(.in) work for Cygwin and MSYS2 by not going through Wine on these platforms #527 #528 Address compiler warnings #533 #543 Version info bumped from 9:2:8 to 9:3:8; see https://verbump.de/ for what these numbers do Infrastructure: #536 CI: Check for realistic minimum CMake version #529 #539 CI: Cover compilation with -m32 #529 CI: Store coverage reports as artifacts for download #528 CI: Upgrade Clang from 11 to 13 Special thanks to: An anonymous whitehat Christopher Degawa J. Peter Mugaas Tyson Smith and GCC Farm Project Trend Micro Zero Day Initiative Release 2.4.2 Sun December 19 2021 Other changes: #509 #510 Link againgst libm for function "isnan" #513 #514 Include expat_config.h as early as possible #498 Autotools: Include files with release archives: - buildconf.sh - fuzz/*.c #507 #519 Autotools: Sync CMake templates #495 #524 CMake: MinGW: Fix pkg-config section "Libs" for - non-release build types (e.g. -DCMAKE_BUILD_TYPE=Debug) - multi-config CMake generators (e.g. Ninja Multi-Config) #502 #503 docs: Document that function XML_GetBuffer may return NULL when asking for a buffer of 0 (zero) bytes size #522 #523 docs: Fix return value docs for both XML_SetBillionLaughsAttackProtection* functions #525 #526 Version info bumped from 9:1:8 to 9:2:8; see https://verbump.de/ for what these numbers do Special thanks to: Dong-hee Na Joergen Ibsen Kai Pastor Release 2.4.1 Sun May 23 2021 Bug fixes: #488 #490 Autotools: Fix installed header expat_config.h for multilib systems; regression introduced in 2.4.0 by pull request #486 Other changes: #491 #492 Version info bumped from 9:0:8 to 9:1:8; see https://verbump.de/ for what these numbers do Special thanks to: Gentoo's QA check "multilib_check_headers" Release 2.4.0 Sun May 23 2021 Security fixes: #34 #466 #484 CVE-2013-0340/CWE-776 -- Protect against billion laughs attacks (denial-of-service; flavors targeting CPU time or RAM or both, leveraging general entities or parameter entities or both) by tracking and limiting the input amplification factor ( := ( + ) / ). By conservative default, amplification up to a factor of 100.0 is tolerated and rejection only starts after 8 MiB of output bytes (= + ) have been processed. The fix adds the following to the API: - A new error code XML_ERROR_AMPLIFICATION_LIMIT_BREACH to signals this specific condition. - Two new API functions .. - XML_SetBillionLaughsAttackProtectionMaximumAmplification and - XML_SetBillionLaughsAttackProtectionActivationThreshold .. to further tighten billion laughs protection parameters when desired. Please see file "doc/reference.html" for details. If you ever need to increase the defaults for non-attack XML payload, please file a bug report with libexpat. - Two new XML_FEATURE_* constants .. - that can be queried using the XML_GetFeatureList function, and - that are shown in "xmlwf -v" output. - Two new environment variable switches .. - EXPAT_ACCOUNTING_DEBUG=(0|1|2|3) and - EXPAT_ENTITY_DEBUG=(0|1) .. for runtime debugging of accounting and entity processing. Specific behavior of these values may change in the future. - Two new command line arguments "-a FACTOR" and "-b BYTES" for xmlwf to further tighten billion laughs protection parameters when desired. If you ever need to increase the defaults for non-attack XML payload, please file a bug report with libexpat. Bug fixes: #332 #470 For (non-default) compilation with -DEXPAT_MIN_SIZE=ON (CMake) or CPPFLAGS=-DXML_MIN_SIZE (GNU Autotools): Fix segfault for UTF-16 payloads containing CDATA sections. #485 #486 Autotools: Fix generated CMake files for non-64bit and non-Linux platforms (e.g. macOS and MinGW in particular) that were introduced with release 2.3.0 Other changes: #468 #469 xmlwf: Improve help output and the xmlwf man page #463 xmlwf: Improve maintainability through some refactoring #477 xmlwf: Fix man page DocBook validity #458 #459 CMake: Support absolute paths for both CMAKE_INSTALL_LIBDIR and CMAKE_INSTALL_INCLUDEDIR #471 #481 CMake: Add support for standard variable BUILD_SHARED_LIBS #457 Unexpose symbol _INTERNAL_trim_to_complete_utf8_characters #467 Resolve macro HAVE_EXPAT_CONFIG_H #472 Delete unused legacy helper file "conftools/PrintPath" #473 #483 Improve attribution #464 #465 #477 doc/reference.html: Fix XHTML validity #475 #478 doc/reference.html: Replace the 90s look by OK.css #479 Version info bumped from 8:0:7 to 9:0:8 due to addition of new symbols and error codes; see https://verbump.de/ for what these numbers do Infrastructure: #456 CI: Enable periodic runs #457 CI: Start covering the list of exported symbols #474 CI: Isolate coverage task #476 #482 CI: Adapt to breaking changes in image "ubuntu-18.04" #477 CI: Cover well-formedness and DocBook/XHTML validity of doc/reference.html and doc/xmlwf.xml Special thanks to: Dimitry Andric Eero Helenius Nick Wellnhofer Rhodri James Tomas Korbar Yury Gribov and Clang LeakSan JetBrains OSS-Fuzz Release 2.3.0 Thu March 25 2021 Bug fixes: #438 When calling XML_ParseBuffer without a prior successful call to XML_GetBuffer as a user, no longer trigger undefined behavior (by adding an integer to a NULL pointer) but rather return XML_STATUS_ERROR and set the error code to (new) code XML_ERROR_NO_BUFFER. Found by UBSan (UndefinedBehaviorSanitizer) of Clang 11 (but not Clang 9). #444 xmlwf: Exit status 2 was used for both: - malformed input files (documented) and - invalid command-line arguments (undocumented). The case of invalid command-line arguments now has its own exit status 4, resolving the ambiguity. Other changes: #439 xmlwf: Add argument -k to allow continuing after non-fatal errors #439 xmlwf: Add section about exit status to the -h help output #422 #426 #447 Windows: Drop support for Visual Studio <=14.0/2015 #434 Windows: CMake: Detect unsupported Visual Studio at configure time (rather than at compile time) #382 #428 testrunner: Make verbose mode (argument "-v") report about passed tests, and make default mode report about failures, as well. #442 CMake: Call "enable_language(CXX)" prior to tinkering with CMAKE_CXX_* variables #448 Document use of libexpat from a CMake-based project #451 Autotools: Install CMake files as generated by CMake 3.19.6 so that users with "find_package(expat [..] CONFIG [..])" are served on distributions that are *not* using the CMake build system inside for libexpat packaging #436 #437 Autotools: Drop obsolescent macro AC_HEADER_STDC #450 #452 Autotools: Resolve use of obsolete macro AC_CONFIG_HEADER #441 Address compiler warnings #443 Version info bumped from 7:12:6 to 8:0:7 due to addition of error code XML_ERROR_NO_BUFFER (see https://verbump.de/ for what these numbers do) Infrastructure: #435 #446 Replace Travis CI by GitHub Actions Special thanks to: Alexander Richardson Oleksandr Popovych Thomas Beutlich Tim Bray and Clang LeakSan, Clang 11 UBSan and the Clang team Release 2.2.10 Sat October 3 2020 Bug fixes: #390 #395 #398 Fix undefined behavior during parsing caused by pointer arithmetic with NULL pointers #404 #405 Fix reading uninitialized variable during parsing #406 xmlwf: Add missing check for malloc NULL return Other changes: #396 Windows: Drop support for Visual Studio <=8.0/2005 #409 Windows: Add missing file "Changes" to the installer to fix compilation with CMake from installed sources #403 xmlwf: Document exit codes in xmlwf manpage and exit with code 3 (rather than code 1) for output errors when used with "-d DIRECTORY" #356 #359 MinGW: Provide declaration of rand_s for mingwrt <5.3.0 #383 #392 Autotools: Use -Werror while configure tests the compiler for supported compile flags to avoid false positives #383 #393 #394 Autotools: Improve handling of user (C|CPP|CXX|LD)FLAGS, e.g. ensure that they have the last word over flags added while running ./configure #360 CMake: Create libexpatw.{dll,so} and expatw.pc (with emphasis on suffix "w") with -DEXPAT_CHAR_TYPE=(ushort|wchar_t) #360 CMake: Detect and deny unsupported build combinations involving -DEXPAT_CHAR_TYPE=(ushort|wchar_t) #360 CMake: Install pre-compiled shipped xmlwf.1 manpage in case of -DEXPAT_BUILD_DOCS=OFF #375 #380 #419 CMake: Fix use of Expat by means of add_subdirectory #407 #408 CMake: Keep expat target name constant at "expat" (i.e. refrain from using the target name to control build artifact filenames) #385 CMake: Fix compilation with -DEXPAT_SHARED_LIBS=OFF for Windows CMake: Expose man page compilation as target "xmlwf-manpage" #413 #414 CMake: Introduce option EXPAT_BUILD_PKGCONFIG to control generation of pkg-config file "expat.pc" #424 CMake: Add minimalistic support for building binary packages with CMake target "package"; based on CPack #366 CMake: Add option -DEXPAT_OSSFUZZ_BUILD=(ON|OFF) with default OFF to build fuzzer code against OSS-Fuzz and related environment variable LIB_FUZZING_ENGINE #354 Fix testsuite for -DEXPAT_DTD=OFF and -DEXPAT_NS=OFF, each #354 #355 .. #356 #412 Address compiler warnings #368 #369 Address pngcheck warnings with doc/*.png images #425 Version info bumped from 7:11:6 to 7:12:6 Special thanks to: asavah Ben Wagner Bhargava Shastry Frank Landgraf Jeffrey Walton Joe Orton Kleber Tarcísio Ma Lin Maciej Sroczyński Mohammed Khajapasha Vadim Zeitlin and Cppcheck 2.0 and the Cppcheck team Release 2.2.9 Wed September 25 2019 Other changes: examples: Drop executable bits from elements.c #349 Windows: Change the name of the Windows DLLs from expat*.dll to libexpat*.dll once more (regression from 2.2.8, first fixed in 1.95.3, issue #61 on SourceForge today, was issue #432456 back then); needs a fix due case-insensitive file systems on Windows and the fact that Perl's XML::Parser::Expat compiles into Expat.dll. #347 Windows: Only define _CRT_RAND_S if not defined Version info bumped from 7:10:6 to 7:11:6 Special thanks to: Ben Wagner Release 2.2.8 Fri September 13 2019 Security fixes: #317 #318 CVE-2019-15903 -- Fix heap overflow triggered by XML_GetCurrentLineNumber (or XML_GetCurrentColumnNumber), and deny internal entities closing the doctype; fixed in commit c20b758c332d9a13afbbb276d30db1d183a85d43 Bug fixes: #240 Fix cases where XML_StopParser did not have any effect when called from inside of an end element handler #341 xmlwf: Fix exit code for operation without "-d DIRECTORY"; previously, only "-d DIRECTORY" would give you a proper exit code: # xmlwf -d . <<<'' 2>/dev/null ; echo $? 2 # xmlwf <<<'' 2>/dev/null ; echo $? 0 Now both cases return exit code 2. Other changes: #299 #302 Windows: Replace LoadLibrary hack to access unofficial API function SystemFunction036 (RtlGenRandom) by using official API function rand_s (needs WinXP+) #325 Windows: Drop support for Visual Studio <=7.1/2003 and document supported compilers in README.md #286 Windows: Remove COM code from xmlwf; in case it turns out needed later, there will be a dedicated repository below https://github.com/libexpat/ for that code #322 Windows: Remove explicit MSVC solution and project files. You can generate Visual Studio solution files through CMake, e.g.: cmake -G"Visual Studio 15 2017" . #338 xmlwf: Make "xmlwf -h" help output more friendly #339 examples: Improve elements.c #244 #264 Autotools: Add argument --enable-xml-attr-info #239 #301 Autotools: Add arguments --with-getrandom --without-getrandom --with-sys-getrandom --without-sys-getrandom #312 #343 Autotools: Fix linking issues with "./configure LD=clang" Autotools: Fix "make run-xmltest" for out-of-source builds #329 #336 CMake: Pull all options from Expat <=2.2.7 into namespace prefix EXPAT_ with the exception of DOCBOOK_TO_MAN: - BUILD_doc -> EXPAT_BUILD_DOCS (plural) - BUILD_examples -> EXPAT_BUILD_EXAMPLES - BUILD_shared -> EXPAT_SHARED_LIBS - BUILD_tests -> EXPAT_BUILD_TESTS - BUILD_tools -> EXPAT_BUILD_TOOLS - DOCBOOK_TO_MAN -> DOCBOOK_TO_MAN (unchanged) - INSTALL -> EXPAT_ENABLE_INSTALL - MSVC_USE_STATIC_CRT -> EXPAT_MSVC_STATIC_CRT - USE_libbsd -> EXPAT_WITH_LIBBSD - WARNINGS_AS_ERRORS -> EXPAT_WARNINGS_AS_ERRORS - XML_CONTEXT_BYTES -> EXPAT_CONTEXT_BYTES - XML_DEV_URANDOM -> EXPAT_DEV_URANDOM - XML_DTD -> EXPAT_DTD - XML_NS -> EXPAT_NS - XML_UNICODE -> EXPAT_CHAR_TYPE=ushort (!) - XML_UNICODE_WCHAR_T -> EXPAT_CHAR_TYPE=wchar_t (!) #244 #264 CMake: Add argument -DEXPAT_ATTR_INFO=(ON|OFF), default OFF #326 CMake: Add argument -DEXPAT_LARGE_SIZE=(ON|OFF), default OFF #328 CMake: Add argument -DEXPAT_MIN_SIZE=(ON|OFF), default OFF #239 #277 CMake: Add arguments -DEXPAT_WITH_GETRANDOM=(ON|OFF|AUTO), default AUTO -DEXPAT_WITH_SYS_GETRANDOM=(ON|OFF|AUTO), default AUTO #326 CMake: Install expat_config.h to include directory #326 CMake: Generate and install configuration files for future find_package(expat [..] CONFIG [..]) CMake: Now produces a summary of applied configuration CMake: Require C++ compiler only when tests are enabled #330 CMake: Fix compilation for 16bit character types, i.e. ex -DXML_UNICODE=ON (and ex -DXML_UNICODE_WCHAR_T=ON) #265 CMake: Fix linking with MinGW #330 CMake: Add full support for MinGW; to enable, use -DCMAKE_TOOLCHAIN_FILE=[expat]/cmake/mingw-toolchain.cmake #330 CMake: Port "make run-xmltest" from GNU Autotools to CMake #316 CMake: Windows: Make binary postfix match MSVC Old: expat[d].lib New: expat[w][d][MD|MT].lib CMake: Migrate files from Windows to Unix line endings #308 CMake: Integrate OSS-Fuzz fuzzers, option -DEXPAT_BUILD_FUZZERS=(ON|OFF), default OFF #14 Drop an OpenVMS support leftover #235 #268 .. #270 #310 .. #313 #331 #333 Address compiler warnings #282 #283 .. #284 #285 Address cppcheck warnings #294 #295 Address Clang Static Analyzer warnings #24 #293 Mass-apply clang-format 9 (and ensure conformance during CI) Version info bumped from 7:9:6 to 7:10:6 Special thanks to: David Loffredo Joonun Jang Kishore Kunche Marco Maggi Mitch Phillips Mohammed Khajapasha Rolf Ade xantares Zhongyuan Zhou Release 2.2.7 Wed June 19 2019 Security fixes: #186 #262 CVE-2018-20843 -- Fix extraction of namespace prefixes from XML names; XML names with multiple colons could end up in the wrong namespace, and take a high amount of RAM and CPU resources while processing, opening the door to use for denial-of-service attacks Other changes: #195 #197 Autotools/CMake: Utilize -fvisibility=hidden to stop exporting non-API symbols #227 Autotools: Add --without-examples and --without-tests #228 Autotools: Modernize configure.ac #245 #246 Autotools: Fix check for -fvisibility=hidden for Clang #247 #248 Autotools: Fix compilation for lack of docbook2x-man #236 #258 Autotools: Produce .tar.{gz,lz,xz} release archives #212 CMake: Make libdir of pkgconfig expat.pc support multilib #158 #263 CMake: Build man page in PROJECT_BINARY_DIR not _SOURCE_DIR #219 Remove fallback to bcopy, assume that memmove(3) exists #257 Use portable "/usr/bin/env bash" shebang (e.g. for OpenBSD) #243 Windows: Fix syntax of .def module definition files Version info bumped from 7:8:6 to 7:9:6 Special thanks to: Benjamin Peterson Caolán McNamara Hanno Böck KangLin Kishore Kunche Marco Maggi Rhodri James Sebastian Dröge userwithuid Yury Gribov Release 2.2.6 Sun August 12 2018 Bug fixes: #170 #206 Avoid doing arithmetic with NULL pointers in XML_GetBuffer #204 #205 Fix 2.2.5 regression with suspend-resume while parsing a document like '' Other changes: #165 #168 Autotools: Fix docbook-related configure syntax error #166 Autotools: Avoid grep option `-q` for Solaris #167 Autotools: Support ./configure DOCBOOK_TO_MAN="xmlto man --skip-validation" #159 #167 Autotools: Support DOCBOOK_TO_MAN command which produces xmlwf.1 rather than XMLWF.1; also covers case insensitive file systems #181 Autotools: Drop -rpath option passed to libtool #188 Autotools: Detect and deny SGML docbook2man as ours is XML #188 Autotools/CMake: Support command db2x_docbook2man as well #174 CMake: Introduce option WARNINGS_AS_ERRORS, defaults to OFF #184 #185 CMake: Introduce option MSVC_USE_STATIC_CRT, defaults to OFF #207 #208 CMake: Introduce option XML_UNICODE and XML_UNICODE_WCHAR_T, both defaulting to OFF #175 CMake: Prefer check_symbol_exists over check_function_exists #176 CMake: Create the same pkg-config file as with GNU Autotools #178 #179 CMake: Use GNUInstallDirs module to set proper defaults for install directories #208 CMake: Utilize expat_config.h.cmake for XML_DEV_URANDOM #180 Windows: Fix compilation of test suite for Visual Studio 2008 #131 #173 #202 Address compiler warnings #187 #190 #200 Fix miscellaneous typos Version info bumped from 7:7:6 to 7:8:6 Special thanks to: Anton Maklakov Benjamin Peterson Brad King Franek Korta Frank Rast Joe Orton luzpaz Pedro Vicente Rainer Jung Rhodri James Rolf Ade Rolf Eike Beer Thomas Beutlich Tomasz Kłoczko Release 2.2.5 Tue October 31 2017 Bug fixes: #8 If the parser runs out of memory, make sure its internal state reflects the memory it actually has, not the memory it wanted to have. #11 The default handler wasn't being called when it should for a SYSTEM or PUBLIC doctype if an entity declaration handler was registered. #137 #138 Fix a case of mistakenly reported parsing success where XML_StopParser was called from an element handler #162 Function XML_ErrorString was returning NULL rather than a message for code XML_ERROR_INVALID_ARGUMENT introduced with release 2.2.1 Other changes: #106 xmlwf: Add argument -N adding notation declarations #75 #106 Test suite: Resolve expected failure cases where xmlwf output was incomplete #127 Windows: Fix test suite compilation #126 #127 Windows: Fix compilation for Visual Studio 2012 Windows: Upgrade shipped project files to Visual Studio 2017 #33 #132 tests: Mass-fix compilation for XML_UNICODE_WCHAR_T #129 examples: Fix compilation for XML_UNICODE_WCHAR_T #130 benchmark: Fix compilation for XML_UNICODE_WCHAR_T #144 xmlwf: Fix compilation for XML_UNICODE_WCHAR_T; still needs Windows or MinGW for 2-byte wchar_t #9 Address two Clang Static Analyzer false positives #59 Resolve troublesome macros hiding parser struct membership and dereferencing that pointer #6 Resolve superfluous internal malloc/realloc switch #153 #155 Improve docbook2x-man detection #160 Undefine NDEBUG in the test suite (rather than rejecting it) #161 Address compiler warnings Version info bumped from 7:6:6 to 7:7:6 Special thanks to: Benbuck Nason Hans Wennborg José Gutiérrez de la Concha Pedro Monreal Gonzalez Rhodri James Rolf Ade Stephen Groat and Core Infrastructure Initiative Release 2.2.4 Sat August 19 2017 Bug fixes: #115 Fix copying of partial characters for UTF-8 input Other changes: #109 Fix "make check" for non-x86 architectures that default to unsigned type char (-128..127 rather than 0..255) #109 coverage.sh: Cover -funsigned-char Autotools: Introduce --without-xmlwf argument #65 Autotools: Replace handwritten Makefile with GNU Automake #43 CMake: Auto-detect high quality entropy extractors, add new option USE_libbsd=ON to use arc4random_buf of libbsd #74 CMake: Add -fno-strict-aliasing only where supported #114 CMake: Always honor manually set BUILD_* options #114 CMake: Compile man page if docbook2x-man is available, only #117 Include file tests/xmltest.log.expected in source tarball (required for "make run-xmltest") #117 Include (existing) Visual Studio 2013 files in source tarball Improve test suite error output #111 Fix some typos in documentation Version info bumped from 7:5:6 to 7:6:6 Special thanks to: Jakub Wilk Joe Orton Lin Tian Rolf Eike Beer Release 2.2.3 Wed August 2 2017 Security fixes: #82 CVE-2017-11742 -- Windows: Fix DLL hijacking vulnerability using Steve Holme's LoadLibrary wrapper for/of cURL Bug fixes: #85 Fix a dangling pointer issue related to realloc Other changes: Increase code coverage #91 Linux: Allow getrandom to fail if nonblocking pool has not yet been initialized and read /dev/urandom then, instead. This is in line with what recent Python does. #81 Pre-10.7/Lion macOS: Support entropy from arc4random #86 Check that a UTF-16 encoding in an XML declaration has the right endianness #4 #5 #7 Recover correctly when some reallocations fail Repair "./configure && make" for systems without any provider of high quality entropy and try reading /dev/urandom on those Ensure that user-defined character encodings have converter functions when they are needed Fix mis-leading description of argument -c in xmlwf.1 Rely on macro HAVE_ARC4RANDOM_BUF (rather than __CloudABI__) for CloudABI #100 Fix use of SIPHASH_MAIN in siphash.h #23 Test suite: Fix memory leaks Version info bumped from 7:4:6 to 7:5:6 Special thanks to: Chanho Park Joe Orton Pascal Cuoq Rhodri James Simon McVittie Vadim Zeitlin Viktor Szakats and Core Infrastructure Initiative Release 2.2.2 Wed July 12 2017 Security fixes: #43 Protect against compilation without any source of high quality entropy enabled, e.g. with CMake build system; commit ff0207e6076e9828e536b8d9cd45c9c92069b895 #60 Windows with _UNICODE: Unintended use of LoadLibraryW with a non-wide string resulted in failure to load advapi32.dll and degradation in quality of used entropy when compiled with _UNICODE for Windows; you can launch existing binaries with EXPAT_ENTROPY_DEBUG=1 in the environment to inspect the quality of entropy used during runtime; commits * 95b95032f907ef1cd17ee7a9a1768010a825d61d * 73a5a2e9c081f49f2d775cf7ced864158b68dc80 [MOX-006] Fix non-NULL parser parameter validation in XML_Parse; resulted in NULL dereference, previously; commit ac256dafdffc9622ab0dc2c62fcecb0dfcfa71fe Bug fixes: #69 Fix improper use of unsigned long long integer literals Other changes: #73 Start requiring a C99 compiler #49 Fix "==" Bashism in configure script #50 Fix too eager getrandom detection for Debian GNU/kFreeBSD #52 and macOS #51 Address lack of stdint.h in Visual Studio 2003 to 2008 #58 Address compile warnings #68 Fix "./buildconf.sh && ./configure" for some versions of Dash for /bin/sh #72 CMake: Ease use of Expat in context of a parent project with multiple CMakeLists.txt files #72 CMake: Resolve mistaken executable permissions #76 Address compile warning with -DNDEBUG (not recommended!) #77 Address compile warning about macro redefinition Special thanks to: Alexander Bluhm Ben Boeckel Cătălin Răceanu Kerin Millar László Böszörményi S. P. Zeidler Segev Finer Václav Slavík Victor Stinner Viktor Szakats and Radically Open Security Release 2.2.1 Sat June 17 2017 Security fixes: CVE-2017-9233 -- External entity infinite loop DoS Details: https://libexpat.github.io/doc/cve-2017-9233/ Commit c4bf96bb51dd2a1b0e185374362ee136fe2c9d7f [MOX-002] CVE-2016-9063 -- Detect integer overflow; commit d4f735b88d9932bd5039df2335eefdd0723dbe20 (Fixed version of existing downstream patches!) (SF.net) #539 Fix regression from fix to CVE-2016-0718 cutting off longer tag names; commits * 896b6c1fd3b842f377d1b62135dccf0a579cf65d * af507cef2c93cb8d40062a0abe43a4f4e9158fb2 #16 * 0dbbf43fdb20f593ddf4fa1ff67288000dd4a7fd #25 More integer overflow detection (function poolGrow); commits * 810b74e4703dcfdd8f404e3cb177d44684775143 * 44178553f3539ce69d34abee77a05e879a7982ac [MOX-002] Detect overflow from len=INT_MAX call to XML_Parse; commits * 4be2cb5afcc018d996f34bbbce6374b7befad47f * 7e5b71b748491b6e459e5c9a1d090820f94544d8 [MOX-005] #30 Use high quality entropy for hash initialization: * arc4random_buf on BSD, systems with libbsd (when configured with --with-libbsd), CloudABI * RtlGenRandom on Windows XP / Server 2003 and later * getrandom on Linux 3.17+ In a way, that's still part of CVE-2016-5300. https://github.com/libexpat/libexpat/pull/30/commits [MOX-005] For the low quality entropy extraction fallback code, the parser instance address can no longer leak, commit 04ad658bd3079dd15cb60fc67087900f0ff4b083 [MOX-003] Prevent use of uninitialised variable; commit [MOX-004] a4dc944f37b664a3ca7199c624a98ee37babdb4b Add missing parameter validation to public API functions and dedicated error code XML_ERROR_INVALID_ARGUMENT: [MOX-006] * NULL checks; commits * d37f74b2b7149a3a95a680c4c4cd2a451a51d60a (merge/many) * 9ed727064b675b7180c98cb3d4f75efba6966681 * 6a747c837c50114dfa413994e07c0ba477be4534 * Negative length (XML_Parse); commit [MOX-002] 70db8d2538a10f4c022655d6895e4c3e78692e7f [MOX-001] #35 Change hash algorithm to William Ahern's version of SipHash to go further with fixing CVE-2012-0876. https://github.com/libexpat/libexpat/pull/39/commits Bug fixes: #32 Fix sharing of hash salt across parsers; relevant where XML_ExternalEntityParserCreate is called prior to XML_Parse, in particular (e.g. FBReader) #28 xmlwf: Auto-disable use of memory-mapping (and parsing as a single chunk) for files larger than ~1 GB (2^30 bytes) rather than failing with error "out of memory" #3 Fix double free after malloc failure in DTD code; commit 7ae9c3d3af433cd4defe95234eae7dc8ed15637f #17 Fix memory leak on parser error for unbound XML attribute prefix with new namespaces defined in the same tag; found by Google's OSS-Fuzz; commits * 16f87daae5a16132e479e4f71862128c7a915c73 * b47dbc9745932c160893d433220e462bd605f8cd xmlwf on Windows: Add missing calls to CloseHandle New features: #30 Introduced environment switch EXPAT_ENTROPY_DEBUG=1 for runtime debugging of entropy extraction Other changes: Increase code coverage #33 Reject use of XML_UNICODE_WCHAR_T with sizeof(wchar_t) != 2; XML_UNICODE_WCHAR_T was never meant to be used outside of Windows; 4-byte wchar_t is common on Linux (SF.net) #538 Start using -fno-strict-aliasing (SF.net) #540 Support compilation against cloudlibc of CloudABI Allow MinGW cross-compilation (SF.net) #534 CMake: Introduce option "BUILD_doc" (enabled by default) to bypass compilation of the xmlwf.1 man page (SF.net) pr2 CMake: Introduce option "INSTALL" (enabled by default) to bypass installation of expat files CMake: Fix ninja support Autotools: Add parameters --enable-xml-context [COUNT] and --disable-xml-context; default of context of 1024 bytes enabled unchanged #14 Drop AmigaOS 4.x code and includes #14 Drop ancient build systems: * Borland C++ Builder * OpenVMS * Open Watcom * Visual Studio 6.0 * Pre-X Mac OS (MPW Makefile) If you happen to rely on some of these, please get in touch for joining with maintenance. #10 Move from WIN32 to _WIN32 #13 Fix "make run-xmltest" order instability Address compile warnings Bump version info from 7:2:6 to 7:3:6 Add AUTHORS file Infrastructure: #1 Migrate from SourceForge to GitHub (except downloads): https://github.com/libexpat/ #1 Re-create http://libexpat.org/ project website Start utilizing Travis CI Special thanks to: Andy Wang Don Lewis Ed Schouten Karl Waclawek Pascal Cuoq Rhodri James Sergei Nikulov Tobias Taschner Viktor Szakats and Core Infrastructure Initiative Mozilla Foundation (MOSS Track 3: Secure Open Source) Radically Open Security Release 2.2.0 Tue June 21 2016 Security fixes: #537 CVE-2016-0718 -- Fix crash on malformed input CVE-2016-4472 -- Improve insufficient fix to CVE-2015-1283 / CVE-2015-2716 introduced with Expat 2.1.1 #499 CVE-2016-5300 -- Use more entropy for hash initialization than the original fix to CVE-2012-0876 #519 CVE-2012-6702 -- Resolve troublesome internal call to srand that was introduced with Expat 2.1.0 when addressing CVE-2012-0876 (issue #496) Bug fixes: Fix uninitialized reads of size 1 (e.g. in little2_updatePosition) Fix detection of UTF-8 character boundaries Other changes: #532 Fix compilation for Visual Studio 2010 (keyword "C99") Autotools: Resolve use of "$<" to better support bmake Autotools: Add QA script "qa.sh" (and make target "qa") Autotools: Respect CXXFLAGS if given Autotools: Fix "make run-xmltest" Autotools: Have "make run-xmltest" check for expected output p90 CMake: Fix static build (BUILD_shared=OFF) on Windows #536 CMake: Add soversion, support -DNO_SONAME=yes to bypass #323 CMake: Add suffix "d" to differentiate debug from release CMake: Define WIN32 with CMake on Windows Annotate memory allocators for GCC Address all currently known compile warnings Make sure that API symbols remain visible despite -fvisibility=hidden Remove executable flag from source files Resolve COMPILED_FROM_DSP in favor of WIN32 Special thanks to: Björn Lindahl Christian Heimes Cristian Rodríguez Daniel Krügler Gustavo Grieco Karl Waclawek László Böszörményi Marco Grassi Pascal Cuoq Sergei Nikulov Thomas Beutlich Warren Young Yann Droneaud Release 2.1.1 Sat March 12 2016 Security fixes: #582: CVE-2015-1283 - Multiple integer overflows in XML_GetBuffer Bug fixes: #502: Fix potential null pointer dereference #520: Symbol XML_SetHashSalt was not exported Output of "xmlwf -h" was incomplete Other changes: #503: Document behavior of calling XML_SetHashSalt with salt 0 Minor improvements to man page xmlwf(1) Improvements to the experimental CMake build system libtool now invoked with --verbose Release 2.1.0 Sat March 24 2012 - Security fixes: #2958794: CVE-2012-1148 - Memory leak in poolGrow. #2895533: CVE-2012-1147 - Resource leak in readfilemap.c. #3496608: CVE-2012-0876 - Hash DOS attack. #2894085: CVE-2009-3560 - Buffer over-read and crash in big2_toUtf8(). #1990430: CVE-2009-3720 - Parser crash with special UTF-8 sequences. - Bug Fixes: #1742315: Harmful XML_ParserCreateNS suggestion. #1785430: Expat build fails on linux-amd64 with gcc version>=4.1 -O3. #1983953, 2517952, 2517962, 2649838: Build modifications using autoreconf instead of buildconf.sh. #2815947, #2884086: OBJEXT and EXEEXT support while building. #2517938: xmlwf should return non-zero exit status if not well-formed. #2517946: Wrong statement about XMLDecl in xmlwf.1 and xmlwf.sgml. #2855609: Dangling positionPtr after error. #2990652: CMake support. #3010819: UNEXPECTED_STATE with a trailing "%" in entity value. #3206497: Uninitialized memory returned from XML_Parse. #3287849: make check fails on mingw-w64. - Patches: #1749198: pkg-config support. #3010222: Fix for bug #3010819. #3312568: CMake support. #3446384: Report byte offsets for attr names and values. - New Features / API changes: Added new API member XML_SetHashSalt() that allows setting an initial value (salt) for hash calculations. This is part of the fix for bug #3496608 to randomize hash parameters. When compiled with XML_ATTR_INFO defined, adds new API member XML_GetAttributeInfo() that allows retrieving the byte offsets for attribute names and values (patch #3446384). Added CMake build system. See bug #2990652 and patch #3312568. Added run-benchmark target to Makefile.in - relies on testdata module present in the same relative location as in the repository. Release 2.0.1 Tue June 5 2007 - Fixed bugs #1515266, #1515600: The character data handler's calling of XML_StopParser() was not handled properly; if the parser was stopped and the handler set to NULL, the parser would segfault. - Fixed bug #1690883: Expat failed on EBCDIC systems as it assumed some character constants to be ASCII encoded. - Minor cleanups of the test harness. - Fixed xmlwf bug #1513566: "out of memory" error on file size zero. - Fixed outline.c bug #1543233: missing a final XML_ParserFree() call. - Fixes and improvements for Windows platform: bugs #1409451, #1476160, #1548182, #1602769, #1717322. - Build fixes for various platforms: HP-UX, Tru64, Solaris 9: patch #1437840, bug #1196180. All Unix: #1554618 (refreshed config.sub/config.guess). #1490371, #1613457: support both, DESTDIR and INSTALL_ROOT, without relying on GNU-Make specific features. #1647805: Patched configure.in to work better with Intel compiler. - Fixes to Makefile.in to have make check work correctly: bugs #1408143, #1535603, #1536684. - Added Open Watcom support: patch #1523242. Release 2.0.0 Wed Jan 11 2006 - We no longer use the "check" library for C unit testing; we always use the (partial) internal implementation of the API. - Report XML_NS setting via XML_GetFeatureList(). - Fixed headers for use from C++. - XML_GetCurrentLineNumber() and XML_GetCurrentColumnNumber() now return unsigned integers. - Added XML_LARGE_SIZE switch to enable 64-bit integers for byte indexes and line/column numbers. - Updated to use libtool 1.5.22 (the most recent). - Added support for AmigaOS. - Some mostly minor bug fixes. SF issues include: #1006708, #1021776, #1023646, #1114960, #1156398, #1221160, #1271642. Release 1.95.8 Fri Jul 23 2004 - Major new feature: suspend/resume. Handlers can now request that a parse be suspended for later resumption or aborted altogether. See "Temporarily Stopping Parsing" in the documentation for more details. - Some mostly minor bug fixes, but compilation should no longer generate warnings on most platforms. SF issues include: #827319, #840173, #846309, #888329, #896188, #923913, #928113, #961698, #985192. Release 1.95.7 Mon Oct 20 2003 - Fixed enum XML_Status issue (reported on SourceForge many times), so compilers that are properly picky will be happy. - Introduced an XMLCALL macro to control the calling convention used by the Expat API; this macro should be used to annotate prototypes and definitions of callback implementations in code compiled with a calling convention other than the default convention for the host platform. - Improved ability to build without the configure-generated expat_config.h header. This is useful for applications which embed Expat rather than linking in the library. - Fixed a variety of bugs: see SF issues #458907, #609603, #676844, #679754, #692878, #692964, #695401, #699323, #699487, #820946. - Improved hash table lookups. - Added more regression tests and improved documentation. Release 1.95.6 Tue Jan 28 2003 - Added XML_FreeContentModel(). - Added XML_MemMalloc(), XML_MemRealloc(), XML_MemFree(). - Fixed a variety of bugs: see SF issues #615606, #616863, #618199, #653180, #673791. - Enhanced the regression test suite. - Man page improvements: includes SF issue #632146. Release 1.95.5 Fri Sep 6 2002 - Added XML_UseForeignDTD() for improved SAX2 support. - Added XML_GetFeatureList(). - Defined XML_Bool type and the values XML_TRUE and XML_FALSE. - Use an incomplete struct instead of a void* for the parser (may not retain). - Fixed UTF-8 decoding bug that caused legal UTF-8 to be rejected. - Finally fixed bug where default handler would report DTD events that were already handled by another handler. Initial patch contributed by Darryl Miles. - Removed unnecessary DllMain() function that caused static linking into a DLL to be difficult. - Added VC++ projects for building static libraries. - Reduced line-length for all source code and headers to be no longer than 80 characters, to help with AS/400 support. - Reduced memory copying during parsing (SF patch #600964). - Fixed a variety of bugs: see SF issues #580793, #434664, #483514, #580503, #581069, #584041, #584183, #584832, #585537, #596555, #596678, #598352, #598944, #599715, #600479, #600971. Release 1.95.4 Fri Jul 12 2002 - Added support for VMS, contributed by Craig Berry. See vms/README.vms for more information. - Added Mac OS (classic) support, with a makefile for MPW, contributed by Thomas Wegner and Daryle Walker. - Added Borland C++ Builder 5 / BCC 5.5 support, contributed by Patrick McConnell (SF patch #538032). - Fixed a variety of bugs: see SF issues #441449, #563184, #564342, #566334, #566901, #569461, #570263, #575168, #579196. - Made skippedEntityHandler conform to SAX2 (see source comment) - Re-implemented WFC: Entity Declared from XML 1.0 spec and added a new error "entity declared in parameter entity": see SF bug report #569461 and SF patch #578161 - Re-implemented section 5.1 from XML 1.0 spec: see SF bug report #570263 and SF patch #578161 Release 1.95.3 Mon Jun 3 2002 - Added a project to the MSVC workspace to create a wchar_t version of the library; the DLLs are named libexpatw.dll. - Changed the name of the Windows DLLs from expat.dll to libexpat.dll; this fixes SF bug #432456. - Added the XML_ParserReset() API function. - Fixed XML_SetReturnNSTriplet() to work for element names. - Made the XML_UNICODE builds usable (thanks, Karl!). - Allow xmlwf to read from standard input. - Install a man page for xmlwf on Unix systems. - Fixed many bugs; see SF bug reports #231864, #461380, #464837, #466885, #469226, #477667, #484419, #487840, #494749, #496505, #547350. Other bugs which we can't test as easily may also have been fixed, especially in the area of build support. Release 1.95.2 Fri Jul 27 2001 - More changes to make MSVC happy with the build; add a single workspace to support both the library and xmlwf application. - Added a Windows installer for Windows users; includes xmlwf.exe. - Added compile-time constants that can be used to determine the Expat version - Removed a lot of GNU-specific dependencies to aide portability among the various Unix flavors. - Fix the UTF-8 BOM bug. - Cleaned up warning messages for several compilers. - Added the -Wall, -Wstrict-prototypes options for GCC. Release 1.95.1 Sun Oct 22 15:11:36 EDT 2000 - Changes to get expat to build under Microsoft compiler - Removed all aborts and instead return an UNEXPECTED_STATE error. - Fixed a bug where a stray '%' in an entity value would cause an abort. - Defined XML_SetEndNamespaceDeclHandler. Thanks to Darryl Miles for finding this oversight. - Changed default patterns in lib/Makefile.in to fit non-GNU makes Thanks to robin@unrated.net for reporting and providing an account to test on. - The reference had the wrong label for XML_SetStartNamespaceDecl. Reported by an anonymous user. Release 1.95.0 Fri Sep 29 2000 - XML_ParserCreate_MM Allows you to set a memory management suite to replace the standard malloc,realloc, and free. - XML_SetReturnNSTriplet If you turn this feature on when namespace processing is in effect, then qualified, prefixed element and attribute names are returned as "uri|name|prefix" where '|' is whatever separator character is used in namespace processing. - Merged in features from perl-expat o XML_SetElementDeclHandler o XML_SetAttlistDeclHandler o XML_SetXmlDeclHandler o XML_SetEntityDeclHandler o StartDoctypeDeclHandler takes 3 additional parameters: sysid, pubid, has_internal_subset o Many paired handler setters (like XML_SetElementHandler) now have corresponding individual handler setters o XML_GetInputContext for getting the input context of the current parse position. - Added reference material - Packaged into a distribution that builds a sharable library diff --git a/contrib/expat/Makefile.am b/contrib/expat/Makefile.am index e6e7971ec8b5..37ae3738edd3 100644 --- a/contrib/expat/Makefile.am +++ b/contrib/expat/Makefile.am @@ -1,173 +1,175 @@ # # __ __ _ # ___\ \/ /_ __ __ _| |_ # / _ \\ /| '_ \ / _` | __| # | __// \| |_) | (_| | |_ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # # Copyright (c) 2017-2021 Sebastian Pipping # Copyright (c) 2018 KangLin +# Copyright (c) 2022 Johnny Jazeix # Licensed under the MIT license: # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to permit # persons to whom the Software is furnished to do so, subject to the # following conditions: # # The above copyright notice and this permission notice shall be included # in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN # NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE # USE OR OTHER DEALINGS IN THE SOFTWARE. AUTOMAKE_OPTIONS = \ dist-bzip2 \ dist-lzip \ dist-xz \ foreign \ subdir-objects ACLOCAL_AMFLAGS = -I m4 LIBTOOLFLAGS = --verbose SUBDIRS = lib # lib goes first to build first if WITH_EXAMPLES SUBDIRS += examples endif if WITH_TESTS SUBDIRS += tests endif if WITH_XMLWF SUBDIRS += xmlwf doc endif pkgconfig_DATA = expat.pc pkgconfigdir = $(libdir)/pkgconfig dist_cmake_DATA = \ cmake/autotools/expat.cmake nodist_cmake_DATA = \ cmake/autotools/expat-config-version.cmake \ cmake/autotools/expat-noconfig.cmake \ cmake/expat-config.cmake cmakedir = $(libdir)/cmake/expat-@PACKAGE_VERSION@ _EXTRA_DIST_CMAKE = \ cmake/autotools/expat-noconfig__linux.cmake.in \ cmake/autotools/expat-noconfig__macos.cmake.in \ cmake/autotools/expat-noconfig__windows.cmake.in \ cmake/autotools/expat-package-init.cmake \ cmake/mingw-toolchain.cmake \ \ CMakeLists.txt \ CMake.README \ ConfigureChecks.cmake \ expat.pc.cmake \ expat_config.h.cmake _EXTRA_DIST_WINDOWS = \ win32/build_expat_iss.bat \ win32/expat.iss \ win32/MANIFEST.txt \ - win32/README.txt + win32/README.txt \ + win32/version.rc EXTRA_DIST = \ $(_EXTRA_DIST_CMAKE) \ $(_EXTRA_DIST_WINDOWS) \ \ conftools/expat.m4 \ conftools/get-version.sh \ \ fuzz/xml_parsebuffer_fuzzer.c \ fuzz/xml_parse_fuzzer.c \ \ xmlwf/xmlwf_helpgen.py \ xmlwf/xmlwf_helpgen.sh \ \ buildconf.sh \ Changes \ README.md \ \ fix-xmltest-log.sh \ test-driver-wrapper.sh .PHONY: buildlib buildlib: @echo 'ERROR: Running "make buildlib LIBRARY=libexpatw.la"' >&2 @echo 'ERROR: is no longer supported. INSTEAD please:' >&2 @echo 'ERROR:' >&2 @echo 'ERROR: * Mass-patch Makefile.am, e.g.' >&2 @echo 'ERROR: # find -name Makefile.am -exec sed \' >&2 @echo 'ERROR: -e "s,libexpat\.la,libexpatw.la," \' >&2 @echo 'ERROR: -e "s,libexpat_la,libexpatw_la," \' >&2 @echo 'ERROR: -i {} +' >&2 @echo 'ERROR:' >&2 @echo 'ERROR: * Run automake to re-generate Makefile.in files' >&2 @echo 'ERROR:' >&2 @echo 'ERROR: * Use "./configure --without-xmlwf" and/or' >&2 @echo 'ERROR: "make -C lib all install" to bypass compilation' >&2 @echo 'ERROR: of xmlwf (e.g. with -DXML_UNICODE)' >&2 @echo 'ERROR:' >&2 @false .PHONY: run-benchmark run-benchmark: $(MAKE) -C tests/benchmark ./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/recset.xml 65535 3 .PHONY: download-xmlts-zip download-xmlts-zip: if test "$(XMLTS_ZIP)" = ""; then \ wget --output-document=tests/xmlts.zip \ https://www.w3.org/XML/Test/xmlts20080827.zip; \ else \ cp $(XMLTS_ZIP) tests/xmlts.zip; \ fi tests/xmlts.zip: $(MAKE) download-xmlts-zip .PHONY: extract-xmlts-zip extract-xmlts-zip: tests/xmlts.zip [ -f $(builddir)/tests/xmlts.zip ] || $(MAKE) download-xmlts-zip # vpath workaround cd tests && unzip -q xmlts.zip tests/xmlconf: tests/xmlts.zip $(MAKE) extract-xmlts-zip .PHONY: run-xmltest run-xmltest: tests/xmlconf if WITH_XMLWF [ -d $(builddir)/tests/xmlconf ] || $(MAKE) extract-xmlts-zip # vpath workaround $(MAKE) -C lib $(MAKE) -C xmlwf $(srcdir)/tests/xmltest.sh "$(abs_builddir)/run.sh $(abs_builddir)/xmlwf/xmlwf@EXEEXT@" 2>&1 | tee $(builddir)/tests/xmltest.log $(srcdir)/fix-xmltest-log.sh $(builddir)/tests/xmltest.log diff -u $(srcdir)/tests/xmltest.log.expected $(builddir)/tests/xmltest.log else @echo 'ERROR: xmlwf is needed for "make run-xmltest".' >&2 @echo 'ERROR: Please re-configure without --without-xmlwf.' >&2 @false endif .PHONY: qa qa: QA_COMPILER=clang QA_SANITIZER=address ./qa.sh QA_COMPILER=clang QA_SANITIZER=memory ./qa.sh QA_COMPILER=clang QA_SANITIZER=undefined ./qa.sh QA_COMPILER=gcc QA_PROCESSOR=gcov ./qa.sh diff --git a/contrib/expat/Makefile.in b/contrib/expat/Makefile.in index 7c6551fca2cf..ea8c72e80ea3 100644 --- a/contrib/expat/Makefile.in +++ b/contrib/expat/Makefile.in @@ -1,1134 +1,1136 @@ # Makefile.in generated by automake 1.16.5 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2021 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # # __ __ _ # ___\ \/ /_ __ __ _| |_ # / _ \\ /| '_ \ / _` | __| # | __// \| |_) | (_| | |_ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # # Copyright (c) 2017-2021 Sebastian Pipping # Copyright (c) 2018 KangLin +# Copyright (c) 2022 Johnny Jazeix # Licensed under the MIT license: # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to permit # persons to whom the Software is furnished to do so, subject to the # following conditions: # # The above copyright notice and this permission notice shall be included # in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN # NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE # USE OR OTHER DEALINGS IN THE SOFTWARE. VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ @WITH_EXAMPLES_TRUE@am__append_1 = examples @WITH_TESTS_TRUE@am__append_2 = tests @WITH_XMLWF_TRUE@am__append_3 = xmlwf doc subdir = . ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/acinclude.m4 \ $(top_srcdir)/conftools/ax-require-defined.m4 \ $(top_srcdir)/conftools/ax-check-compile-flag.m4 \ $(top_srcdir)/conftools/ax-check-link-flag.m4 \ $(top_srcdir)/conftools/ax-append-flag.m4 \ $(top_srcdir)/conftools/ax-append-compile-flags.m4 \ $(top_srcdir)/conftools/ax-append-link-flags.m4 \ $(top_srcdir)/conftools/expatcfg-compiler-supports-visibility.m4 \ $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \ $(am__configure_deps) $(dist_cmake_DATA) $(am__DIST_COMMON) am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ configure.lineno config.status.lineno mkinstalldirs = $(install_sh) -d CONFIG_HEADER = expat_config.h CONFIG_CLEAN_FILES = expat.pc cmake/expat-config.cmake \ cmake/autotools/expat-config-version.cmake \ cmake/autotools/expat-noconfig.cmake run.sh CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = SOURCES = DIST_SOURCES = RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ install-exec-recursive install-html-recursive \ install-info-recursive install-pdf-recursive \ install-ps-recursive install-recursive installcheck-recursive \ installdirs-recursive pdf-recursive ps-recursive \ tags-recursive uninstall-recursive am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(cmakedir)" "$(DESTDIR)$(cmakedir)" \ "$(DESTDIR)$(pkgconfigdir)" DATA = $(dist_cmake_DATA) $(nodist_cmake_DATA) $(pkgconfig_DATA) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ $(RECURSIVE_TARGETS) \ $(RECURSIVE_CLEAN_TARGETS) \ $(am__extra_recursive_targets) AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ cscope distdir distdir-am dist dist-all distcheck am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) \ expat_config.h.in # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` DIST_SUBDIRS = lib examples tests xmlwf doc am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/expat.pc.in \ $(srcdir)/expat_config.h.in $(srcdir)/run.sh.in \ $(top_srcdir)/cmake/autotools/expat-config-version.cmake.in \ $(top_srcdir)/cmake/expat-config.cmake.in \ $(top_srcdir)/conftools/ar-lib $(top_srcdir)/conftools/compile \ $(top_srcdir)/conftools/config.guess \ $(top_srcdir)/conftools/config.sub \ $(top_srcdir)/conftools/install-sh \ $(top_srcdir)/conftools/ltmain.sh \ $(top_srcdir)/conftools/missing AUTHORS COPYING README.md \ conftools/ar-lib conftools/compile conftools/config.guess \ conftools/config.sub conftools/depcomp conftools/install-sh \ conftools/ltmain.sh conftools/missing DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) distdir = $(PACKAGE)-$(VERSION) top_distdir = $(distdir) am__remove_distdir = \ if test -d "$(distdir)"; then \ find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \ && rm -rf "$(distdir)" \ || { sleep 5 && rm -rf "$(distdir)"; }; \ else :; fi am__post_remove_distdir = $(am__remove_distdir) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" DIST_ARCHIVES = $(distdir).tar.gz $(distdir).tar.bz2 $(distdir).tar.lz \ $(distdir).tar.xz GZIP_ENV = --best DIST_TARGETS = dist-lzip dist-xz dist-bzip2 dist-gzip # Exists only to be overridden by the user if desired. AM_DISTCHECK_DVI_TARGET = dvi distuninstallcheck_listfiles = find . -type f -print am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \ | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$' distcleancheck_listfiles = find . -type f -print ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ AM_CFLAGS = @AM_CFLAGS@ AM_CPPFLAGS = @AM_CPPFLAGS@ AM_CXXFLAGS = @AM_CXXFLAGS@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ AM_LDFLAGS = @AM_LDFLAGS@ AR = @AR@ AS = @AS@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CMAKE_SHARED_LIBRARY_PREFIX = @CMAKE_SHARED_LIBRARY_PREFIX@ CPPFLAGS = @CPPFLAGS@ CSCOPE = @CSCOPE@ CTAGS = @CTAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DLLTOOL = @DLLTOOL@ DOCBOOK_TO_MAN = @DOCBOOK_TO_MAN@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ ETAGS = @ETAGS@ EXEEXT = @EXEEXT@ EXPAT_ATTR_INFO = @EXPAT_ATTR_INFO@ EXPAT_CHAR_TYPE = @EXPAT_CHAR_TYPE@ EXPAT_CONTEXT_BYTES = @EXPAT_CONTEXT_BYTES@ EXPAT_DTD = @EXPAT_DTD@ EXPAT_LARGE_SIZE = @EXPAT_LARGE_SIZE@ EXPAT_MIN_SIZE = @EXPAT_MIN_SIZE@ EXPAT_NS = @EXPAT_NS@ FGREP = @FGREP@ FILEMAP = @FILEMAP@ GREP = @GREP@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBAGE = @LIBAGE@ LIBCURRENT = @LIBCURRENT@ LIBDIR_BASENAME = @LIBDIR_BASENAME@ LIBM = @LIBM@ LIBOBJS = @LIBOBJS@ LIBREVISION = @LIBREVISION@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SO_MAJOR = @SO_MAJOR@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ STRIP = @STRIP@ VERSION = @VERSION@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ ac_cv_sizeof_void_p = @ac_cv_sizeof_void_p@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = \ dist-bzip2 \ dist-lzip \ dist-xz \ foreign \ subdir-objects ACLOCAL_AMFLAGS = -I m4 LIBTOOLFLAGS = --verbose SUBDIRS = lib $(am__append_1) $(am__append_2) $(am__append_3) pkgconfig_DATA = expat.pc pkgconfigdir = $(libdir)/pkgconfig dist_cmake_DATA = \ cmake/autotools/expat.cmake nodist_cmake_DATA = \ cmake/autotools/expat-config-version.cmake \ cmake/autotools/expat-noconfig.cmake \ cmake/expat-config.cmake cmakedir = $(libdir)/cmake/expat-@PACKAGE_VERSION@ _EXTRA_DIST_CMAKE = \ cmake/autotools/expat-noconfig__linux.cmake.in \ cmake/autotools/expat-noconfig__macos.cmake.in \ cmake/autotools/expat-noconfig__windows.cmake.in \ cmake/autotools/expat-package-init.cmake \ cmake/mingw-toolchain.cmake \ \ CMakeLists.txt \ CMake.README \ ConfigureChecks.cmake \ expat.pc.cmake \ expat_config.h.cmake _EXTRA_DIST_WINDOWS = \ win32/build_expat_iss.bat \ win32/expat.iss \ win32/MANIFEST.txt \ - win32/README.txt + win32/README.txt \ + win32/version.rc EXTRA_DIST = \ $(_EXTRA_DIST_CMAKE) \ $(_EXTRA_DIST_WINDOWS) \ \ conftools/expat.m4 \ conftools/get-version.sh \ \ fuzz/xml_parsebuffer_fuzzer.c \ fuzz/xml_parse_fuzzer.c \ \ xmlwf/xmlwf_helpgen.py \ xmlwf/xmlwf_helpgen.sh \ \ buildconf.sh \ Changes \ README.md \ \ fix-xmltest-log.sh \ test-driver-wrapper.sh all: expat_config.h $(MAKE) $(AM_MAKEFLAGS) all-recursive .SUFFIXES: am--refresh: Makefile @: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ echo ' cd $(srcdir) && $(AUTOMAKE) --foreign'; \ $(am__cd) $(srcdir) && $(AUTOMAKE) --foreign \ && exit 0; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ echo ' $(SHELL) ./config.status'; \ $(SHELL) ./config.status;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) $(SHELL) ./config.status --recheck $(top_srcdir)/configure: $(am__configure_deps) $(am__cd) $(srcdir) && $(AUTOCONF) $(ACLOCAL_M4): $(am__aclocal_m4_deps) $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) $(am__aclocal_m4_deps): expat_config.h: stamp-h1 @test -f $@ || rm -f stamp-h1 @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) stamp-h1 stamp-h1: $(srcdir)/expat_config.h.in $(top_builddir)/config.status @rm -f stamp-h1 cd $(top_builddir) && $(SHELL) ./config.status expat_config.h $(srcdir)/expat_config.h.in: $(am__configure_deps) ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) rm -f stamp-h1 touch $@ distclean-hdr: -rm -f expat_config.h stamp-h1 expat.pc: $(top_builddir)/config.status $(srcdir)/expat.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ cmake/expat-config.cmake: $(top_builddir)/config.status $(top_srcdir)/cmake/expat-config.cmake.in cd $(top_builddir) && $(SHELL) ./config.status $@ cmake/autotools/expat-config-version.cmake: $(top_builddir)/config.status $(top_srcdir)/cmake/autotools/expat-config-version.cmake.in cd $(top_builddir) && $(SHELL) ./config.status $@ cmake/autotools/expat-noconfig.cmake: $(top_builddir)/config.status cd $(top_builddir) && $(SHELL) ./config.status $@ run.sh: $(top_builddir)/config.status $(srcdir)/run.sh.in cd $(top_builddir) && $(SHELL) ./config.status $@ mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs distclean-libtool: -rm -f libtool config.lt install-dist_cmakeDATA: $(dist_cmake_DATA) @$(NORMAL_INSTALL) @list='$(dist_cmake_DATA)'; test -n "$(cmakedir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(cmakedir)'"; \ $(MKDIR_P) "$(DESTDIR)$(cmakedir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(cmakedir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(cmakedir)" || exit $$?; \ done uninstall-dist_cmakeDATA: @$(NORMAL_UNINSTALL) @list='$(dist_cmake_DATA)'; test -n "$(cmakedir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(cmakedir)'; $(am__uninstall_files_from_dir) install-nodist_cmakeDATA: $(nodist_cmake_DATA) @$(NORMAL_INSTALL) @list='$(nodist_cmake_DATA)'; test -n "$(cmakedir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(cmakedir)'"; \ $(MKDIR_P) "$(DESTDIR)$(cmakedir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(cmakedir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(cmakedir)" || exit $$?; \ done uninstall-nodist_cmakeDATA: @$(NORMAL_UNINSTALL) @list='$(nodist_cmake_DATA)'; test -n "$(cmakedir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(cmakedir)'; $(am__uninstall_files_from_dir) install-pkgconfigDATA: $(pkgconfig_DATA) @$(NORMAL_INSTALL) @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(pkgconfigdir)'"; \ $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgconfigdir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgconfigdir)" || exit $$?; \ done uninstall-pkgconfigDATA: @$(NORMAL_UNINSTALL) @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(pkgconfigdir)'; $(am__uninstall_files_from_dir) # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. # To change the values of 'make' variables: instead of editing Makefiles, # (1) if the variable is set in 'config.status', edit 'config.status' # (which will cause the Makefiles to be regenerated when you run 'make'); # (2) otherwise, pass the desired values on the 'make' command line. $(am__recursive_targets): @fail=; \ if $(am__make_keepgoing); then \ failcom='fail=yes'; \ else \ failcom='exit 1'; \ fi; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-recursive TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-recursive CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscope: cscope.files test ! -s cscope.files \ || $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS) clean-cscope: -rm -f cscope.files cscope.files: clean-cscope cscopelist cscopelist: cscopelist-recursive cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags -rm -f cscope.out cscope.in.out cscope.po.out cscope.files distdir: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) distdir-am distdir-am: $(DISTFILES) $(am__remove_distdir) test -d "$(distdir)" || mkdir "$(distdir)" @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ $(am__make_dryrun) \ || test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done -test -n "$(am__skip_mode_fix)" \ || find "$(distdir)" -type d ! -perm -755 \ -exec chmod u+rwx,go+rx {} \; -o \ ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ || chmod -R a+r "$(distdir)" dist-gzip: distdir tardir=$(distdir) && $(am__tar) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).tar.gz $(am__post_remove_distdir) dist-bzip2: distdir tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2 $(am__post_remove_distdir) dist-lzip: distdir tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz $(am__post_remove_distdir) dist-xz: distdir tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz $(am__post_remove_distdir) dist-zstd: distdir tardir=$(distdir) && $(am__tar) | zstd -c $${ZSTD_CLEVEL-$${ZSTD_OPT--19}} >$(distdir).tar.zst $(am__post_remove_distdir) dist-tarZ: distdir @echo WARNING: "Support for distribution archives compressed with" \ "legacy program 'compress' is deprecated." >&2 @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z $(am__post_remove_distdir) dist-shar: distdir @echo WARNING: "Support for shar distribution archives is" \ "deprecated." >&2 @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 shar $(distdir) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).shar.gz $(am__post_remove_distdir) dist-zip: distdir -rm -f $(distdir).zip zip -rq $(distdir).zip $(distdir) $(am__post_remove_distdir) dist dist-all: $(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:' $(am__post_remove_distdir) # This target untars the dist file and tries a VPATH configuration. Then # it guarantees that the distribution is self-contained by making another # tarfile. distcheck: dist case '$(DIST_ARCHIVES)' in \ *.tar.gz*) \ eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).tar.gz | $(am__untar) ;;\ *.tar.bz2*) \ bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\ *.tar.lz*) \ lzip -dc $(distdir).tar.lz | $(am__untar) ;;\ *.tar.xz*) \ xz -dc $(distdir).tar.xz | $(am__untar) ;;\ *.tar.Z*) \ uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ *.shar.gz*) \ eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).shar.gz | unshar ;;\ *.zip*) \ unzip $(distdir).zip ;;\ *.tar.zst*) \ zstd -dc $(distdir).tar.zst | $(am__untar) ;;\ esac chmod -R a-w $(distdir) chmod u+w $(distdir) mkdir $(distdir)/_build $(distdir)/_build/sub $(distdir)/_inst chmod a-w $(distdir) test -d $(distdir)/_build || exit 0; \ dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ && am__cwd=`pwd` \ && $(am__cd) $(distdir)/_build/sub \ && ../../configure \ $(AM_DISTCHECK_CONFIGURE_FLAGS) \ $(DISTCHECK_CONFIGURE_FLAGS) \ --srcdir=../.. --prefix="$$dc_install_base" \ && $(MAKE) $(AM_MAKEFLAGS) \ && $(MAKE) $(AM_MAKEFLAGS) $(AM_DISTCHECK_DVI_TARGET) \ && $(MAKE) $(AM_MAKEFLAGS) check \ && $(MAKE) $(AM_MAKEFLAGS) install \ && $(MAKE) $(AM_MAKEFLAGS) installcheck \ && $(MAKE) $(AM_MAKEFLAGS) uninstall \ && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ distuninstallcheck \ && chmod -R a-w "$$dc_install_base" \ && ({ \ (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ } || { rm -rf "$$dc_destdir"; exit 1; }) \ && rm -rf "$$dc_destdir" \ && $(MAKE) $(AM_MAKEFLAGS) dist \ && rm -rf $(DIST_ARCHIVES) \ && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \ && cd "$$am__cwd" \ || exit 1 $(am__post_remove_distdir) @(echo "$(distdir) archives ready for distribution: "; \ list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' distuninstallcheck: @test -n '$(distuninstallcheck_dir)' || { \ echo 'ERROR: trying to run $@ with an empty' \ '$$(distuninstallcheck_dir)' >&2; \ exit 1; \ }; \ $(am__cd) '$(distuninstallcheck_dir)' || { \ echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \ exit 1; \ }; \ test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \ || { echo "ERROR: files left after uninstall:" ; \ if test -n "$(DESTDIR)"; then \ echo " (check DESTDIR support)"; \ fi ; \ $(distuninstallcheck_listfiles) ; \ exit 1; } >&2 distcleancheck: distclean @if test '$(srcdir)' = . ; then \ echo "ERROR: distcleancheck can only run from a VPATH build" ; \ exit 1 ; \ fi @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ || { echo "ERROR: files left in build directory after distclean:" ; \ $(distcleancheck_listfiles) ; \ exit 1; } >&2 check-am: all-am check: check-recursive all-am: Makefile $(DATA) expat_config.h installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(cmakedir)" "$(DESTDIR)$(cmakedir)" "$(DESTDIR)$(pkgconfigdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-recursive -rm -f $(am__CONFIG_DISTCLEAN_FILES) -rm -f Makefile distclean-am: clean-am distclean-generic distclean-hdr \ distclean-libtool distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dist_cmakeDATA install-nodist_cmakeDATA \ install-pkgconfigDATA install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f $(am__CONFIG_DISTCLEAN_FILES) -rm -rf $(top_srcdir)/autom4te.cache -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-dist_cmakeDATA uninstall-nodist_cmakeDATA \ uninstall-pkgconfigDATA .MAKE: $(am__recursive_targets) all install-am install-strip .PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ am--refresh check check-am clean clean-cscope clean-generic \ clean-libtool cscope cscopelist-am ctags ctags-am dist \ dist-all dist-bzip2 dist-gzip dist-lzip dist-shar dist-tarZ \ dist-xz dist-zip dist-zstd distcheck distclean \ distclean-generic distclean-hdr distclean-libtool \ distclean-tags distcleancheck distdir distuninstallcheck dvi \ dvi-am html html-am info info-am install install-am \ install-data install-data-am install-dist_cmakeDATA \ install-dvi install-dvi-am install-exec install-exec-am \ install-html install-html-am install-info install-info-am \ install-man install-nodist_cmakeDATA install-pdf \ install-pdf-am install-pkgconfigDATA install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ installdirs-am maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ ps ps-am tags tags-am uninstall uninstall-am \ uninstall-dist_cmakeDATA uninstall-nodist_cmakeDATA \ uninstall-pkgconfigDATA .PRECIOUS: Makefile .PHONY: buildlib buildlib: @echo 'ERROR: Running "make buildlib LIBRARY=libexpatw.la"' >&2 @echo 'ERROR: is no longer supported. INSTEAD please:' >&2 @echo 'ERROR:' >&2 @echo 'ERROR: * Mass-patch Makefile.am, e.g.' >&2 @echo 'ERROR: # find -name Makefile.am -exec sed \' >&2 @echo 'ERROR: -e "s,libexpat\.la,libexpatw.la," \' >&2 @echo 'ERROR: -e "s,libexpat_la,libexpatw_la," \' >&2 @echo 'ERROR: -i {} +' >&2 @echo 'ERROR:' >&2 @echo 'ERROR: * Run automake to re-generate Makefile.in files' >&2 @echo 'ERROR:' >&2 @echo 'ERROR: * Use "./configure --without-xmlwf" and/or' >&2 @echo 'ERROR: "make -C lib all install" to bypass compilation' >&2 @echo 'ERROR: of xmlwf (e.g. with -DXML_UNICODE)' >&2 @echo 'ERROR:' >&2 @false .PHONY: run-benchmark run-benchmark: $(MAKE) -C tests/benchmark ./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/recset.xml 65535 3 .PHONY: download-xmlts-zip download-xmlts-zip: if test "$(XMLTS_ZIP)" = ""; then \ wget --output-document=tests/xmlts.zip \ https://www.w3.org/XML/Test/xmlts20080827.zip; \ else \ cp $(XMLTS_ZIP) tests/xmlts.zip; \ fi tests/xmlts.zip: $(MAKE) download-xmlts-zip .PHONY: extract-xmlts-zip extract-xmlts-zip: tests/xmlts.zip [ -f $(builddir)/tests/xmlts.zip ] || $(MAKE) download-xmlts-zip # vpath workaround cd tests && unzip -q xmlts.zip tests/xmlconf: tests/xmlts.zip $(MAKE) extract-xmlts-zip .PHONY: run-xmltest run-xmltest: tests/xmlconf @WITH_XMLWF_TRUE@ [ -d $(builddir)/tests/xmlconf ] || $(MAKE) extract-xmlts-zip # vpath workaround @WITH_XMLWF_TRUE@ $(MAKE) -C lib @WITH_XMLWF_TRUE@ $(MAKE) -C xmlwf @WITH_XMLWF_TRUE@ $(srcdir)/tests/xmltest.sh "$(abs_builddir)/run.sh $(abs_builddir)/xmlwf/xmlwf@EXEEXT@" 2>&1 | tee $(builddir)/tests/xmltest.log @WITH_XMLWF_TRUE@ $(srcdir)/fix-xmltest-log.sh $(builddir)/tests/xmltest.log @WITH_XMLWF_TRUE@ diff -u $(srcdir)/tests/xmltest.log.expected $(builddir)/tests/xmltest.log @WITH_XMLWF_FALSE@ @echo 'ERROR: xmlwf is needed for "make run-xmltest".' >&2 @WITH_XMLWF_FALSE@ @echo 'ERROR: Please re-configure without --without-xmlwf.' >&2 @WITH_XMLWF_FALSE@ @false .PHONY: qa qa: QA_COMPILER=clang QA_SANITIZER=address ./qa.sh QA_COMPILER=clang QA_SANITIZER=memory ./qa.sh QA_COMPILER=clang QA_SANITIZER=undefined ./qa.sh QA_COMPILER=gcc QA_PROCESSOR=gcov ./qa.sh # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/contrib/expat/README.md b/contrib/expat/README.md index 959c4a6e94a7..6bfbf130dbf3 100644 --- a/contrib/expat/README.md +++ b/contrib/expat/README.md @@ -1,269 +1,269 @@ [![Run Linux Travis CI tasks](https://github.com/libexpat/libexpat/actions/workflows/linux.yml/badge.svg)](https://github.com/libexpat/libexpat/actions/workflows/linux.yml) [![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/libexpat/libexpat?svg=true)](https://ci.appveyor.com/project/libexpat/libexpat) [![Packaging status](https://repology.org/badge/tiny-repos/expat.svg)](https://repology.org/metapackage/expat/versions) [![Downloads SourceForge](https://img.shields.io/sourceforge/dt/expat?label=Downloads%20SourceForge)](https://sourceforge.net/projects/expat/files/) [![Downloads GitHub](https://img.shields.io/github/downloads/libexpat/libexpat/total?label=Downloads%20GitHub)](https://github.com/libexpat/libexpat/releases) -# Expat, Release 2.4.6 +# Expat, Release 2.4.7 This is Expat, a C library for parsing XML, started by [James Clark](https://en.wikipedia.org/wiki/James_Clark_%28programmer%29) in 1997. Expat is a stream-oriented XML parser. This means that you register handlers with the parser before starting the parse. These handlers are called when the parser discovers the associated structures in the document being parsed. A start tag is an example of the kind of structures for which you may register handlers. Expat supports the following compilers: - GNU GCC >=4.5 - LLVM Clang >=3.5 - Microsoft Visual Studio >=15.0/2017 (rolling `${today} minus 5 years`) Windows users can use the [`expat-win32bin-*.*.*.{exe,zip}` download](https://github.com/libexpat/libexpat/releases), which includes both pre-compiled libraries and executables, and source code for developers. Expat is [free software](https://www.gnu.org/philosophy/free-sw.en.html). You may copy, distribute, and modify it under the terms of the License contained in the file [`COPYING`](https://github.com/libexpat/libexpat/blob/master/expat/COPYING) distributed with this package. This license is the same as the MIT/X Consortium license. ## Using libexpat in your CMake-Based Project There are two ways of using libexpat with CMake: ### a) Module Mode This approach leverages CMake's own [module `FindEXPAT`](https://cmake.org/cmake/help/latest/module/FindEXPAT.html). Notice the *uppercase* `EXPAT` in the following example: ```cmake cmake_minimum_required(VERSION 3.0) # or 3.10, see below project(hello VERSION 1.0.0) find_package(EXPAT 2.2.8 MODULE REQUIRED) add_executable(hello hello.c ) # a) for CMake >=3.10 (see CMake's FindEXPAT docs) target_link_libraries(hello PUBLIC EXPAT::EXPAT) # b) for CMake >=3.0 target_include_directories(hello PRIVATE ${EXPAT_INCLUDE_DIRS}) target_link_libraries(hello PUBLIC ${EXPAT_LIBRARIES}) ``` ### b) Config Mode This approach requires files from… - libexpat >=2.2.8 where packaging uses the CMake build system or - libexpat >=2.3.0 where packaging uses the GNU Autotools build system on Linux or - libexpat >=2.4.0 where packaging uses the GNU Autotools build system on macOS or MinGW. Notice the *lowercase* `expat` in the following example: ```cmake cmake_minimum_required(VERSION 3.0) project(hello VERSION 1.0.0) find_package(expat 2.2.8 CONFIG REQUIRED char dtd ns) add_executable(hello hello.c ) target_link_libraries(hello PUBLIC expat::expat) ``` ## Building from a Git Clone If you are building Expat from a check-out from the [Git repository](https://github.com/libexpat/libexpat/), you need to run a script that generates the configure script using the GNU autoconf and libtool tools. To do this, you need to have autoconf 2.58 or newer. Run the script like this: ```console ./buildconf.sh ``` Once this has been done, follow the same instructions as for building from a source distribution. ## Building from a Source Distribution ### a) Building with the configure script (i.e. GNU Autotools) To build Expat from a source distribution, you first run the configuration shell script in the top level distribution directory: ```console ./configure ``` There are many options which you may provide to configure (which you can discover by running configure with the `--help` option). But the one of most interest is the one that sets the installation directory. By default, the configure script will set things up to install libexpat into `/usr/local/lib`, `expat.h` into `/usr/local/include`, and `xmlwf` into `/usr/local/bin`. If, for example, you'd prefer to install into `/home/me/mystuff/lib`, `/home/me/mystuff/include`, and `/home/me/mystuff/bin`, you can tell `configure` about that with: ```console ./configure --prefix=/home/me/mystuff ``` Another interesting option is to enable 64-bit integer support for line and column numbers and the over-all byte index: ```console ./configure CPPFLAGS=-DXML_LARGE_SIZE ``` However, such a modification would be a breaking change to the ABI and is therefore not recommended for general use — e.g. as part of a Linux distribution — but rather for builds with special requirements. After running the configure script, the `make` command will build things and `make install` will install things into their proper location. Have a look at the `Makefile` to learn about additional `make` options. Note that you need to have write permission into the directories into which things will be installed. If you are interested in building Expat to provide document information in UTF-16 encoding rather than the default UTF-8, follow these instructions (after having run `make distclean`). Please note that we configure with `--without-xmlwf` as xmlwf does not support this mode of compilation (yet): 1. Mass-patch `Makefile.am` files to use `libexpatw.la` for a library name:
`find -name Makefile.am -exec sed -e 's,libexpat\.la,libexpatw.la,' -e 's,libexpat_la,libexpatw_la,' -i {} +` 1. Run `automake` to re-write `Makefile.in` files:
`automake` 1. For UTF-16 output as unsigned short (and version/error strings as char), run:
`./configure CPPFLAGS=-DXML_UNICODE --without-xmlwf`
For UTF-16 output as `wchar_t` (incl. version/error strings), run:
`./configure CFLAGS="-g -O2 -fshort-wchar" CPPFLAGS=-DXML_UNICODE_WCHAR_T --without-xmlwf`
Note: The latter requires libc compiled with `-fshort-wchar`, as well. 1. Run `make` (which excludes xmlwf). 1. Run `make install` (again, excludes xmlwf). Using `DESTDIR` is supported. It works as follows: ```console make install DESTDIR=/path/to/image ``` overrides the in-makefile set `DESTDIR`, because variable-setting priority is 1. commandline 1. in-makefile 1. environment Note: This only applies to the Expat library itself, building UTF-16 versions of xmlwf and the tests is currently not supported. When using Expat with a project using autoconf for configuration, you can use the probing macro in `conftools/expat.m4` to determine how to include Expat. See the comments at the top of that file for more information. A reference manual is available in the file `doc/reference.html` in this distribution. ### b) Building with CMake The CMake build system is still *experimental* and may replace the primary build system based on GNU Autotools at some point when it is ready. #### Available Options For an idea of the available (non-advanced) options for building with CMake: ```console # rm -f CMakeCache.txt ; cmake -D_EXPAT_HELP=ON -LH . | grep -B1 ':.*=' | sed 's,^--$,,' // Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel ... CMAKE_BUILD_TYPE:STRING= // Install path prefix, prepended onto install directories. CMAKE_INSTALL_PREFIX:PATH=/usr/local // Path to a program. DOCBOOK_TO_MAN:FILEPATH=/usr/bin/docbook2x-man // build man page for xmlwf EXPAT_BUILD_DOCS:BOOL=ON // build the examples for expat library EXPAT_BUILD_EXAMPLES:BOOL=ON // build fuzzers for the expat library EXPAT_BUILD_FUZZERS:BOOL=OFF // build pkg-config file EXPAT_BUILD_PKGCONFIG:BOOL=ON // build the tests for expat library EXPAT_BUILD_TESTS:BOOL=ON // build the xmlwf tool for expat library EXPAT_BUILD_TOOLS:BOOL=ON // Character type to use (char|ushort|wchar_t) [default=char] EXPAT_CHAR_TYPE:STRING=char // install expat files in cmake install target EXPAT_ENABLE_INSTALL:BOOL=ON // Use /MT flag (static CRT) when compiling in MSVC EXPAT_MSVC_STATIC_CRT:BOOL=OFF // build fuzzers via ossfuzz for the expat library EXPAT_OSSFUZZ_BUILD:BOOL=OFF // build a shared expat library EXPAT_SHARED_LIBS:BOOL=ON // Treat all compiler warnings as errors EXPAT_WARNINGS_AS_ERRORS:BOOL=OFF // Make use of getrandom function (ON|OFF|AUTO) [default=AUTO] EXPAT_WITH_GETRANDOM:STRING=AUTO // utilize libbsd (for arc4random_buf) EXPAT_WITH_LIBBSD:BOOL=OFF // Make use of syscall SYS_getrandom (ON|OFF|AUTO) [default=AUTO] EXPAT_WITH_SYS_GETRANDOM:STRING=AUTO ``` diff --git a/contrib/expat/configure.ac b/contrib/expat/configure.ac index 5175487bb4b3..7a7f013febc9 100644 --- a/contrib/expat/configure.ac +++ b/contrib/expat/configure.ac @@ -1,437 +1,437 @@ dnl configuration script for expat dnl Process this file with autoconf to produce a configure script. dnl __ __ _ dnl ___\ \/ /_ __ __ _| |_ dnl / _ \\ /| '_ \ / _` | __| dnl | __// \| |_) | (_| | |_ dnl \___/_/\_\ .__/ \__,_|\__| dnl |_| XML parser dnl dnl Copyright (c) 2000 Clark Cooper dnl Copyright (c) 2000-2005 Fred L. Drake, Jr. dnl Copyright (c) 2001-2003 Greg Stein dnl Copyright (c) 2006-2012 Karl Waclawek dnl Copyright (c) 2016-2022 Sebastian Pipping dnl Copyright (c) 2017 S. P. Zeidler dnl Copyright (c) 2017 Stephen Groat dnl Copyright (c) 2017-2020 Joe Orton dnl Copyright (c) 2018 Yury Gribov dnl Copyright (c) 2018 Benjamin Peterson dnl Copyright (c) 2018 Marco Maggi dnl Copyright (c) 2018 KangLin dnl Copyright (c) 2019 Mohammed Khajapasha dnl Copyright (c) 2019 Kishore Kunche dnl Copyright (c) 2020 Jeffrey Walton dnl Licensed under the MIT license: dnl dnl Permission is hereby granted, free of charge, to any person obtaining dnl a copy of this software and associated documentation files (the dnl "Software"), to deal in the Software without restriction, including dnl without limitation the rights to use, copy, modify, merge, publish, dnl distribute, sublicense, and/or sell copies of the Software, and to permit dnl persons to whom the Software is furnished to do so, subject to the dnl following conditions: dnl dnl The above copyright notice and this permission notice shall be included dnl in all copies or substantial portions of the Software. dnl dnl THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, dnl EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF dnl MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN dnl NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, dnl DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR dnl OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE dnl USE OR OTHER DEALINGS IN THE SOFTWARE. dnl Ensure that Expat is configured with autoconf 2.69 or newer. AC_PREREQ([2.69]) dnl Get the version number of Expat, using m4's esyscmd() command to run dnl the command at m4-generation time. This allows us to create an m4 dnl symbol holding the correct version number. AC_INIT requires the dnl version number at m4-time, rather than when ./configure is run, so dnl all this must happen as part of m4, not as part of the shell code dnl contained in ./configure. dnl dnl NOTE: esyscmd() is a GNU M4 extension. Thus, we wrap it in an appropriate dnl test. I believe this test will work, but I don't have a place with non- dnl GNU M4 to test it right now. m4_define([expat_version], m4_ifdef([__gnu__], [esyscmd(conftools/get-version.sh lib/expat.h)], [2.2.x])) AC_INIT([expat], expat_version, [expat-bugs@libexpat.org]) m4_undefine([expat_version]) AC_CONFIG_SRCDIR([Makefile.in]) AC_CONFIG_AUX_DIR([conftools]) AC_CONFIG_MACRO_DIR([m4]) AC_CANONICAL_HOST AM_INIT_AUTOMAKE dnl dnl Increment LIBREVISION if source code has changed at all dnl dnl If the API has changed, increment LIBCURRENT and set LIBREVISION to 0 dnl dnl If the API changes compatibly (i.e. simply adding a new function dnl without changing or removing earlier interfaces), then increment LIBAGE. dnl dnl If the API changes incompatibly set LIBAGE back to 0 dnl LIBCURRENT=9 # sync -LIBREVISION=6 # with +LIBREVISION=7 # with LIBAGE=8 # CMakeLists.txt! AC_CONFIG_HEADERS([expat_config.h]) AM_PROG_AR AC_PROG_INSTALL AC_PROG_LN_S AC_PROG_MAKE_SET LT_PREREQ([2.4]) LT_INIT([win32-dll]) AC_SUBST(LIBCURRENT) AC_SUBST(LIBREVISION) AC_SUBST(LIBAGE) AC_LANG([C]) AC_PROG_CC_C99 AS_IF([test "$GCC" = yes], [AX_APPEND_COMPILE_FLAGS([-Wall -Wextra], [AM_CFLAGS]) dnl Be careful about adding the -fexceptions option; some versions of dnl GCC don't support it and it causes extra warnings that are only dnl distracting; avoid. AX_APPEND_COMPILE_FLAGS([-fexceptions], [AM_CFLAGS]) AX_APPEND_COMPILE_FLAGS([-fno-strict-aliasing -Wmissing-prototypes -Wstrict-prototypes], [AM_CFLAGS]) AX_APPEND_COMPILE_FLAGS([-pedantic -Wduplicated-cond -Wduplicated-branches -Wlogical-op], [AM_CFLAGS]) AX_APPEND_COMPILE_FLAGS([-Wrestrict -Wnull-dereference -Wjump-misses-init -Wdouble-promotion], [AM_CFLAGS]) AX_APPEND_COMPILE_FLAGS([-Wshadow -Wformat=2 -Wno-pedantic-ms-format -Wmisleading-indentation], [AM_CFLAGS])]) AC_LANG_PUSH([C++]) AC_PROG_CXX AS_IF([test "$GCC" = yes], [AX_APPEND_COMPILE_FLAGS([-Wall -Wextra], [AM_CXXFLAGS]) dnl Be careful about adding the -fexceptions option; some versions of dnl GCC don't support it and it causes extra warnings that are only dnl distracting; avoid. AX_APPEND_COMPILE_FLAGS([-fexceptions], [AM_CXXFLAGS]) AX_APPEND_COMPILE_FLAGS([-fno-strict-aliasing], [AM_CXXFLAGS])]) AC_LANG_POP([C++]) AS_IF([test "$GCC" = yes], [AX_APPEND_LINK_FLAGS([-fno-strict-aliasing],[AM_LDFLAGS])]) dnl patching ${archive_cmds} to affect generation of file "libtool" to fix linking with clang (issue #312) AS_CASE(["$LD"],[*clang*], [AS_CASE(["${host_os}"], [*linux*],[archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib'])]) EXPATCFG_COMPILER_SUPPORTS_VISIBILITY([ AX_APPEND_FLAG([-fvisibility=hidden], [AM_CFLAGS]) AX_APPEND_FLAG([-DXML_ENABLE_VISIBILITY=1], [AM_CPPFLAGS])]) dnl Checks for typedefs, structures, and compiler characteristics. dnl We define BYTEORDER to 1234 when the platform is little endian; it dnl defines it to 4321 when the platform is big endian. We also define dnl WORDS_BIGENDIAN to 1 when the platform is big endian. dnl dnl A long time ago (early 2000 years) AC_C_BIGENDIAN was considered dnl wrong when cross compiling, now (2018, GNU Autoconf 2.69) we assume dnl it is fine. AC_C_BIGENDIAN([AC_DEFINE([WORDS_BIGENDIAN], 1) AS_VAR_SET([BYTEORDER], 4321)], [AS_VAR_SET([BYTEORDER], 1234)]) AC_DEFINE_UNQUOTED([BYTEORDER], $BYTEORDER, [1234 = LILENDIAN, 4321 = BIGENDIAN]) AC_C_CONST AC_TYPE_SIZE_T AC_ARG_WITH([xmlwf], [AS_HELP_STRING([--without-xmlwf], [do not build xmlwf])], [], [with_xmlwf=yes]) AM_CONDITIONAL([WITH_XMLWF], [test x${with_xmlwf} = xyes]) AC_ARG_WITH([examples], [AS_HELP_STRING([--without-examples], [do not build examples @<:@default=included@:>@])], [], [with_examples=yes]) AM_CONDITIONAL([WITH_EXAMPLES], [test x${with_examples} = xyes]) AC_ARG_WITH([tests], [AS_HELP_STRING([--without-tests], [do not build tests @<:@default=included@:>@])], [], [with_tests=yes]) AM_CONDITIONAL([WITH_TESTS], [test x${with_tests} = xyes]) AS_VAR_SET([EXPATCFG_ON_MINGW],[no]) AS_CASE("${host_os}", [mingw*], [AS_VAR_SET([EXPATCFG_ON_MINGW],[yes]) AC_MSG_NOTICE([detected OS: MinGW])]) AM_CONDITIONAL([MINGW], [test x${EXPATCFG_ON_MINGW} = xyes]) dnl Note: Prefix "_INTERNAL_" here means exclusive use inside of file configure.ac AM_CONDITIONAL([UNICODE], [echo -- "${CPPFLAGS}${CFLAGS}" | ${FGREP} XML_UNICODE >/dev/null]) AM_CONDITIONAL([_INTERNAL_UNICODE_WCHAR_T], [echo -- "${CPPFLAGS}${CFLAGS}" | ${FGREP} XML_UNICODE_WCHAR_T >/dev/null]) AM_CONDITIONAL([_INTERNAL_MIN_SIZE], [echo -- "${CPPFLAGS}${CFLAGS}" | ${FGREP} XML_MIN_SIZE >/dev/null]) AM_CONDITIONAL([_INTERNAL_LARGE_SIZE], [echo -- "${CPPFLAGS}${CFLAGS}" | ${FGREP} XML_LARGE_SIZE >/dev/null]) LT_LIB_M AC_ARG_WITH([libbsd], [AS_HELP_STRING([--with-libbsd], [utilize libbsd (for arc4random_buf)])], [], [with_libbsd=no]) AS_IF([test "x${with_libbsd}" != xno], [AC_CHECK_LIB([bsd], [arc4random_buf], [], [AS_IF([test "x${with_libbsd}" = xyes], [AC_MSG_ERROR([Enforced use of libbsd cannot be satisfied.])])])]) AC_MSG_CHECKING([for arc4random_buf (BSD or libbsd)]) AC_LINK_IFELSE([AC_LANG_SOURCE([ #include /* for arc4random_buf on BSD, for NULL */ #if defined(HAVE_LIBBSD) # include #endif int main() { arc4random_buf(NULL, 0U); return 0; } ])], [AC_DEFINE([HAVE_ARC4RANDOM_BUF], [1], [Define to 1 if you have the `arc4random_buf' function.]) AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no]) AC_MSG_CHECKING([for arc4random (BSD, macOS or libbsd)]) AC_LINK_IFELSE([AC_LANG_SOURCE([ #if defined(HAVE_LIBBSD) # include #else # include #endif int main() { arc4random(); return 0; } ])], [AC_DEFINE([HAVE_ARC4RANDOM], [1], [Define to 1 if you have the `arc4random' function.]) AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no])])]) AC_ARG_WITH([getrandom], [AS_HELP_STRING([--with-getrandom], [enforce the use of getrandom function in the system @<:@default=check@:>@]) AS_HELP_STRING([--without-getrandom], [skip auto detect of getrandom @<:@default=check@:>@])], [], [with_getrandom=check]) AS_IF([test "x$with_getrandom" != xno], [AC_MSG_CHECKING([for getrandom (Linux 3.17+, glibc 2.25+)]) AC_LINK_IFELSE([AC_LANG_SOURCE([ #include /* for NULL */ #include int main() { return getrandom(NULL, 0U, 0U); } ])], [AC_DEFINE([HAVE_GETRANDOM], [1], [Define to 1 if you have the `getrandom' function.]) AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no]) AS_IF([test "x$with_getrandom" = xyes], [AC_MSG_ERROR([enforced the use of getrandom --with-getrandom, but not detected])])])]) AC_ARG_WITH([sys_getrandom], [AS_HELP_STRING([--with-sys-getrandom], [enforce the use of syscall SYS_getrandom function in the system @<:@default=check@:>@]) AS_HELP_STRING([--without-sys-getrandom], [skip auto detect of syscall SYS_getrandom @<:@default=check@:>@])], [], [with_sys_getrandom=check]) AS_IF([test "x$with_sys_getrandom" != xno], [AC_MSG_CHECKING([for syscall SYS_getrandom (Linux 3.17+)]) AC_LINK_IFELSE([AC_LANG_SOURCE([ #include /* for NULL */ #include /* for syscall */ #include /* for SYS_getrandom */ int main() { syscall(SYS_getrandom, NULL, 0, 0); return 0; } ])], [AC_DEFINE([HAVE_SYSCALL_GETRANDOM], [1], [Define to 1 if you have `syscall' and `SYS_getrandom'.]) AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no]) AS_IF([test "x$with_sys_getrandom" = xyes], [AC_MSG_ERROR([enforced the use of syscall SYS_getrandom --with-sys-getrandom, but not detected])])])]) dnl Only needed for xmlwf: AC_CHECK_HEADERS(fcntl.h unistd.h) AC_TYPE_OFF_T AC_FUNC_MMAP AS_IF([test "$ac_cv_func_mmap_fixed_mapped" = "yes"], [AS_VAR_SET(FILEMAP,unixfilemap)], [AS_VAR_SET(FILEMAP,readfilemap)]) AC_SUBST(FILEMAP) dnl Some basic configuration: AC_DEFINE([XML_NS], 1, [Define to make XML Namespaces functionality available.]) AC_DEFINE([XML_DTD], 1, [Define to make parameter entity parsing functionality available.]) AC_DEFINE([XML_DEV_URANDOM], 1, [Define to include code reading entropy from `/dev/urandom'.]) AC_ARG_ENABLE([xml-attr-info], [AS_HELP_STRING([--enable-xml-attr-info], [Enable retrieving the byte offsets for attribute names and values @<:@default=no@:>@])], [], [enable_xml_attr_info=no]) AS_IF([test "x${enable_xml_attr_info}" = "xyes"], [AC_DEFINE([XML_ATTR_INFO], 1, [Define to allow retrieving the byte offsets for attribute names and values.])]) AC_ARG_ENABLE([xml-context], AS_HELP_STRING([--enable-xml-context @<:@COUNT@:>@], [Retain context around the current parse point; default is enabled and a size of 1024 bytes]) AS_HELP_STRING([--disable-xml-context], [Do not retain context around the current parse point]), [enable_xml_context=${enableval}]) AS_IF([test "x${enable_xml_context}" != "xno"], [AS_IF([test "x${enable_xml_context}" = "xyes" \ -o "x${enable_xml_context}" = "x"], [AS_VAR_SET(enable_xml_context,1024)]) AC_DEFINE_UNQUOTED([XML_CONTEXT_BYTES], [${enable_xml_context}], [Define to specify how much context to retain around the current parse point.])]) AC_ARG_WITH([docbook], [AS_HELP_STRING([--with-docbook], [enforce XML to man page compilation @<:@default=check@:>@]) AS_HELP_STRING([--without-docbook], [skip XML to man page compilation @<:@default=check@:>@])], [], [with_docbook=check]) AC_ARG_VAR([DOCBOOK_TO_MAN], [docbook2x-man command]) AS_IF([test "x$with_docbook" != xno], [AC_CHECK_PROGS([DOCBOOK_TO_MAN], [docbook2x-man db2x_docbook2man docbook2man docbook-to-man])]) AS_IF([test "x${DOCBOOK_TO_MAN}" = x -a "x$with_docbook" = xyes], [AC_MSG_ERROR([Required program 'docbook2x-man' not found.])]) AS_IF([test "x${DOCBOOK_TO_MAN}" != x -a "x$with_docbook" != xno], [AS_IF([${DOCBOOK_TO_MAN} --help | grep -i -q -F sgmlbase], [AC_MSG_ERROR([Your local ${DOCBOOK_TO_MAN} was found to work with SGML rather than XML. Please install docbook2X and use variable DOCBOOK_TO_MAN to point configure to command docbook2x-man of docbook2X. Or use DOCBOOK_TO_MAN="xmlto man --skip-validation" if you have xmlto around. You can also configure using --without-docbook if you can do without a man page for xmlwf.])])]) AM_CONDITIONAL(WITH_DOCBOOK, [test "x${DOCBOOK_TO_MAN}" != x]) dnl Configure CMake file templates dnl NOTE: The *_TRUE variables read here are Automake conditionals dnl that are either set to "" when enabled or to "#" when disabled dnl (because they are used to dynamically comment out certain things) AS_IF([test "x${enable_xml_attr_info}" = xyes], [EXPAT_ATTR_INFO=ON], [EXPAT_ATTR_INFO=OFF]) EXPAT_DTD=ON AS_IF([test "x${_INTERNAL_LARGE_SIZE_TRUE}" = x], [EXPAT_LARGE_SIZE=ON], [EXPAT_LARGE_SIZE=OFF]) AS_IF([test "x${_INTERNAL_MIN_SIZE_TRUE}" = x], [EXPAT_MIN_SIZE=ON], [EXPAT_MIN_SIZE=OFF]) EXPAT_NS=ON AS_IF([test "x${enable_xml_context}" != xno], [EXPAT_CONTEXT_BYTES=${enable_xml_context}], [EXPAT_CONTEXT_BYTES=OFF]) AS_IF([test "x${UNICODE_TRUE}" = x], [AS_IF( [test "x${_INTERNAL_UNICODE_WCHAR_T_TRUE}" = x], [EXPAT_CHAR_TYPE=wchar_t], [EXPAT_CHAR_TYPE=ushort])], [EXPAT_CHAR_TYPE=char]) PACKAGE_INIT="${srcdir}"/cmake/autotools/expat-package-init.cmake LIBDIR_BASENAME="$(basename "${libdir}")" SO_MAJOR="$(expr "${LIBCURRENT}" - "${LIBAGE}")" SO_MINOR="${LIBAGE}" SO_PATCH="${LIBREVISION}" AC_CHECK_SIZEOF([void *]) # sets ac_cv_sizeof_void_p AC_SUBST([EXPAT_ATTR_INFO]) AC_SUBST([EXPAT_DTD]) AC_SUBST([EXPAT_LARGE_SIZE]) AC_SUBST([EXPAT_MIN_SIZE]) AC_SUBST([EXPAT_NS]) AC_SUBST([EXPAT_CONTEXT_BYTES]) AC_SUBST([EXPAT_CHAR_TYPE]) AC_SUBST_FILE([PACKAGE_INIT]) AC_SUBST([LIBDIR_BASENAME]) AC_SUBST([SO_MAJOR]) AC_SUBST([SO_MINOR]) AC_SUBST([SO_PATCH]) AC_SUBST([ac_cv_sizeof_void_p]) dnl write the Automake flags we set AC_SUBST([AM_CPPFLAGS]) AC_SUBST([AM_CFLAGS]) AC_SUBST([AM_CXXFLAGS]) AC_SUBST([AM_LDFLAGS]) dnl Emulate the use of CMAKE_SHARED_LIBRARY_PREFIX under CMake AC_MSG_CHECKING([for shared library name prefix]) AS_CASE("${host_os}", [cygwin*], [CMAKE_SHARED_LIBRARY_PREFIX=cyg], [CMAKE_SHARED_LIBRARY_PREFIX=lib]) AC_MSG_RESULT([${CMAKE_SHARED_LIBRARY_PREFIX}]) AC_SUBST([CMAKE_SHARED_LIBRARY_PREFIX]) AS_CASE("${host_os}", [darwin*], [CMAKE_NOCONFIG_SOURCE=cmake/autotools/expat-noconfig__macos.cmake.in], [mingw*|cygwin*], [CMAKE_NOCONFIG_SOURCE=cmake/autotools/expat-noconfig__windows.cmake.in], [CMAKE_NOCONFIG_SOURCE=cmake/autotools/expat-noconfig__linux.cmake.in]) AC_CONFIG_FILES([Makefile] [expat.pc] [cmake/expat-config.cmake] [cmake/autotools/expat-config-version.cmake] [cmake/autotools/expat-noconfig.cmake:${CMAKE_NOCONFIG_SOURCE}] [doc/Makefile] [examples/Makefile] [lib/Makefile] [tests/Makefile] [tests/benchmark/Makefile] [xmlwf/Makefile]) AC_CONFIG_FILES([run.sh], [chmod +x run.sh]) AC_OUTPUT AC_MSG_NOTICE([ Automake flags (can be overridden by user flags): [AM_CPPFLAGS]: ${AM_CPPFLAGS} [AM_CFLAGS]: ${AM_CFLAGS} [AM_CXXFLAGS]: ${AM_CXXFLAGS} [AM_LDFLAGS]: ${AM_LDFLAGS} User flags (override Automake flags on conflict): CPPFLAGS: ${CPPFLAGS} CFLAGS: ${CFLAGS} CXXFLAGS: ${CXXFLAGS} LDFLAGS: ${LDFLAGS}]) diff --git a/contrib/expat/doc/reference.html b/contrib/expat/doc/reference.html index 26db5a635479..87ace02d456e 100644 --- a/contrib/expat/doc/reference.html +++ b/contrib/expat/doc/reference.html @@ -1,2596 +1,2606 @@ Expat XML Parser

The Expat XML Parser - Release 2.4.6 + Release 2.4.7

Expat is a library, written in C, for parsing XML documents. It's the underlying XML parser for the open source Mozilla project, Perl's XML::Parser, Python's xml.parsers.expat, and other open-source XML parsers.

This library is the creation of James Clark, who's also given us groff (an nroff look-alike), Jade (an implementation of ISO's DSSSL stylesheet language for SGML), XP (a Java XML parser package), XT (a Java XSL engine). James was also the technical lead on the XML Working Group at W3C that produced the XML specification.

This is free software, licensed under the MIT/X Consortium license. You may download it from the Expat home page.

The bulk of this document was originally commissioned as an article by XML.com. They graciously allowed Clark Cooper to retain copyright and to distribute it with Expat. This version has been substantially extended to include documentation on features which have been added since the original article was published, and additional information on using the original interface.


Table of Contents


Overview

Expat is a stream-oriented parser. You register callback (or handler) functions with the parser and then start feeding it the document. As the parser recognizes parts of the document, it will call the appropriate handler for that part (if you've registered one.) The document is fed to the parser in pieces, so you can start parsing before you have all the document. This also allows you to parse really huge documents that won't fit into memory.

Expat can be intimidating due to the many kinds of handlers and options you can set. But you only need to learn four functions in order to do 90% of what you'll want to do with it:

XML_ParserCreate
Create a new parser object.
XML_SetElementHandler
Set handlers for start and end tags.
XML_SetCharacterDataHandler
Set handler for text.
XML_Parse
Pass a buffer full of document to the parser

These functions and others are described in the reference part of this document. The reference section also describes in detail the parameters passed to the different types of handlers.

Let's look at a very simple example program that only uses 3 of the above functions (it doesn't need to set a character handler.) The program outline.c prints an element outline, indenting child elements to distinguish them from the parent element that contains them. The start handler does all the work. It prints two indenting spaces for every level of ancestor elements, then it prints the element and attribute information. Finally it increments the global Depth variable.

 int Depth;
 
 void XMLCALL
 start(void *data, const char *el, const char **attr) {
   int i;
 
   for (i = 0; i < Depth; i++)
     printf("  ");
 
   printf("%s", el);
 
   for (i = 0; attr[i]; i += 2) {
     printf(" %s='%s'", attr[i], attr[i + 1]);
   }
 
   printf("\n");
   Depth++;
 }  /* End of start handler */
 

The end tag simply does the bookkeeping work of decrementing Depth.

 void XMLCALL
 end(void *data, const char *el) {
   Depth--;
 }  /* End of end handler */
 

Note the XMLCALL annotation used for the callbacks. This is used to ensure that the Expat and the callbacks are using the same calling convention in case the compiler options used for Expat itself and the client code are different. Expat tries not to care what the default calling convention is, though it may require that it be compiled with a default convention of "cdecl" on some platforms. For code which uses Expat, however, the calling convention is specified by the XMLCALL annotation on most platforms; callbacks should be defined using this annotation.

The XMLCALL annotation was added in Expat 1.95.7, but existing working Expat applications don't need to add it (since they are already using the "cdecl" calling convention, or they wouldn't be working). The annotation is only needed if the default calling convention may be something other than "cdecl". To use the annotation safely with older versions of Expat, you can conditionally define it after including Expat's header file:

 #include <expat.h>
 
 #ifndef XMLCALL
 #if defined(_MSC_EXTENSIONS) && !defined(__BEOS__) && !defined(__CYGWIN__)
 #define XMLCALL __cdecl
 #elif defined(__GNUC__)
 #define XMLCALL __attribute__((cdecl))
 #else
 #define XMLCALL
 #endif
 #endif
 

After creating the parser, the main program just has the job of shoveling the document to the parser so that it can do its work.


Building and Installing Expat

The Expat distribution comes as a compressed (with GNU gzip) tar file. You may download the latest version from Source Forge. After unpacking this, cd into the directory. Then follow either the Win32 directions or Unix directions below.

Building under Win32

If you're using the GNU compiler under cygwin, follow the Unix directions in the next section. Otherwise if you have Microsoft's Developer Studio installed, you can use CMake to generate a .sln file, e.g. cmake -G"Visual Studio 15 2017" -DCMAKE_BUILD_TYPE=RelWithDebInfo . , and build Expat using msbuild /m expat.sln after.

Alternatively, you may download the Win32 binary package that contains the "expat.h" include file and a pre-built DLL.

Building under Unix (or GNU)

First you'll need to run the configure shell script in order to configure the Makefiles and headers for your system.

If you're happy with all the defaults that configure picks for you, and you have permission on your system to install into /usr/local, you can install Expat with this sequence of commands:

 ./configure
 make
 make install
 

There are some options that you can provide to this script, but the only one we'll mention here is the --prefix option. You can find out all the options available by running configure with just the --help option.

By default, the configure script sets things up so that the library gets installed in /usr/local/lib and the associated header file in /usr/local/include. But if you were to give the option, --prefix=/home/me/mystuff, then the library and header would get installed in /home/me/mystuff/lib and /home/me/mystuff/include respectively.

Configuring Expat Using the Pre-Processor

Expat's feature set can be configured using a small number of pre-processor definitions. The definition of this symbols does not affect the set of entry points for Expat, only the behavior of the API and the definition of character types in the case of XML_UNICODE_WCHAR_T. The symbols are:

XML_DTD
Include support for using and reporting DTD-based content. If this is defined, default attribute values from an external DTD subset are reported and attribute value normalization occurs based on the type of attributes defined in the external subset. Without this, Expat has a smaller memory footprint and can be faster, but will not load external entities or process conditional sections. This does not affect the set of functions available in the API.
XML_NS
When defined, support for the Namespaces in XML specification is included.
XML_UNICODE
When defined, character data reported to the application is encoded in UTF-16 using wide characters of the type XML_Char. This is implied if XML_UNICODE_WCHAR_T is defined.
XML_UNICODE_WCHAR_T
If defined, causes the XML_Char character type to be defined using the wchar_t type; otherwise, unsigned short is used. Defining this implies XML_UNICODE.
XML_LARGE_SIZE
If defined, causes the XML_Size and XML_Index integer types to be at least 64 bits in size. This is intended to support processing of very large input streams, where the return values of XML_GetCurrentByteIndex, XML_GetCurrentLineNumber and XML_GetCurrentColumnNumber could overflow. It may not be supported by all compilers, and is turned off by default.
XML_CONTEXT_BYTES
The number of input bytes of markup context which the parser will ensure are available for reporting via XML_GetInputContext. This is normally set to 1024, and must be set to a positive integer. If this is not defined, the input context will not be available and XML_GetInputContext will always report NULL. Without this, Expat has a smaller memory footprint and can be faster.
XML_STATIC
On Windows, this should be set if Expat is going to be linked statically with the code that calls it; this is required to get all the right MSVC magic annotations correct. This is ignored on other platforms.
XML_ATTR_INFO
If defined, makes the additional function XML_GetAttributeInfo available for reporting attribute byte offsets.

Using Expat

Compiling and Linking Against Expat

Unless you installed Expat in a location not expected by your compiler and linker, all you have to do to use Expat in your programs is to include the Expat header (#include <expat.h>) in your files that make calls to it and to tell the linker that it needs to link against the Expat library. On Unix systems, this would usually be done with the -lexpat argument. Otherwise, you'll need to tell the compiler where to look for the Expat header and the linker where to find the Expat library. You may also need to take steps to tell the operating system where to find this library at run time.

On a Unix-based system, here's what a Makefile might look like when Expat is installed in a standard location:

 CC=cc
 LDFLAGS=
 LIBS= -lexpat
 xmlapp: xmlapp.o
         $(CC) $(LDFLAGS) -o xmlapp xmlapp.o $(LIBS)
 

If you installed Expat in, say, /home/me/mystuff, then the Makefile would look like this:

 CC=cc
 CFLAGS= -I/home/me/mystuff/include
 LDFLAGS=
 LIBS= -L/home/me/mystuff/lib -lexpat
 xmlapp: xmlapp.o
         $(CC) $(LDFLAGS) -o xmlapp xmlapp.o $(LIBS)
 

You'd also have to set the environment variable LD_LIBRARY_PATH to /home/me/mystuff/lib (or to ${LD_LIBRARY_PATH}:/home/me/mystuff/lib if LD_LIBRARY_PATH already has some directories in it) in order to run your application.

Expat Basics

As we saw in the example in the overview, the first step in parsing an XML document with Expat is to create a parser object. There are three functions in the Expat API for creating a parser object. However, only two of these (XML_ParserCreate and XML_ParserCreateNS) can be used for constructing a parser for a top-level document. The object returned by these functions is an opaque pointer (i.e. "expat.h" declares it as void *) to data with further internal structure. In order to free the memory associated with this object you must call XML_ParserFree. Note that if you have provided any user data that gets stored in the parser, then your application is responsible for freeing it prior to calling XML_ParserFree.

The objects returned by the parser creation functions are good for parsing only one XML document or external parsed entity. If your application needs to parse many XML documents, then it needs to create a parser object for each one. The best way to deal with this is to create a higher level object that contains all the default initialization you want for your parser objects.

Walking through a document hierarchy with a stream oriented parser will require a good stack mechanism in order to keep track of current context. For instance, to answer the simple question, "What element does this text belong to?" requires a stack, since the parser may have descended into other elements that are children of the current one and has encountered this text on the way out.

The things you're likely to want to keep on a stack are the currently opened element and it's attributes. You push this information onto the stack in the start handler and you pop it off in the end handler.

For some tasks, it is sufficient to just keep information on what the depth of the stack is (or would be if you had one.) The outline program shown above presents one example. Another such task would be skipping over a complete element. When you see the start tag for the element you want to skip, you set a skip flag and record the depth at which the element started. When the end tag handler encounters the same depth, the skipped element has ended and the flag may be cleared. If you follow the convention that the root element starts at 1, then you can use the same variable for skip flag and skip depth.

 void
 init_info(Parseinfo *info) {
   info->skip = 0;
   info->depth = 1;
   /* Other initializations here */
 }  /* End of init_info */
 
 void XMLCALL
 rawstart(void *data, const char *el, const char **attr) {
   Parseinfo *inf = (Parseinfo *) data;
 
   if (! inf->skip) {
     if (should_skip(inf, el, attr)) {
       inf->skip = inf->depth;
     }
     else
       start(inf, el, attr);     /* This does rest of start handling */
   }
 
   inf->depth++;
 }  /* End of rawstart */
 
 void XMLCALL
 rawend(void *data, const char *el) {
   Parseinfo *inf = (Parseinfo *) data;
 
   inf->depth--;
 
   if (! inf->skip)
     end(inf, el);              /* This does rest of end handling */
 
   if (inf->skip == inf->depth)
     inf->skip = 0;
 }  /* End rawend */
 

Notice in the above example the difference in how depth is manipulated in the start and end handlers. The end tag handler should be the mirror image of the start tag handler. This is necessary to properly model containment. Since, in the start tag handler, we incremented depth after the main body of start tag code, then in the end handler, we need to manipulate it before the main body. If we'd decided to increment it first thing in the start handler, then we'd have had to decrement it last thing in the end handler.

Communicating between handlers

In order to be able to pass information between different handlers without using globals, you'll need to define a data structure to hold the shared variables. You can then tell Expat (with the XML_SetUserData function) to pass a pointer to this structure to the handlers. This is the first argument received by most handlers. In the reference section, an argument to a callback function is named userData and have type void * if the user data is passed; it will have the type XML_Parser if the parser itself is passed. When the parser is passed, the user data may be retrieved using XML_GetUserData.

One common case where multiple calls to a single handler may need to communicate using an application data structure is the case when content passed to the character data handler (set by XML_SetCharacterDataHandler) needs to be accumulated. A common first-time mistake with any of the event-oriented interfaces to an XML parser is to expect all the text contained in an element to be reported by a single call to the character data handler. Expat, like many other XML parsers, reports such data as a sequence of calls; there's no way to know when the end of the sequence is reached until a different callback is made. A buffer referenced by the user data structure proves both an effective and convenient place to accumulate character data.

XML Version

Expat is an XML 1.0 parser, and as such never complains based on the value of the version pseudo-attribute in the XML declaration, if present.

If an application needs to check the version number (to support alternate processing), it should use the XML_SetXmlDeclHandler function to set a handler that uses the information in the XML declaration to determine what to do. This example shows how to check that only a version number of "1.0" is accepted:

 static int wrong_version;
 static XML_Parser parser;
 
 static void XMLCALL
 xmldecl_handler(void            *userData,
                 const XML_Char  *version,
                 const XML_Char  *encoding,
                 int              standalone)
 {
   static const XML_Char Version_1_0[] = {'1', '.', '0', 0};
 
   int i;
 
   for (i = 0; i < (sizeof(Version_1_0) / sizeof(Version_1_0[0])); ++i) {
     if (version[i] != Version_1_0[i]) {
       wrong_version = 1;
       /* also clear all other handlers: */
       XML_SetCharacterDataHandler(parser, NULL);
       ...
       return;
     }
   }
   ...
 }
 

Namespace Processing

When the parser is created using the XML_ParserCreateNS, function, Expat performs namespace processing. Under namespace processing, Expat consumes xmlns and xmlns:... attributes, which declare namespaces for the scope of the element in which they occur. This means that your start handler will not see these attributes. Your application can still be informed of these declarations by setting namespace declaration handlers with XML_SetNamespaceDeclHandler.

Element type and attribute names that belong to a given namespace are passed to the appropriate handler in expanded form. By default this expanded form is a concatenation of the namespace URI, the separator character (which is the 2nd argument to XML_ParserCreateNS), and the local name (i.e. the part after the colon). Names with undeclared prefixes are not well-formed when namespace processing is enabled, and will trigger an error. Unprefixed attribute names are never expanded, and unprefixed element names are only expanded when they are in the scope of a default namespace.

However if XML_SetReturnNSTriplet has been called with a non-zero do_nst parameter, then the expanded form for names with an explicit prefix is a concatenation of: URI, separator, local name, separator, prefix.

You can set handlers for the start of a namespace declaration and for the end of a scope of a declaration with the XML_SetNamespaceDeclHandler function. The StartNamespaceDeclHandler is called prior to the start tag handler and the EndNamespaceDeclHandler is called after the corresponding end tag that ends the namespace's scope. The namespace start handler gets passed the prefix and URI for the namespace. For a default namespace declaration (xmlns='...'), the prefix will be null. The URI will be null for the case where the default namespace is being unset. The namespace end handler just gets the prefix for the closing scope.

These handlers are called for each declaration. So if, for instance, a start tag had three namespace declarations, then the StartNamespaceDeclHandler would be called three times before the start tag handler is called, once for each declaration.

Character Encodings

While XML is based on Unicode, and every XML processor is required to recognized UTF-8 and UTF-16 (1 and 2 byte encodings of Unicode), other encodings may be declared in XML documents or entities. For the main document, an XML declaration may contain an encoding declaration:

 <?xml version="1.0" encoding="ISO-8859-2"?>
 

External parsed entities may begin with a text declaration, which looks like an XML declaration with just an encoding declaration:

 <?xml encoding="Big5"?>
 

With Expat, you may also specify an encoding at the time of creating a parser. This is useful when the encoding information may come from a source outside the document itself (like a higher level protocol.)

There are four built-in encodings in Expat:

  • UTF-8
  • UTF-16
  • ISO-8859-1
  • US-ASCII

Anything else discovered in an encoding declaration or in the protocol encoding specified in the parser constructor, triggers a call to the UnknownEncodingHandler. This handler gets passed the encoding name and a pointer to an XML_Encoding data structure. Your handler must fill in this structure and return XML_STATUS_OK if it knows how to deal with the encoding. Otherwise the handler should return XML_STATUS_ERROR. The handler also gets passed a pointer to an optional application data structure that you may indicate when you set the handler.

Expat places restrictions on character encodings that it can support by filling in the XML_Encoding structure. include file:

  1. Every ASCII character that can appear in a well-formed XML document must be represented by a single byte, and that byte must correspond to it's ASCII encoding (except for the characters $@\^'{}~)
  2. Characters must be encoded in 4 bytes or less.
  3. All characters encoded must have Unicode scalar values less than or equal to 65535 (0xFFFF)This does not apply to the built-in support for UTF-16 and UTF-8
  4. No character may be encoded by more that one distinct sequence of bytes

XML_Encoding contains an array of integers that correspond to the 1st byte of an encoding sequence. If the value in the array for a byte is zero or positive, then the byte is a single byte encoding that encodes the Unicode scalar value contained in the array. A -1 in this array indicates a malformed byte. If the value is -2, -3, or -4, then the byte is the beginning of a 2, 3, or 4 byte sequence respectively. Multi-byte sequences are sent to the convert function pointed at in the XML_Encoding structure. This function should return the Unicode scalar value for the sequence or -1 if the sequence is malformed.

One pitfall that novice Expat users are likely to fall into is that although Expat may accept input in various encodings, the strings that it passes to the handlers are always encoded in UTF-8 or UTF-16 (depending on how Expat was compiled). Your application is responsible for any translation of these strings into other encodings.

Handling External Entity References

Expat does not read or parse external entities directly. Note that any external DTD is a special case of an external entity. If you've set no ExternalEntityRefHandler, then external entity references are silently ignored. Otherwise, it calls your handler with the information needed to read and parse the external entity.

Your handler isn't actually responsible for parsing the entity, but it is responsible for creating a subsidiary parser with XML_ExternalEntityParserCreate that will do the job. This returns an instance of XML_Parser that has handlers and other data structures initialized from the parent parser. You may then use XML_Parse or XML_ParseBuffer calls against this parser. Since external entities my refer to other external entities, your handler should be prepared to be called recursively.

Parsing DTDs

In order to parse parameter entities, before starting the parse, you must call XML_SetParamEntityParsing with one of the following arguments:

XML_PARAM_ENTITY_PARSING_NEVER
Don't parse parameter entities or the external subset
XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE
Parse parameter entities and the external subset unless standalone was set to "yes" in the XML declaration.
XML_PARAM_ENTITY_PARSING_ALWAYS
Always parse parameter entities and the external subset

In order to read an external DTD, you also have to set an external entity reference handler as described above.

Temporarily Stopping Parsing

Expat 1.95.8 introduces a new feature: its now possible to stop parsing temporarily from within a handler function, even if more data has already been passed into the parser. Applications for this include

  • Supporting the XInclude specification.
  • Delaying further processing until additional information is available from some other source.
  • Adjusting processor load as task priorities shift within an application.
  • Stopping parsing completely (simply free or reset the parser instead of resuming in the outer parsing loop). This can be useful if an application-domain error is found in the XML being parsed or if the result of the parse is determined not to be useful after all.

To take advantage of this feature, the main parsing loop of an application needs to support this specifically. It cannot be supported with a parsing loop compatible with Expat 1.95.7 or earlier (though existing loops will continue to work without supporting the stop/resume feature).

An application that uses this feature for a single parser will have the rough structure (in pseudo-code):

 fd = open_input()
 p = create_parser()
 
 if parse_xml(p, fd) {
   /* suspended */
 
   int suspended = 1;
 
   while (suspended) {
     do_something_else()
     if ready_to_resume() {
       suspended = continue_parsing(p, fd);
     }
   }
 }
 

An application that may resume any of several parsers based on input (either from the XML being parsed or some other source) will certainly have more interesting control structures.

This C function could be used for the parse_xml function mentioned in the pseudo-code above:

 #define BUFF_SIZE 10240
 
 /* Parse a document from the open file descriptor 'fd' until the parse
    is complete (the document has been completely parsed, or there's
    been an error), or the parse is stopped.  Return non-zero when
    the parse is merely suspended.
 */
 int
 parse_xml(XML_Parser p, int fd)
 {
   for (;;) {
     int last_chunk;
     int bytes_read;
     enum XML_Status status;
 
     void *buff = XML_GetBuffer(p, BUFF_SIZE);
     if (buff == NULL) {
       /* handle error... */
       return 0;
     }
     bytes_read = read(fd, buff, BUFF_SIZE);
     if (bytes_read < 0) {
       /* handle error... */
       return 0;
     }
     status = XML_ParseBuffer(p, bytes_read, bytes_read == 0);
     switch (status) {
       case XML_STATUS_ERROR:
         /* handle error... */
         return 0;
       case XML_STATUS_SUSPENDED:
         return 1;
     }
     if (bytes_read == 0)
       return 0;
   }
 }
 

The corresponding continue_parsing function is somewhat simpler, since it only need deal with the return code from XML_ResumeParser; it can delegate the input handling to the parse_xml function:

 /* Continue parsing a document which had been suspended.  The 'p' and
    'fd' arguments are the same as passed to parse_xml().  Return
    non-zero when the parse is suspended.
 */
 int
 continue_parsing(XML_Parser p, int fd)
 {
   enum XML_Status status = XML_ResumeParser(p);
   switch (status) {
     case XML_STATUS_ERROR:
       /* handle error... */
       return 0;
     case XML_ERROR_NOT_SUSPENDED:
       /* handle error... */
       return 0;.
     case XML_STATUS_SUSPENDED:
       return 1;
   }
   return parse_xml(p, fd);
 }
 

Now that we've seen what a mess the top-level parsing loop can become, what have we gained? Very simply, we can now use the XML_StopParser function to stop parsing, without having to go to great lengths to avoid additional processing that we're expecting to ignore. As a bonus, we get to stop parsing temporarily, and come back to it when we're ready.

To stop parsing from a handler function, use the XML_StopParser function. This function takes two arguments; the parser being stopped and a flag indicating whether the parse can be resumed in the future.


Expat Reference

Parser Creation

XML_ParserCreate

 XML_Parser XMLCALL
 XML_ParserCreate(const XML_Char *encoding);
 
Construct a new parser. If encoding is non-null, it specifies a character encoding to use for the document. This overrides the document encoding declaration. There are four built-in encodings:
  • US-ASCII
  • UTF-8
  • UTF-16
  • ISO-8859-1
Any other value will invoke a call to the UnknownEncodingHandler.

XML_ParserCreateNS

 XML_Parser XMLCALL
 XML_ParserCreateNS(const XML_Char *encoding,
                    XML_Char sep);
 
Constructs a new parser that has namespace processing in effect. Namespace expanded element names and attribute names are returned as a concatenation of the namespace URI, sep, and the local part of the name. This means that you should pick a character for sep that can't be part of an URI. Since Expat does not check namespace URIs for conformance, the only safe choice for a namespace separator is a character that is illegal in XML. For instance, '\xFF' is not legal in UTF-8, and '\xFFFF' is not legal in UTF-16. There is a special case when sep is the null character '\0': the namespace URI and the local part will be concatenated without any separator - this is intended to support RDF processors. It is a programming error to use the null separator with namespace triplets.
+

Note: +Expat does not validate namespace URIs (beyond encoding) +against RFC 3986 today (and is not required to do so with regard to +the XML 1.0 namespaces specification) but it may start doing that +in future releases. Before that, an application using Expat must +be ready to receive namespace URIs containing non-URI characters. +

+

XML_ParserCreate_MM

 XML_Parser XMLCALL
 XML_ParserCreate_MM(const XML_Char *encoding,
                     const XML_Memory_Handling_Suite *ms,
 		    const XML_Char *sep);
 
 typedef struct {
   void *(XMLCALL *malloc_fcn)(size_t size);
   void *(XMLCALL *realloc_fcn)(void *ptr, size_t size);
   void (XMLCALL *free_fcn)(void *ptr);
 } XML_Memory_Handling_Suite;
 

Construct a new parser using the suite of memory handling functions specified in ms. If ms is NULL, then use the standard set of memory management functions. If sep is non NULL, then namespace processing is enabled in the created parser and the character pointed at by sep is used as the separator between the namespace URI and the local part of the name.

XML_ExternalEntityParserCreate

 XML_Parser XMLCALL
 XML_ExternalEntityParserCreate(XML_Parser p,
                                const XML_Char *context,
                                const XML_Char *encoding);
 
Construct a new XML_Parser object for parsing an external general entity. Context is the context argument passed in a call to a ExternalEntityRefHandler. Other state information such as handlers, user data, namespace processing is inherited from the parser passed as the 1st argument. So you shouldn't need to call any of the behavior changing functions on this parser (unless you want it to act differently than the parent parser).

XML_ParserFree

 void XMLCALL
 XML_ParserFree(XML_Parser p);
 
Free memory used by the parser. Your application is responsible for freeing any memory associated with user data.

XML_ParserReset

 XML_Bool XMLCALL
 XML_ParserReset(XML_Parser p,
                 const XML_Char *encoding);
 
Clean up the memory structures maintained by the parser so that it may be used again. After this has been called, parser is ready to start parsing a new document. All handlers are cleared from the parser, except for the unknownEncodingHandler. The parser's external state is re-initialized except for the values of ns and ns_triplets. This function may not be used on a parser created using XML_ExternalEntityParserCreate; it will return XML_FALSE in that case. Returns XML_TRUE on success. Your application is responsible for dealing with any memory associated with user data.

Parsing

To state the obvious: the three parsing functions XML_Parse, XML_ParseBuffer and XML_GetBuffer must not be called from within a handler unless they operate on a separate parser instance, that is, one that did not call the handler. For example, it is OK to call the parsing functions from within an XML_ExternalEntityRefHandler, if they apply to the parser created by XML_ExternalEntityParserCreate.

Note: The len argument passed to these functions should be considerably less than the maximum value for an integer, as it could create an integer overflow situation if the added lengths of a buffer and the unprocessed portion of the previous buffer exceed the maximum integer value. Input data at the end of a buffer will remain unprocessed if it is part of an XML token for which the end is not part of that buffer.

XML_Parse

 enum XML_Status XMLCALL
 XML_Parse(XML_Parser p,
           const char *s,
           int len,
           int isFinal);
 
 enum XML_Status {
   XML_STATUS_ERROR = 0,
   XML_STATUS_OK = 1
 };
 
Parse some more of the document. The string s is a buffer containing part (or perhaps all) of the document. The number of bytes of s that are part of the document is indicated by len. This means that s doesn't have to be null terminated. It also means that if len is larger than the number of bytes in the block of memory that s points at, then a memory fault is likely. The isFinal parameter informs the parser that this is the last piece of the document. Frequently, the last piece is empty (i.e. len is zero.) If a parse error occurred, it returns XML_STATUS_ERROR. Otherwise it returns XML_STATUS_OK value.

XML_ParseBuffer

 enum XML_Status XMLCALL
 XML_ParseBuffer(XML_Parser p,
                 int len,
                 int isFinal);
 
This is just like XML_Parse, except in this case Expat provides the buffer. By obtaining the buffer from Expat with the XML_GetBuffer function, the application can avoid double copying of the input.

XML_GetBuffer

 void * XMLCALL
 XML_GetBuffer(XML_Parser p,
               int len);
 
Obtain a buffer of size len to read a piece of the document into. A NULL value is returned if Expat can't allocate enough memory for this buffer. A NULL value may also be returned if len is zero. This has to be called prior to every call to XML_ParseBuffer. A typical use would look like this:
 for (;;) {
   int bytes_read;
   void *buff = XML_GetBuffer(p, BUFF_SIZE);
   if (buff == NULL) {
     /* handle error */
   }
 
   bytes_read = read(docfd, buff, BUFF_SIZE);
   if (bytes_read < 0) {
     /* handle error */
   }
 
   if (! XML_ParseBuffer(p, bytes_read, bytes_read == 0)) {
     /* handle parse error */
   }
 
   if (bytes_read == 0)
     break;
 }
 

XML_StopParser

 enum XML_Status XMLCALL
 XML_StopParser(XML_Parser p,
                XML_Bool resumable);
 

Stops parsing, causing XML_Parse or XML_ParseBuffer to return. Must be called from within a call-back handler, except when aborting (when resumable is XML_FALSE) an already suspended parser. Some call-backs may still follow because they would otherwise get lost, including

  • the end element handler for empty elements when stopped in the start element handler,
  • the end namespace declaration handler when stopped in the end element handler,
  • the character data handler when stopped in the character data handler while making multiple call-backs on a contiguous chunk of characters,

and possibly others.

This can be called from most handlers, including DTD related call-backs, except when parsing an external parameter entity and resumable is XML_TRUE. Returns XML_STATUS_OK when successful, XML_STATUS_ERROR otherwise. The possible error codes are:

XML_ERROR_SUSPENDED
when suspending an already suspended parser.
XML_ERROR_FINISHED
when the parser has already finished.
XML_ERROR_SUSPEND_PE
when suspending while parsing an external PE.

Since the stop/resume feature requires application support in the outer parsing loop, it is an error to call this function for a parser not being handled appropriately; see Temporarily Stopping Parsing for more information.

When resumable is XML_TRUE then parsing is suspended, that is, XML_Parse and XML_ParseBuffer return XML_STATUS_SUSPENDED. Otherwise, parsing is aborted, that is, XML_Parse and XML_ParseBuffer return XML_STATUS_ERROR with error code XML_ERROR_ABORTED.

Note: This will be applied to the current parser instance only, that is, if there is a parent parser then it will continue parsing when the external entity reference handler returns. It is up to the implementation of that handler to call XML_StopParser on the parent parser (recursively), if one wants to stop parsing altogether.

When suspended, parsing can be resumed by calling XML_ResumeParser.

New in Expat 1.95.8.

XML_ResumeParser

 enum XML_Status XMLCALL
 XML_ResumeParser(XML_Parser p);
 

Resumes parsing after it has been suspended with XML_StopParser. Must not be called from within a handler call-back. Returns same status codes as XML_Parse or XML_ParseBuffer. An additional error code, XML_ERROR_NOT_SUSPENDED, will be returned if the parser was not currently suspended.

Note: This must be called on the most deeply nested child parser instance first, and on its parent parser only after the child parser has finished, to be applied recursively until the document entity's parser is restarted. That is, the parent parser will not resume by itself and it is up to the application to call XML_ResumeParser on it at the appropriate moment.

New in Expat 1.95.8.

XML_GetParsingStatus

 void XMLCALL
 XML_GetParsingStatus(XML_Parser p,
                      XML_ParsingStatus *status);
 
 enum XML_Parsing {
   XML_INITIALIZED,
   XML_PARSING,
   XML_FINISHED,
   XML_SUSPENDED
 };
 
 typedef struct {
   enum XML_Parsing parsing;
   XML_Bool finalBuffer;
 } XML_ParsingStatus;
 

Returns status of parser with respect to being initialized, parsing, finished, or suspended, and whether the final buffer is being processed. The status parameter must not be NULL.

New in Expat 1.95.8.

Handler Setting

Although handlers are typically set prior to parsing and left alone, an application may choose to set or change the handler for a parsing event while the parse is in progress. For instance, your application may choose to ignore all text not descended from a para element. One way it could do this is to set the character handler when a para start tag is seen, and unset it for the corresponding end tag.

A handler may be unset by providing a NULL pointer to the appropriate handler setter. None of the handler setting functions have a return value.

Your handlers will be receiving strings in arrays of type XML_Char. This type is conditionally defined in expat.h as either char, wchar_t or unsigned short. The former implies UTF-8 encoding, the latter two imply UTF-16 encoding. Note that you'll receive them in this form independent of the original encoding of the document.

XML_SetStartElementHandler

 void XMLCALL
 XML_SetStartElementHandler(XML_Parser p,
                            XML_StartElementHandler start);
 
 typedef void
 (XMLCALL *XML_StartElementHandler)(void *userData,
                                    const XML_Char *name,
                                    const XML_Char **atts);
 

Set handler for start (and empty) tags. Attributes are passed to the start handler as a pointer to a vector of char pointers. Each attribute seen in a start (or empty) tag occupies 2 consecutive places in this vector: the attribute name followed by the attribute value. These pairs are terminated by a null pointer.

Note that an empty tag generates a call to both start and end handlers (in that order).

XML_SetEndElementHandler

 void XMLCALL
 XML_SetEndElementHandler(XML_Parser p,
                          XML_EndElementHandler);
 
 typedef void
 (XMLCALL *XML_EndElementHandler)(void *userData,
                                  const XML_Char *name);
 

Set handler for end (and empty) tags. As noted above, an empty tag generates a call to both start and end handlers.

XML_SetElementHandler

 void XMLCALL
 XML_SetElementHandler(XML_Parser p,
                       XML_StartElementHandler start,
                       XML_EndElementHandler end);
 

Set handlers for start and end tags with one call.

XML_SetCharacterDataHandler

 void XMLCALL
 XML_SetCharacterDataHandler(XML_Parser p,
                             XML_CharacterDataHandler charhndl)
 
 typedef void
 (XMLCALL *XML_CharacterDataHandler)(void *userData,
                                     const XML_Char *s,
                                     int len);
 

Set a text handler. The string your handler receives is NOT null-terminated. You have to use the length argument to deal with the end of the string. A single block of contiguous text free of markup may still result in a sequence of calls to this handler. In other words, if you're searching for a pattern in the text, it may be split across calls to this handler. Note: Setting this handler to NULL may NOT immediately terminate call-backs if the parser is currently processing such a single block of contiguous markup-free text, as the parser will continue calling back until the end of the block is reached.

XML_SetProcessingInstructionHandler

 void XMLCALL
 XML_SetProcessingInstructionHandler(XML_Parser p,
                                     XML_ProcessingInstructionHandler proc)
 
 typedef void
 (XMLCALL *XML_ProcessingInstructionHandler)(void *userData,
                                             const XML_Char *target,
                                             const XML_Char *data);
 
 

Set a handler for processing instructions. The target is the first word in the processing instruction. The data is the rest of the characters in it after skipping all whitespace after the initial word.

XML_SetCommentHandler

 void XMLCALL
 XML_SetCommentHandler(XML_Parser p,
                       XML_CommentHandler cmnt)
 
 typedef void
 (XMLCALL *XML_CommentHandler)(void *userData,
                               const XML_Char *data);
 

Set a handler for comments. The data is all text inside the comment delimiters.

XML_SetStartCdataSectionHandler

 void XMLCALL
 XML_SetStartCdataSectionHandler(XML_Parser p,
                                 XML_StartCdataSectionHandler start);
 
 typedef void
 (XMLCALL *XML_StartCdataSectionHandler)(void *userData);
 

Set a handler that gets called at the beginning of a CDATA section.

XML_SetEndCdataSectionHandler

 void XMLCALL
 XML_SetEndCdataSectionHandler(XML_Parser p,
                               XML_EndCdataSectionHandler end);
 
 typedef void
 (XMLCALL *XML_EndCdataSectionHandler)(void *userData);
 

Set a handler that gets called at the end of a CDATA section.

XML_SetCdataSectionHandler

 void XMLCALL
 XML_SetCdataSectionHandler(XML_Parser p,
                            XML_StartCdataSectionHandler start,
                            XML_EndCdataSectionHandler end)
 

Sets both CDATA section handlers with one call.

XML_SetDefaultHandler

 void XMLCALL
 XML_SetDefaultHandler(XML_Parser p,
                       XML_DefaultHandler hndl)
 
 typedef void
 (XMLCALL *XML_DefaultHandler)(void *userData,
                               const XML_Char *s,
                               int len);
 

Sets a handler for any characters in the document which wouldn't otherwise be handled. This includes both data for which no handlers can be set (like some kinds of DTD declarations) and data which could be reported but which currently has no handler set. The characters are passed exactly as they were present in the XML document except that they will be encoded in UTF-8 or UTF-16. Line boundaries are not normalized. Note that a byte order mark character is not passed to the default handler. There are no guarantees about how characters are divided between calls to the default handler: for example, a comment might be split between multiple calls. Setting the handler with this call has the side effect of turning off expansion of references to internally defined general entities. Instead these references are passed to the default handler.

See also XML_DefaultCurrent.

XML_SetDefaultHandlerExpand

 void XMLCALL
 XML_SetDefaultHandlerExpand(XML_Parser p,
                             XML_DefaultHandler hndl)
 
 typedef void
 (XMLCALL *XML_DefaultHandler)(void *userData,
                               const XML_Char *s,
                               int len);
 

This sets a default handler, but doesn't inhibit the expansion of internal entity references. The entity reference will not be passed to the default handler.

See also XML_DefaultCurrent.

XML_SetExternalEntityRefHandler

 void XMLCALL
 XML_SetExternalEntityRefHandler(XML_Parser p,
                                 XML_ExternalEntityRefHandler hndl)
 
 typedef int
 (XMLCALL *XML_ExternalEntityRefHandler)(XML_Parser p,
                                         const XML_Char *context,
                                         const XML_Char *base,
                                         const XML_Char *systemId,
                                         const XML_Char *publicId);
 

Set an external entity reference handler. This handler is also called for processing an external DTD subset if parameter entity parsing is in effect. (See XML_SetParamEntityParsing.)

The context parameter specifies the parsing context in the format expected by the context argument to XML_ExternalEntityParserCreate. code is valid only until the handler returns, so if the referenced entity is to be parsed later, it must be copied. context is NULL only when the entity is a parameter entity, which is how one can differentiate between general and parameter entities.

The base parameter is the base to use for relative system identifiers. It is set by XML_SetBase and may be NULL. The publicId parameter is the public id given in the entity declaration and may be NULL. systemId is the system identifier specified in the entity declaration and is never NULL.

There are a couple of ways in which this handler differs from others. First, this handler returns a status indicator (an integer). XML_STATUS_OK should be returned for successful handling of the external entity reference. Returning XML_STATUS_ERROR indicates failure, and causes the calling parser to return an XML_ERROR_EXTERNAL_ENTITY_HANDLING error.

Second, instead of having the user data as its first argument, it receives the parser that encountered the entity reference. This, along with the context parameter, may be used as arguments to a call to XML_ExternalEntityParserCreate. Using the returned parser, the body of the external entity can be recursively parsed.

Since this handler may be called recursively, it should not be saving information into global or static variables.

XML_SetExternalEntityRefHandlerArg

 void XMLCALL
 XML_SetExternalEntityRefHandlerArg(XML_Parser p,
                                    void *arg)
 

Set the argument passed to the ExternalEntityRefHandler. If arg is not NULL, it is the new value passed to the handler set using XML_SetExternalEntityRefHandler; if arg is NULL, the argument passed to the handler function will be the parser object itself.

Note: The type of arg and the type of the first argument to the ExternalEntityRefHandler do not match. This function takes a void * to be passed to the handler, while the handler accepts an XML_Parser. This is a historical accident, but will not be corrected before Expat 2.0 (at the earliest) to avoid causing compiler warnings for code that's known to work with this API. It is the responsibility of the application code to know the actual type of the argument passed to the handler and to manage it properly.

XML_SetSkippedEntityHandler

 void XMLCALL
 XML_SetSkippedEntityHandler(XML_Parser p,
                             XML_SkippedEntityHandler handler)
 
 typedef void
 (XMLCALL *XML_SkippedEntityHandler)(void *userData,
                                     const XML_Char *entityName,
                                     int is_parameter_entity);
 

Set a skipped entity handler. This is called in two situations:

  1. An entity reference is encountered for which no declaration has been read and this is not an error.
  2. An internal entity reference is read, but not expanded, because XML_SetDefaultHandler has been called.

The is_parameter_entity argument will be non-zero for a parameter entity and zero for a general entity.

Note: Skipped parameter entities in declarations and skipped general entities in attribute values cannot be reported, because the event would be out of sync with the reporting of the declarations or attribute values

XML_SetUnknownEncodingHandler

 void XMLCALL
 XML_SetUnknownEncodingHandler(XML_Parser p,
                               XML_UnknownEncodingHandler enchandler,
 			      void *encodingHandlerData)
 
 typedef int
 (XMLCALL *XML_UnknownEncodingHandler)(void *encodingHandlerData,
                                       const XML_Char *name,
                                       XML_Encoding *info);
 
 typedef struct {
   int map[256];
   void *data;
   int (XMLCALL *convert)(void *data, const char *s);
   void (XMLCALL *release)(void *data);
 } XML_Encoding;
 

Set a handler to deal with encodings other than the built in set. This should be done before XML_Parse or XML_ParseBuffer have been called on the given parser.

If the handler knows how to deal with an encoding with the given name, it should fill in the info data structure and return XML_STATUS_OK. Otherwise it should return XML_STATUS_ERROR. The handler will be called at most once per parsed (external) entity. The optional application data pointer encodingHandlerData will be passed back to the handler.

The map array contains information for every possible leading byte in a byte sequence. If the corresponding value is >= 0, then it's a single byte sequence and the byte encodes that Unicode value. If the value is -1, then that byte is invalid as the initial byte in a sequence. If the value is -n, where n is an integer > 1, then n is the number of bytes in the sequence and the actual conversion is accomplished by a call to the function pointed at by convert. This function may return -1 if the sequence itself is invalid. The convert pointer may be null if there are only single byte codes. The data parameter passed to the convert function is the data pointer from XML_Encoding. The string s is NOT null-terminated and points at the sequence of bytes to be converted.

The function pointed at by release is called by the parser when it is finished with the encoding. It may be NULL.

XML_SetStartNamespaceDeclHandler

 void XMLCALL
 XML_SetStartNamespaceDeclHandler(XML_Parser p,
 			         XML_StartNamespaceDeclHandler start);
 
 typedef void
 (XMLCALL *XML_StartNamespaceDeclHandler)(void *userData,
                                          const XML_Char *prefix,
                                          const XML_Char *uri);
 

Set a handler to be called when a namespace is declared. Namespace declarations occur inside start tags. But the namespace declaration start handler is called before the start tag handler for each namespace declared in that start tag.

XML_SetEndNamespaceDeclHandler

 void XMLCALL
 XML_SetEndNamespaceDeclHandler(XML_Parser p,
 			       XML_EndNamespaceDeclHandler end);
 
 typedef void
 (XMLCALL *XML_EndNamespaceDeclHandler)(void *userData,
                                        const XML_Char *prefix);
 

Set a handler to be called when leaving the scope of a namespace declaration. This will be called, for each namespace declaration, after the handler for the end tag of the element in which the namespace was declared.

XML_SetNamespaceDeclHandler

 void XMLCALL
 XML_SetNamespaceDeclHandler(XML_Parser p,
                             XML_StartNamespaceDeclHandler start,
                             XML_EndNamespaceDeclHandler end)
 

Sets both namespace declaration handlers with a single call.

XML_SetXmlDeclHandler

 void XMLCALL
 XML_SetXmlDeclHandler(XML_Parser p,
 		      XML_XmlDeclHandler xmldecl);
 
 typedef void
 (XMLCALL *XML_XmlDeclHandler)(void            *userData,
                               const XML_Char  *version,
                               const XML_Char  *encoding,
                               int             standalone);
 

Sets a handler that is called for XML declarations and also for text declarations discovered in external entities. The way to distinguish is that the version parameter will be NULL for text declarations. The encoding parameter may be NULL for an XML declaration. The standalone argument will contain -1, 0, or 1 indicating respectively that there was no standalone parameter in the declaration, that it was given as no, or that it was given as yes.

XML_SetStartDoctypeDeclHandler

 void XMLCALL
 XML_SetStartDoctypeDeclHandler(XML_Parser p,
 			       XML_StartDoctypeDeclHandler start);
 
 typedef void
 (XMLCALL *XML_StartDoctypeDeclHandler)(void           *userData,
                                        const XML_Char *doctypeName,
                                        const XML_Char *sysid,
                                        const XML_Char *pubid,
                                        int            has_internal_subset);
 

Set a handler that is called at the start of a DOCTYPE declaration, before any external or internal subset is parsed. Both sysid and pubid may be NULL. The has_internal_subset will be non-zero if the DOCTYPE declaration has an internal subset.

XML_SetEndDoctypeDeclHandler

 void XMLCALL
 XML_SetEndDoctypeDeclHandler(XML_Parser p,
 			     XML_EndDoctypeDeclHandler end);
 
 typedef void
 (XMLCALL *XML_EndDoctypeDeclHandler)(void *userData);
 

Set a handler that is called at the end of a DOCTYPE declaration, after parsing any external subset.

XML_SetDoctypeDeclHandler

 void XMLCALL
 XML_SetDoctypeDeclHandler(XML_Parser p,
 			  XML_StartDoctypeDeclHandler start,
 			  XML_EndDoctypeDeclHandler end);
 

Set both doctype handlers with one call.

XML_SetElementDeclHandler

 void XMLCALL
 XML_SetElementDeclHandler(XML_Parser p,
 			  XML_ElementDeclHandler eldecl);
 
 typedef void
 (XMLCALL *XML_ElementDeclHandler)(void *userData,
                                   const XML_Char *name,
                                   XML_Content *model);
 
 enum XML_Content_Type {
   XML_CTYPE_EMPTY = 1,
   XML_CTYPE_ANY,
   XML_CTYPE_MIXED,
   XML_CTYPE_NAME,
   XML_CTYPE_CHOICE,
   XML_CTYPE_SEQ
 };
 
 enum XML_Content_Quant {
   XML_CQUANT_NONE,
   XML_CQUANT_OPT,
   XML_CQUANT_REP,
   XML_CQUANT_PLUS
 };
 
 typedef struct XML_cp XML_Content;
 
 struct XML_cp {
   enum XML_Content_Type		type;
   enum XML_Content_Quant	quant;
   const XML_Char *		name;
   unsigned int			numchildren;
   XML_Content *			children;
 };
 

Sets a handler for element declarations in a DTD. The handler gets called with the name of the element in the declaration and a pointer -to a structure that contains the element model. It is the -application's responsibility to free this data structure using -XML_FreeContentModel.

+to a structure that contains the element model. It's the user code's +responsibility to free model when finished with it. See +XML_FreeContentModel. +There is no need to free the model from the handler, it can be kept +around and freed at a later stage.

The model argument is the root of a tree of XML_Content nodes. If type equals XML_CTYPE_EMPTY or XML_CTYPE_ANY, then quant will be XML_CQUANT_NONE, and the other fields will be zero or NULL. If type is XML_CTYPE_MIXED, then quant will be XML_CQUANT_NONE or XML_CQUANT_REP and numchildren will contain the number of elements that are allowed to be mixed in and children points to an array of XML_Content structures that will all have type XML_CTYPE_NAME with no quantification. Only the root node can be type XML_CTYPE_EMPTY, XML_CTYPE_ANY, or XML_CTYPE_MIXED.

For type XML_CTYPE_NAME, the name field points to the name and the numchildren and children fields will be zero and NULL. The quant field will indicate any quantifiers placed on the name.

Types XML_CTYPE_CHOICE and XML_CTYPE_SEQ indicate a choice or sequence respectively. The numchildren field indicates how many nodes in the choice or sequence and children points to the nodes.

XML_SetAttlistDeclHandler

 void XMLCALL
 XML_SetAttlistDeclHandler(XML_Parser p,
                           XML_AttlistDeclHandler attdecl);
 
 typedef void
 (XMLCALL *XML_AttlistDeclHandler)(void           *userData,
                                   const XML_Char *elname,
                                   const XML_Char *attname,
                                   const XML_Char *att_type,
                                   const XML_Char *dflt,
                                   int            isrequired);
 

Set a handler for attlist declarations in the DTD. This handler is called for each attribute. So a single attlist declaration with multiple attributes declared will generate multiple calls to this handler. The elname parameter returns the name of the element for which the attribute is being declared. The attribute name is in the attname parameter. The attribute type is in the att_type parameter. It is the string representing the type in the declaration with whitespace removed.

The dflt parameter holds the default value. It will be NULL in the case of "#IMPLIED" or "#REQUIRED" attributes. You can distinguish these two cases by checking the isrequired parameter, which will be true in the case of "#REQUIRED" attributes. Attributes which are "#FIXED" will have also have a true isrequired, but they will have the non-NULL fixed value in the dflt parameter.

XML_SetEntityDeclHandler

 void XMLCALL
 XML_SetEntityDeclHandler(XML_Parser p,
 			 XML_EntityDeclHandler handler);
 
 typedef void
 (XMLCALL *XML_EntityDeclHandler)(void           *userData,
                                  const XML_Char *entityName,
                                  int            is_parameter_entity,
                                  const XML_Char *value,
                                  int            value_length, 
                                  const XML_Char *base,
                                  const XML_Char *systemId,
                                  const XML_Char *publicId,
                                  const XML_Char *notationName);
 

Sets a handler that will be called for all entity declarations. The is_parameter_entity argument will be non-zero in the case of parameter entities and zero otherwise.

For internal entities (<!ENTITY foo "bar">), value will be non-NULL and systemId, publicId, and notationName will all be NULL. The value string is not NULL terminated; the length is provided in the value_length parameter. Do not use value_length to test for internal entities, since it is legal to have zero-length values. Instead check for whether or not value is NULL.

The notationName argument will have a non-NULL value only for unparsed entity declarations.

XML_SetUnparsedEntityDeclHandler

 void XMLCALL
 XML_SetUnparsedEntityDeclHandler(XML_Parser p,
                                  XML_UnparsedEntityDeclHandler h)
 
 typedef void
 (XMLCALL *XML_UnparsedEntityDeclHandler)(void *userData,
                                          const XML_Char *entityName, 
                                          const XML_Char *base,
                                          const XML_Char *systemId,
                                          const XML_Char *publicId,
                                          const XML_Char *notationName);
 

Set a handler that receives declarations of unparsed entities. These are entity declarations that have a notation (NDATA) field:

 <!ENTITY logo SYSTEM "images/logo.gif" NDATA gif>
 

This handler is obsolete and is provided for backwards compatibility. Use instead XML_SetEntityDeclHandler.

XML_SetNotationDeclHandler

 void XMLCALL
 XML_SetNotationDeclHandler(XML_Parser p,
                            XML_NotationDeclHandler h)
 
 typedef void
 (XMLCALL *XML_NotationDeclHandler)(void *userData, 
                                    const XML_Char *notationName,
                                    const XML_Char *base,
                                    const XML_Char *systemId,
                                    const XML_Char *publicId);
 

Set a handler that receives notation declarations.

XML_SetNotStandaloneHandler

 void XMLCALL
 XML_SetNotStandaloneHandler(XML_Parser p,
                             XML_NotStandaloneHandler h)
 
 typedef int 
 (XMLCALL *XML_NotStandaloneHandler)(void *userData);
 

Set a handler that is called if the document is not "standalone". This happens when there is an external subset or a reference to a parameter entity, but does not have standalone set to "yes" in an XML declaration. If this handler returns XML_STATUS_ERROR, then the parser will throw an XML_ERROR_NOT_STANDALONE error.

Parse position and error reporting functions

These are the functions you'll want to call when the parse functions return XML_STATUS_ERROR (a parse error has occurred), although the position reporting functions are useful outside of errors. The position reported is the byte position (in the original document or entity encoding) of the first of the sequence of characters that generated the current event (or the error that caused the parse functions to return XML_STATUS_ERROR.) The exceptions are callbacks triggered by declarations in the document prologue, in which case they exact position reported is somewhere in the relevant markup, but not necessarily as meaningful as for other events.

The position reporting functions are accurate only outside of the DTD. In other words, they usually return bogus information when called from within a DTD declaration handler.

XML_GetErrorCode

 enum XML_Error XMLCALL
 XML_GetErrorCode(XML_Parser p);
 
Return what type of error has occurred.

XML_ErrorString

 const XML_LChar * XMLCALL
 XML_ErrorString(enum XML_Error code);
 
Return a string describing the error corresponding to code. The code should be one of the enums that can be returned from XML_GetErrorCode.

XML_GetCurrentByteIndex

 XML_Index XMLCALL
 XML_GetCurrentByteIndex(XML_Parser p);
 
Return the byte offset of the position. This always corresponds to the values returned by XML_GetCurrentLineNumber and XML_GetCurrentColumnNumber.

XML_GetCurrentLineNumber

 XML_Size XMLCALL
 XML_GetCurrentLineNumber(XML_Parser p);
 
Return the line number of the position. The first line is reported as 1.

XML_GetCurrentColumnNumber

 XML_Size XMLCALL
 XML_GetCurrentColumnNumber(XML_Parser p);
 
Return the offset, from the beginning of the current line, of the position.

XML_GetCurrentByteCount

 int XMLCALL
 XML_GetCurrentByteCount(XML_Parser p);
 
Return the number of bytes in the current event. Returns 0 if the event is inside a reference to an internal entity and for the end-tag event for empty element tags (the later can be used to distinguish empty-element tags from empty elements using separate start and end tags).

XML_GetInputContext

 const char * XMLCALL
 XML_GetInputContext(XML_Parser p,
                     int *offset,
                     int *size);
 

Returns the parser's input buffer, sets the integer pointed at by offset to the offset within this buffer of the current parse position, and set the integer pointed at by size to the size of the returned buffer.

This should only be called from within a handler during an active parse and the returned buffer should only be referred to from within the handler that made the call. This input buffer contains the untranslated bytes of the input.

Only a limited amount of context is kept, so if the event triggering a call spans over a very large amount of input, the actual parse position may be before the beginning of the buffer.

If XML_CONTEXT_BYTES is not defined, this will always return NULL.

Billion Laughs Attack Protection

The functions in this section configure the built-in protection against various forms of billion laughs attacks.

XML_SetBillionLaughsAttackProtectionMaximumAmplification

 /* Added in Expat 2.4.0. */
 XML_Bool XMLCALL
 XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser p,
                                                          float maximumAmplificationFactor);
 

Sets the maximum tolerated amplification factor for protection against billion laughs attacks (default: 100.0) of parser p to maximumAmplificationFactor, and returns XML_TRUE upon success and XML_FALSE upon error.

The amplification factor is calculated as ..
     amplification := (direct + indirect) / direct
   
.. while parsing, whereas direct is the number of bytes read from the primary document in parsing and indirect is the number of bytes added by expanding entities and reading of external DTD files, combined.

For a call to XML_SetBillionLaughsAttackProtectionMaximumAmplification to succeed:

  • parser p must be a non-NULL root parser (without any parent parsers) and
  • maximumAmplificationFactor must be non-NaN and greater than or equal to 1.0.

Note: If you ever need to increase this value for non-attack payload, please file a bug report.

Note: Peak amplifications of factor 15,000 for the entire payload and of factor 30,000 in the middle of parsing have been observed with small benign files in practice. So if you do reduce the maximum allowed amplification, please make sure that the activation threshold is still big enough to not end up with undesired false positives (i.e. benign files being rejected).

XML_SetBillionLaughsAttackProtectionActivationThreshold

 /* Added in Expat 2.4.0. */
 XML_Bool XMLCALL
 XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser p,
                                                         unsigned long long activationThresholdBytes);
 

Sets number of output bytes (including amplification from entity expansion and reading DTD files) needed to activate protection against billion laughs attacks (default: 8 MiB) of parser p to activationThresholdBytes, and returns XML_TRUE upon success and XML_FALSE upon error.

For a call to XML_SetBillionLaughsAttackProtectionActivationThreshold to succeed:

  • parser p must be a non-NULL root parser (without any parent parsers).

Note: If you ever need to increase this value for non-attack payload, please file a bug report.

Note: Activation thresholds below 4 MiB are known to break support for DITA 1.3 payload and are hence not recommended.

Miscellaneous functions

The functions in this section either obtain state information from the parser or can be used to dynamically set parser options.

XML_SetUserData

 void XMLCALL
 XML_SetUserData(XML_Parser p,
                 void *userData);
 
This sets the user data pointer that gets passed to handlers. It overwrites any previous value for this pointer. Note that the application is responsible for freeing the memory associated with userData when it is finished with the parser. So if you call this when there's already a pointer there, and you haven't freed the memory associated with it, then you've probably just leaked memory.

XML_GetUserData

 void * XMLCALL
 XML_GetUserData(XML_Parser p);
 
This returns the user data pointer that gets passed to handlers. It is actually implemented as a macro.

XML_UseParserAsHandlerArg

 void XMLCALL
 XML_UseParserAsHandlerArg(XML_Parser p);
 
After this is called, handlers receive the parser in their userData arguments. The user data can still be obtained using the XML_GetUserData function.

XML_SetBase

 enum XML_Status XMLCALL
 XML_SetBase(XML_Parser p,
             const XML_Char *base);
 
Set the base to be used for resolving relative URIs in system identifiers. The return value is XML_STATUS_ERROR if there's no memory to store base, otherwise it's XML_STATUS_OK.

XML_GetBase

 const XML_Char * XMLCALL
 XML_GetBase(XML_Parser p);
 
Return the base for resolving relative URIs.

XML_GetSpecifiedAttributeCount

 int XMLCALL
 XML_GetSpecifiedAttributeCount(XML_Parser p);
 
When attributes are reported to the start handler in the atts vector, attributes that were explicitly set in the element occur before any attributes that receive their value from default information in an ATTLIST declaration. This function returns the number of attributes that were explicitly set times two, thus giving the offset in the atts array passed to the start tag handler of the first attribute set due to defaults. It supplies information for the last call to a start handler. If called inside a start handler, then that means the current call.

XML_GetIdAttributeIndex

 int XMLCALL
 XML_GetIdAttributeIndex(XML_Parser p);
 
Returns the index of the ID attribute passed in the atts array in the last call to XML_StartElementHandler, or -1 if there is no ID attribute. If called inside a start handler, then that means the current call.

XML_GetAttributeInfo

 const XML_AttrInfo * XMLCALL
 XML_GetAttributeInfo(XML_Parser parser);
 
 typedef struct {
   XML_Index  nameStart;  /* Offset to beginning of the attribute name. */
   XML_Index  nameEnd;    /* Offset after the attribute name's last byte. */
   XML_Index  valueStart; /* Offset to beginning of the attribute value. */
   XML_Index  valueEnd;   /* Offset after the attribute value's last byte. */
 } XML_AttrInfo;
 
Returns an array of XML_AttrInfo structures for the attribute/value pairs passed in the last call to the XML_StartElementHandler that were specified in the start-tag rather than defaulted. Each attribute/value pair counts as 1; thus the number of entries in the array is XML_GetSpecifiedAttributeCount(parser) / 2.

XML_SetEncoding

 enum XML_Status XMLCALL
 XML_SetEncoding(XML_Parser p,
                 const XML_Char *encoding);
 
Set the encoding to be used by the parser. It is equivalent to passing a non-null encoding argument to the parser creation functions. It must not be called after XML_Parse or XML_ParseBuffer have been called on the given parser. Returns XML_STATUS_OK on success or XML_STATUS_ERROR on error.

XML_SetParamEntityParsing

 int XMLCALL
 XML_SetParamEntityParsing(XML_Parser p,
                           enum XML_ParamEntityParsing code);
 
This enables parsing of parameter entities, including the external parameter entity that is the external DTD subset, according to code. The choices for code are:
  • XML_PARAM_ENTITY_PARSING_NEVER
  • XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE
  • XML_PARAM_ENTITY_PARSING_ALWAYS
Note: If XML_SetParamEntityParsing is called after XML_Parse or XML_ParseBuffer, then it has no effect and will always return 0.

XML_SetHashSalt

 int XMLCALL
 XML_SetHashSalt(XML_Parser p,
                 unsigned long hash_salt);
 
Sets the hash salt to use for internal hash calculations. Helps in preventing DoS attacks based on predicting hash function behavior. In order to have an effect this must be called before parsing has started. Returns 1 if successful, 0 when called after XML_Parse or XML_ParseBuffer.

Note: This call is optional, as the parser will auto-generate a new random salt value if no value has been set at the start of parsing.

Note: One should not call XML_SetHashSalt with a hash salt value of 0, as this value is used as sentinel value to indicate that XML_SetHashSalt has not been called. Consequently such a call will have no effect, even if it returns 1.

XML_UseForeignDTD

 enum XML_Error XMLCALL
 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD);
 

This function allows an application to provide an external subset for the document type declaration for documents which do not specify an external subset of their own. For documents which specify an external subset in their DOCTYPE declaration, the application-provided subset will be ignored. If the document does not contain a DOCTYPE declaration at all and useDTD is true, the application-provided subset will be parsed, but the startDoctypeDeclHandler and endDoctypeDeclHandler functions, if set, will not be called. The setting of parameter entity parsing, controlled using XML_SetParamEntityParsing, will be honored.

The application-provided external subset is read by calling the external entity reference handler set via XML_SetExternalEntityRefHandler with both publicId and systemId set to NULL.

If this function is called after parsing has begun, it returns XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING and ignores useDTD. If called when Expat has been compiled without DTD support, it returns XML_ERROR_FEATURE_REQUIRES_XML_DTD. Otherwise, it returns XML_ERROR_NONE.

Note: For the purpose of checking WFC: Entity Declared, passing useDTD == XML_TRUE will make the parser behave as if the document had a DTD with an external subset. This holds true even if the external entity reference handler returns without action.

XML_SetReturnNSTriplet

 void XMLCALL
 XML_SetReturnNSTriplet(XML_Parser parser,
                        int        do_nst);
 

This function only has an effect when using a parser created with XML_ParserCreateNS, i.e. when namespace processing is in effect. The do_nst sets whether or not prefixes are returned with names qualified with a namespace prefix. If this function is called with do_nst non-zero, then afterwards namespace qualified names (that is qualified with a prefix as opposed to belonging to a default namespace) are returned as a triplet with the three parts separated by the namespace separator specified when the parser was created. The order of returned parts is URI, local name, and prefix.

If do_nst is zero, then namespaces are reported in the default manner, URI then local_name separated by the namespace separator.

XML_DefaultCurrent

 void XMLCALL
 XML_DefaultCurrent(XML_Parser parser);
 
This can be called within a handler for a start element, end element, processing instruction or character data. It causes the corresponding markup to be passed to the default handler set by XML_SetDefaultHandler or XML_SetDefaultHandlerExpand. It does nothing if there is not a default handler.

XML_ExpatVersion

 XML_LChar * XMLCALL
 XML_ExpatVersion();
 
Return the library version as a string (e.g. "expat_1.95.1").

XML_ExpatVersionInfo

 struct XML_Expat_Version XMLCALL
 XML_ExpatVersionInfo();
 
 typedef struct {
   int major;
   int minor;
   int micro;
 } XML_Expat_Version;
 
Return the library version information as a structure. Some macros are also defined that support compile-time tests of the library version:
  • XML_MAJOR_VERSION
  • XML_MINOR_VERSION
  • XML_MICRO_VERSION
Testing these constants is currently the best way to determine if particular parts of the Expat API are available.

XML_GetFeatureList

 const XML_Feature * XMLCALL
 XML_GetFeatureList();
 
 enum XML_FeatureEnum {
   XML_FEATURE_END = 0,
   XML_FEATURE_UNICODE,
   XML_FEATURE_UNICODE_WCHAR_T,
   XML_FEATURE_DTD,
   XML_FEATURE_CONTEXT_BYTES,
   XML_FEATURE_MIN_SIZE,
   XML_FEATURE_SIZEOF_XML_CHAR,
   XML_FEATURE_SIZEOF_XML_LCHAR,
   XML_FEATURE_NS,
   XML_FEATURE_LARGE_SIZE
 };
 
 typedef struct {
   enum XML_FeatureEnum  feature;
   XML_LChar            *name;
   long int              value;
 } XML_Feature;
 

Returns a list of "feature" records, providing details on how Expat was configured at compile time. Most applications should not need to worry about this, but this information is otherwise not available from Expat. This function allows code that does need to check these features to do so at runtime.

The return value is an array of XML_Feature, terminated by a record with a feature of XML_FEATURE_END and name of NULL, identifying the feature-test macros Expat was compiled with. Since an application that requires this kind of information needs to determine the type of character the name points to, records for the XML_FEATURE_SIZEOF_XML_CHAR and XML_FEATURE_SIZEOF_XML_LCHAR will be located at the beginning of the list, followed by XML_FEATURE_UNICODE and XML_FEATURE_UNICODE_WCHAR_T, if they are present at all.

Some features have an associated value. If there isn't an associated value, the value field is set to 0. At this time, the following features have been defined to have values:

XML_FEATURE_SIZEOF_XML_CHAR
The number of bytes occupied by one XML_Char character.
XML_FEATURE_SIZEOF_XML_LCHAR
The number of bytes occupied by one XML_LChar character.
XML_FEATURE_CONTEXT_BYTES
The maximum number of characters of context which can be reported by XML_GetInputContext.

XML_FreeContentModel

 void XMLCALL
 XML_FreeContentModel(XML_Parser parser, XML_Content *model);
 
Function to deallocate the model argument passed to the XML_ElementDeclHandler callback set using XML_ElementDeclHandler. This function should not be used for any other purpose.

The following functions allow external code to share the memory allocator an XML_Parser has been configured to use. This is especially useful for third-party libraries that interact with a parser object created by application code, or heavily layered applications. This can be essential when using dynamically loaded libraries which use different C standard libraries (this can happen on Windows, at least).

XML_MemMalloc

 void * XMLCALL
 XML_MemMalloc(XML_Parser parser, size_t size);
 
Allocate size bytes of memory using the allocator the parser object has been configured to use. Returns a pointer to the memory or NULL on failure. Memory allocated in this way must be freed using XML_MemFree.

XML_MemRealloc

 void * XMLCALL
 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size);
 
Allocate size bytes of memory using the allocator the parser object has been configured to use. ptr must point to a block of memory allocated by XML_MemMalloc or XML_MemRealloc, or be NULL. This function tries to expand the block pointed to by ptr if possible. Returns a pointer to the memory or NULL on failure. On success, the original block has either been expanded or freed. On failure, the original block has not been freed; the caller is responsible for freeing the original block. Memory allocated in this way must be freed using XML_MemFree.

XML_MemFree

 void XMLCALL
 XML_MemFree(XML_Parser parser, void *ptr);
 
Free a block of memory pointed to by ptr. The block must have been allocated by XML_MemMalloc or XML_MemRealloc, or be NULL.

diff --git a/contrib/expat/doc/xmlwf.1 b/contrib/expat/doc/xmlwf.1 index f931d63d4e1d..aa024e0abed5 100644 --- a/contrib/expat/doc/xmlwf.1 +++ b/contrib/expat/doc/xmlwf.1 @@ -1,324 +1,324 @@ '\" -*- coding: us-ascii -*- .if \n(.g .ds T< \\FC .if \n(.g .ds T> \\F[\n[.fam]] .de URL \\$2 \(la\\$1\(ra\\$3 .. .if \n(.g .mso www.tmac -.TH XMLWF 1 "February 20, 2022" "" "" +.TH XMLWF 1 "March 4, 2022" "" "" .SH NAME xmlwf \- Determines if an XML document is well-formed .SH SYNOPSIS 'nh .fi .ad l \fBxmlwf\fR \kx .if (\nx>(\n(.l/2)) .nr x (\n(.l/5) 'in \n(.iu+\nxu [\fIOPTIONS\fR] [\fIFILE\fR ...] 'in \n(.iu-\nxu .ad b 'hy 'nh .fi .ad l \fBxmlwf\fR \kx .if (\nx>(\n(.l/2)) .nr x (\n(.l/5) 'in \n(.iu+\nxu \fB-h\fR 'in \n(.iu-\nxu .ad b 'hy 'nh .fi .ad l \fBxmlwf\fR \kx .if (\nx>(\n(.l/2)) .nr x (\n(.l/5) 'in \n(.iu+\nxu \fB-v\fR 'in \n(.iu-\nxu .ad b 'hy .SH DESCRIPTION \fBxmlwf\fR uses the Expat library to determine if an XML document is well-formed. It is non-validating. .PP If you do not specify any files on the command-line, and you have a recent version of \fBxmlwf\fR, the input file will be read from standard input. .SH "WELL-FORMED DOCUMENTS" A well-formed document must adhere to the following rules: .TP 0.2i \(bu The file begins with an XML declaration. For instance, \*(T<\*(T>. \fINOTE\fR: \fBxmlwf\fR does not currently check for a valid XML declaration. .TP 0.2i \(bu Every start tag is either empty () or has a corresponding end tag. .TP 0.2i \(bu There is exactly one root element. This element must contain all other elements in the document. Only comments, white space, and processing instructions may come after the close of the root element. .TP 0.2i \(bu All elements nest properly. .TP 0.2i \(bu All attribute values are enclosed in quotes (either single or double). .PP If the document has a DTD, and it strictly complies with that DTD, then the document is also considered \fIvalid\fR. \fBxmlwf\fR is a non-validating parser -- it does not check the DTD. However, it does support external entities (see the \*(T<\fB\-x\fR\*(T> option). .SH OPTIONS When an option includes an argument, you may specify the argument either separately ("\*(T<\fB\-d\fR\*(T> \fIoutput\fR") or concatenated with the option ("\*(T<\fB\-d\fR\*(T>\fIoutput\fR"). \fBxmlwf\fR supports both. .TP \*(T<\fB\-a\fR\*(T> \fIfactor\fR Sets the maximum tolerated amplification factor for protection against billion laughs attacks (default: 100.0). The amplification factor is calculated as .. .nf amplification := (direct + indirect) / direct .fi \&.. while parsing, whereas is the number of bytes read from the primary document in parsing and is the number of bytes added by expanding entities and reading of external DTD files, combined. \fINOTE\fR: If you ever need to increase this value for non-attack payload, please file a bug report. .TP \*(T<\fB\-b\fR\*(T> \fIbytes\fR Sets the number of output bytes (including amplification) needed to activate protection against billion laughs attacks (default: 8 MiB). This can be thought of as an "activation threshold". \fINOTE\fR: If you ever need to increase this value for non-attack payload, please file a bug report. .TP \*(T<\fB\-c\fR\*(T> If the input file is well-formed and \fBxmlwf\fR doesn't encounter any errors, the input file is simply copied to the output directory unchanged. This implies no namespaces (turns off \*(T<\fB\-n\fR\*(T>) and requires \*(T<\fB\-d\fR\*(T> to specify an output directory. .TP \*(T<\fB\-d\fR\*(T> \fIoutput-dir\fR Specifies a directory to contain transformed representations of the input files. By default, \*(T<\fB\-d\fR\*(T> outputs a canonical representation (described below). You can select different output formats using \*(T<\fB\-c\fR\*(T>, \*(T<\fB\-m\fR\*(T> and \*(T<\fB\-N\fR\*(T>. The output filenames will be exactly the same as the input filenames or "STDIN" if the input is coming from standard input. Therefore, you must be careful that the output file does not go into the same directory as the input file. Otherwise, \fBxmlwf\fR will delete the input file before it generates the output file (just like running \*(T file\*(T> in most shells). Two structurally equivalent XML documents have a byte-for-byte identical canonical XML representation. Note that ignorable white space is considered significant and is treated equivalently to data. More on canonical XML can be found at http://www.jclark.com/xml/canonxml.html . .TP \*(T<\fB\-e\fR\*(T> \fIencoding\fR Specifies the character encoding for the document, overriding any document encoding declaration. \fBxmlwf\fR supports four built-in encodings: \*(T, \*(T, \*(T, and \*(T. Also see the \*(T<\fB\-w\fR\*(T> option. .TP \*(T<\fB\-k\fR\*(T> When processing multiple files, \fBxmlwf\fR by default halts after the the first file with an error. This tells \fBxmlwf\fR to report the error but to keep processing. This can be useful, for example, when testing a filter that converts many files to XML and you want to quickly find out which conversions failed. .TP \*(T<\fB\-m\fR\*(T> Outputs some strange sort of XML file that completely describes the input file, including character positions. Requires \*(T<\fB\-d\fR\*(T> to specify an output file. .TP \*(T<\fB\-n\fR\*(T> Turns on namespace processing. (describe namespaces) \*(T<\fB\-c\fR\*(T> disables namespaces. .TP \*(T<\fB\-N\fR\*(T> Adds a doctype and notation declarations to canonical XML output. This matches the example output used by the formal XML test cases. Requires \*(T<\fB\-d\fR\*(T> to specify an output file. .TP \*(T<\fB\-p\fR\*(T> Tells \fBxmlwf\fR to process external DTDs and parameter entities. Normally \fBxmlwf\fR never parses parameter entities. \*(T<\fB\-p\fR\*(T> tells it to always parse them. \*(T<\fB\-p\fR\*(T> implies \*(T<\fB\-x\fR\*(T>. .TP \*(T<\fB\-r\fR\*(T> Normally \fBxmlwf\fR memory-maps the XML file before parsing; this can result in faster parsing on many platforms. \*(T<\fB\-r\fR\*(T> turns off memory-mapping and uses normal file IO calls instead. Of course, memory-mapping is automatically turned off when reading from standard input. Use of memory-mapping can cause some platforms to report substantially higher memory usage for \fBxmlwf\fR, but this appears to be a matter of the operating system reporting memory in a strange way; there is not a leak in \fBxmlwf\fR. .TP \*(T<\fB\-s\fR\*(T> Prints an error if the document is not standalone. A document is standalone if it has no external subset and no references to parameter entities. .TP \*(T<\fB\-t\fR\*(T> Turns on timings. This tells Expat to parse the entire file, but not perform any processing. This gives a fairly accurate idea of the raw speed of Expat itself without client overhead. \*(T<\fB\-t\fR\*(T> turns off most of the output options (\*(T<\fB\-d\fR\*(T>, \*(T<\fB\-m\fR\*(T>, \*(T<\fB\-c\fR\*(T>, ...). .TP \*(T<\fB\-v\fR\*(T> Prints the version of the Expat library being used, including some information on the compile-time configuration of the library, and then exits. .TP \*(T<\fB\-w\fR\*(T> Enables support for Windows code pages. Normally, \fBxmlwf\fR will throw an error if it runs across an encoding that it is not equipped to handle itself. With \*(T<\fB\-w\fR\*(T>, \fBxmlwf\fR will try to use a Windows code page. See also \*(T<\fB\-e\fR\*(T>. .TP \*(T<\fB\-x\fR\*(T> Turns on parsing external entities. Non-validating parsers are not required to resolve external entities, or even expand entities at all. Expat always expands internal entities (?), but external entity parsing must be enabled explicitly. External entities are simply entities that obtain their data from outside the XML file currently being parsed. This is an example of an internal entity: .nf .fi And here are some examples of external entities: .nf (parsed) (unparsed) .fi .TP \*(T<\fB\-\-\fR\*(T> (Two hyphens.) Terminates the list of options. This is only needed if a filename starts with a hyphen. For example: .nf xmlwf \-\- \-myfile.xml .fi will run \fBxmlwf\fR on the file \*(T<\fI\-myfile.xml\fR\*(T>. .PP Older versions of \fBxmlwf\fR do not support reading from standard input. .SH OUTPUT \fBxmlwf\fR outputs nothing for files which are problem-free. If any input file is not well-formed, or if the output for any input file cannot be opened, \fBxmlwf\fR prints a single line describing the problem to standard output. .PP If the \*(T<\fB\-k\fR\*(T> option is not provided, \fBxmlwf\fR halts upon encountering a well-formedness or output-file error. If \*(T<\fB\-k\fR\*(T> is provided, \fBxmlwf\fR continues processing the remaining input files, describing problems found with any of them. .SH "EXIT STATUS" For option \*(T<\fB\-v\fR\*(T> or \*(T<\fB\-h\fR\*(T>, \fBxmlwf\fR always exits with status code 0. For other cases, the following exit status codes are returned: .TP \*(T<\fB0\fR\*(T> The input files are well-formed and the output (if requested) was written successfully. .TP \*(T<\fB1\fR\*(T> An internal error occurred. .TP \*(T<\fB2\fR\*(T> One or more input files were not well-formed or could not be parsed. .TP \*(T<\fB3\fR\*(T> If using the \*(T<\fB\-d\fR\*(T> option, an error occurred opening an output file. .TP \*(T<\fB4\fR\*(T> There was a command-line argument error in how \fBxmlwf\fR was invoked. .SH BUGS The errors should go to standard error, not standard output. .PP There should be a way to get \*(T<\fB\-d\fR\*(T> to send its output to standard output rather than forcing the user to send it to a file. .PP I have no idea why anyone would want to use the \*(T<\fB\-d\fR\*(T>, \*(T<\fB\-c\fR\*(T>, and \*(T<\fB\-m\fR\*(T> options. If someone could explain it to me, I'd like to add this information to this manpage. .SH "SEE ALSO" .nf The Expat home page: https://libexpat.github.io/ The W3 XML 1.0 specification (fourth edition): https://www.w3.org/TR/2006/REC\-xml\-20060816/ Billion laughs attack: https://en.wikipedia.org/wiki/Billion_laughs_attack .fi .SH AUTHOR This manual page was originally written by Scott Bronson <\*(T> in December 2001 for the Debian GNU/Linux system (but may be used by others). Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.1. diff --git a/contrib/expat/doc/xmlwf.xml b/contrib/expat/doc/xmlwf.xml index 79ed58569ea9..8b43a11ef3a8 100644 --- a/contrib/expat/doc/xmlwf.xml +++ b/contrib/expat/doc/xmlwf.xml @@ -1,538 +1,538 @@ Scott"> Bronson"> - February 20, 2022"> + March 4, 2022"> 1"> bronson@rinspin.com"> XMLWF"> Debian GNU/Linux"> GNU"> ]>
&dhemail;
&dhfirstname; &dhsurname; 2001 &dhusername; &dhdate;
&dhucpackage; &dhsection; &dhpackage; Determines if an XML document is well-formed &dhpackage; OPTIONS FILE ... &dhpackage; &dhpackage; DESCRIPTION &dhpackage; uses the Expat library to determine if an XML document is well-formed. It is non-validating. If you do not specify any files on the command-line, and you have a recent version of &dhpackage;, the input file will be read from standard input. WELL-FORMED DOCUMENTS A well-formed document must adhere to the following rules: The file begins with an XML declaration. For instance, <?xml version="1.0" standalone="yes"?>. NOTE: &dhpackage; does not currently check for a valid XML declaration. Every start tag is either empty (<tag/>) or has a corresponding end tag. There is exactly one root element. This element must contain all other elements in the document. Only comments, white space, and processing instructions may come after the close of the root element. All elements nest properly. All attribute values are enclosed in quotes (either single or double). If the document has a DTD, and it strictly complies with that DTD, then the document is also considered valid. &dhpackage; is a non-validating parser -- it does not check the DTD. However, it does support external entities (see the option). OPTIONS When an option includes an argument, you may specify the argument either separately (" output") or concatenated with the option ("output"). &dhpackage; supports both. factor Sets the maximum tolerated amplification factor for protection against billion laughs attacks (default: 100.0). The amplification factor is calculated as .. amplification := (direct + indirect) / direct .. while parsing, whereas <direct> is the number of bytes read from the primary document in parsing and <indirect> is the number of bytes added by expanding entities and reading of external DTD files, combined. NOTE: If you ever need to increase this value for non-attack payload, please file a bug report. bytes Sets the number of output bytes (including amplification) needed to activate protection against billion laughs attacks (default: 8 MiB). This can be thought of as an "activation threshold". NOTE: If you ever need to increase this value for non-attack payload, please file a bug report. If the input file is well-formed and &dhpackage; doesn't encounter any errors, the input file is simply copied to the output directory unchanged. This implies no namespaces (turns off ) and requires to specify an output directory. output-dir Specifies a directory to contain transformed representations of the input files. By default, outputs a canonical representation (described below). You can select different output formats using , and . The output filenames will be exactly the same as the input filenames or "STDIN" if the input is coming from standard input. Therefore, you must be careful that the output file does not go into the same directory as the input file. Otherwise, &dhpackage; will delete the input file before it generates the output file (just like running cat < file > file in most shells). Two structurally equivalent XML documents have a byte-for-byte identical canonical XML representation. Note that ignorable white space is considered significant and is treated equivalently to data. More on canonical XML can be found at http://www.jclark.com/xml/canonxml.html . encoding Specifies the character encoding for the document, overriding any document encoding declaration. &dhpackage; supports four built-in encodings: US-ASCII, UTF-8, UTF-16, and ISO-8859-1. Also see the option. When processing multiple files, &dhpackage; by default halts after the the first file with an error. This tells &dhpackage; to report the error but to keep processing. This can be useful, for example, when testing a filter that converts many files to XML and you want to quickly find out which conversions failed. Outputs some strange sort of XML file that completely describes the input file, including character positions. Requires to specify an output file. Turns on namespace processing. (describe namespaces) disables namespaces. Adds a doctype and notation declarations to canonical XML output. This matches the example output used by the formal XML test cases. Requires to specify an output file. Tells &dhpackage; to process external DTDs and parameter entities. Normally &dhpackage; never parses parameter entities. tells it to always parse them. implies . Normally &dhpackage; memory-maps the XML file before parsing; this can result in faster parsing on many platforms. turns off memory-mapping and uses normal file IO calls instead. Of course, memory-mapping is automatically turned off when reading from standard input. Use of memory-mapping can cause some platforms to report substantially higher memory usage for &dhpackage;, but this appears to be a matter of the operating system reporting memory in a strange way; there is not a leak in &dhpackage;. Prints an error if the document is not standalone. A document is standalone if it has no external subset and no references to parameter entities. Turns on timings. This tells Expat to parse the entire file, but not perform any processing. This gives a fairly accurate idea of the raw speed of Expat itself without client overhead. turns off most of the output options (, , , ...). Prints the version of the Expat library being used, including some information on the compile-time configuration of the library, and then exits. Enables support for Windows code pages. Normally, &dhpackage; will throw an error if it runs across an encoding that it is not equipped to handle itself. With , &dhpackage; will try to use a Windows code page. See also . Turns on parsing external entities. Non-validating parsers are not required to resolve external entities, or even expand entities at all. Expat always expands internal entities (?), but external entity parsing must be enabled explicitly. External entities are simply entities that obtain their data from outside the XML file currently being parsed. This is an example of an internal entity: <!ENTITY vers '1.0.2'> And here are some examples of external entities: <!ENTITY header SYSTEM "header-&vers;.xml"> (parsed) <!ENTITY logo SYSTEM "logo.png" PNG> (unparsed) (Two hyphens.) Terminates the list of options. This is only needed if a filename starts with a hyphen. For example: &dhpackage; -- -myfile.xml will run &dhpackage; on the file -myfile.xml. Older versions of &dhpackage; do not support reading from standard input. OUTPUT &dhpackage; outputs nothing for files which are problem-free. If any input file is not well-formed, or if the output for any input file cannot be opened, &dhpackage; prints a single line describing the problem to standard output. If the option is not provided, &dhpackage; halts upon encountering a well-formedness or output-file error. If is provided, &dhpackage; continues processing the remaining input files, describing problems found with any of them. EXIT STATUS For option or , &dhpackage; always exits with status code 0. For other cases, the following exit status codes are returned: The input files are well-formed and the output (if requested) was written successfully. An internal error occurred. One or more input files were not well-formed or could not be parsed. If using the option, an error occurred opening an output file. There was a command-line argument error in how &dhpackage; was invoked. BUGS The errors should go to standard error, not standard output. There should be a way to get to send its output to standard output rather than forcing the user to send it to a file. I have no idea why anyone would want to use the , , and options. If someone could explain it to me, I'd like to add this information to this manpage. SEE ALSO The Expat home page: https://libexpat.github.io/ The W3 XML 1.0 specification (fourth edition): https://www.w3.org/TR/2006/REC-xml-20060816/ Billion laughs attack: https://en.wikipedia.org/wiki/Billion_laughs_attack AUTHOR This manual page was originally written by &dhusername; &dhemail; in December 2001 for the &debian; system (but may be used by others). Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.1.
diff --git a/contrib/expat/lib/expat.h b/contrib/expat/lib/expat.h index 46a0e1bcd22d..c9214f64070a 100644 --- a/contrib/expat/lib/expat.h +++ b/contrib/expat/lib/expat.h @@ -1,1050 +1,1064 @@ /* __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser Copyright (c) 1997-2000 Thai Open Source Software Center Ltd Copyright (c) 2000 Clark Cooper Copyright (c) 2000-2005 Fred L. Drake, Jr. Copyright (c) 2001-2002 Greg Stein Copyright (c) 2002-2016 Karl Waclawek Copyright (c) 2016-2022 Sebastian Pipping Copyright (c) 2016 Cristian Rodríguez Copyright (c) 2016 Thomas Beutlich Copyright (c) 2017 Rhodri James + Copyright (c) 2022 Thijs Schreijer Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef Expat_INCLUDED #define Expat_INCLUDED 1 #include #include "expat_external.h" #ifdef __cplusplus extern "C" { #endif struct XML_ParserStruct; typedef struct XML_ParserStruct *XML_Parser; typedef unsigned char XML_Bool; #define XML_TRUE ((XML_Bool)1) #define XML_FALSE ((XML_Bool)0) /* The XML_Status enum gives the possible return values for several API functions. The preprocessor #defines are included so this stanza can be added to code that still needs to support older versions of Expat 1.95.x: #ifndef XML_STATUS_OK #define XML_STATUS_OK 1 #define XML_STATUS_ERROR 0 #endif Otherwise, the #define hackery is quite ugly and would have been dropped. */ enum XML_Status { XML_STATUS_ERROR = 0, #define XML_STATUS_ERROR XML_STATUS_ERROR XML_STATUS_OK = 1, #define XML_STATUS_OK XML_STATUS_OK XML_STATUS_SUSPENDED = 2 #define XML_STATUS_SUSPENDED XML_STATUS_SUSPENDED }; enum XML_Error { XML_ERROR_NONE, XML_ERROR_NO_MEMORY, XML_ERROR_SYNTAX, XML_ERROR_NO_ELEMENTS, XML_ERROR_INVALID_TOKEN, XML_ERROR_UNCLOSED_TOKEN, XML_ERROR_PARTIAL_CHAR, XML_ERROR_TAG_MISMATCH, XML_ERROR_DUPLICATE_ATTRIBUTE, XML_ERROR_JUNK_AFTER_DOC_ELEMENT, XML_ERROR_PARAM_ENTITY_REF, XML_ERROR_UNDEFINED_ENTITY, XML_ERROR_RECURSIVE_ENTITY_REF, XML_ERROR_ASYNC_ENTITY, XML_ERROR_BAD_CHAR_REF, XML_ERROR_BINARY_ENTITY_REF, XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, XML_ERROR_MISPLACED_XML_PI, XML_ERROR_UNKNOWN_ENCODING, XML_ERROR_INCORRECT_ENCODING, XML_ERROR_UNCLOSED_CDATA_SECTION, XML_ERROR_EXTERNAL_ENTITY_HANDLING, XML_ERROR_NOT_STANDALONE, XML_ERROR_UNEXPECTED_STATE, XML_ERROR_ENTITY_DECLARED_IN_PE, XML_ERROR_FEATURE_REQUIRES_XML_DTD, XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING, /* Added in 1.95.7. */ XML_ERROR_UNBOUND_PREFIX, /* Added in 1.95.8. */ XML_ERROR_UNDECLARING_PREFIX, XML_ERROR_INCOMPLETE_PE, XML_ERROR_XML_DECL, XML_ERROR_TEXT_DECL, XML_ERROR_PUBLICID, XML_ERROR_SUSPENDED, XML_ERROR_NOT_SUSPENDED, XML_ERROR_ABORTED, XML_ERROR_FINISHED, XML_ERROR_SUSPEND_PE, /* Added in 2.0. */ XML_ERROR_RESERVED_PREFIX_XML, XML_ERROR_RESERVED_PREFIX_XMLNS, XML_ERROR_RESERVED_NAMESPACE_URI, /* Added in 2.2.1. */ XML_ERROR_INVALID_ARGUMENT, /* Added in 2.3.0. */ XML_ERROR_NO_BUFFER, /* Added in 2.4.0. */ XML_ERROR_AMPLIFICATION_LIMIT_BREACH }; enum XML_Content_Type { XML_CTYPE_EMPTY = 1, XML_CTYPE_ANY, XML_CTYPE_MIXED, XML_CTYPE_NAME, XML_CTYPE_CHOICE, XML_CTYPE_SEQ }; enum XML_Content_Quant { XML_CQUANT_NONE, XML_CQUANT_OPT, XML_CQUANT_REP, XML_CQUANT_PLUS }; /* If type == XML_CTYPE_EMPTY or XML_CTYPE_ANY, then quant will be XML_CQUANT_NONE, and the other fields will be zero or NULL. If type == XML_CTYPE_MIXED, then quant will be NONE or REP and numchildren will contain number of elements that may be mixed in and children point to an array of XML_Content cells that will be all of XML_CTYPE_NAME type with no quantification. If type == XML_CTYPE_NAME, then the name points to the name, and the numchildren field will be zero and children will be NULL. The quant fields indicates any quantifiers placed on the name. CHOICE and SEQ will have name NULL, the number of children in numchildren and children will point, recursively, to an array of XML_Content cells. The EMPTY, ANY, and MIXED types will only occur at top level. */ typedef struct XML_cp XML_Content; struct XML_cp { enum XML_Content_Type type; enum XML_Content_Quant quant; XML_Char *name; unsigned int numchildren; XML_Content *children; }; /* This is called for an element declaration. See above for - description of the model argument. It's the caller's responsibility - to free model when finished with it. + description of the model argument. It's the user code's responsibility + to free model when finished with it. See XML_FreeContentModel. + There is no need to free the model from the handler, it can be kept + around and freed at a later stage. */ typedef void(XMLCALL *XML_ElementDeclHandler)(void *userData, const XML_Char *name, XML_Content *model); XMLPARSEAPI(void) XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl); /* The Attlist declaration handler is called for *each* attribute. So a single Attlist declaration with multiple attributes declared will generate multiple calls to this handler. The "default" parameter may be NULL in the case of the "#IMPLIED" or "#REQUIRED" keyword. The "isrequired" parameter will be true and the default value will be NULL in the case of "#REQUIRED". If "isrequired" is true and default is non-NULL, then this is a "#FIXED" default. */ typedef void(XMLCALL *XML_AttlistDeclHandler)( void *userData, const XML_Char *elname, const XML_Char *attname, const XML_Char *att_type, const XML_Char *dflt, int isrequired); XMLPARSEAPI(void) XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl); /* The XML declaration handler is called for *both* XML declarations and text declarations. The way to distinguish is that the version parameter will be NULL for text declarations. The encoding parameter may be NULL for XML declarations. The standalone parameter will be -1, 0, or 1 indicating respectively that there was no standalone parameter in the declaration, that it was given as no, or that it was given as yes. */ typedef void(XMLCALL *XML_XmlDeclHandler)(void *userData, const XML_Char *version, const XML_Char *encoding, int standalone); XMLPARSEAPI(void) XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler xmldecl); typedef struct { void *(*malloc_fcn)(size_t size); void *(*realloc_fcn)(void *ptr, size_t size); void (*free_fcn)(void *ptr); } XML_Memory_Handling_Suite; /* Constructs a new parser; encoding is the encoding specified by the external protocol or NULL if there is none specified. */ XMLPARSEAPI(XML_Parser) XML_ParserCreate(const XML_Char *encoding); /* Constructs a new parser and namespace processor. Element type names and attribute names that belong to a namespace will be expanded; unprefixed attribute names are never expanded; unprefixed element type names are expanded only if there is a default namespace. The expanded name is the concatenation of the namespace URI, the namespace separator character, and the local part of the name. If the namespace separator is '\0' then the namespace URI and the local part will be concatenated without any separator. It is a programming error to use the separator '\0' with namespace triplets (see XML_SetReturnNSTriplet). + If a namespace separator is chosen that can be part of a URI or + part of an XML name, splitting an expanded name back into its + 1, 2 or 3 original parts on application level in the element handler + may end up vulnerable, so these are advised against; sane choices for + a namespace separator are e.g. '\n' (line feed) and '|' (pipe). + + Note that Expat does not validate namespace URIs (beyond encoding) + against RFC 3986 today (and is not required to do so with regard to + the XML 1.0 namespaces specification) but it may start doing that + in future releases. Before that, an application using Expat must + be ready to receive namespace URIs containing non-URI characters. */ XMLPARSEAPI(XML_Parser) XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator); /* Constructs a new parser using the memory management suite referred to by memsuite. If memsuite is NULL, then use the standard library memory suite. If namespaceSeparator is non-NULL it creates a parser with namespace processing as described above. The character pointed at will serve as the namespace separator. All further memory operations used for the created parser will come from the given suite. */ XMLPARSEAPI(XML_Parser) XML_ParserCreate_MM(const XML_Char *encoding, const XML_Memory_Handling_Suite *memsuite, const XML_Char *namespaceSeparator); /* Prepare a parser object to be re-used. This is particularly valuable when memory allocation overhead is disproportionately high, such as when a large number of small documnents need to be parsed. All handlers are cleared from the parser, except for the unknownEncodingHandler. The parser's external state is re-initialized except for the values of ns and ns_triplets. Added in Expat 1.95.3. */ XMLPARSEAPI(XML_Bool) XML_ParserReset(XML_Parser parser, const XML_Char *encoding); /* atts is array of name/value pairs, terminated by 0; names and values are 0 terminated. */ typedef void(XMLCALL *XML_StartElementHandler)(void *userData, const XML_Char *name, const XML_Char **atts); typedef void(XMLCALL *XML_EndElementHandler)(void *userData, const XML_Char *name); /* s is not 0 terminated. */ typedef void(XMLCALL *XML_CharacterDataHandler)(void *userData, const XML_Char *s, int len); /* target and data are 0 terminated */ typedef void(XMLCALL *XML_ProcessingInstructionHandler)(void *userData, const XML_Char *target, const XML_Char *data); /* data is 0 terminated */ typedef void(XMLCALL *XML_CommentHandler)(void *userData, const XML_Char *data); typedef void(XMLCALL *XML_StartCdataSectionHandler)(void *userData); typedef void(XMLCALL *XML_EndCdataSectionHandler)(void *userData); /* This is called for any characters in the XML document for which there is no applicable handler. This includes both characters that are part of markup which is of a kind that is not reported (comments, markup declarations), or characters that are part of a construct which could be reported but for which no handler has been supplied. The characters are passed exactly as they were in the XML document except that they will be encoded in UTF-8 or UTF-16. Line boundaries are not normalized. Note that a byte order mark character is not passed to the default handler. There are no guarantees about how characters are divided between calls to the default handler: for example, a comment might be split between multiple calls. */ typedef void(XMLCALL *XML_DefaultHandler)(void *userData, const XML_Char *s, int len); /* This is called for the start of the DOCTYPE declaration, before any DTD or internal subset is parsed. */ typedef void(XMLCALL *XML_StartDoctypeDeclHandler)(void *userData, const XML_Char *doctypeName, const XML_Char *sysid, const XML_Char *pubid, int has_internal_subset); -/* This is called for the start of the DOCTYPE declaration when the +/* This is called for the end of the DOCTYPE declaration when the closing > is encountered, but after processing any external subset. */ typedef void(XMLCALL *XML_EndDoctypeDeclHandler)(void *userData); /* This is called for entity declarations. The is_parameter_entity argument will be non-zero if the entity is a parameter entity, zero otherwise. For internal entities (), value will be non-NULL and systemId, publicID, and notationName will be NULL. The value string is NOT null-terminated; the length is provided in the value_length argument. Since it is legal to have zero-length values, do not use this argument to test for internal entities. For external entities, value will be NULL and systemId will be non-NULL. The publicId argument will be NULL unless a public identifier was provided. The notationName argument will have a non-NULL value only for unparsed entity declarations. Note that is_parameter_entity can't be changed to XML_Bool, since that would break binary compatibility. */ typedef void(XMLCALL *XML_EntityDeclHandler)( void *userData, const XML_Char *entityName, int is_parameter_entity, const XML_Char *value, int value_length, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName); XMLPARSEAPI(void) XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler); /* OBSOLETE -- OBSOLETE -- OBSOLETE This handler has been superseded by the EntityDeclHandler above. It is provided here for backward compatibility. This is called for a declaration of an unparsed (NDATA) entity. The base argument is whatever was set by XML_SetBase. The entityName, systemId and notationName arguments will never be NULL. The other arguments may be. */ typedef void(XMLCALL *XML_UnparsedEntityDeclHandler)( void *userData, const XML_Char *entityName, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName); /* This is called for a declaration of notation. The base argument is whatever was set by XML_SetBase. The notationName will never be NULL. The other arguments can be. */ typedef void(XMLCALL *XML_NotationDeclHandler)(void *userData, const XML_Char *notationName, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId); /* When namespace processing is enabled, these are called once for each namespace declaration. The call to the start and end element handlers occur between the calls to the start and end namespace declaration handlers. For an xmlns attribute, prefix will be NULL. For an xmlns="" attribute, uri will be NULL. */ typedef void(XMLCALL *XML_StartNamespaceDeclHandler)(void *userData, const XML_Char *prefix, const XML_Char *uri); typedef void(XMLCALL *XML_EndNamespaceDeclHandler)(void *userData, const XML_Char *prefix); /* This is called if the document is not standalone, that is, it has an external subset or a reference to a parameter entity, but does not have standalone="yes". If this handler returns XML_STATUS_ERROR, then processing will not continue, and the parser will return a XML_ERROR_NOT_STANDALONE error. If parameter entity parsing is enabled, then in addition to the conditions above this handler will only be called if the referenced entity was actually read. */ typedef int(XMLCALL *XML_NotStandaloneHandler)(void *userData); /* This is called for a reference to an external parsed general entity. The referenced entity is not automatically parsed. The application can parse it immediately or later using XML_ExternalEntityParserCreate. The parser argument is the parser parsing the entity containing the reference; it can be passed as the parser argument to XML_ExternalEntityParserCreate. The systemId argument is the system identifier as specified in the entity declaration; it will not be NULL. The base argument is the system identifier that should be used as the base for resolving systemId if systemId was relative; this is set by XML_SetBase; it may be NULL. The publicId argument is the public identifier as specified in the entity declaration, or NULL if none was specified; the whitespace in the public identifier will have been normalized as required by the XML spec. The context argument specifies the parsing context in the format expected by the context argument to XML_ExternalEntityParserCreate; context is valid only until the handler returns, so if the referenced entity is to be parsed later, it must be copied. context is NULL only when the entity is a parameter entity. The handler should return XML_STATUS_ERROR if processing should not continue because of a fatal error in the handling of the external entity. In this case the calling parser will return an XML_ERROR_EXTERNAL_ENTITY_HANDLING error. Note that unlike other handlers the first argument is the parser, not userData. */ typedef int(XMLCALL *XML_ExternalEntityRefHandler)(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId); /* This is called in two situations: 1) An entity reference is encountered for which no declaration has been read *and* this is not an error. 2) An internal entity reference is read, but not expanded, because XML_SetDefaultHandler has been called. Note: skipped parameter entities in declarations and skipped general entities in attribute values cannot be reported, because the event would be out of sync with the reporting of the declarations or attribute values */ typedef void(XMLCALL *XML_SkippedEntityHandler)(void *userData, const XML_Char *entityName, int is_parameter_entity); /* This structure is filled in by the XML_UnknownEncodingHandler to provide information to the parser about encodings that are unknown to the parser. The map[b] member gives information about byte sequences whose first byte is b. If map[b] is c where c is >= 0, then b by itself encodes the Unicode scalar value c. If map[b] is -1, then the byte sequence is malformed. If map[b] is -n, where n >= 2, then b is the first byte of an n-byte sequence that encodes a single Unicode scalar value. The data member will be passed as the first argument to the convert function. The convert function is used to convert multibyte sequences; s will point to a n-byte sequence where map[(unsigned char)*s] == -n. The convert function must return the Unicode scalar value represented by this byte sequence or -1 if the byte sequence is malformed. The convert function may be NULL if the encoding is a single-byte encoding, that is if map[b] >= -1 for all bytes b. When the parser is finished with the encoding, then if release is not NULL, it will call release passing it the data member; once release has been called, the convert function will not be called again. Expat places certain restrictions on the encodings that are supported using this mechanism. 1. Every ASCII character that can appear in a well-formed XML document, other than the characters $@\^`{}~ must be represented by a single byte, and that byte must be the same byte that represents that character in ASCII. 2. No character may require more than 4 bytes to encode. 3. All characters encoded must have Unicode scalar values <= 0xFFFF, (i.e., characters that would be encoded by surrogates in UTF-16 are not allowed). Note that this restriction doesn't apply to the built-in support for UTF-8 and UTF-16. 4. No Unicode character may be encoded by more than one distinct sequence of bytes. */ typedef struct { int map[256]; void *data; int(XMLCALL *convert)(void *data, const char *s); void(XMLCALL *release)(void *data); } XML_Encoding; /* This is called for an encoding that is unknown to the parser. The encodingHandlerData argument is that which was passed as the second argument to XML_SetUnknownEncodingHandler. The name argument gives the name of the encoding as specified in the encoding declaration. If the callback can provide information about the encoding, it must fill in the XML_Encoding structure, and return XML_STATUS_OK. Otherwise it must return XML_STATUS_ERROR. If info does not describe a suitable encoding, then the parser will return an XML_ERROR_UNKNOWN_ENCODING error. */ typedef int(XMLCALL *XML_UnknownEncodingHandler)(void *encodingHandlerData, const XML_Char *name, XML_Encoding *info); XMLPARSEAPI(void) XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, XML_EndElementHandler end); XMLPARSEAPI(void) XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler handler); XMLPARSEAPI(void) XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler handler); XMLPARSEAPI(void) XML_SetCharacterDataHandler(XML_Parser parser, XML_CharacterDataHandler handler); XMLPARSEAPI(void) XML_SetProcessingInstructionHandler(XML_Parser parser, XML_ProcessingInstructionHandler handler); XMLPARSEAPI(void) XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler); XMLPARSEAPI(void) XML_SetCdataSectionHandler(XML_Parser parser, XML_StartCdataSectionHandler start, XML_EndCdataSectionHandler end); XMLPARSEAPI(void) XML_SetStartCdataSectionHandler(XML_Parser parser, XML_StartCdataSectionHandler start); XMLPARSEAPI(void) XML_SetEndCdataSectionHandler(XML_Parser parser, XML_EndCdataSectionHandler end); /* This sets the default handler and also inhibits expansion of internal entities. These entity references will be passed to the default handler, or to the skipped entity handler, if one is set. */ XMLPARSEAPI(void) XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler); /* This sets the default handler but does not inhibit expansion of internal entities. The entity reference will not be passed to the default handler. */ XMLPARSEAPI(void) XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler); XMLPARSEAPI(void) XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, XML_EndDoctypeDeclHandler end); XMLPARSEAPI(void) XML_SetStartDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start); XMLPARSEAPI(void) XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end); XMLPARSEAPI(void) XML_SetUnparsedEntityDeclHandler(XML_Parser parser, XML_UnparsedEntityDeclHandler handler); XMLPARSEAPI(void) XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler); XMLPARSEAPI(void) XML_SetNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start, XML_EndNamespaceDeclHandler end); XMLPARSEAPI(void) XML_SetStartNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start); XMLPARSEAPI(void) XML_SetEndNamespaceDeclHandler(XML_Parser parser, XML_EndNamespaceDeclHandler end); XMLPARSEAPI(void) XML_SetNotStandaloneHandler(XML_Parser parser, XML_NotStandaloneHandler handler); XMLPARSEAPI(void) XML_SetExternalEntityRefHandler(XML_Parser parser, XML_ExternalEntityRefHandler handler); /* If a non-NULL value for arg is specified here, then it will be passed as the first argument to the external entity ref handler instead of the parser object. */ XMLPARSEAPI(void) XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg); XMLPARSEAPI(void) XML_SetSkippedEntityHandler(XML_Parser parser, XML_SkippedEntityHandler handler); XMLPARSEAPI(void) XML_SetUnknownEncodingHandler(XML_Parser parser, XML_UnknownEncodingHandler handler, void *encodingHandlerData); /* This can be called within a handler for a start element, end element, processing instruction or character data. It causes the corresponding markup to be passed to the default handler. */ XMLPARSEAPI(void) XML_DefaultCurrent(XML_Parser parser); /* If do_nst is non-zero, and namespace processing is in effect, and a name has a prefix (i.e. an explicit namespace qualifier) then that name is returned as a triplet in a single string separated by the separator character specified when the parser was created: URI + sep + local_name + sep + prefix. If do_nst is zero, then namespace information is returned in the default manner (URI + sep + local_name) whether or not the name has a prefix. Note: Calling XML_SetReturnNSTriplet after XML_Parse or XML_ParseBuffer has no effect. */ XMLPARSEAPI(void) XML_SetReturnNSTriplet(XML_Parser parser, int do_nst); /* This value is passed as the userData argument to callbacks. */ XMLPARSEAPI(void) XML_SetUserData(XML_Parser parser, void *userData); /* Returns the last value set by XML_SetUserData or NULL. */ #define XML_GetUserData(parser) (*(void **)(parser)) /* This is equivalent to supplying an encoding argument to XML_ParserCreate. On success XML_SetEncoding returns non-zero, zero otherwise. Note: Calling XML_SetEncoding after XML_Parse or XML_ParseBuffer has no effect and returns XML_STATUS_ERROR. */ XMLPARSEAPI(enum XML_Status) XML_SetEncoding(XML_Parser parser, const XML_Char *encoding); /* If this function is called, then the parser will be passed as the first argument to callbacks instead of userData. The userData will still be accessible using XML_GetUserData. */ XMLPARSEAPI(void) XML_UseParserAsHandlerArg(XML_Parser parser); /* If useDTD == XML_TRUE is passed to this function, then the parser will assume that there is an external subset, even if none is specified in the document. In such a case the parser will call the externalEntityRefHandler with a value of NULL for the systemId argument (the publicId and context arguments will be NULL as well). Note: For the purpose of checking WFC: Entity Declared, passing useDTD == XML_TRUE will make the parser behave as if the document had a DTD with an external subset. Note: If this function is called, then this must be done before the first call to XML_Parse or XML_ParseBuffer, since it will have no effect after that. Returns XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING. Note: If the document does not have a DOCTYPE declaration at all, then startDoctypeDeclHandler and endDoctypeDeclHandler will not be called, despite an external subset being parsed. Note: If XML_DTD is not defined when Expat is compiled, returns XML_ERROR_FEATURE_REQUIRES_XML_DTD. Note: If parser == NULL, returns XML_ERROR_INVALID_ARGUMENT. */ XMLPARSEAPI(enum XML_Error) XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD); /* Sets the base to be used for resolving relative URIs in system identifiers in declarations. Resolving relative identifiers is left to the application: this value will be passed through as the base argument to the XML_ExternalEntityRefHandler, XML_NotationDeclHandler and XML_UnparsedEntityDeclHandler. The base argument will be copied. Returns XML_STATUS_ERROR if out of memory, XML_STATUS_OK otherwise. */ XMLPARSEAPI(enum XML_Status) XML_SetBase(XML_Parser parser, const XML_Char *base); XMLPARSEAPI(const XML_Char *) XML_GetBase(XML_Parser parser); /* Returns the number of the attribute/value pairs passed in last call to the XML_StartElementHandler that were specified in the start-tag rather than defaulted. Each attribute/value pair counts as 2; thus this corresponds to an index into the atts array passed to the XML_StartElementHandler. Returns -1 if parser == NULL. */ XMLPARSEAPI(int) XML_GetSpecifiedAttributeCount(XML_Parser parser); /* Returns the index of the ID attribute passed in the last call to XML_StartElementHandler, or -1 if there is no ID attribute or parser == NULL. Each attribute/value pair counts as 2; thus this corresponds to an index into the atts array passed to the XML_StartElementHandler. */ XMLPARSEAPI(int) XML_GetIdAttributeIndex(XML_Parser parser); #ifdef XML_ATTR_INFO /* Source file byte offsets for the start and end of attribute names and values. The value indices are exclusive of surrounding quotes; thus in a UTF-8 source file an attribute value of "blah" will yield: info->valueEnd - info->valueStart = 4 bytes. */ typedef struct { XML_Index nameStart; /* Offset to beginning of the attribute name. */ XML_Index nameEnd; /* Offset after the attribute name's last byte. */ XML_Index valueStart; /* Offset to beginning of the attribute value. */ XML_Index valueEnd; /* Offset after the attribute value's last byte. */ } XML_AttrInfo; /* Returns an array of XML_AttrInfo structures for the attribute/value pairs passed in last call to the XML_StartElementHandler that were specified in the start-tag rather than defaulted. Each attribute/value pair counts as 1; thus the number of entries in the array is XML_GetSpecifiedAttributeCount(parser) / 2. */ XMLPARSEAPI(const XML_AttrInfo *) XML_GetAttributeInfo(XML_Parser parser); #endif /* Parses some input. Returns XML_STATUS_ERROR if a fatal error is detected. The last call to XML_Parse must have isFinal true; len may be zero for this call (or any other). Though the return values for these functions has always been described as a Boolean value, the implementation, at least for the 1.95.x series, has always returned exactly one of the XML_Status values. */ XMLPARSEAPI(enum XML_Status) XML_Parse(XML_Parser parser, const char *s, int len, int isFinal); XMLPARSEAPI(void *) XML_GetBuffer(XML_Parser parser, int len); XMLPARSEAPI(enum XML_Status) XML_ParseBuffer(XML_Parser parser, int len, int isFinal); /* Stops parsing, causing XML_Parse() or XML_ParseBuffer() to return. Must be called from within a call-back handler, except when aborting (resumable = 0) an already suspended parser. Some call-backs may still follow because they would otherwise get lost. Examples: - endElementHandler() for empty elements when stopped in startElementHandler(), - endNameSpaceDeclHandler() when stopped in endElementHandler(), and possibly others. Can be called from most handlers, including DTD related call-backs, except when parsing an external parameter entity and resumable != 0. Returns XML_STATUS_OK when successful, XML_STATUS_ERROR otherwise. Possible error codes: - XML_ERROR_SUSPENDED: when suspending an already suspended parser. - XML_ERROR_FINISHED: when the parser has already finished. - XML_ERROR_SUSPEND_PE: when suspending while parsing an external PE. When resumable != 0 (true) then parsing is suspended, that is, XML_Parse() and XML_ParseBuffer() return XML_STATUS_SUSPENDED. Otherwise, parsing is aborted, that is, XML_Parse() and XML_ParseBuffer() return XML_STATUS_ERROR with error code XML_ERROR_ABORTED. *Note*: This will be applied to the current parser instance only, that is, if there is a parent parser then it will continue parsing when the externalEntityRefHandler() returns. It is up to the implementation of the externalEntityRefHandler() to call XML_StopParser() on the parent parser (recursively), if one wants to stop parsing altogether. When suspended, parsing can be resumed by calling XML_ResumeParser(). */ XMLPARSEAPI(enum XML_Status) XML_StopParser(XML_Parser parser, XML_Bool resumable); /* Resumes parsing after it has been suspended with XML_StopParser(). Must not be called from within a handler call-back. Returns same status codes as XML_Parse() or XML_ParseBuffer(). Additional error code XML_ERROR_NOT_SUSPENDED possible. *Note*: This must be called on the most deeply nested child parser instance first, and on its parent parser only after the child parser has finished, to be applied recursively until the document entity's parser is restarted. That is, the parent parser will not resume by itself and it is up to the application to call XML_ResumeParser() on it at the appropriate moment. */ XMLPARSEAPI(enum XML_Status) XML_ResumeParser(XML_Parser parser); enum XML_Parsing { XML_INITIALIZED, XML_PARSING, XML_FINISHED, XML_SUSPENDED }; typedef struct { enum XML_Parsing parsing; XML_Bool finalBuffer; } XML_ParsingStatus; /* Returns status of parser with respect to being initialized, parsing, finished, or suspended and processing the final buffer. XXX XML_Parse() and XML_ParseBuffer() should return XML_ParsingStatus, XXX with XML_FINISHED_OK or XML_FINISHED_ERROR replacing XML_FINISHED */ XMLPARSEAPI(void) XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status); /* Creates an XML_Parser object that can parse an external general entity; context is a '\0'-terminated string specifying the parse context; encoding is a '\0'-terminated string giving the name of the externally specified encoding, or NULL if there is no externally specified encoding. The context string consists of a sequence of tokens separated by formfeeds (\f); a token consisting of a name specifies that the general entity of the name is open; a token of the form prefix=uri specifies the namespace for a particular prefix; a token of the form =uri specifies the default namespace. This can be called at any point after the first call to an ExternalEntityRefHandler so longer as the parser has not yet been freed. The new parser is completely independent and may safely be used in a separate thread. The handlers and userData are initialized from the parser argument. Returns NULL if out of memory. Otherwise returns a new XML_Parser object. */ XMLPARSEAPI(XML_Parser) XML_ExternalEntityParserCreate(XML_Parser parser, const XML_Char *context, const XML_Char *encoding); enum XML_ParamEntityParsing { XML_PARAM_ENTITY_PARSING_NEVER, XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE, XML_PARAM_ENTITY_PARSING_ALWAYS }; /* Controls parsing of parameter entities (including the external DTD subset). If parsing of parameter entities is enabled, then references to external parameter entities (including the external DTD subset) will be passed to the handler set with XML_SetExternalEntityRefHandler. The context passed will be 0. Unlike external general entities, external parameter entities can only be parsed synchronously. If the external parameter entity is to be parsed, it must be parsed during the call to the external entity ref handler: the complete sequence of XML_ExternalEntityParserCreate, XML_Parse/XML_ParseBuffer and XML_ParserFree calls must be made during this call. After XML_ExternalEntityParserCreate has been called to create the parser for the external parameter entity (context must be 0 for this call), it is illegal to make any calls on the old parser until XML_ParserFree has been called on the newly created parser. If the library has been compiled without support for parameter entity parsing (ie without XML_DTD being defined), then XML_SetParamEntityParsing will return 0 if parsing of parameter entities is requested; otherwise it will return non-zero. Note: If XML_SetParamEntityParsing is called after XML_Parse or XML_ParseBuffer, then it has no effect and will always return 0. Note: If parser == NULL, the function will do nothing and return 0. */ XMLPARSEAPI(int) XML_SetParamEntityParsing(XML_Parser parser, enum XML_ParamEntityParsing parsing); /* Sets the hash salt to use for internal hash calculations. Helps in preventing DoS attacks based on predicting hash function behavior. This must be called before parsing is started. Returns 1 if successful, 0 when called after parsing has started. Note: If parser == NULL, the function will do nothing and return 0. */ XMLPARSEAPI(int) XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt); /* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then XML_GetErrorCode returns information about the error. */ XMLPARSEAPI(enum XML_Error) XML_GetErrorCode(XML_Parser parser); /* These functions return information about the current parse location. They may be called from any callback called to report some parse event; in this case the location is the location of the first of the sequence of characters that generated the event. When called from callbacks generated by declarations in the document prologue, the location identified isn't as neatly defined, but will be within the relevant markup. When called outside of the callback functions, the position indicated will be just past the last parse event (regardless of whether there was an associated callback). They may also be called after returning from a call to XML_Parse or XML_ParseBuffer. If the return value is XML_STATUS_ERROR then the location is the location of the character at which the error was detected; otherwise the location is the location of the last parse event, as described above. Note: XML_GetCurrentLineNumber and XML_GetCurrentColumnNumber return 0 to indicate an error. Note: XML_GetCurrentByteIndex returns -1 to indicate an error. */ XMLPARSEAPI(XML_Size) XML_GetCurrentLineNumber(XML_Parser parser); XMLPARSEAPI(XML_Size) XML_GetCurrentColumnNumber(XML_Parser parser); XMLPARSEAPI(XML_Index) XML_GetCurrentByteIndex(XML_Parser parser); /* Return the number of bytes in the current event. Returns 0 if the event is in an internal entity. */ XMLPARSEAPI(int) XML_GetCurrentByteCount(XML_Parser parser); /* If XML_CONTEXT_BYTES is defined, returns the input buffer, sets the integer pointed to by offset to the offset within this buffer of the current parse position, and sets the integer pointed to by size to the size of this buffer (the number of input bytes). Otherwise returns a NULL pointer. Also returns a NULL pointer if a parse isn't active. NOTE: The character pointer returned should not be used outside the handler that makes the call. */ XMLPARSEAPI(const char *) XML_GetInputContext(XML_Parser parser, int *offset, int *size); /* For backwards compatibility with previous versions. */ #define XML_GetErrorLineNumber XML_GetCurrentLineNumber #define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber #define XML_GetErrorByteIndex XML_GetCurrentByteIndex /* Frees the content model passed to the element declaration handler */ XMLPARSEAPI(void) XML_FreeContentModel(XML_Parser parser, XML_Content *model); /* Exposing the memory handling functions used in Expat */ XMLPARSEAPI(void *) XML_ATTR_MALLOC XML_ATTR_ALLOC_SIZE(2) XML_MemMalloc(XML_Parser parser, size_t size); XMLPARSEAPI(void *) XML_ATTR_ALLOC_SIZE(3) XML_MemRealloc(XML_Parser parser, void *ptr, size_t size); XMLPARSEAPI(void) XML_MemFree(XML_Parser parser, void *ptr); /* Frees memory used by the parser. */ XMLPARSEAPI(void) XML_ParserFree(XML_Parser parser); /* Returns a string describing the error. */ XMLPARSEAPI(const XML_LChar *) XML_ErrorString(enum XML_Error code); /* Return a string containing the version number of this expat */ XMLPARSEAPI(const XML_LChar *) XML_ExpatVersion(void); typedef struct { int major; int minor; int micro; } XML_Expat_Version; /* Return an XML_Expat_Version structure containing numeric version number information for this version of expat. */ XMLPARSEAPI(XML_Expat_Version) XML_ExpatVersionInfo(void); /* Added in Expat 1.95.5. */ enum XML_FeatureEnum { XML_FEATURE_END = 0, XML_FEATURE_UNICODE, XML_FEATURE_UNICODE_WCHAR_T, XML_FEATURE_DTD, XML_FEATURE_CONTEXT_BYTES, XML_FEATURE_MIN_SIZE, XML_FEATURE_SIZEOF_XML_CHAR, XML_FEATURE_SIZEOF_XML_LCHAR, XML_FEATURE_NS, XML_FEATURE_LARGE_SIZE, XML_FEATURE_ATTR_INFO, /* Added in Expat 2.4.0. */ XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT /* Additional features must be added to the end of this enum. */ }; typedef struct { enum XML_FeatureEnum feature; const XML_LChar *name; long int value; } XML_Feature; XMLPARSEAPI(const XML_Feature *) XML_GetFeatureList(void); #ifdef XML_DTD /* Added in Expat 2.4.0. */ XMLPARSEAPI(XML_Bool) XML_SetBillionLaughsAttackProtectionMaximumAmplification( XML_Parser parser, float maximumAmplificationFactor); /* Added in Expat 2.4.0. */ XMLPARSEAPI(XML_Bool) XML_SetBillionLaughsAttackProtectionActivationThreshold( XML_Parser parser, unsigned long long activationThresholdBytes); #endif /* Expat follows the semantic versioning convention. See http://semver.org. */ #define XML_MAJOR_VERSION 2 #define XML_MINOR_VERSION 4 -#define XML_MICRO_VERSION 6 +#define XML_MICRO_VERSION 7 #ifdef __cplusplus } #endif #endif /* not Expat_INCLUDED */ diff --git a/contrib/expat/lib/xmlparse.c b/contrib/expat/lib/xmlparse.c index 7db28d07acbc..05216d997b07 100644 --- a/contrib/expat/lib/xmlparse.c +++ b/contrib/expat/lib/xmlparse.c @@ -1,8255 +1,8382 @@ -/* a30d2613dcfdef81475a9d1a349134d2d42722172fdaa7d5bb12ed2aa74b9596 (2.4.6+) +/* fcb1a62fefa945567301146eb98e3ad3413e823a41c4378e84e8b6b6f308d824 (2.4.7+) __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser Copyright (c) 1997-2000 Thai Open Source Software Center Ltd Copyright (c) 2000 Clark Cooper Copyright (c) 2000-2006 Fred L. Drake, Jr. Copyright (c) 2001-2002 Greg Stein Copyright (c) 2002-2016 Karl Waclawek Copyright (c) 2005-2009 Steven Solie Copyright (c) 2016 Eric Rahm Copyright (c) 2016-2022 Sebastian Pipping Copyright (c) 2016 Gaurav Copyright (c) 2016 Thomas Beutlich Copyright (c) 2016 Gustavo Grieco Copyright (c) 2016 Pascal Cuoq Copyright (c) 2016 Ed Schouten Copyright (c) 2017-2018 Rhodri James Copyright (c) 2017 Václav Slavík Copyright (c) 2017 Viktor Szakats Copyright (c) 2017 Chanho Park Copyright (c) 2017 Rolf Eike Beer Copyright (c) 2017 Hans Wennborg Copyright (c) 2018 Anton Maklakov Copyright (c) 2018 Benjamin Peterson Copyright (c) 2018 Marco Maggi Copyright (c) 2018 Mariusz Zaborski Copyright (c) 2019 David Loffredo Copyright (c) 2019-2020 Ben Wagner Copyright (c) 2019 Vadim Zeitlin Copyright (c) 2021 Dong-hee Na Copyright (c) 2022 Samanta Navarro + Copyright (c) 2022 Jeffrey Walton Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define XML_BUILDING_EXPAT 1 #include #if ! defined(_GNU_SOURCE) # define _GNU_SOURCE 1 /* syscall prototype */ #endif #ifdef _WIN32 /* force stdlib to define rand_s() */ # if ! defined(_CRT_RAND_S) # define _CRT_RAND_S # endif #endif #include #include /* memset(), memcpy() */ #include #include /* UINT_MAX */ #include /* fprintf */ #include /* getenv, rand_s */ #include /* uintptr_t */ #include /* isnan */ #ifdef _WIN32 # define getpid GetCurrentProcessId #else # include /* gettimeofday() */ # include /* getpid() */ # include /* getpid() */ # include /* O_RDONLY */ # include #endif #ifdef _WIN32 # include "winconfig.h" #endif #include "ascii.h" #include "expat.h" #include "siphash.h" #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) # if defined(HAVE_GETRANDOM) # include /* getrandom */ # else # include /* syscall */ # include /* SYS_getrandom */ # endif # if ! defined(GRND_NONBLOCK) # define GRND_NONBLOCK 0x0001 # endif /* defined(GRND_NONBLOCK) */ #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ #if defined(HAVE_LIBBSD) \ && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM)) # include #endif #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32) # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800 #endif #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \ && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \ && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \ && ! defined(XML_POOR_ENTROPY) # error You do not have support for any sources of high quality entropy \ enabled. For end user security, that is probably not what you want. \ \ Your options include: \ * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \ * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \ * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ * BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \ * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \ * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \ - * Linux (including <3.17) / BSD / macOS (including <10.7) (/dev/urandom): XML_DEV_URANDOM, \ + * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \ * Windows >=Vista (rand_s): _WIN32. \ \ If insist on not using any of these, bypass this error by defining \ XML_POOR_ENTROPY; you have been warned. \ \ If you have reasons to patch this detection code away or need changes \ to the build system, please open a bug. Thank you! #endif #ifdef XML_UNICODE # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX # define XmlConvert XmlUtf16Convert # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS # define XmlEncode XmlUtf16Encode # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1)) typedef unsigned short ICHAR; #else # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX # define XmlConvert XmlUtf8Convert # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS # define XmlEncode XmlUtf8Encode # define MUST_CONVERT(enc, s) (! (enc)->isUtf8) typedef char ICHAR; #endif #ifndef XML_NS # define XmlInitEncodingNS XmlInitEncoding # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding # undef XmlGetInternalEncodingNS # define XmlGetInternalEncodingNS XmlGetInternalEncoding # define XmlParseXmlDeclNS XmlParseXmlDecl #endif #ifdef XML_UNICODE # ifdef XML_UNICODE_WCHAR_T # define XML_T(x) (const wchar_t) x # define XML_L(x) L##x # else # define XML_T(x) (const unsigned short)x # define XML_L(x) x # endif #else # define XML_T(x) x # define XML_L(x) x #endif /* Round up n to be a multiple of sz, where sz is a power of 2. */ #define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1)) /* Do safe (NULL-aware) pointer arithmetic */ #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0) #include "internal.h" #include "xmltok.h" #include "xmlrole.h" typedef const XML_Char *KEY; typedef struct { KEY name; } NAMED; typedef struct { NAMED **v; unsigned char power; size_t size; size_t used; const XML_Memory_Handling_Suite *mem; } HASH_TABLE; static size_t keylen(KEY s); static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key); /* For probing (after a collision) we need a step size relative prime to the hash table size, which is a power of 2. We use double-hashing, since we can calculate a second hash value cheaply by taking those bits of the first hash value that were discarded (masked out) when the table index was calculated: index = hash & mask, where mask = table->size - 1. We limit the maximum step size to table->size / 4 (mask >> 2) and make it odd, since odd numbers are always relative prime to a power of 2. */ #define SECOND_HASH(hash, mask, power) \ ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2)) #define PROBE_STEP(hash, mask, power) \ ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1)) typedef struct { NAMED **p; NAMED **end; } HASH_TABLE_ITER; #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */ #define INIT_DATA_BUF_SIZE 1024 #define INIT_ATTS_SIZE 16 #define INIT_ATTS_VERSION 0xFFFFFFFF #define INIT_BLOCK_SIZE 1024 #define INIT_BUFFER_SIZE 1024 #define EXPAND_SPARE 24 typedef struct binding { struct prefix *prefix; struct binding *nextTagBinding; struct binding *prevPrefixBinding; const struct attribute_id *attId; XML_Char *uri; int uriLen; int uriAlloc; } BINDING; typedef struct prefix { const XML_Char *name; BINDING *binding; } PREFIX; typedef struct { const XML_Char *str; const XML_Char *localPart; const XML_Char *prefix; int strLen; int uriLen; int prefixLen; } TAG_NAME; /* TAG represents an open element. The name of the element is stored in both the document and API encodings. The memory buffer 'buf' is a separately-allocated memory area which stores the name. During the XML_Parse()/ XMLParseBuffer() when the element is open, the memory for the 'raw' version of the name (in the document encoding) is shared with the document buffer. If the element is open across calls to XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to contain the 'raw' name as well. A parser re-uses these structures, maintaining a list of allocated TAG objects in a free list. */ typedef struct tag { struct tag *parent; /* parent of this element */ const char *rawName; /* tagName in the original encoding */ int rawNameLength; TAG_NAME name; /* tagName in the API encoding */ char *buf; /* buffer for name components */ char *bufEnd; /* end of the buffer */ BINDING *bindings; } TAG; typedef struct { const XML_Char *name; const XML_Char *textPtr; int textLen; /* length in XML_Chars */ int processed; /* # of processed bytes - when suspended */ const XML_Char *systemId; const XML_Char *base; const XML_Char *publicId; const XML_Char *notation; XML_Bool open; XML_Bool is_param; XML_Bool is_internal; /* true if declared in internal subset outside PE */ } ENTITY; typedef struct { enum XML_Content_Type type; enum XML_Content_Quant quant; const XML_Char *name; int firstchild; int lastchild; int childcnt; int nextsib; } CONTENT_SCAFFOLD; #define INIT_SCAFFOLD_ELEMENTS 32 typedef struct block { struct block *next; int size; XML_Char s[1]; } BLOCK; typedef struct { BLOCK *blocks; BLOCK *freeBlocks; const XML_Char *end; XML_Char *ptr; XML_Char *start; const XML_Memory_Handling_Suite *mem; } STRING_POOL; /* The XML_Char before the name is used to determine whether an attribute has been specified. */ typedef struct attribute_id { XML_Char *name; PREFIX *prefix; XML_Bool maybeTokenized; XML_Bool xmlns; } ATTRIBUTE_ID; typedef struct { const ATTRIBUTE_ID *id; XML_Bool isCdata; const XML_Char *value; } DEFAULT_ATTRIBUTE; typedef struct { unsigned long version; unsigned long hash; const XML_Char *uriName; } NS_ATT; typedef struct { const XML_Char *name; PREFIX *prefix; const ATTRIBUTE_ID *idAtt; int nDefaultAtts; int allocDefaultAtts; DEFAULT_ATTRIBUTE *defaultAtts; } ELEMENT_TYPE; typedef struct { HASH_TABLE generalEntities; HASH_TABLE elementTypes; HASH_TABLE attributeIds; HASH_TABLE prefixes; STRING_POOL pool; STRING_POOL entityValuePool; /* false once a parameter entity reference has been skipped */ XML_Bool keepProcessing; /* true once an internal or external PE reference has been encountered; this includes the reference to an external subset */ XML_Bool hasParamEntityRefs; XML_Bool standalone; #ifdef XML_DTD /* indicates if external PE has been read */ XML_Bool paramEntityRead; HASH_TABLE paramEntities; #endif /* XML_DTD */ PREFIX defaultPrefix; /* === scaffolding for building content model === */ XML_Bool in_eldecl; CONTENT_SCAFFOLD *scaffold; unsigned contentStringLen; unsigned scaffSize; unsigned scaffCount; int scaffLevel; int *scaffIndex; } DTD; typedef struct open_internal_entity { const char *internalEventPtr; const char *internalEventEndPtr; struct open_internal_entity *next; ENTITY *entity; int startTagLevel; XML_Bool betweenDecl; /* WFC: PE Between Declarations */ } OPEN_INTERNAL_ENTITY; enum XML_Account { XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */ XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity expansion */ XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */ }; #ifdef XML_DTD typedef unsigned long long XmlBigCount; typedef struct accounting { XmlBigCount countBytesDirect; XmlBigCount countBytesIndirect; int debugLevel; float maximumAmplificationFactor; // >=1.0 unsigned long long activationThresholdBytes; } ACCOUNTING; typedef struct entity_stats { unsigned int countEverOpened; unsigned int currentDepth; unsigned int maximumDepthSeen; int debugLevel; } ENTITY_STATS; #endif /* XML_DTD */ typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start, const char *end, const char **endPtr); static Processor prologProcessor; static Processor prologInitProcessor; static Processor contentProcessor; static Processor cdataSectionProcessor; #ifdef XML_DTD static Processor ignoreSectionProcessor; static Processor externalParEntProcessor; static Processor externalParEntInitProcessor; static Processor entityValueProcessor; static Processor entityValueInitProcessor; #endif /* XML_DTD */ static Processor epilogProcessor; static Processor errorProcessor; static Processor externalEntityInitProcessor; static Processor externalEntityInitProcessor2; static Processor externalEntityInitProcessor3; static Processor externalEntityContentProcessor; static Processor internalEntityProcessor; static enum XML_Error handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName); static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s, const char *next); static enum XML_Error initializeEncoding(XML_Parser parser); static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, int tok, const char *next, const char **nextPtr, XML_Bool haveMore, XML_Bool allowClosingDoctype, enum XML_Account account); static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl); static enum XML_Error doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, const char *start, const char *end, const char **endPtr, XML_Bool haveMore, enum XML_Account account); static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr, XML_Bool haveMore, enum XML_Account account); #ifdef XML_DTD static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr, XML_Bool haveMore); #endif /* XML_DTD */ static void freeBindings(XML_Parser parser, BINDING *bindings); static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s, TAG_NAME *tagNamePtr, BINDING **bindingsPtr, enum XML_Account account); static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr); static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata, XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser); static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata, const char *, const char *, STRING_POOL *, enum XML_Account account); static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata, const char *, const char *, STRING_POOL *, enum XML_Account account); static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *); static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start, const char *end, enum XML_Account account); static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static int reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static void reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static const XML_Char *getContext(XML_Parser parser); static XML_Bool setContext(XML_Parser parser, const XML_Char *context); static void FASTCALL normalizePublicId(XML_Char *s); static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms); /* do not call if m_parentParser != NULL */ static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms); static void dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms); static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms); static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *, STRING_POOL *, const HASH_TABLE *); static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize); static void FASTCALL hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms); static void FASTCALL hashTableClear(HASH_TABLE *); static void FASTCALL hashTableDestroy(HASH_TABLE *); static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *); static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *); static void FASTCALL poolInit(STRING_POOL *, const XML_Memory_Handling_Suite *ms); static void FASTCALL poolClear(STRING_POOL *); static void FASTCALL poolDestroy(STRING_POOL *); static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr, const char *end); static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr, const char *end); static XML_Bool FASTCALL poolGrow(STRING_POOL *pool); static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool, const XML_Char *s); static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n); static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool, const XML_Char *s); static int FASTCALL nextScaffoldPart(XML_Parser parser); static XML_Content *build_model(XML_Parser parser); static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, const char *end); static XML_Char *copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite); static unsigned long generate_hash_secret_salt(XML_Parser parser); static XML_Bool startParsing(XML_Parser parser); static XML_Parser parserCreate(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep, DTD *dtd); static void parserInit(XML_Parser parser, const XML_Char *encodingName); #ifdef XML_DTD static float accountingGetCurrentAmplification(XML_Parser rootParser); static void accountingReportStats(XML_Parser originParser, const char *epilog); static void accountingOnAbort(XML_Parser originParser); static void accountingReportDiff(XML_Parser rootParser, unsigned int levelsAwayFromRootParser, const char *before, const char *after, ptrdiff_t bytesMore, int source_line, enum XML_Account account); static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok, const char *before, const char *after, int source_line, enum XML_Account account); static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity, const char *action, int sourceLine); static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity, int sourceLine); static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity, int sourceLine); static XML_Parser getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff); #endif /* XML_DTD */ static unsigned long getDebugLevel(const char *variableName, unsigned long defaultDebugLevel); #define poolStart(pool) ((pool)->start) #define poolEnd(pool) ((pool)->ptr) #define poolLength(pool) ((pool)->ptr - (pool)->start) #define poolChop(pool) ((void)--(pool->ptr)) #define poolLastChar(pool) (((pool)->ptr)[-1]) #define poolDiscard(pool) ((pool)->ptr = (pool)->start) #define poolFinish(pool) ((pool)->start = (pool)->ptr) #define poolAppendChar(pool, c) \ (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \ ? 0 \ : ((*((pool)->ptr)++ = c), 1)) struct XML_ParserStruct { /* The first member must be m_userData so that the XML_GetUserData macro works. */ void *m_userData; void *m_handlerArg; char *m_buffer; const XML_Memory_Handling_Suite m_mem; /* first character to be parsed */ const char *m_bufferPtr; /* past last character to be parsed */ char *m_bufferEnd; /* allocated end of m_buffer */ const char *m_bufferLim; XML_Index m_parseEndByteIndex; const char *m_parseEndPtr; XML_Char *m_dataBuf; XML_Char *m_dataBufEnd; XML_StartElementHandler m_startElementHandler; XML_EndElementHandler m_endElementHandler; XML_CharacterDataHandler m_characterDataHandler; XML_ProcessingInstructionHandler m_processingInstructionHandler; XML_CommentHandler m_commentHandler; XML_StartCdataSectionHandler m_startCdataSectionHandler; XML_EndCdataSectionHandler m_endCdataSectionHandler; XML_DefaultHandler m_defaultHandler; XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler; XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler; XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler; XML_NotationDeclHandler m_notationDeclHandler; XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler; XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler; XML_NotStandaloneHandler m_notStandaloneHandler; XML_ExternalEntityRefHandler m_externalEntityRefHandler; XML_Parser m_externalEntityRefHandlerArg; XML_SkippedEntityHandler m_skippedEntityHandler; XML_UnknownEncodingHandler m_unknownEncodingHandler; XML_ElementDeclHandler m_elementDeclHandler; XML_AttlistDeclHandler m_attlistDeclHandler; XML_EntityDeclHandler m_entityDeclHandler; XML_XmlDeclHandler m_xmlDeclHandler; const ENCODING *m_encoding; INIT_ENCODING m_initEncoding; const ENCODING *m_internalEncoding; const XML_Char *m_protocolEncodingName; XML_Bool m_ns; XML_Bool m_ns_triplets; void *m_unknownEncodingMem; void *m_unknownEncodingData; void *m_unknownEncodingHandlerData; void(XMLCALL *m_unknownEncodingRelease)(void *); PROLOG_STATE m_prologState; Processor *m_processor; enum XML_Error m_errorCode; const char *m_eventPtr; const char *m_eventEndPtr; const char *m_positionPtr; OPEN_INTERNAL_ENTITY *m_openInternalEntities; OPEN_INTERNAL_ENTITY *m_freeInternalEntities; XML_Bool m_defaultExpandInternalEntities; int m_tagLevel; ENTITY *m_declEntity; const XML_Char *m_doctypeName; const XML_Char *m_doctypeSysid; const XML_Char *m_doctypePubid; const XML_Char *m_declAttributeType; const XML_Char *m_declNotationName; const XML_Char *m_declNotationPublicId; ELEMENT_TYPE *m_declElementType; ATTRIBUTE_ID *m_declAttributeId; XML_Bool m_declAttributeIsCdata; XML_Bool m_declAttributeIsId; DTD *m_dtd; const XML_Char *m_curBase; TAG *m_tagStack; TAG *m_freeTagList; BINDING *m_inheritedBindings; BINDING *m_freeBindingList; int m_attsSize; int m_nSpecifiedAtts; int m_idAttIndex; ATTRIBUTE *m_atts; NS_ATT *m_nsAtts; unsigned long m_nsAttsVersion; unsigned char m_nsAttsPower; #ifdef XML_ATTR_INFO XML_AttrInfo *m_attInfo; #endif POSITION m_position; STRING_POOL m_tempPool; STRING_POOL m_temp2Pool; char *m_groupConnector; unsigned int m_groupSize; XML_Char m_namespaceSeparator; XML_Parser m_parentParser; XML_ParsingStatus m_parsingStatus; #ifdef XML_DTD XML_Bool m_isParamEntity; XML_Bool m_useForeignDTD; enum XML_ParamEntityParsing m_paramEntityParsing; #endif unsigned long m_hash_secret_salt; #ifdef XML_DTD ACCOUNTING m_accounting; ENTITY_STATS m_entity_stats; #endif }; #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s))) #define FREE(parser, p) (parser->m_mem.free_fcn((p))) XML_Parser XMLCALL XML_ParserCreate(const XML_Char *encodingName) { return XML_ParserCreate_MM(encodingName, NULL, NULL); } XML_Parser XMLCALL XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) { XML_Char tmp[2] = {nsSep, 0}; return XML_ParserCreate_MM(encodingName, NULL, tmp); } +// "xml=http://www.w3.org/XML/1998/namespace" static const XML_Char implicitContext[] = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'}; /* To avoid warnings about unused functions: */ #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) # if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) /* Obtain entropy on Linux 3.17+ */ static int writeRandomBytes_getrandom_nonblock(void *target, size_t count) { int success = 0; /* full count bytes written? */ size_t bytesWrittenTotal = 0; const unsigned int getrandomFlags = GRND_NONBLOCK; do { void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); const size_t bytesToWrite = count - bytesWrittenTotal; const int bytesWrittenMore = # if defined(HAVE_GETRANDOM) getrandom(currentTarget, bytesToWrite, getrandomFlags); # else syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags); # endif if (bytesWrittenMore > 0) { bytesWrittenTotal += bytesWrittenMore; if (bytesWrittenTotal >= count) success = 1; } } while (! success && (errno == EINTR)); return success; } # endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ # if ! defined(_WIN32) && defined(XML_DEV_URANDOM) /* Extract entropy from /dev/urandom */ static int writeRandomBytes_dev_urandom(void *target, size_t count) { int success = 0; /* full count bytes written? */ size_t bytesWrittenTotal = 0; const int fd = open("/dev/urandom", O_RDONLY); if (fd < 0) { return 0; } do { void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); const size_t bytesToWrite = count - bytesWrittenTotal; const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite); if (bytesWrittenMore > 0) { bytesWrittenTotal += bytesWrittenMore; if (bytesWrittenTotal >= count) success = 1; } } while (! success && (errno == EINTR)); close(fd); return success; } # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) static void writeRandomBytes_arc4random(void *target, size_t count) { size_t bytesWrittenTotal = 0; while (bytesWrittenTotal < count) { const uint32_t random32 = arc4random(); size_t i = 0; for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); i++, bytesWrittenTotal++) { const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); ((uint8_t *)target)[bytesWrittenTotal] = random8; } } } #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */ #ifdef _WIN32 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it), as it didn't declare it in its header prior to version 5.3.0 of its runtime package (mingwrt, containing stdlib.h). The upstream fix was introduced at https://osdn.net/projects/mingw/ticket/39658 . */ # if defined(__MINGW32__) && defined(__MINGW32_VERSION) \ && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR) __declspec(dllimport) int rand_s(unsigned int *); # endif /* Obtain entropy on Windows using the rand_s() function which * generates cryptographically secure random numbers. Internally it * uses RtlGenRandom API which is present in Windows XP and later. */ static int writeRandomBytes_rand_s(void *target, size_t count) { size_t bytesWrittenTotal = 0; while (bytesWrittenTotal < count) { unsigned int random32 = 0; size_t i = 0; if (rand_s(&random32)) return 0; /* failure */ for (; (i < sizeof(random32)) && (bytesWrittenTotal < count); i++, bytesWrittenTotal++) { const uint8_t random8 = (uint8_t)(random32 >> (i * 8)); ((uint8_t *)target)[bytesWrittenTotal] = random8; } } return 1; /* success */ } #endif /* _WIN32 */ #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) static unsigned long gather_time_entropy(void) { # ifdef _WIN32 FILETIME ft; GetSystemTimeAsFileTime(&ft); /* never fails */ return ft.dwHighDateTime ^ ft.dwLowDateTime; # else struct timeval tv; int gettimeofday_res; gettimeofday_res = gettimeofday(&tv, NULL); # if defined(NDEBUG) (void)gettimeofday_res; # else assert(gettimeofday_res == 0); # endif /* defined(NDEBUG) */ /* Microseconds time is <20 bits entropy */ return tv.tv_usec; # endif } #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */ static unsigned long ENTROPY_DEBUG(const char *label, unsigned long entropy) { if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) { fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label, (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy)); } return entropy; } static unsigned long generate_hash_secret_salt(XML_Parser parser) { unsigned long entropy; (void)parser; /* "Failproof" high quality providers: */ #if defined(HAVE_ARC4RANDOM_BUF) arc4random_buf(&entropy, sizeof(entropy)); return ENTROPY_DEBUG("arc4random_buf", entropy); #elif defined(HAVE_ARC4RANDOM) writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy)); return ENTROPY_DEBUG("arc4random", entropy); #else /* Try high quality providers first .. */ # ifdef _WIN32 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) { return ENTROPY_DEBUG("rand_s", entropy); } # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) { return ENTROPY_DEBUG("getrandom", entropy); } # endif # if ! defined(_WIN32) && defined(XML_DEV_URANDOM) if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) { return ENTROPY_DEBUG("/dev/urandom", entropy); } # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ /* .. and self-made low quality for backup: */ /* Process ID is 0 bits entropy if attacker has local access */ entropy = gather_time_entropy() ^ getpid(); /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */ if (sizeof(unsigned long) == 4) { return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647); } else { return ENTROPY_DEBUG("fallback(8)", entropy * (unsigned long)2305843009213693951ULL); } #endif } static unsigned long get_hash_secret_salt(XML_Parser parser) { if (parser->m_parentParser != NULL) return get_hash_secret_salt(parser->m_parentParser); return parser->m_hash_secret_salt; } static XML_Bool /* only valid for root parser */ startParsing(XML_Parser parser) { /* hash functions must be initialized before setContext() is called */ if (parser->m_hash_secret_salt == 0) parser->m_hash_secret_salt = generate_hash_secret_salt(parser); if (parser->m_ns) { /* implicit context only set for root parser, since child parsers (i.e. external entity parsers) will inherit it */ return setContext(parser, implicitContext); } return XML_TRUE; } XML_Parser XMLCALL XML_ParserCreate_MM(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep) { return parserCreate(encodingName, memsuite, nameSep, NULL); } static XML_Parser parserCreate(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep, DTD *dtd) { XML_Parser parser; if (memsuite) { XML_Memory_Handling_Suite *mtemp; parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct)); if (parser != NULL) { mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); mtemp->malloc_fcn = memsuite->malloc_fcn; mtemp->realloc_fcn = memsuite->realloc_fcn; mtemp->free_fcn = memsuite->free_fcn; } } else { XML_Memory_Handling_Suite *mtemp; parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct)); if (parser != NULL) { mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); mtemp->malloc_fcn = malloc; mtemp->realloc_fcn = realloc; mtemp->free_fcn = free; } } if (! parser) return parser; parser->m_buffer = NULL; parser->m_bufferLim = NULL; parser->m_attsSize = INIT_ATTS_SIZE; parser->m_atts = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE)); if (parser->m_atts == NULL) { FREE(parser, parser); return NULL; } #ifdef XML_ATTR_INFO parser->m_attInfo = (XML_AttrInfo *)MALLOC( parser, parser->m_attsSize * sizeof(XML_AttrInfo)); if (parser->m_attInfo == NULL) { FREE(parser, parser->m_atts); FREE(parser, parser); return NULL; } #endif parser->m_dataBuf = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char)); if (parser->m_dataBuf == NULL) { FREE(parser, parser->m_atts); #ifdef XML_ATTR_INFO FREE(parser, parser->m_attInfo); #endif FREE(parser, parser); return NULL; } parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE; if (dtd) parser->m_dtd = dtd; else { parser->m_dtd = dtdCreate(&parser->m_mem); if (parser->m_dtd == NULL) { FREE(parser, parser->m_dataBuf); FREE(parser, parser->m_atts); #ifdef XML_ATTR_INFO FREE(parser, parser->m_attInfo); #endif FREE(parser, parser); return NULL; } } parser->m_freeBindingList = NULL; parser->m_freeTagList = NULL; parser->m_freeInternalEntities = NULL; parser->m_groupSize = 0; parser->m_groupConnector = NULL; parser->m_unknownEncodingHandler = NULL; parser->m_unknownEncodingHandlerData = NULL; parser->m_namespaceSeparator = ASCII_EXCL; parser->m_ns = XML_FALSE; parser->m_ns_triplets = XML_FALSE; parser->m_nsAtts = NULL; parser->m_nsAttsVersion = 0; parser->m_nsAttsPower = 0; parser->m_protocolEncodingName = NULL; poolInit(&parser->m_tempPool, &(parser->m_mem)); poolInit(&parser->m_temp2Pool, &(parser->m_mem)); parserInit(parser, encodingName); if (encodingName && ! parser->m_protocolEncodingName) { XML_ParserFree(parser); return NULL; } if (nameSep) { parser->m_ns = XML_TRUE; parser->m_internalEncoding = XmlGetInternalEncodingNS(); parser->m_namespaceSeparator = *nameSep; } else { parser->m_internalEncoding = XmlGetInternalEncoding(); } return parser; } static void parserInit(XML_Parser parser, const XML_Char *encodingName) { parser->m_processor = prologInitProcessor; XmlPrologStateInit(&parser->m_prologState); if (encodingName != NULL) { parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem)); } parser->m_curBase = NULL; XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0); parser->m_userData = NULL; parser->m_handlerArg = NULL; parser->m_startElementHandler = NULL; parser->m_endElementHandler = NULL; parser->m_characterDataHandler = NULL; parser->m_processingInstructionHandler = NULL; parser->m_commentHandler = NULL; parser->m_startCdataSectionHandler = NULL; parser->m_endCdataSectionHandler = NULL; parser->m_defaultHandler = NULL; parser->m_startDoctypeDeclHandler = NULL; parser->m_endDoctypeDeclHandler = NULL; parser->m_unparsedEntityDeclHandler = NULL; parser->m_notationDeclHandler = NULL; parser->m_startNamespaceDeclHandler = NULL; parser->m_endNamespaceDeclHandler = NULL; parser->m_notStandaloneHandler = NULL; parser->m_externalEntityRefHandler = NULL; parser->m_externalEntityRefHandlerArg = parser; parser->m_skippedEntityHandler = NULL; parser->m_elementDeclHandler = NULL; parser->m_attlistDeclHandler = NULL; parser->m_entityDeclHandler = NULL; parser->m_xmlDeclHandler = NULL; parser->m_bufferPtr = parser->m_buffer; parser->m_bufferEnd = parser->m_buffer; parser->m_parseEndByteIndex = 0; parser->m_parseEndPtr = NULL; parser->m_declElementType = NULL; parser->m_declAttributeId = NULL; parser->m_declEntity = NULL; parser->m_doctypeName = NULL; parser->m_doctypeSysid = NULL; parser->m_doctypePubid = NULL; parser->m_declAttributeType = NULL; parser->m_declNotationName = NULL; parser->m_declNotationPublicId = NULL; parser->m_declAttributeIsCdata = XML_FALSE; parser->m_declAttributeIsId = XML_FALSE; memset(&parser->m_position, 0, sizeof(POSITION)); parser->m_errorCode = XML_ERROR_NONE; parser->m_eventPtr = NULL; parser->m_eventEndPtr = NULL; parser->m_positionPtr = NULL; parser->m_openInternalEntities = NULL; parser->m_defaultExpandInternalEntities = XML_TRUE; parser->m_tagLevel = 0; parser->m_tagStack = NULL; parser->m_inheritedBindings = NULL; parser->m_nSpecifiedAtts = 0; parser->m_unknownEncodingMem = NULL; parser->m_unknownEncodingRelease = NULL; parser->m_unknownEncodingData = NULL; parser->m_parentParser = NULL; parser->m_parsingStatus.parsing = XML_INITIALIZED; #ifdef XML_DTD parser->m_isParamEntity = XML_FALSE; parser->m_useForeignDTD = XML_FALSE; parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; #endif parser->m_hash_secret_salt = 0; #ifdef XML_DTD memset(&parser->m_accounting, 0, sizeof(ACCOUNTING)); parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u); parser->m_accounting.maximumAmplificationFactor = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT; parser->m_accounting.activationThresholdBytes = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT; memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS)); parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u); #endif } /* moves list of bindings to m_freeBindingList */ static void FASTCALL moveToFreeBindingList(XML_Parser parser, BINDING *bindings) { while (bindings) { BINDING *b = bindings; bindings = bindings->nextTagBinding; b->nextTagBinding = parser->m_freeBindingList; parser->m_freeBindingList = b; } } XML_Bool XMLCALL XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { TAG *tStk; OPEN_INTERNAL_ENTITY *openEntityList; if (parser == NULL) return XML_FALSE; if (parser->m_parentParser) return XML_FALSE; /* move m_tagStack to m_freeTagList */ tStk = parser->m_tagStack; while (tStk) { TAG *tag = tStk; tStk = tStk->parent; tag->parent = parser->m_freeTagList; moveToFreeBindingList(parser, tag->bindings); tag->bindings = NULL; parser->m_freeTagList = tag; } /* move m_openInternalEntities to m_freeInternalEntities */ openEntityList = parser->m_openInternalEntities; while (openEntityList) { OPEN_INTERNAL_ENTITY *openEntity = openEntityList; openEntityList = openEntity->next; openEntity->next = parser->m_freeInternalEntities; parser->m_freeInternalEntities = openEntity; } moveToFreeBindingList(parser, parser->m_inheritedBindings); FREE(parser, parser->m_unknownEncodingMem); if (parser->m_unknownEncodingRelease) parser->m_unknownEncodingRelease(parser->m_unknownEncodingData); poolClear(&parser->m_tempPool); poolClear(&parser->m_temp2Pool); FREE(parser, (void *)parser->m_protocolEncodingName); parser->m_protocolEncodingName = NULL; parserInit(parser, encodingName); dtdReset(parser->m_dtd, &parser->m_mem); return XML_TRUE; } enum XML_Status XMLCALL XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { if (parser == NULL) return XML_STATUS_ERROR; /* Block after XML_Parse()/XML_ParseBuffer() has been called. XXX There's no way for the caller to determine which of the XXX possible error cases caused the XML_STATUS_ERROR return. */ if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED) return XML_STATUS_ERROR; /* Get rid of any previous encoding name */ FREE(parser, (void *)parser->m_protocolEncodingName); if (encodingName == NULL) /* No new encoding name */ parser->m_protocolEncodingName = NULL; else { /* Copy the new encoding name into allocated memory */ parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem)); if (! parser->m_protocolEncodingName) return XML_STATUS_ERROR; } return XML_STATUS_OK; } XML_Parser XMLCALL XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, const XML_Char *encodingName) { XML_Parser parser = oldParser; DTD *newDtd = NULL; DTD *oldDtd; XML_StartElementHandler oldStartElementHandler; XML_EndElementHandler oldEndElementHandler; XML_CharacterDataHandler oldCharacterDataHandler; XML_ProcessingInstructionHandler oldProcessingInstructionHandler; XML_CommentHandler oldCommentHandler; XML_StartCdataSectionHandler oldStartCdataSectionHandler; XML_EndCdataSectionHandler oldEndCdataSectionHandler; XML_DefaultHandler oldDefaultHandler; XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler; XML_NotationDeclHandler oldNotationDeclHandler; XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler; XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler; XML_NotStandaloneHandler oldNotStandaloneHandler; XML_ExternalEntityRefHandler oldExternalEntityRefHandler; XML_SkippedEntityHandler oldSkippedEntityHandler; XML_UnknownEncodingHandler oldUnknownEncodingHandler; XML_ElementDeclHandler oldElementDeclHandler; XML_AttlistDeclHandler oldAttlistDeclHandler; XML_EntityDeclHandler oldEntityDeclHandler; XML_XmlDeclHandler oldXmlDeclHandler; ELEMENT_TYPE *oldDeclElementType; void *oldUserData; void *oldHandlerArg; XML_Bool oldDefaultExpandInternalEntities; XML_Parser oldExternalEntityRefHandlerArg; #ifdef XML_DTD enum XML_ParamEntityParsing oldParamEntityParsing; int oldInEntityValue; #endif XML_Bool oldns_triplets; /* Note that the new parser shares the same hash secret as the old parser, so that dtdCopy and copyEntityTable can lookup values from hash tables associated with either parser without us having to worry which hash secrets each table has. */ unsigned long oldhash_secret_salt; /* Validate the oldParser parameter before we pull everything out of it */ if (oldParser == NULL) return NULL; /* Stash the original parser contents on the stack */ oldDtd = parser->m_dtd; oldStartElementHandler = parser->m_startElementHandler; oldEndElementHandler = parser->m_endElementHandler; oldCharacterDataHandler = parser->m_characterDataHandler; oldProcessingInstructionHandler = parser->m_processingInstructionHandler; oldCommentHandler = parser->m_commentHandler; oldStartCdataSectionHandler = parser->m_startCdataSectionHandler; oldEndCdataSectionHandler = parser->m_endCdataSectionHandler; oldDefaultHandler = parser->m_defaultHandler; oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler; oldNotationDeclHandler = parser->m_notationDeclHandler; oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler; oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler; oldNotStandaloneHandler = parser->m_notStandaloneHandler; oldExternalEntityRefHandler = parser->m_externalEntityRefHandler; oldSkippedEntityHandler = parser->m_skippedEntityHandler; oldUnknownEncodingHandler = parser->m_unknownEncodingHandler; oldElementDeclHandler = parser->m_elementDeclHandler; oldAttlistDeclHandler = parser->m_attlistDeclHandler; oldEntityDeclHandler = parser->m_entityDeclHandler; oldXmlDeclHandler = parser->m_xmlDeclHandler; oldDeclElementType = parser->m_declElementType; oldUserData = parser->m_userData; oldHandlerArg = parser->m_handlerArg; oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities; oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg; #ifdef XML_DTD oldParamEntityParsing = parser->m_paramEntityParsing; oldInEntityValue = parser->m_prologState.inEntityValue; #endif oldns_triplets = parser->m_ns_triplets; /* Note that the new parser shares the same hash secret as the old parser, so that dtdCopy and copyEntityTable can lookup values from hash tables associated with either parser without us having to worry which hash secrets each table has. */ oldhash_secret_salt = parser->m_hash_secret_salt; #ifdef XML_DTD if (! context) newDtd = oldDtd; #endif /* XML_DTD */ /* Note that the magical uses of the pre-processor to make field access look more like C++ require that `parser' be overwritten here. This makes this function more painful to follow than it would be otherwise. */ if (parser->m_ns) { XML_Char tmp[2] = {parser->m_namespaceSeparator, 0}; parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd); } else { parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd); } if (! parser) return NULL; parser->m_startElementHandler = oldStartElementHandler; parser->m_endElementHandler = oldEndElementHandler; parser->m_characterDataHandler = oldCharacterDataHandler; parser->m_processingInstructionHandler = oldProcessingInstructionHandler; parser->m_commentHandler = oldCommentHandler; parser->m_startCdataSectionHandler = oldStartCdataSectionHandler; parser->m_endCdataSectionHandler = oldEndCdataSectionHandler; parser->m_defaultHandler = oldDefaultHandler; parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler; parser->m_notationDeclHandler = oldNotationDeclHandler; parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler; parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler; parser->m_notStandaloneHandler = oldNotStandaloneHandler; parser->m_externalEntityRefHandler = oldExternalEntityRefHandler; parser->m_skippedEntityHandler = oldSkippedEntityHandler; parser->m_unknownEncodingHandler = oldUnknownEncodingHandler; parser->m_elementDeclHandler = oldElementDeclHandler; parser->m_attlistDeclHandler = oldAttlistDeclHandler; parser->m_entityDeclHandler = oldEntityDeclHandler; parser->m_xmlDeclHandler = oldXmlDeclHandler; parser->m_declElementType = oldDeclElementType; parser->m_userData = oldUserData; if (oldUserData == oldHandlerArg) parser->m_handlerArg = parser->m_userData; else parser->m_handlerArg = parser; if (oldExternalEntityRefHandlerArg != oldParser) parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities; parser->m_ns_triplets = oldns_triplets; parser->m_hash_secret_salt = oldhash_secret_salt; parser->m_parentParser = oldParser; #ifdef XML_DTD parser->m_paramEntityParsing = oldParamEntityParsing; parser->m_prologState.inEntityValue = oldInEntityValue; if (context) { #endif /* XML_DTD */ if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem) || ! setContext(parser, context)) { XML_ParserFree(parser); return NULL; } parser->m_processor = externalEntityInitProcessor; #ifdef XML_DTD } else { /* The DTD instance referenced by parser->m_dtd is shared between the document's root parser and external PE parsers, therefore one does not need to call setContext. In addition, one also *must* not call setContext, because this would overwrite existing prefix->binding pointers in parser->m_dtd with ones that get destroyed with the external PE parser. This would leave those prefixes with dangling pointers. */ parser->m_isParamEntity = XML_TRUE; XmlPrologStateInitExternalEntity(&parser->m_prologState); parser->m_processor = externalParEntInitProcessor; } #endif /* XML_DTD */ return parser; } static void FASTCALL destroyBindings(BINDING *bindings, XML_Parser parser) { for (;;) { BINDING *b = bindings; if (! b) break; bindings = b->nextTagBinding; FREE(parser, b->uri); FREE(parser, b); } } void XMLCALL XML_ParserFree(XML_Parser parser) { TAG *tagList; OPEN_INTERNAL_ENTITY *entityList; if (parser == NULL) return; /* free m_tagStack and m_freeTagList */ tagList = parser->m_tagStack; for (;;) { TAG *p; if (tagList == NULL) { if (parser->m_freeTagList == NULL) break; tagList = parser->m_freeTagList; parser->m_freeTagList = NULL; } p = tagList; tagList = tagList->parent; FREE(parser, p->buf); destroyBindings(p->bindings, parser); FREE(parser, p); } /* free m_openInternalEntities and m_freeInternalEntities */ entityList = parser->m_openInternalEntities; for (;;) { OPEN_INTERNAL_ENTITY *openEntity; if (entityList == NULL) { if (parser->m_freeInternalEntities == NULL) break; entityList = parser->m_freeInternalEntities; parser->m_freeInternalEntities = NULL; } openEntity = entityList; entityList = entityList->next; FREE(parser, openEntity); } destroyBindings(parser->m_freeBindingList, parser); destroyBindings(parser->m_inheritedBindings, parser); poolDestroy(&parser->m_tempPool); poolDestroy(&parser->m_temp2Pool); FREE(parser, (void *)parser->m_protocolEncodingName); #ifdef XML_DTD /* external parameter entity parsers share the DTD structure parser->m_dtd with the root parser, so we must not destroy it */ if (! parser->m_isParamEntity && parser->m_dtd) #else if (parser->m_dtd) #endif /* XML_DTD */ dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser, &parser->m_mem); FREE(parser, (void *)parser->m_atts); #ifdef XML_ATTR_INFO FREE(parser, (void *)parser->m_attInfo); #endif FREE(parser, parser->m_groupConnector); FREE(parser, parser->m_buffer); FREE(parser, parser->m_dataBuf); FREE(parser, parser->m_nsAtts); FREE(parser, parser->m_unknownEncodingMem); if (parser->m_unknownEncodingRelease) parser->m_unknownEncodingRelease(parser->m_unknownEncodingData); FREE(parser, parser); } void XMLCALL XML_UseParserAsHandlerArg(XML_Parser parser) { if (parser != NULL) parser->m_handlerArg = parser; } enum XML_Error XMLCALL XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) { if (parser == NULL) return XML_ERROR_INVALID_ARGUMENT; #ifdef XML_DTD /* block after XML_Parse()/XML_ParseBuffer() has been called */ if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED) return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING; parser->m_useForeignDTD = useDTD; return XML_ERROR_NONE; #else UNUSED_P(useDTD); return XML_ERROR_FEATURE_REQUIRES_XML_DTD; #endif } void XMLCALL XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) { if (parser == NULL) return; /* block after XML_Parse()/XML_ParseBuffer() has been called */ if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED) return; parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE; } void XMLCALL XML_SetUserData(XML_Parser parser, void *p) { if (parser == NULL) return; if (parser->m_handlerArg == parser->m_userData) parser->m_handlerArg = parser->m_userData = p; else parser->m_userData = p; } enum XML_Status XMLCALL XML_SetBase(XML_Parser parser, const XML_Char *p) { if (parser == NULL) return XML_STATUS_ERROR; if (p) { p = poolCopyString(&parser->m_dtd->pool, p); if (! p) return XML_STATUS_ERROR; parser->m_curBase = p; } else parser->m_curBase = NULL; return XML_STATUS_OK; } const XML_Char *XMLCALL XML_GetBase(XML_Parser parser) { if (parser == NULL) return NULL; return parser->m_curBase; } int XMLCALL XML_GetSpecifiedAttributeCount(XML_Parser parser) { if (parser == NULL) return -1; return parser->m_nSpecifiedAtts; } int XMLCALL XML_GetIdAttributeIndex(XML_Parser parser) { if (parser == NULL) return -1; return parser->m_idAttIndex; } #ifdef XML_ATTR_INFO const XML_AttrInfo *XMLCALL XML_GetAttributeInfo(XML_Parser parser) { if (parser == NULL) return NULL; return parser->m_attInfo; } #endif void XMLCALL XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, XML_EndElementHandler end) { if (parser == NULL) return; parser->m_startElementHandler = start; parser->m_endElementHandler = end; } void XMLCALL XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) { if (parser != NULL) parser->m_startElementHandler = start; } void XMLCALL XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) { if (parser != NULL) parser->m_endElementHandler = end; } void XMLCALL XML_SetCharacterDataHandler(XML_Parser parser, XML_CharacterDataHandler handler) { if (parser != NULL) parser->m_characterDataHandler = handler; } void XMLCALL XML_SetProcessingInstructionHandler(XML_Parser parser, XML_ProcessingInstructionHandler handler) { if (parser != NULL) parser->m_processingInstructionHandler = handler; } void XMLCALL XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) { if (parser != NULL) parser->m_commentHandler = handler; } void XMLCALL XML_SetCdataSectionHandler(XML_Parser parser, XML_StartCdataSectionHandler start, XML_EndCdataSectionHandler end) { if (parser == NULL) return; parser->m_startCdataSectionHandler = start; parser->m_endCdataSectionHandler = end; } void XMLCALL XML_SetStartCdataSectionHandler(XML_Parser parser, XML_StartCdataSectionHandler start) { if (parser != NULL) parser->m_startCdataSectionHandler = start; } void XMLCALL XML_SetEndCdataSectionHandler(XML_Parser parser, XML_EndCdataSectionHandler end) { if (parser != NULL) parser->m_endCdataSectionHandler = end; } void XMLCALL XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) { if (parser == NULL) return; parser->m_defaultHandler = handler; parser->m_defaultExpandInternalEntities = XML_FALSE; } void XMLCALL XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) { if (parser == NULL) return; parser->m_defaultHandler = handler; parser->m_defaultExpandInternalEntities = XML_TRUE; } void XMLCALL XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, XML_EndDoctypeDeclHandler end) { if (parser == NULL) return; parser->m_startDoctypeDeclHandler = start; parser->m_endDoctypeDeclHandler = end; } void XMLCALL XML_SetStartDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start) { if (parser != NULL) parser->m_startDoctypeDeclHandler = start; } void XMLCALL XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) { if (parser != NULL) parser->m_endDoctypeDeclHandler = end; } void XMLCALL XML_SetUnparsedEntityDeclHandler(XML_Parser parser, XML_UnparsedEntityDeclHandler handler) { if (parser != NULL) parser->m_unparsedEntityDeclHandler = handler; } void XMLCALL XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) { if (parser != NULL) parser->m_notationDeclHandler = handler; } void XMLCALL XML_SetNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start, XML_EndNamespaceDeclHandler end) { if (parser == NULL) return; parser->m_startNamespaceDeclHandler = start; parser->m_endNamespaceDeclHandler = end; } void XMLCALL XML_SetStartNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start) { if (parser != NULL) parser->m_startNamespaceDeclHandler = start; } void XMLCALL XML_SetEndNamespaceDeclHandler(XML_Parser parser, XML_EndNamespaceDeclHandler end) { if (parser != NULL) parser->m_endNamespaceDeclHandler = end; } void XMLCALL XML_SetNotStandaloneHandler(XML_Parser parser, XML_NotStandaloneHandler handler) { if (parser != NULL) parser->m_notStandaloneHandler = handler; } void XMLCALL XML_SetExternalEntityRefHandler(XML_Parser parser, XML_ExternalEntityRefHandler handler) { if (parser != NULL) parser->m_externalEntityRefHandler = handler; } void XMLCALL XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) { if (parser == NULL) return; if (arg) parser->m_externalEntityRefHandlerArg = (XML_Parser)arg; else parser->m_externalEntityRefHandlerArg = parser; } void XMLCALL XML_SetSkippedEntityHandler(XML_Parser parser, XML_SkippedEntityHandler handler) { if (parser != NULL) parser->m_skippedEntityHandler = handler; } void XMLCALL XML_SetUnknownEncodingHandler(XML_Parser parser, XML_UnknownEncodingHandler handler, void *data) { if (parser == NULL) return; parser->m_unknownEncodingHandler = handler; parser->m_unknownEncodingHandlerData = data; } void XMLCALL XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) { if (parser != NULL) parser->m_elementDeclHandler = eldecl; } void XMLCALL XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) { if (parser != NULL) parser->m_attlistDeclHandler = attdecl; } void XMLCALL XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) { if (parser != NULL) parser->m_entityDeclHandler = handler; } void XMLCALL XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) { if (parser != NULL) parser->m_xmlDeclHandler = handler; } int XMLCALL XML_SetParamEntityParsing(XML_Parser parser, enum XML_ParamEntityParsing peParsing) { if (parser == NULL) return 0; /* block after XML_Parse()/XML_ParseBuffer() has been called */ if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED) return 0; #ifdef XML_DTD parser->m_paramEntityParsing = peParsing; return 1; #else return peParsing == XML_PARAM_ENTITY_PARSING_NEVER; #endif } int XMLCALL XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) { if (parser == NULL) return 0; if (parser->m_parentParser) return XML_SetHashSalt(parser->m_parentParser, hash_salt); /* block after XML_Parse()/XML_ParseBuffer() has been called */ if (parser->m_parsingStatus.parsing == XML_PARSING || parser->m_parsingStatus.parsing == XML_SUSPENDED) return 0; parser->m_hash_secret_salt = hash_salt; return 1; } enum XML_Status XMLCALL XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) { if (parser != NULL) parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT; return XML_STATUS_ERROR; } switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: parser->m_errorCode = XML_ERROR_SUSPENDED; return XML_STATUS_ERROR; case XML_FINISHED: parser->m_errorCode = XML_ERROR_FINISHED; return XML_STATUS_ERROR; case XML_INITIALIZED: if (parser->m_parentParser == NULL && ! startParsing(parser)) { parser->m_errorCode = XML_ERROR_NO_MEMORY; return XML_STATUS_ERROR; } /* fall through */ default: parser->m_parsingStatus.parsing = XML_PARSING; } if (len == 0) { parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; if (! isFinal) return XML_STATUS_OK; parser->m_positionPtr = parser->m_bufferPtr; parser->m_parseEndPtr = parser->m_bufferEnd; /* If data are left over from last buffer, and we now know that these data are the final chunk of input, then we have to check them again to detect errors based on that fact. */ parser->m_errorCode = parser->m_processor(parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr); if (parser->m_errorCode == XML_ERROR_NONE) { switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: /* It is hard to be certain, but it seems that this case * cannot occur. This code is cleaning up a previous parse * with no new data (since len == 0). Changing the parsing * state requires getting to execute a handler function, and * there doesn't seem to be an opportunity for that while in * this circumstance. * * Given the uncertainty, we retain the code but exclude it * from coverage tests. * * LCOV_EXCL_START */ XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position); parser->m_positionPtr = parser->m_bufferPtr; return XML_STATUS_SUSPENDED; /* LCOV_EXCL_STOP */ case XML_INITIALIZED: case XML_PARSING: parser->m_parsingStatus.parsing = XML_FINISHED; /* fall through */ default: return XML_STATUS_OK; } } parser->m_eventEndPtr = parser->m_eventPtr; parser->m_processor = errorProcessor; return XML_STATUS_ERROR; } #ifndef XML_CONTEXT_BYTES else if (parser->m_bufferPtr == parser->m_bufferEnd) { const char *end; int nLeftOver; enum XML_Status result; /* Detect overflow (a+b > MAX <==> b > MAX-a) */ if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) { parser->m_errorCode = XML_ERROR_NO_MEMORY; parser->m_eventPtr = parser->m_eventEndPtr = NULL; parser->m_processor = errorProcessor; return XML_STATUS_ERROR; } parser->m_parseEndByteIndex += len; parser->m_positionPtr = s; parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; parser->m_errorCode = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end); if (parser->m_errorCode != XML_ERROR_NONE) { parser->m_eventEndPtr = parser->m_eventPtr; parser->m_processor = errorProcessor; return XML_STATUS_ERROR; } else { switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: result = XML_STATUS_SUSPENDED; break; case XML_INITIALIZED: case XML_PARSING: if (isFinal) { parser->m_parsingStatus.parsing = XML_FINISHED; return XML_STATUS_OK; } /* fall through */ default: result = XML_STATUS_OK; } } XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end, &parser->m_position); nLeftOver = s + len - end; if (nLeftOver) { if (parser->m_buffer == NULL || nLeftOver > parser->m_bufferLim - parser->m_buffer) { /* avoid _signed_ integer overflow */ char *temp = NULL; const int bytesToAllocate = (int)((unsigned)len * 2U); if (bytesToAllocate > 0) { temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate); } if (temp == NULL) { parser->m_errorCode = XML_ERROR_NO_MEMORY; parser->m_eventPtr = parser->m_eventEndPtr = NULL; parser->m_processor = errorProcessor; return XML_STATUS_ERROR; } parser->m_buffer = temp; parser->m_bufferLim = parser->m_buffer + bytesToAllocate; } memcpy(parser->m_buffer, end, nLeftOver); } parser->m_bufferPtr = parser->m_buffer; parser->m_bufferEnd = parser->m_buffer + nLeftOver; parser->m_positionPtr = parser->m_bufferPtr; parser->m_parseEndPtr = parser->m_bufferEnd; parser->m_eventPtr = parser->m_bufferPtr; parser->m_eventEndPtr = parser->m_bufferPtr; return result; } #endif /* not defined XML_CONTEXT_BYTES */ else { void *buff = XML_GetBuffer(parser, len); if (buff == NULL) return XML_STATUS_ERROR; else { memcpy(buff, s, len); return XML_ParseBuffer(parser, len, isFinal); } } } enum XML_Status XMLCALL XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { const char *start; enum XML_Status result = XML_STATUS_OK; if (parser == NULL) return XML_STATUS_ERROR; switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: parser->m_errorCode = XML_ERROR_SUSPENDED; return XML_STATUS_ERROR; case XML_FINISHED: parser->m_errorCode = XML_ERROR_FINISHED; return XML_STATUS_ERROR; case XML_INITIALIZED: /* Has someone called XML_GetBuffer successfully before? */ if (! parser->m_bufferPtr) { parser->m_errorCode = XML_ERROR_NO_BUFFER; return XML_STATUS_ERROR; } if (parser->m_parentParser == NULL && ! startParsing(parser)) { parser->m_errorCode = XML_ERROR_NO_MEMORY; return XML_STATUS_ERROR; } /* fall through */ default: parser->m_parsingStatus.parsing = XML_PARSING; } start = parser->m_bufferPtr; parser->m_positionPtr = start; parser->m_bufferEnd += len; parser->m_parseEndPtr = parser->m_bufferEnd; parser->m_parseEndByteIndex += len; parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; parser->m_errorCode = parser->m_processor( parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr); if (parser->m_errorCode != XML_ERROR_NONE) { parser->m_eventEndPtr = parser->m_eventPtr; parser->m_processor = errorProcessor; return XML_STATUS_ERROR; } else { switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: result = XML_STATUS_SUSPENDED; break; case XML_INITIALIZED: case XML_PARSING: if (isFinal) { parser->m_parsingStatus.parsing = XML_FINISHED; return result; } default:; /* should not happen */ } } XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position); parser->m_positionPtr = parser->m_bufferPtr; return result; } void *XMLCALL XML_GetBuffer(XML_Parser parser, int len) { if (parser == NULL) return NULL; if (len < 0) { parser->m_errorCode = XML_ERROR_NO_MEMORY; return NULL; } switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: parser->m_errorCode = XML_ERROR_SUSPENDED; return NULL; case XML_FINISHED: parser->m_errorCode = XML_ERROR_FINISHED; return NULL; default:; } if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) { #ifdef XML_CONTEXT_BYTES int keep; #endif /* defined XML_CONTEXT_BYTES */ /* Do not invoke signed arithmetic overflow: */ int neededSize = (int)((unsigned)len + (unsigned)EXPAT_SAFE_PTR_DIFF( parser->m_bufferEnd, parser->m_bufferPtr)); if (neededSize < 0) { parser->m_errorCode = XML_ERROR_NO_MEMORY; return NULL; } #ifdef XML_CONTEXT_BYTES keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer); if (keep > XML_CONTEXT_BYTES) keep = XML_CONTEXT_BYTES; /* Detect and prevent integer overflow */ if (keep > INT_MAX - neededSize) { parser->m_errorCode = XML_ERROR_NO_MEMORY; return NULL; } neededSize += keep; #endif /* defined XML_CONTEXT_BYTES */ if (neededSize <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) { #ifdef XML_CONTEXT_BYTES if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) { int offset = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer) - keep; /* The buffer pointers cannot be NULL here; we have at least some bytes * in the buffer */ memmove(parser->m_buffer, &parser->m_buffer[offset], parser->m_bufferEnd - parser->m_bufferPtr + keep); parser->m_bufferEnd -= offset; parser->m_bufferPtr -= offset; } #else if (parser->m_buffer && parser->m_bufferPtr) { memmove(parser->m_buffer, parser->m_bufferPtr, EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); parser->m_bufferEnd = parser->m_buffer + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); parser->m_bufferPtr = parser->m_buffer; } #endif /* not defined XML_CONTEXT_BYTES */ } else { char *newBuf; int bufferSize = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr); if (bufferSize == 0) bufferSize = INIT_BUFFER_SIZE; do { /* Do not invoke signed arithmetic overflow: */ bufferSize = (int)(2U * (unsigned)bufferSize); } while (bufferSize < neededSize && bufferSize > 0); if (bufferSize <= 0) { parser->m_errorCode = XML_ERROR_NO_MEMORY; return NULL; } newBuf = (char *)MALLOC(parser, bufferSize); if (newBuf == 0) { parser->m_errorCode = XML_ERROR_NO_MEMORY; return NULL; } parser->m_bufferLim = newBuf + bufferSize; #ifdef XML_CONTEXT_BYTES if (parser->m_bufferPtr) { memcpy(newBuf, &parser->m_bufferPtr[-keep], EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) + keep); FREE(parser, parser->m_buffer); parser->m_buffer = newBuf; parser->m_bufferEnd = parser->m_buffer + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) + keep; parser->m_bufferPtr = parser->m_buffer + keep; } else { /* This must be a brand new buffer with no data in it yet */ parser->m_bufferEnd = newBuf; parser->m_bufferPtr = parser->m_buffer = newBuf; } #else if (parser->m_bufferPtr) { memcpy(newBuf, parser->m_bufferPtr, EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); FREE(parser, parser->m_buffer); parser->m_bufferEnd = newBuf + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); } else { /* This must be a brand new buffer with no data in it yet */ parser->m_bufferEnd = newBuf; } parser->m_bufferPtr = parser->m_buffer = newBuf; #endif /* not defined XML_CONTEXT_BYTES */ } parser->m_eventPtr = parser->m_eventEndPtr = NULL; parser->m_positionPtr = NULL; } return parser->m_bufferEnd; } enum XML_Status XMLCALL XML_StopParser(XML_Parser parser, XML_Bool resumable) { if (parser == NULL) return XML_STATUS_ERROR; switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: if (resumable) { parser->m_errorCode = XML_ERROR_SUSPENDED; return XML_STATUS_ERROR; } parser->m_parsingStatus.parsing = XML_FINISHED; break; case XML_FINISHED: parser->m_errorCode = XML_ERROR_FINISHED; return XML_STATUS_ERROR; default: if (resumable) { #ifdef XML_DTD if (parser->m_isParamEntity) { parser->m_errorCode = XML_ERROR_SUSPEND_PE; return XML_STATUS_ERROR; } #endif parser->m_parsingStatus.parsing = XML_SUSPENDED; } else parser->m_parsingStatus.parsing = XML_FINISHED; } return XML_STATUS_OK; } enum XML_Status XMLCALL XML_ResumeParser(XML_Parser parser) { enum XML_Status result = XML_STATUS_OK; if (parser == NULL) return XML_STATUS_ERROR; if (parser->m_parsingStatus.parsing != XML_SUSPENDED) { parser->m_errorCode = XML_ERROR_NOT_SUSPENDED; return XML_STATUS_ERROR; } parser->m_parsingStatus.parsing = XML_PARSING; parser->m_errorCode = parser->m_processor( parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr); if (parser->m_errorCode != XML_ERROR_NONE) { parser->m_eventEndPtr = parser->m_eventPtr; parser->m_processor = errorProcessor; return XML_STATUS_ERROR; } else { switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: result = XML_STATUS_SUSPENDED; break; case XML_INITIALIZED: case XML_PARSING: if (parser->m_parsingStatus.finalBuffer) { parser->m_parsingStatus.parsing = XML_FINISHED; return result; } default:; } } XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_bufferPtr, &parser->m_position); parser->m_positionPtr = parser->m_bufferPtr; return result; } void XMLCALL XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) { if (parser == NULL) return; assert(status != NULL); *status = parser->m_parsingStatus; } enum XML_Error XMLCALL XML_GetErrorCode(XML_Parser parser) { if (parser == NULL) return XML_ERROR_INVALID_ARGUMENT; return parser->m_errorCode; } XML_Index XMLCALL XML_GetCurrentByteIndex(XML_Parser parser) { if (parser == NULL) return -1; if (parser->m_eventPtr) return (XML_Index)(parser->m_parseEndByteIndex - (parser->m_parseEndPtr - parser->m_eventPtr)); return -1; } int XMLCALL XML_GetCurrentByteCount(XML_Parser parser) { if (parser == NULL) return 0; if (parser->m_eventEndPtr && parser->m_eventPtr) return (int)(parser->m_eventEndPtr - parser->m_eventPtr); return 0; } const char *XMLCALL XML_GetInputContext(XML_Parser parser, int *offset, int *size) { #ifdef XML_CONTEXT_BYTES if (parser == NULL) return NULL; if (parser->m_eventPtr && parser->m_buffer) { if (offset != NULL) *offset = (int)(parser->m_eventPtr - parser->m_buffer); if (size != NULL) *size = (int)(parser->m_bufferEnd - parser->m_buffer); return parser->m_buffer; } #else (void)parser; (void)offset; (void)size; #endif /* defined XML_CONTEXT_BYTES */ return (const char *)0; } XML_Size XMLCALL XML_GetCurrentLineNumber(XML_Parser parser) { if (parser == NULL) return 0; if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) { XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_eventPtr, &parser->m_position); parser->m_positionPtr = parser->m_eventPtr; } return parser->m_position.lineNumber + 1; } XML_Size XMLCALL XML_GetCurrentColumnNumber(XML_Parser parser) { if (parser == NULL) return 0; if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) { XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, parser->m_eventPtr, &parser->m_position); parser->m_positionPtr = parser->m_eventPtr; } return parser->m_position.columnNumber; } void XMLCALL XML_FreeContentModel(XML_Parser parser, XML_Content *model) { if (parser != NULL) FREE(parser, model); } void *XMLCALL XML_MemMalloc(XML_Parser parser, size_t size) { if (parser == NULL) return NULL; return MALLOC(parser, size); } void *XMLCALL XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) { if (parser == NULL) return NULL; return REALLOC(parser, ptr, size); } void XMLCALL XML_MemFree(XML_Parser parser, void *ptr) { if (parser != NULL) FREE(parser, ptr); } void XMLCALL XML_DefaultCurrent(XML_Parser parser) { if (parser == NULL) return; if (parser->m_defaultHandler) { if (parser->m_openInternalEntities) reportDefault(parser, parser->m_internalEncoding, parser->m_openInternalEntities->internalEventPtr, parser->m_openInternalEntities->internalEventEndPtr); else reportDefault(parser, parser->m_encoding, parser->m_eventPtr, parser->m_eventEndPtr); } } const XML_LChar *XMLCALL XML_ErrorString(enum XML_Error code) { switch (code) { case XML_ERROR_NONE: return NULL; case XML_ERROR_NO_MEMORY: return XML_L("out of memory"); case XML_ERROR_SYNTAX: return XML_L("syntax error"); case XML_ERROR_NO_ELEMENTS: return XML_L("no element found"); case XML_ERROR_INVALID_TOKEN: return XML_L("not well-formed (invalid token)"); case XML_ERROR_UNCLOSED_TOKEN: return XML_L("unclosed token"); case XML_ERROR_PARTIAL_CHAR: return XML_L("partial character"); case XML_ERROR_TAG_MISMATCH: return XML_L("mismatched tag"); case XML_ERROR_DUPLICATE_ATTRIBUTE: return XML_L("duplicate attribute"); case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: return XML_L("junk after document element"); case XML_ERROR_PARAM_ENTITY_REF: return XML_L("illegal parameter entity reference"); case XML_ERROR_UNDEFINED_ENTITY: return XML_L("undefined entity"); case XML_ERROR_RECURSIVE_ENTITY_REF: return XML_L("recursive entity reference"); case XML_ERROR_ASYNC_ENTITY: return XML_L("asynchronous entity"); case XML_ERROR_BAD_CHAR_REF: return XML_L("reference to invalid character number"); case XML_ERROR_BINARY_ENTITY_REF: return XML_L("reference to binary entity"); case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: return XML_L("reference to external entity in attribute"); case XML_ERROR_MISPLACED_XML_PI: return XML_L("XML or text declaration not at start of entity"); case XML_ERROR_UNKNOWN_ENCODING: return XML_L("unknown encoding"); case XML_ERROR_INCORRECT_ENCODING: return XML_L("encoding specified in XML declaration is incorrect"); case XML_ERROR_UNCLOSED_CDATA_SECTION: return XML_L("unclosed CDATA section"); case XML_ERROR_EXTERNAL_ENTITY_HANDLING: return XML_L("error in processing external entity reference"); case XML_ERROR_NOT_STANDALONE: return XML_L("document is not standalone"); case XML_ERROR_UNEXPECTED_STATE: return XML_L("unexpected parser state - please send a bug report"); case XML_ERROR_ENTITY_DECLARED_IN_PE: return XML_L("entity declared in parameter entity"); case XML_ERROR_FEATURE_REQUIRES_XML_DTD: return XML_L("requested feature requires XML_DTD support in Expat"); case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING: return XML_L("cannot change setting once parsing has begun"); /* Added in 1.95.7. */ case XML_ERROR_UNBOUND_PREFIX: return XML_L("unbound prefix"); /* Added in 1.95.8. */ case XML_ERROR_UNDECLARING_PREFIX: return XML_L("must not undeclare prefix"); case XML_ERROR_INCOMPLETE_PE: return XML_L("incomplete markup in parameter entity"); case XML_ERROR_XML_DECL: return XML_L("XML declaration not well-formed"); case XML_ERROR_TEXT_DECL: return XML_L("text declaration not well-formed"); case XML_ERROR_PUBLICID: return XML_L("illegal character(s) in public id"); case XML_ERROR_SUSPENDED: return XML_L("parser suspended"); case XML_ERROR_NOT_SUSPENDED: return XML_L("parser not suspended"); case XML_ERROR_ABORTED: return XML_L("parsing aborted"); case XML_ERROR_FINISHED: return XML_L("parsing finished"); case XML_ERROR_SUSPEND_PE: return XML_L("cannot suspend in external parameter entity"); /* Added in 2.0.0. */ case XML_ERROR_RESERVED_PREFIX_XML: return XML_L( "reserved prefix (xml) must not be undeclared or bound to another namespace name"); case XML_ERROR_RESERVED_PREFIX_XMLNS: return XML_L("reserved prefix (xmlns) must not be declared or undeclared"); case XML_ERROR_RESERVED_NAMESPACE_URI: return XML_L( "prefix must not be bound to one of the reserved namespace names"); /* Added in 2.2.5. */ case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */ return XML_L("invalid argument"); /* Added in 2.3.0. */ case XML_ERROR_NO_BUFFER: return XML_L( "a successful prior call to function XML_GetBuffer is required"); /* Added in 2.4.0. */ case XML_ERROR_AMPLIFICATION_LIMIT_BREACH: return XML_L( "limit on input amplification factor (from DTD and entities) breached"); } return NULL; } const XML_LChar *XMLCALL XML_ExpatVersion(void) { /* V1 is used to string-ize the version number. However, it would string-ize the actual version macro *names* unless we get them substituted before being passed to V1. CPP is defined to expand a macro, then rescan for more expansions. Thus, we use V2 to expand the version macros, then CPP will expand the resulting V1() macro with the correct numerals. */ /* ### I'm assuming cpp is portable in this respect... */ #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c) #define V2(a, b, c) XML_L("expat_") V1(a, b, c) return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION); #undef V1 #undef V2 } XML_Expat_Version XMLCALL XML_ExpatVersionInfo(void) { XML_Expat_Version version; version.major = XML_MAJOR_VERSION; version.minor = XML_MINOR_VERSION; version.micro = XML_MICRO_VERSION; return version; } const XML_Feature *XMLCALL XML_GetFeatureList(void) { static const XML_Feature features[] = { {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"), sizeof(XML_Char)}, {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"), sizeof(XML_LChar)}, #ifdef XML_UNICODE {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0}, #endif #ifdef XML_UNICODE_WCHAR_T {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0}, #endif #ifdef XML_DTD {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, #endif #ifdef XML_CONTEXT_BYTES {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), XML_CONTEXT_BYTES}, #endif #ifdef XML_MIN_SIZE {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0}, #endif #ifdef XML_NS {XML_FEATURE_NS, XML_L("XML_NS"), 0}, #endif #ifdef XML_LARGE_SIZE {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0}, #endif #ifdef XML_ATTR_INFO {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, #endif #ifdef XML_DTD /* Added in Expat 2.4.0. */ {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, XML_L("XML_BLAP_MAX_AMP"), (long int) EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT}, {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, XML_L("XML_BLAP_ACT_THRES"), EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT}, #endif {XML_FEATURE_END, NULL, 0}}; return features; } #ifdef XML_DTD XML_Bool XMLCALL XML_SetBillionLaughsAttackProtectionMaximumAmplification( XML_Parser parser, float maximumAmplificationFactor) { if ((parser == NULL) || (parser->m_parentParser != NULL) || isnan(maximumAmplificationFactor) || (maximumAmplificationFactor < 1.0f)) { return XML_FALSE; } parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor; return XML_TRUE; } XML_Bool XMLCALL XML_SetBillionLaughsAttackProtectionActivationThreshold( XML_Parser parser, unsigned long long activationThresholdBytes) { if ((parser == NULL) || (parser->m_parentParser != NULL)) { return XML_FALSE; } parser->m_accounting.activationThresholdBytes = activationThresholdBytes; return XML_TRUE; } #endif /* XML_DTD */ /* Initially tag->rawName always points into the parse buffer; for those TAG instances opened while the current parse buffer was processed, and not yet closed, we need to store tag->rawName in a more permanent location, since the parse buffer is about to be discarded. */ static XML_Bool storeRawNames(XML_Parser parser) { TAG *tag = parser->m_tagStack; while (tag) { int bufSize; int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); size_t rawNameLen; char *rawNameBuf = tag->buf + nameLen; /* Stop if already stored. Since m_tagStack is a stack, we can stop at the first entry that has already been copied; everything below it in the stack is already been accounted for in a previous call to this function. */ if (tag->rawName == rawNameBuf) break; /* For re-use purposes we need to ensure that the size of tag->buf is a multiple of sizeof(XML_Char). */ rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char)); /* Detect and prevent integer overflow. */ if (rawNameLen > (size_t)INT_MAX - nameLen) return XML_FALSE; bufSize = nameLen + (int)rawNameLen; if (bufSize > tag->bufEnd - tag->buf) { char *temp = (char *)REALLOC(parser, tag->buf, bufSize); if (temp == NULL) return XML_FALSE; /* if tag->name.str points to tag->buf (only when namespace processing is off) then we have to update it */ if (tag->name.str == (XML_Char *)tag->buf) tag->name.str = (XML_Char *)temp; /* if tag->name.localPart is set (when namespace processing is on) then update it as well, since it will always point into tag->buf */ if (tag->name.localPart) tag->name.localPart = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf); tag->buf = temp; tag->bufEnd = temp + bufSize; rawNameBuf = temp + nameLen; } memcpy(rawNameBuf, tag->rawName, tag->rawNameLength); tag->rawName = rawNameBuf; tag = tag->parent; } return XML_TRUE; } static enum XML_Error PTRCALL contentProcessor(XML_Parser parser, const char *start, const char *end, const char **endPtr) { enum XML_Error result = doContent( parser, 0, parser->m_encoding, start, end, endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); if (result == XML_ERROR_NONE) { if (! storeRawNames(parser)) return XML_ERROR_NO_MEMORY; } return result; } static enum XML_Error PTRCALL externalEntityInitProcessor(XML_Parser parser, const char *start, const char *end, const char **endPtr) { enum XML_Error result = initializeEncoding(parser); if (result != XML_ERROR_NONE) return result; parser->m_processor = externalEntityInitProcessor2; return externalEntityInitProcessor2(parser, start, end, endPtr); } static enum XML_Error PTRCALL externalEntityInitProcessor2(XML_Parser parser, const char *start, const char *end, const char **endPtr) { const char *next = start; /* XmlContentTok doesn't always set the last arg */ int tok = XmlContentTok(parser->m_encoding, start, end, &next); switch (tok) { case XML_TOK_BOM: #ifdef XML_DTD if (! accountingDiffTolerated(parser, tok, start, next, __LINE__, XML_ACCOUNT_DIRECT)) { accountingOnAbort(parser); return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; } #endif /* XML_DTD */ /* If we are at the end of the buffer, this would cause the next stage, i.e. externalEntityInitProcessor3, to pass control directly to doContent (by detecting XML_TOK_NONE) without processing any xml text declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent. */ if (next == end && ! parser->m_parsingStatus.finalBuffer) { *endPtr = next; return XML_ERROR_NONE; } start = next; break; case XML_TOK_PARTIAL: if (! parser->m_parsingStatus.finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } parser->m_eventPtr = start; return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: if (! parser->m_parsingStatus.finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } parser->m_eventPtr = start; return XML_ERROR_PARTIAL_CHAR; } parser->m_processor = externalEntityInitProcessor3; return externalEntityInitProcessor3(parser, start, end, endPtr); } static enum XML_Error PTRCALL externalEntityInitProcessor3(XML_Parser parser, const char *start, const char *end, const char **endPtr) { int tok; const char *next = start; /* XmlContentTok doesn't always set the last arg */ parser->m_eventPtr = start; tok = XmlContentTok(parser->m_encoding, start, end, &next); /* Note: These bytes are accounted later in: - processXmlDecl - externalEntityContentProcessor */ parser->m_eventEndPtr = next; switch (tok) { case XML_TOK_XML_DECL: { enum XML_Error result; result = processXmlDecl(parser, 1, start, next); if (result != XML_ERROR_NONE) return result; switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: *endPtr = next; return XML_ERROR_NONE; case XML_FINISHED: return XML_ERROR_ABORTED; default: start = next; } } break; case XML_TOK_PARTIAL: if (! parser->m_parsingStatus.finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: if (! parser->m_parsingStatus.finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; } parser->m_processor = externalEntityContentProcessor; parser->m_tagLevel = 1; return externalEntityContentProcessor(parser, start, end, endPtr); } static enum XML_Error PTRCALL externalEntityContentProcessor(XML_Parser parser, const char *start, const char *end, const char **endPtr) { enum XML_Error result = doContent(parser, 1, parser->m_encoding, start, end, endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_ENTITY_EXPANSION); if (result == XML_ERROR_NONE) { if (! storeRawNames(parser)) return XML_ERROR_NO_MEMORY; } return result; } static enum XML_Error doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, const char *s, const char *end, const char **nextPtr, XML_Bool haveMore, enum XML_Account account) { /* save one level of indirection */ DTD *const dtd = parser->m_dtd; const char **eventPP; const char **eventEndPP; if (enc == parser->m_encoding) { eventPP = &parser->m_eventPtr; eventEndPP = &parser->m_eventEndPtr; } else { eventPP = &(parser->m_openInternalEntities->internalEventPtr); eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); } *eventPP = s; for (;;) { const char *next = s; /* XmlContentTok doesn't always set the last arg */ int tok = XmlContentTok(enc, s, end, &next); #ifdef XML_DTD const char *accountAfter = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR)) ? (haveMore ? s /* i.e. 0 bytes */ : end) : next; if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__, account)) { accountingOnAbort(parser); return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; } #endif *eventEndPP = next; switch (tok) { case XML_TOK_TRAILING_CR: if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } *eventEndPP = end; if (parser->m_characterDataHandler) { XML_Char c = 0xA; parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, end); /* We are at the end of the final buffer, should we check for XML_SUSPENDED, XML_FINISHED? */ if (startTagLevel == 0) return XML_ERROR_NO_ELEMENTS; if (parser->m_tagLevel != startTagLevel) return XML_ERROR_ASYNC_ENTITY; *nextPtr = end; return XML_ERROR_NONE; case XML_TOK_NONE: if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } if (startTagLevel > 0) { if (parser->m_tagLevel != startTagLevel) return XML_ERROR_ASYNC_ENTITY; *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_NO_ELEMENTS; case XML_TOK_INVALID: *eventPP = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; case XML_TOK_ENTITY_REF: { const XML_Char *name; ENTITY *entity; XML_Char ch = (XML_Char)XmlPredefinedEntityName( enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (ch) { #ifdef XML_DTD /* NOTE: We are replacing 4-6 characters original input for 1 character * so there is no amplification and hence recording without * protection. */ accountingDiffTolerated(parser, tok, (char *)&ch, ((char *)&ch) + sizeof(XML_Char), __LINE__, XML_ACCOUNT_ENTITY_EXPANSION); #endif /* XML_DTD */ if (parser->m_characterDataHandler) parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1); else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; } name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (! name) return XML_ERROR_NO_MEMORY; entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); poolDiscard(&dtd->pool); /* First, determine if a check for an existing declaration is needed; if yes, check that the entity exists, and that it is internal, otherwise call the skipped entity or default handler. */ if (! dtd->hasParamEntityRefs || dtd->standalone) { if (! entity) return XML_ERROR_UNDEFINED_ENTITY; else if (! entity->is_internal) return XML_ERROR_ENTITY_DECLARED_IN_PE; } else if (! entity) { if (parser->m_skippedEntityHandler) parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; } if (entity->open) return XML_ERROR_RECURSIVE_ENTITY_REF; if (entity->notation) return XML_ERROR_BINARY_ENTITY_REF; if (entity->textPtr) { enum XML_Error result; if (! parser->m_defaultExpandInternalEntities) { if (parser->m_skippedEntityHandler) parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name, 0); else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; } result = processInternalEntity(parser, entity, XML_FALSE); if (result != XML_ERROR_NONE) return result; } else if (parser->m_externalEntityRefHandler) { const XML_Char *context; entity->open = XML_TRUE; context = getContext(parser); entity->open = XML_FALSE; if (! context) return XML_ERROR_NO_MEMORY; if (! parser->m_externalEntityRefHandler( parser->m_externalEntityRefHandlerArg, context, entity->base, entity->systemId, entity->publicId)) return XML_ERROR_EXTERNAL_ENTITY_HANDLING; poolDiscard(&parser->m_tempPool); } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; } case XML_TOK_START_TAG_NO_ATTS: /* fall through */ case XML_TOK_START_TAG_WITH_ATTS: { TAG *tag; enum XML_Error result; XML_Char *toPtr; if (parser->m_freeTagList) { tag = parser->m_freeTagList; parser->m_freeTagList = parser->m_freeTagList->parent; } else { tag = (TAG *)MALLOC(parser, sizeof(TAG)); if (! tag) return XML_ERROR_NO_MEMORY; tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE); if (! tag->buf) { FREE(parser, tag); return XML_ERROR_NO_MEMORY; } tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; } tag->bindings = NULL; tag->parent = parser->m_tagStack; parser->m_tagStack = tag; tag->name.localPart = NULL; tag->name.prefix = NULL; tag->rawName = s + enc->minBytesPerChar; tag->rawNameLength = XmlNameLength(enc, tag->rawName); ++parser->m_tagLevel; { const char *rawNameEnd = tag->rawName + tag->rawNameLength; const char *fromPtr = tag->rawName; toPtr = (XML_Char *)tag->buf; for (;;) { int bufSize; int convLen; const enum XML_Convert_Result convert_res = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1); convLen = (int)(toPtr - (XML_Char *)tag->buf); if ((fromPtr >= rawNameEnd) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) { tag->name.strLen = convLen; break; } bufSize = (int)(tag->bufEnd - tag->buf) << 1; { char *temp = (char *)REALLOC(parser, tag->buf, bufSize); if (temp == NULL) return XML_ERROR_NO_MEMORY; tag->buf = temp; tag->bufEnd = temp + bufSize; toPtr = (XML_Char *)temp + convLen; } } } tag->name.str = (XML_Char *)tag->buf; *toPtr = XML_T('\0'); result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account); if (result) return result; if (parser->m_startElementHandler) parser->m_startElementHandler(parser->m_handlerArg, tag->name.str, (const XML_Char **)parser->m_atts); else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); poolClear(&parser->m_tempPool); break; } case XML_TOK_EMPTY_ELEMENT_NO_ATTS: /* fall through */ case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: { const char *rawName = s + enc->minBytesPerChar; enum XML_Error result; BINDING *bindings = NULL; XML_Bool noElmHandlers = XML_TRUE; TAG_NAME name; name.str = poolStoreString(&parser->m_tempPool, enc, rawName, rawName + XmlNameLength(enc, rawName)); if (! name.str) return XML_ERROR_NO_MEMORY; poolFinish(&parser->m_tempPool); result = storeAtts(parser, enc, s, &name, &bindings, XML_ACCOUNT_NONE /* token spans whole start tag */); if (result != XML_ERROR_NONE) { freeBindings(parser, bindings); return result; } poolFinish(&parser->m_tempPool); if (parser->m_startElementHandler) { parser->m_startElementHandler(parser->m_handlerArg, name.str, (const XML_Char **)parser->m_atts); noElmHandlers = XML_FALSE; } if (parser->m_endElementHandler) { if (parser->m_startElementHandler) *eventPP = *eventEndPP; parser->m_endElementHandler(parser->m_handlerArg, name.str); noElmHandlers = XML_FALSE; } if (noElmHandlers && parser->m_defaultHandler) reportDefault(parser, enc, s, next); poolClear(&parser->m_tempPool); freeBindings(parser, bindings); } if ((parser->m_tagLevel == 0) && (parser->m_parsingStatus.parsing != XML_FINISHED)) { if (parser->m_parsingStatus.parsing == XML_SUSPENDED) parser->m_processor = epilogProcessor; else return epilogProcessor(parser, next, end, nextPtr); } break; case XML_TOK_END_TAG: if (parser->m_tagLevel == startTagLevel) return XML_ERROR_ASYNC_ENTITY; else { int len; const char *rawName; TAG *tag = parser->m_tagStack; parser->m_tagStack = tag->parent; tag->parent = parser->m_freeTagList; parser->m_freeTagList = tag; rawName = s + enc->minBytesPerChar * 2; len = XmlNameLength(enc, rawName); if (len != tag->rawNameLength || memcmp(tag->rawName, rawName, len) != 0) { *eventPP = rawName; return XML_ERROR_TAG_MISMATCH; } --parser->m_tagLevel; if (parser->m_endElementHandler) { const XML_Char *localPart; const XML_Char *prefix; XML_Char *uri; localPart = tag->name.localPart; if (parser->m_ns && localPart) { /* localPart and prefix may have been overwritten in tag->name.str, since this points to the binding->uri buffer which gets re-used; so we have to add them again */ uri = (XML_Char *)tag->name.str + tag->name.uriLen; /* don't need to check for space - already done in storeAtts() */ while (*localPart) *uri++ = *localPart++; prefix = (XML_Char *)tag->name.prefix; if (parser->m_ns_triplets && prefix) { *uri++ = parser->m_namespaceSeparator; while (*prefix) *uri++ = *prefix++; } *uri = XML_T('\0'); } parser->m_endElementHandler(parser->m_handlerArg, tag->name.str); } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); while (tag->bindings) { BINDING *b = tag->bindings; if (parser->m_endNamespaceDeclHandler) parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name); tag->bindings = tag->bindings->nextTagBinding; b->nextTagBinding = parser->m_freeBindingList; parser->m_freeBindingList = b; b->prefix->binding = b->prevPrefixBinding; } if ((parser->m_tagLevel == 0) && (parser->m_parsingStatus.parsing != XML_FINISHED)) { if (parser->m_parsingStatus.parsing == XML_SUSPENDED) parser->m_processor = epilogProcessor; else return epilogProcessor(parser, next, end, nextPtr); } } break; case XML_TOK_CHAR_REF: { int n = XmlCharRefNumber(enc, s); if (n < 0) return XML_ERROR_BAD_CHAR_REF; if (parser->m_characterDataHandler) { XML_Char buf[XML_ENCODE_MAX]; parser->m_characterDataHandler(parser->m_handlerArg, buf, XmlEncode(n, (ICHAR *)buf)); } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); } break; case XML_TOK_XML_DECL: return XML_ERROR_MISPLACED_XML_PI; case XML_TOK_DATA_NEWLINE: if (parser->m_characterDataHandler) { XML_Char c = 0xA; parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; case XML_TOK_CDATA_SECT_OPEN: { enum XML_Error result; if (parser->m_startCdataSectionHandler) parser->m_startCdataSectionHandler(parser->m_handlerArg); /* BEGIN disabled code */ /* Suppose you doing a transformation on a document that involves changing only the character data. You set up a defaultHandler and a characterDataHandler. The defaultHandler simply copies characters through. The characterDataHandler does the transformation and writes the characters out escaping them as necessary. This case will fail to work if we leave out the following two lines (because & and < inside CDATA sections will be incorrectly escaped). However, now we have a start/endCdataSectionHandler, so it seems easier to let the user deal with this. */ else if (0 && parser->m_characterDataHandler) parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0); /* END disabled code */ else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account); if (result != XML_ERROR_NONE) return result; else if (! next) { parser->m_processor = cdataSectionProcessor; return result; } } break; case XML_TOK_TRAILING_RSQB: if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } if (parser->m_characterDataHandler) { if (MUST_CONVERT(enc, s)) { ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd); parser->m_characterDataHandler( parser->m_handlerArg, parser->m_dataBuf, (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); } else parser->m_characterDataHandler( parser->m_handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s)); } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, end); /* We are at the end of the final buffer, should we check for XML_SUSPENDED, XML_FINISHED? */ if (startTagLevel == 0) { *eventPP = end; return XML_ERROR_NO_ELEMENTS; } if (parser->m_tagLevel != startTagLevel) { *eventPP = end; return XML_ERROR_ASYNC_ENTITY; } *nextPtr = end; return XML_ERROR_NONE; case XML_TOK_DATA_CHARS: { XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler; if (charDataHandler) { if (MUST_CONVERT(enc, s)) { for (;;) { ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; const enum XML_Convert_Result convert_res = XmlConvert( enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd); *eventEndPP = s; charDataHandler(parser->m_handlerArg, parser->m_dataBuf, (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; *eventPP = s; } } else charDataHandler(parser->m_handlerArg, (XML_Char *)s, (int)((XML_Char *)next - (XML_Char *)s)); } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); } break; case XML_TOK_PI: if (! reportProcessingInstruction(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_COMMENT: if (! reportComment(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; break; default: /* All of the tokens produced by XmlContentTok() have their own * explicit cases, so this default is not strictly necessary. * However it is a useful safety net, so we retain the code and * simply exclude it from the coverage tests. * * LCOV_EXCL_START */ if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; /* LCOV_EXCL_STOP */ } *eventPP = s = next; switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: *nextPtr = next; return XML_ERROR_NONE; case XML_FINISHED: return XML_ERROR_ABORTED; default:; } } /* not reached */ } /* This function does not call free() on the allocated memory, merely * moving it to the parser's m_freeBindingList where it can be freed or * reused as appropriate. */ static void freeBindings(XML_Parser parser, BINDING *bindings) { while (bindings) { BINDING *b = bindings; /* m_startNamespaceDeclHandler will have been called for this * binding in addBindings(), so call the end handler now. */ if (parser->m_endNamespaceDeclHandler) parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name); bindings = bindings->nextTagBinding; b->nextTagBinding = parser->m_freeBindingList; parser->m_freeBindingList = b; b->prefix->binding = b->prevPrefixBinding; } } /* Precondition: all arguments must be non-NULL; Purpose: - normalize attributes - check attributes for well-formedness - generate namespace aware attribute names (URI, prefix) - build list of attributes for startElementHandler - default attributes - process namespace declarations (check and report them) - generate namespace aware element name (URI, prefix) */ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, TAG_NAME *tagNamePtr, BINDING **bindingsPtr, enum XML_Account account) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ ELEMENT_TYPE *elementType; int nDefaultAtts; const XML_Char **appAtts; /* the attribute list for the application */ int attIndex = 0; int prefixLen; int i; int n; XML_Char *uri; int nPrefixes = 0; BINDING *binding; const XML_Char *localPart; /* lookup the element type name */ elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0); if (! elementType) { const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str); if (! name) return XML_ERROR_NO_MEMORY; elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE)); if (! elementType) return XML_ERROR_NO_MEMORY; if (parser->m_ns && ! setElementTypePrefix(parser, elementType)) return XML_ERROR_NO_MEMORY; } nDefaultAtts = elementType->nDefaultAtts; /* get the attributes from the tokenizer */ n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts); /* Detect and prevent integer overflow */ if (n > INT_MAX - nDefaultAtts) { return XML_ERROR_NO_MEMORY; } if (n + nDefaultAtts > parser->m_attsSize) { int oldAttsSize = parser->m_attsSize; ATTRIBUTE *temp; #ifdef XML_ATTR_INFO XML_AttrInfo *temp2; #endif /* Detect and prevent integer overflow */ if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE) || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) { return XML_ERROR_NO_MEMORY; } parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; /* Detect and prevent integer overflow. * The preprocessor guard addresses the "always false" warning * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) { parser->m_attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; } #endif temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts, parser->m_attsSize * sizeof(ATTRIBUTE)); if (temp == NULL) { parser->m_attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; } parser->m_atts = temp; #ifdef XML_ATTR_INFO /* Detect and prevent integer overflow. * The preprocessor guard addresses the "always false" warning * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ # if UINT_MAX >= SIZE_MAX if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) { parser->m_attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; } # endif temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo, parser->m_attsSize * sizeof(XML_AttrInfo)); if (temp2 == NULL) { parser->m_attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; } parser->m_attInfo = temp2; #endif if (n > oldAttsSize) XmlGetAttributes(enc, attStr, n, parser->m_atts); } appAtts = (const XML_Char **)parser->m_atts; for (i = 0; i < n; i++) { ATTRIBUTE *currAtt = &parser->m_atts[i]; #ifdef XML_ATTR_INFO XML_AttrInfo *currAttInfo = &parser->m_attInfo[i]; #endif /* add the name and value to the attribute list */ ATTRIBUTE_ID *attId = getAttributeId(parser, enc, currAtt->name, currAtt->name + XmlNameLength(enc, currAtt->name)); if (! attId) return XML_ERROR_NO_MEMORY; #ifdef XML_ATTR_INFO currAttInfo->nameStart = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name); currAttInfo->nameEnd = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name); currAttInfo->valueStart = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->valuePtr); currAttInfo->valueEnd = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->valueEnd); #endif /* Detect duplicate attributes by their QNames. This does not work when namespace processing is turned on and different prefixes for the same namespace are used. For this case we have a check further down. */ if ((attId->name)[-1]) { if (enc == parser->m_encoding) parser->m_eventPtr = parser->m_atts[i].name; return XML_ERROR_DUPLICATE_ATTRIBUTE; } (attId->name)[-1] = 1; appAtts[attIndex++] = attId->name; if (! parser->m_atts[i].normalized) { enum XML_Error result; XML_Bool isCdata = XML_TRUE; /* figure out whether declared as other than CDATA */ if (attId->maybeTokenized) { int j; for (j = 0; j < nDefaultAtts; j++) { if (attId == elementType->defaultAtts[j].id) { isCdata = elementType->defaultAtts[j].isCdata; break; } } } /* normalize the attribute value */ result = storeAttributeValue( parser, enc, isCdata, parser->m_atts[i].valuePtr, parser->m_atts[i].valueEnd, &parser->m_tempPool, account); if (result) return result; appAtts[attIndex] = poolStart(&parser->m_tempPool); poolFinish(&parser->m_tempPool); } else { /* the value did not need normalizing */ appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc, parser->m_atts[i].valuePtr, parser->m_atts[i].valueEnd); if (appAtts[attIndex] == 0) return XML_ERROR_NO_MEMORY; poolFinish(&parser->m_tempPool); } /* handle prefixed attribute names */ if (attId->prefix) { if (attId->xmlns) { /* deal with namespace declarations here */ enum XML_Error result = addBinding(parser, attId->prefix, attId, appAtts[attIndex], bindingsPtr); if (result) return result; --attIndex; } else { /* deal with other prefixed names later */ attIndex++; nPrefixes++; (attId->name)[-1] = 2; } } else attIndex++; } /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */ parser->m_nSpecifiedAtts = attIndex; if (elementType->idAtt && (elementType->idAtt->name)[-1]) { for (i = 0; i < attIndex; i += 2) if (appAtts[i] == elementType->idAtt->name) { parser->m_idAttIndex = i; break; } } else parser->m_idAttIndex = -1; /* do attribute defaulting */ for (i = 0; i < nDefaultAtts; i++) { const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i; if (! (da->id->name)[-1] && da->value) { if (da->id->prefix) { if (da->id->xmlns) { enum XML_Error result = addBinding(parser, da->id->prefix, da->id, da->value, bindingsPtr); if (result) return result; } else { (da->id->name)[-1] = 2; nPrefixes++; appAtts[attIndex++] = da->id->name; appAtts[attIndex++] = da->value; } } else { (da->id->name)[-1] = 1; appAtts[attIndex++] = da->id->name; appAtts[attIndex++] = da->value; } } } appAtts[attIndex] = 0; /* expand prefixed attribute names, check for duplicates, and clear flags that say whether attributes were specified */ i = 0; if (nPrefixes) { int j; /* hash table index */ unsigned long version = parser->m_nsAttsVersion; /* Detect and prevent invalid shift */ if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) { return XML_ERROR_NO_MEMORY; } unsigned int nsAttsSize = 1u << parser->m_nsAttsPower; unsigned char oldNsAttsPower = parser->m_nsAttsPower; /* size of hash table must be at least 2 * (# of prefixed attributes) */ if ((nPrefixes << 1) >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */ NS_ATT *temp; /* hash table size must also be a power of 2 and >= 8 */ while (nPrefixes >> parser->m_nsAttsPower++) ; if (parser->m_nsAttsPower < 3) parser->m_nsAttsPower = 3; /* Detect and prevent invalid shift */ if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) { /* Restore actual size of memory in m_nsAtts */ parser->m_nsAttsPower = oldNsAttsPower; return XML_ERROR_NO_MEMORY; } nsAttsSize = 1u << parser->m_nsAttsPower; /* Detect and prevent integer overflow. * The preprocessor guard addresses the "always false" warning * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) { /* Restore actual size of memory in m_nsAtts */ parser->m_nsAttsPower = oldNsAttsPower; return XML_ERROR_NO_MEMORY; } #endif temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts, nsAttsSize * sizeof(NS_ATT)); if (! temp) { /* Restore actual size of memory in m_nsAtts */ parser->m_nsAttsPower = oldNsAttsPower; return XML_ERROR_NO_MEMORY; } parser->m_nsAtts = temp; version = 0; /* force re-initialization of m_nsAtts hash table */ } /* using a version flag saves us from initializing m_nsAtts every time */ if (! version) { /* initialize version flags when version wraps around */ version = INIT_ATTS_VERSION; for (j = nsAttsSize; j != 0;) parser->m_nsAtts[--j].version = version; } parser->m_nsAttsVersion = --version; /* expand prefixed names and check for duplicates */ for (; i < attIndex; i += 2) { const XML_Char *s = appAtts[i]; if (s[-1] == 2) { /* prefixed */ ATTRIBUTE_ID *id; const BINDING *b; unsigned long uriHash; struct siphash sip_state; struct sipkey sip_key; copy_salt_to_sipkey(parser, &sip_key); sip24_init(&sip_state, &sip_key); ((XML_Char *)s)[-1] = 0; /* clear flag */ id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0); if (! id || ! id->prefix) { /* This code is walking through the appAtts array, dealing * with (in this case) a prefixed attribute name. To be in * the array, the attribute must have already been bound, so * has to have passed through the hash table lookup once * already. That implies that an entry for it already * exists, so the lookup above will return a pointer to * already allocated memory. There is no opportunaity for * the allocator to fail, so the condition above cannot be * fulfilled. * * Since it is difficult to be certain that the above * analysis is complete, we retain the test and merely * remove the code from coverage tests. */ return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ } b = id->prefix->binding; if (! b) return XML_ERROR_UNBOUND_PREFIX; for (j = 0; j < b->uriLen; j++) { const XML_Char c = b->uri[j]; if (! poolAppendChar(&parser->m_tempPool, c)) return XML_ERROR_NO_MEMORY; } sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char)); while (*s++ != XML_T(ASCII_COLON)) ; sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char)); do { /* copies null terminator */ if (! poolAppendChar(&parser->m_tempPool, *s)) return XML_ERROR_NO_MEMORY; } while (*s++); uriHash = (unsigned long)sip24_final(&sip_state); { /* Check hash table for duplicate of expanded name (uriName). Derived from code in lookup(parser, HASH_TABLE *table, ...). */ unsigned char step = 0; unsigned long mask = nsAttsSize - 1; j = uriHash & mask; /* index into hash table */ while (parser->m_nsAtts[j].version == version) { /* for speed we compare stored hash values first */ if (uriHash == parser->m_nsAtts[j].hash) { const XML_Char *s1 = poolStart(&parser->m_tempPool); const XML_Char *s2 = parser->m_nsAtts[j].uriName; /* s1 is null terminated, but not s2 */ for (; *s1 == *s2 && *s1 != 0; s1++, s2++) ; if (*s1 == 0) return XML_ERROR_DUPLICATE_ATTRIBUTE; } if (! step) step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower); j < step ? (j += nsAttsSize - step) : (j -= step); } } if (parser->m_ns_triplets) { /* append namespace separator and prefix */ parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator; s = b->prefix->name; do { if (! poolAppendChar(&parser->m_tempPool, *s)) return XML_ERROR_NO_MEMORY; } while (*s++); } /* store expanded name in attribute list */ s = poolStart(&parser->m_tempPool); poolFinish(&parser->m_tempPool); appAtts[i] = s; /* fill empty slot with new version, uriName and hash value */ parser->m_nsAtts[j].version = version; parser->m_nsAtts[j].hash = uriHash; parser->m_nsAtts[j].uriName = s; if (! --nPrefixes) { i += 2; break; } } else /* not prefixed */ ((XML_Char *)s)[-1] = 0; /* clear flag */ } } /* clear flags for the remaining attributes */ for (; i < attIndex; i += 2) ((XML_Char *)(appAtts[i]))[-1] = 0; for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding) binding->attId->name[-1] = 0; if (! parser->m_ns) return XML_ERROR_NONE; /* expand the element type name */ if (elementType->prefix) { binding = elementType->prefix->binding; if (! binding) return XML_ERROR_UNBOUND_PREFIX; localPart = tagNamePtr->str; while (*localPart++ != XML_T(ASCII_COLON)) ; } else if (dtd->defaultPrefix.binding) { binding = dtd->defaultPrefix.binding; localPart = tagNamePtr->str; } else return XML_ERROR_NONE; prefixLen = 0; if (parser->m_ns_triplets && binding->prefix->name) { for (; binding->prefix->name[prefixLen++];) ; /* prefixLen includes null terminator */ } tagNamePtr->localPart = localPart; tagNamePtr->uriLen = binding->uriLen; tagNamePtr->prefix = binding->prefix->name; tagNamePtr->prefixLen = prefixLen; for (i = 0; localPart[i++];) ; /* i includes null terminator */ /* Detect and prevent integer overflow */ if (binding->uriLen > INT_MAX - prefixLen || i > INT_MAX - (binding->uriLen + prefixLen)) { return XML_ERROR_NO_MEMORY; } n = i + binding->uriLen + prefixLen; if (n > binding->uriAlloc) { TAG *p; /* Detect and prevent integer overflow */ if (n > INT_MAX - EXPAND_SPARE) { return XML_ERROR_NO_MEMORY; } /* Detect and prevent integer overflow. * The preprocessor guard addresses the "always false" warning * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { return XML_ERROR_NO_MEMORY; } #endif uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char)); if (! uri) return XML_ERROR_NO_MEMORY; binding->uriAlloc = n + EXPAND_SPARE; memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char)); for (p = parser->m_tagStack; p; p = p->parent) if (p->name.str == binding->uri) p->name.str = uri; FREE(parser, binding->uri); binding->uri = uri; } /* if m_namespaceSeparator != '\0' then uri includes it already */ uri = binding->uri + binding->uriLen; memcpy(uri, localPart, i * sizeof(XML_Char)); /* we always have a namespace separator between localPart and prefix */ if (prefixLen) { uri += i - 1; *uri = parser->m_namespaceSeparator; /* replace null terminator */ memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char)); } tagNamePtr->str = binding->uri; return XML_ERROR_NONE; } +static XML_Bool +is_rfc3986_uri_char(XML_Char candidate) { + // For the RFC 3986 ANBF grammar see + // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A + + switch (candidate) { + // From rule "ALPHA" (uppercase half) + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + + // From rule "ALPHA" (lowercase half) + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + + // From rule "DIGIT" + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + + // From rule "pct-encoded" + case '%': + + // From rule "unreserved" + case '-': + case '.': + case '_': + case '~': + + // From rule "gen-delims" + case ':': + case '/': + case '?': + case '#': + case '[': + case ']': + case '@': + + // From rule "sub-delims" + case '!': + case '$': + case '&': + case '\'': + case '(': + case ')': + case '*': + case '+': + case ',': + case ';': + case '=': + return XML_TRUE; + + default: + return XML_FALSE; + } +} + /* addBinding() overwrites the value of prefix->binding without checking. Therefore one must keep track of the old value outside of addBinding(). */ static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr) { + // "http://www.w3.org/XML/1998/namespace" static const XML_Char xmlNamespace[] = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'}; static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1; + // "http://www.w3.org/2000/xmlns/" static const XML_Char xmlnsNamespace[] = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x, ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'}; static const int xmlnsLen = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1; XML_Bool mustBeXML = XML_FALSE; XML_Bool isXML = XML_TRUE; XML_Bool isXMLNS = XML_TRUE; BINDING *b; int len; /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */ if (*uri == XML_T('\0') && prefix->name) return XML_ERROR_UNDECLARING_PREFIX; if (prefix->name && prefix->name[0] == XML_T(ASCII_x) && prefix->name[1] == XML_T(ASCII_m) && prefix->name[2] == XML_T(ASCII_l)) { /* Not allowed to bind xmlns */ if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s) && prefix->name[5] == XML_T('\0')) return XML_ERROR_RESERVED_PREFIX_XMLNS; if (prefix->name[3] == XML_T('\0')) mustBeXML = XML_TRUE; } for (len = 0; uri[len]; len++) { if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len])) isXML = XML_FALSE; if (! mustBeXML && isXMLNS && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) isXMLNS = XML_FALSE; - // NOTE: While Expat does not validate namespace URIs against RFC 3986, - // we have to at least make sure that the XML processor on top of - // Expat (that is splitting tag names by namespace separator into - // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused - // by an attacker putting additional namespace separator characters - // into namespace declarations. That would be ambiguous and not to - // be expected. - if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) { + // NOTE: While Expat does not validate namespace URIs against RFC 3986 + // today (and is not REQUIRED to do so with regard to the XML 1.0 + // namespaces specification) we have to at least make sure, that + // the application on top of Expat (that is likely splitting expanded + // element names ("qualified names") of form + // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces + // in its element handler code) cannot be confused by an attacker + // putting additional namespace separator characters into namespace + // declarations. That would be ambiguous and not to be expected. + // + // While the HTML API docs of function XML_ParserCreateNS have been + // advising against use of a namespace separator character that can + // appear in a URI for >20 years now, some widespread applications + // are using URI characters (':' (colon) in particular) for a + // namespace separator, in practice. To keep these applications + // functional, we only reject namespaces URIs containing the + // application-chosen namespace separator if the chosen separator + // is a non-URI character with regard to RFC 3986. + if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator) + && ! is_rfc3986_uri_char(uri[len])) { return XML_ERROR_SYNTAX; } } isXML = isXML && len == xmlLen; isXMLNS = isXMLNS && len == xmlnsLen; if (mustBeXML != isXML) return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML : XML_ERROR_RESERVED_NAMESPACE_URI; if (isXMLNS) return XML_ERROR_RESERVED_NAMESPACE_URI; if (parser->m_namespaceSeparator) len++; if (parser->m_freeBindingList) { b = parser->m_freeBindingList; if (len > b->uriAlloc) { /* Detect and prevent integer overflow */ if (len > INT_MAX - EXPAND_SPARE) { return XML_ERROR_NO_MEMORY; } /* Detect and prevent integer overflow. * The preprocessor guard addresses the "always false" warning * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { return XML_ERROR_NO_MEMORY; } #endif XML_Char *temp = (XML_Char *)REALLOC( parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE)); if (temp == NULL) return XML_ERROR_NO_MEMORY; b->uri = temp; b->uriAlloc = len + EXPAND_SPARE; } parser->m_freeBindingList = b->nextTagBinding; } else { b = (BINDING *)MALLOC(parser, sizeof(BINDING)); if (! b) return XML_ERROR_NO_MEMORY; /* Detect and prevent integer overflow */ if (len > INT_MAX - EXPAND_SPARE) { return XML_ERROR_NO_MEMORY; } /* Detect and prevent integer overflow. * The preprocessor guard addresses the "always false" warning * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { return XML_ERROR_NO_MEMORY; } #endif b->uri = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE)); if (! b->uri) { FREE(parser, b); return XML_ERROR_NO_MEMORY; } b->uriAlloc = len + EXPAND_SPARE; } b->uriLen = len; memcpy(b->uri, uri, len * sizeof(XML_Char)); if (parser->m_namespaceSeparator) b->uri[len - 1] = parser->m_namespaceSeparator; b->prefix = prefix; b->attId = attId; b->prevPrefixBinding = prefix->binding; /* NULL binding when default namespace undeclared */ if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix) prefix->binding = NULL; else prefix->binding = b; b->nextTagBinding = *bindingsPtr; *bindingsPtr = b; /* if attId == NULL then we are not starting a namespace scope */ if (attId && parser->m_startNamespaceDeclHandler) parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name, prefix->binding ? uri : 0); return XML_ERROR_NONE; } /* The idea here is to avoid using stack for each CDATA section when the whole file is parsed with one call. */ static enum XML_Error PTRCALL cdataSectionProcessor(XML_Parser parser, const char *start, const char *end, const char **endPtr) { enum XML_Error result = doCdataSection( parser, parser->m_encoding, &start, end, endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); if (result != XML_ERROR_NONE) return result; if (start) { if (parser->m_parentParser) { /* we are parsing an external entity */ parser->m_processor = externalEntityContentProcessor; return externalEntityContentProcessor(parser, start, end, endPtr); } else { parser->m_processor = contentProcessor; return contentProcessor(parser, start, end, endPtr); } } return result; } /* startPtr gets set to non-null if the section is closed, and to null if the section is not yet closed. */ static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, const char *end, const char **nextPtr, XML_Bool haveMore, enum XML_Account account) { const char *s = *startPtr; const char **eventPP; const char **eventEndPP; if (enc == parser->m_encoding) { eventPP = &parser->m_eventPtr; *eventPP = s; eventEndPP = &parser->m_eventEndPtr; } else { eventPP = &(parser->m_openInternalEntities->internalEventPtr); eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); } *eventPP = s; *startPtr = NULL; for (;;) { const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ int tok = XmlCdataSectionTok(enc, s, end, &next); #ifdef XML_DTD if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { accountingOnAbort(parser); return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; } #else UNUSED_P(account); #endif *eventEndPP = next; switch (tok) { case XML_TOK_CDATA_SECT_CLOSE: if (parser->m_endCdataSectionHandler) parser->m_endCdataSectionHandler(parser->m_handlerArg); /* BEGIN disabled code */ /* see comment under XML_TOK_CDATA_SECT_OPEN */ else if (0 && parser->m_characterDataHandler) parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf, 0); /* END disabled code */ else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); *startPtr = next; *nextPtr = next; if (parser->m_parsingStatus.parsing == XML_FINISHED) return XML_ERROR_ABORTED; else return XML_ERROR_NONE; case XML_TOK_DATA_NEWLINE: if (parser->m_characterDataHandler) { XML_Char c = 0xA; parser->m_characterDataHandler(parser->m_handlerArg, &c, 1); } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); break; case XML_TOK_DATA_CHARS: { XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler; if (charDataHandler) { if (MUST_CONVERT(enc, s)) { for (;;) { ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; const enum XML_Convert_Result convert_res = XmlConvert( enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd); *eventEndPP = next; charDataHandler(parser->m_handlerArg, parser->m_dataBuf, (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; *eventPP = s; } } else charDataHandler(parser->m_handlerArg, (XML_Char *)s, (int)((XML_Char *)next - (XML_Char *)s)); } else if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); } break; case XML_TOK_INVALID: *eventPP = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL_CHAR: if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; case XML_TOK_PARTIAL: case XML_TOK_NONE: if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_CDATA_SECTION; default: /* Every token returned by XmlCdataSectionTok() has its own * explicit case, so this default case will never be executed. * We retain it as a safety net and exclude it from the coverage * statistics. * * LCOV_EXCL_START */ *eventPP = next; return XML_ERROR_UNEXPECTED_STATE; /* LCOV_EXCL_STOP */ } *eventPP = s = next; switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: *nextPtr = next; return XML_ERROR_NONE; case XML_FINISHED: return XML_ERROR_ABORTED; default:; } } /* not reached */ } #ifdef XML_DTD /* The idea here is to avoid using stack for each IGNORE section when the whole file is parsed with one call. */ static enum XML_Error PTRCALL ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end, const char **endPtr) { enum XML_Error result = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer); if (result != XML_ERROR_NONE) return result; if (start) { parser->m_processor = prologProcessor; return prologProcessor(parser, start, end, endPtr); } return result; } /* startPtr gets set to non-null is the section is closed, and to null if the section is not yet closed. */ static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, const char *end, const char **nextPtr, XML_Bool haveMore) { const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ int tok; const char *s = *startPtr; const char **eventPP; const char **eventEndPP; if (enc == parser->m_encoding) { eventPP = &parser->m_eventPtr; *eventPP = s; eventEndPP = &parser->m_eventEndPtr; } else { /* It's not entirely clear, but it seems the following two lines * of code cannot be executed. The only occasions on which 'enc' * is not 'encoding' are when this function is called * from the internal entity processing, and IGNORE sections are an * error in internal entities. * * Since it really isn't clear that this is true, we keep the code * and just remove it from our coverage tests. * * LCOV_EXCL_START */ eventPP = &(parser->m_openInternalEntities->internalEventPtr); eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); /* LCOV_EXCL_STOP */ } *eventPP = s; *startPtr = NULL; tok = XmlIgnoreSectionTok(enc, s, end, &next); # ifdef XML_DTD if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, XML_ACCOUNT_DIRECT)) { accountingOnAbort(parser); return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; } # endif *eventEndPP = next; switch (tok) { case XML_TOK_IGNORE_SECT: if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); *startPtr = next; *nextPtr = next; if (parser->m_parsingStatus.parsing == XML_FINISHED) return XML_ERROR_ABORTED; else return XML_ERROR_NONE; case XML_TOK_INVALID: *eventPP = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL_CHAR: if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; case XML_TOK_PARTIAL: case XML_TOK_NONE: if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */ default: /* All of the tokens that XmlIgnoreSectionTok() returns have * explicit cases to handle them, so this default case is never * executed. We keep it as a safety net anyway, and remove it * from our test coverage statistics. * * LCOV_EXCL_START */ *eventPP = next; return XML_ERROR_UNEXPECTED_STATE; /* LCOV_EXCL_STOP */ } /* not reached */ } #endif /* XML_DTD */ static enum XML_Error initializeEncoding(XML_Parser parser) { const char *s; #ifdef XML_UNICODE char encodingBuf[128]; /* See comments about `protocolEncodingName` in parserInit() */ if (! parser->m_protocolEncodingName) s = NULL; else { int i; for (i = 0; parser->m_protocolEncodingName[i]; i++) { if (i == sizeof(encodingBuf) - 1 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) { encodingBuf[0] = '\0'; break; } encodingBuf[i] = (char)parser->m_protocolEncodingName[i]; } encodingBuf[i] = '\0'; s = encodingBuf; } #else s = parser->m_protocolEncodingName; #endif if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)( &parser->m_initEncoding, &parser->m_encoding, s)) return XML_ERROR_NONE; return handleUnknownEncoding(parser, parser->m_protocolEncodingName); } static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s, const char *next) { const char *encodingName = NULL; const XML_Char *storedEncName = NULL; const ENCODING *newEncoding = NULL; const char *version = NULL; const char *versionend; const XML_Char *storedversion = NULL; int standalone = -1; #ifdef XML_DTD if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__, XML_ACCOUNT_DIRECT)) { accountingOnAbort(parser); return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; } #endif if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)( isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr, &version, &versionend, &encodingName, &newEncoding, &standalone)) { if (isGeneralTextEntity) return XML_ERROR_TEXT_DECL; else return XML_ERROR_XML_DECL; } if (! isGeneralTextEntity && standalone == 1) { parser->m_dtd->standalone = XML_TRUE; #ifdef XML_DTD if (parser->m_paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; #endif /* XML_DTD */ } if (parser->m_xmlDeclHandler) { if (encodingName != NULL) { storedEncName = poolStoreString( &parser->m_temp2Pool, parser->m_encoding, encodingName, encodingName + XmlNameLength(parser->m_encoding, encodingName)); if (! storedEncName) return XML_ERROR_NO_MEMORY; poolFinish(&parser->m_temp2Pool); } if (version) { storedversion = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version, versionend - parser->m_encoding->minBytesPerChar); if (! storedversion) return XML_ERROR_NO_MEMORY; } parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName, standalone); } else if (parser->m_defaultHandler) reportDefault(parser, parser->m_encoding, s, next); if (parser->m_protocolEncodingName == NULL) { if (newEncoding) { /* Check that the specified encoding does not conflict with what * the parser has already deduced. Do we have the same number * of bytes in the smallest representation of a character? If * this is UTF-16, is it the same endianness? */ if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar || (newEncoding->minBytesPerChar == 2 && newEncoding != parser->m_encoding)) { parser->m_eventPtr = encodingName; return XML_ERROR_INCORRECT_ENCODING; } parser->m_encoding = newEncoding; } else if (encodingName) { enum XML_Error result; if (! storedEncName) { storedEncName = poolStoreString( &parser->m_temp2Pool, parser->m_encoding, encodingName, encodingName + XmlNameLength(parser->m_encoding, encodingName)); if (! storedEncName) return XML_ERROR_NO_MEMORY; } result = handleUnknownEncoding(parser, storedEncName); poolClear(&parser->m_temp2Pool); if (result == XML_ERROR_UNKNOWN_ENCODING) parser->m_eventPtr = encodingName; return result; } } if (storedEncName || storedversion) poolClear(&parser->m_temp2Pool); return XML_ERROR_NONE; } static enum XML_Error handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) { if (parser->m_unknownEncodingHandler) { XML_Encoding info; int i; for (i = 0; i < 256; i++) info.map[i] = -1; info.convert = NULL; info.data = NULL; info.release = NULL; if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData, encodingName, &info)) { ENCODING *enc; parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding()); if (! parser->m_unknownEncodingMem) { if (info.release) info.release(info.data); return XML_ERROR_NO_MEMORY; } enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)( parser->m_unknownEncodingMem, info.map, info.convert, info.data); if (enc) { parser->m_unknownEncodingData = info.data; parser->m_unknownEncodingRelease = info.release; parser->m_encoding = enc; return XML_ERROR_NONE; } } if (info.release != NULL) info.release(info.data); } return XML_ERROR_UNKNOWN_ENCODING; } static enum XML_Error PTRCALL prologInitProcessor(XML_Parser parser, const char *s, const char *end, const char **nextPtr) { enum XML_Error result = initializeEncoding(parser); if (result != XML_ERROR_NONE) return result; parser->m_processor = prologProcessor; return prologProcessor(parser, s, end, nextPtr); } #ifdef XML_DTD static enum XML_Error PTRCALL externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end, const char **nextPtr) { enum XML_Error result = initializeEncoding(parser); if (result != XML_ERROR_NONE) return result; /* we know now that XML_Parse(Buffer) has been called, so we consider the external parameter entity read */ parser->m_dtd->paramEntityRead = XML_TRUE; if (parser->m_prologState.inEntityValue) { parser->m_processor = entityValueInitProcessor; return entityValueInitProcessor(parser, s, end, nextPtr); } else { parser->m_processor = externalParEntProcessor; return externalParEntProcessor(parser, s, end, nextPtr); } } static enum XML_Error PTRCALL entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, const char **nextPtr) { int tok; const char *start = s; const char *next = start; parser->m_eventPtr = start; for (;;) { tok = XmlPrologTok(parser->m_encoding, start, end, &next); /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in: - storeEntityValue - processXmlDecl */ parser->m_eventEndPtr = next; if (tok <= 0) { if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { *nextPtr = s; return XML_ERROR_NONE; } switch (tok) { case XML_TOK_INVALID: return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: return XML_ERROR_PARTIAL_CHAR; case XML_TOK_NONE: /* start == end */ default: break; } /* found end of entity value - can store it now */ return storeEntityValue(parser, parser->m_encoding, s, end, XML_ACCOUNT_DIRECT); } else if (tok == XML_TOK_XML_DECL) { enum XML_Error result; result = processXmlDecl(parser, 0, start, next); if (result != XML_ERROR_NONE) return result; /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For * that to happen, a parameter entity parsing handler must have attempted * to suspend the parser, which fails and raises an error. The parser can * be aborted, but can't be suspended. */ if (parser->m_parsingStatus.parsing == XML_FINISHED) return XML_ERROR_ABORTED; *nextPtr = next; /* stop scanning for text declaration - we found one */ parser->m_processor = entityValueProcessor; return entityValueProcessor(parser, next, end, nextPtr); } /* If we are at the end of the buffer, this would cause XmlPrologTok to return XML_TOK_NONE on the next call, which would then cause the function to exit with *nextPtr set to s - that is what we want for other tokens, but not for the BOM - we would rather like to skip it; then, when this routine is entered the next time, XmlPrologTok will return XML_TOK_INVALID, since the BOM is still in the buffer */ else if (tok == XML_TOK_BOM && next == end && ! parser->m_parsingStatus.finalBuffer) { # ifdef XML_DTD if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, XML_ACCOUNT_DIRECT)) { accountingOnAbort(parser); return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; } # endif *nextPtr = next; return XML_ERROR_NONE; } /* If we get this token, we have the start of what might be a normal tag, but not a declaration (i.e. it doesn't begin with "m_eventPtr = start; } } static enum XML_Error PTRCALL externalParEntProcessor(XML_Parser parser, const char *s, const char *end, const char **nextPtr) { const char *next = s; int tok; tok = XmlPrologTok(parser->m_encoding, s, end, &next); if (tok <= 0) { if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { *nextPtr = s; return XML_ERROR_NONE; } switch (tok) { case XML_TOK_INVALID: return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: return XML_ERROR_PARTIAL_CHAR; case XML_TOK_NONE: /* start == end */ default: break; } } /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM. However, when parsing an external subset, doProlog will not accept a BOM as valid, and report a syntax error, so we have to skip the BOM, and account for the BOM bytes. */ else if (tok == XML_TOK_BOM) { if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, XML_ACCOUNT_DIRECT)) { accountingOnAbort(parser); return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; } s = next; tok = XmlPrologTok(parser->m_encoding, s, end, &next); } parser->m_processor = prologProcessor; return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, XML_ACCOUNT_DIRECT); } static enum XML_Error PTRCALL entityValueProcessor(XML_Parser parser, const char *s, const char *end, const char **nextPtr) { const char *start = s; const char *next = s; const ENCODING *enc = parser->m_encoding; int tok; for (;;) { tok = XmlPrologTok(enc, start, end, &next); /* Note: These bytes are accounted later in: - storeEntityValue */ if (tok <= 0) { if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { *nextPtr = s; return XML_ERROR_NONE; } switch (tok) { case XML_TOK_INVALID: return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: return XML_ERROR_PARTIAL_CHAR; case XML_TOK_NONE: /* start == end */ default: break; } /* found end of entity value - can store it now */ return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT); } start = next; } } #endif /* XML_DTD */ static enum XML_Error PTRCALL prologProcessor(XML_Parser parser, const char *s, const char *end, const char **nextPtr) { const char *next = s; int tok = XmlPrologTok(parser->m_encoding, s, end, &next); return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, XML_ACCOUNT_DIRECT); } static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, int tok, const char *next, const char **nextPtr, XML_Bool haveMore, XML_Bool allowClosingDoctype, enum XML_Account account) { #ifdef XML_DTD static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'}; #endif /* XML_DTD */ static const XML_Char atypeCDATA[] = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'}; static const XML_Char atypeIDREF[] = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'}; static const XML_Char atypeIDREFS[] = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'}; static const XML_Char atypeENTITY[] = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'}; static const XML_Char atypeENTITIES[] = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0'}; static const XML_Char atypeNMTOKEN[] = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'}; static const XML_Char atypeNMTOKENS[] = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0'}; static const XML_Char notationPrefix[] = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'}; static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'}; static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'}; #ifndef XML_DTD UNUSED_P(account); #endif /* save one level of indirection */ DTD *const dtd = parser->m_dtd; const char **eventPP; const char **eventEndPP; enum XML_Content_Quant quant; if (enc == parser->m_encoding) { eventPP = &parser->m_eventPtr; eventEndPP = &parser->m_eventEndPtr; } else { eventPP = &(parser->m_openInternalEntities->internalEventPtr); eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); } for (;;) { int role; XML_Bool handleDefault = XML_TRUE; *eventPP = s; *eventEndPP = next; if (tok <= 0) { if (haveMore && tok != XML_TOK_INVALID) { *nextPtr = s; return XML_ERROR_NONE; } switch (tok) { case XML_TOK_INVALID: *eventPP = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: return XML_ERROR_PARTIAL_CHAR; case -XML_TOK_PROLOG_S: tok = -tok; break; case XML_TOK_NONE: #ifdef XML_DTD /* for internal PE NOT referenced between declarations */ if (enc != parser->m_encoding && ! parser->m_openInternalEntities->betweenDecl) { *nextPtr = s; return XML_ERROR_NONE; } /* WFC: PE Between Declarations - must check that PE contains complete markup, not only for external PEs, but also for internal PEs if the reference occurs between declarations. */ if (parser->m_isParamEntity || enc != parser->m_encoding) { if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc) == XML_ROLE_ERROR) return XML_ERROR_INCOMPLETE_PE; *nextPtr = s; return XML_ERROR_NONE; } #endif /* XML_DTD */ return XML_ERROR_NO_ELEMENTS; default: tok = -tok; next = end; break; } } role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc); #ifdef XML_DTD switch (role) { case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl break; default: if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { accountingOnAbort(parser); return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; } } #endif switch (role) { case XML_ROLE_XML_DECL: { enum XML_Error result = processXmlDecl(parser, 0, s, next); if (result != XML_ERROR_NONE) return result; enc = parser->m_encoding; handleDefault = XML_FALSE; } break; case XML_ROLE_DOCTYPE_NAME: if (parser->m_startDoctypeDeclHandler) { parser->m_doctypeName = poolStoreString(&parser->m_tempPool, enc, s, next); if (! parser->m_doctypeName) return XML_ERROR_NO_MEMORY; poolFinish(&parser->m_tempPool); parser->m_doctypePubid = NULL; handleDefault = XML_FALSE; } parser->m_doctypeSysid = NULL; /* always initialize to NULL */ break; case XML_ROLE_DOCTYPE_INTERNAL_SUBSET: if (parser->m_startDoctypeDeclHandler) { parser->m_startDoctypeDeclHandler( parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, parser->m_doctypePubid, 1); parser->m_doctypeName = NULL; poolClear(&parser->m_tempPool); handleDefault = XML_FALSE; } break; #ifdef XML_DTD case XML_ROLE_TEXT_DECL: { enum XML_Error result = processXmlDecl(parser, 1, s, next); if (result != XML_ERROR_NONE) return result; enc = parser->m_encoding; handleDefault = XML_FALSE; } break; #endif /* XML_DTD */ case XML_ROLE_DOCTYPE_PUBLIC_ID: #ifdef XML_DTD parser->m_useForeignDTD = XML_FALSE; parser->m_declEntity = (ENTITY *)lookup( parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); if (! parser->m_declEntity) return XML_ERROR_NO_MEMORY; #endif /* XML_DTD */ dtd->hasParamEntityRefs = XML_TRUE; if (parser->m_startDoctypeDeclHandler) { XML_Char *pubId; if (! XmlIsPublicId(enc, s, next, eventPP)) return XML_ERROR_PUBLICID; pubId = poolStoreString(&parser->m_tempPool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (! pubId) return XML_ERROR_NO_MEMORY; normalizePublicId(pubId); poolFinish(&parser->m_tempPool); parser->m_doctypePubid = pubId; handleDefault = XML_FALSE; goto alreadyChecked; } /* fall through */ case XML_ROLE_ENTITY_PUBLIC_ID: if (! XmlIsPublicId(enc, s, next, eventPP)) return XML_ERROR_PUBLICID; alreadyChecked: if (dtd->keepProcessing && parser->m_declEntity) { XML_Char *tem = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (! tem) return XML_ERROR_NO_MEMORY; normalizePublicId(tem); parser->m_declEntity->publicId = tem; poolFinish(&dtd->pool); /* Don't suppress the default handler if we fell through from * the XML_ROLE_DOCTYPE_PUBLIC_ID case. */ if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID) handleDefault = XML_FALSE; } break; case XML_ROLE_DOCTYPE_CLOSE: if (allowClosingDoctype != XML_TRUE) { /* Must not close doctype from within expanded parameter entities */ return XML_ERROR_INVALID_TOKEN; } if (parser->m_doctypeName) { parser->m_startDoctypeDeclHandler( parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid, parser->m_doctypePubid, 0); poolClear(&parser->m_tempPool); handleDefault = XML_FALSE; } /* parser->m_doctypeSysid will be non-NULL in the case of a previous XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler was not set, indicating an external subset */ #ifdef XML_DTD if (parser->m_doctypeSysid || parser->m_useForeignDTD) { XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; dtd->hasParamEntityRefs = XML_TRUE; if (parser->m_paramEntityParsing && parser->m_externalEntityRefHandler) { ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); if (! entity) { /* The external subset name "#" will have already been * inserted into the hash table at the start of the * external entity parsing, so no allocation will happen * and lookup() cannot fail. */ return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ } if (parser->m_useForeignDTD) entity->base = parser->m_curBase; dtd->paramEntityRead = XML_FALSE; if (! parser->m_externalEntityRefHandler( parser->m_externalEntityRefHandlerArg, 0, entity->base, entity->systemId, entity->publicId)) return XML_ERROR_EXTERNAL_ENTITY_HANDLING; if (dtd->paramEntityRead) { if (! dtd->standalone && parser->m_notStandaloneHandler && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) return XML_ERROR_NOT_STANDALONE; } /* if we didn't read the foreign DTD then this means that there is no external subset and we must reset dtd->hasParamEntityRefs */ else if (! parser->m_doctypeSysid) dtd->hasParamEntityRefs = hadParamEntityRefs; /* end of DTD - no need to update dtd->keepProcessing */ } parser->m_useForeignDTD = XML_FALSE; } #endif /* XML_DTD */ if (parser->m_endDoctypeDeclHandler) { parser->m_endDoctypeDeclHandler(parser->m_handlerArg); handleDefault = XML_FALSE; } break; case XML_ROLE_INSTANCE_START: #ifdef XML_DTD /* if there is no DOCTYPE declaration then now is the last chance to read the foreign DTD */ if (parser->m_useForeignDTD) { XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; dtd->hasParamEntityRefs = XML_TRUE; if (parser->m_paramEntityParsing && parser->m_externalEntityRefHandler) { ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); if (! entity) return XML_ERROR_NO_MEMORY; entity->base = parser->m_curBase; dtd->paramEntityRead = XML_FALSE; if (! parser->m_externalEntityRefHandler( parser->m_externalEntityRefHandlerArg, 0, entity->base, entity->systemId, entity->publicId)) return XML_ERROR_EXTERNAL_ENTITY_HANDLING; if (dtd->paramEntityRead) { if (! dtd->standalone && parser->m_notStandaloneHandler && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) return XML_ERROR_NOT_STANDALONE; } /* if we didn't read the foreign DTD then this means that there is no external subset and we must reset dtd->hasParamEntityRefs */ else dtd->hasParamEntityRefs = hadParamEntityRefs; /* end of DTD - no need to update dtd->keepProcessing */ } } #endif /* XML_DTD */ parser->m_processor = contentProcessor; return contentProcessor(parser, s, end, nextPtr); case XML_ROLE_ATTLIST_ELEMENT_NAME: parser->m_declElementType = getElementType(parser, enc, s, next); if (! parser->m_declElementType) return XML_ERROR_NO_MEMORY; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_NAME: parser->m_declAttributeId = getAttributeId(parser, enc, s, next); if (! parser->m_declAttributeId) return XML_ERROR_NO_MEMORY; parser->m_declAttributeIsCdata = XML_FALSE; parser->m_declAttributeType = NULL; parser->m_declAttributeIsId = XML_FALSE; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_CDATA: parser->m_declAttributeIsCdata = XML_TRUE; parser->m_declAttributeType = atypeCDATA; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_ID: parser->m_declAttributeIsId = XML_TRUE; parser->m_declAttributeType = atypeID; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_IDREF: parser->m_declAttributeType = atypeIDREF; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_IDREFS: parser->m_declAttributeType = atypeIDREFS; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_ENTITY: parser->m_declAttributeType = atypeENTITY; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES: parser->m_declAttributeType = atypeENTITIES; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN: parser->m_declAttributeType = atypeNMTOKEN; goto checkAttListDeclHandler; case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS: parser->m_declAttributeType = atypeNMTOKENS; checkAttListDeclHandler: if (dtd->keepProcessing && parser->m_attlistDeclHandler) handleDefault = XML_FALSE; break; case XML_ROLE_ATTRIBUTE_ENUM_VALUE: case XML_ROLE_ATTRIBUTE_NOTATION_VALUE: if (dtd->keepProcessing && parser->m_attlistDeclHandler) { const XML_Char *prefix; if (parser->m_declAttributeType) { prefix = enumValueSep; } else { prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix : enumValueStart); } if (! poolAppendString(&parser->m_tempPool, prefix)) return XML_ERROR_NO_MEMORY; if (! poolAppend(&parser->m_tempPool, enc, s, next)) return XML_ERROR_NO_MEMORY; parser->m_declAttributeType = parser->m_tempPool.start; handleDefault = XML_FALSE; } break; case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE: case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE: if (dtd->keepProcessing) { if (! defineAttribute(parser->m_declElementType, parser->m_declAttributeId, parser->m_declAttributeIsCdata, parser->m_declAttributeIsId, 0, parser)) return XML_ERROR_NO_MEMORY; if (parser->m_attlistDeclHandler && parser->m_declAttributeType) { if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN) || (*parser->m_declAttributeType == XML_T(ASCII_N) && parser->m_declAttributeType[1] == XML_T(ASCII_O))) { /* Enumerated or Notation type */ if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN)) || ! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_ERROR_NO_MEMORY; parser->m_declAttributeType = parser->m_tempPool.start; poolFinish(&parser->m_tempPool); } *eventEndPP = s; parser->m_attlistDeclHandler( parser->m_handlerArg, parser->m_declElementType->name, parser->m_declAttributeId->name, parser->m_declAttributeType, 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE); poolClear(&parser->m_tempPool); handleDefault = XML_FALSE; } } break; case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: case XML_ROLE_FIXED_ATTRIBUTE_VALUE: if (dtd->keepProcessing) { const XML_Char *attVal; enum XML_Error result = storeAttributeValue( parser, enc, parser->m_declAttributeIsCdata, s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool, XML_ACCOUNT_NONE); if (result) return result; attVal = poolStart(&dtd->pool); poolFinish(&dtd->pool); /* ID attributes aren't allowed to have a default */ if (! defineAttribute( parser->m_declElementType, parser->m_declAttributeId, parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser)) return XML_ERROR_NO_MEMORY; if (parser->m_attlistDeclHandler && parser->m_declAttributeType) { if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN) || (*parser->m_declAttributeType == XML_T(ASCII_N) && parser->m_declAttributeType[1] == XML_T(ASCII_O))) { /* Enumerated or Notation type */ if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN)) || ! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_ERROR_NO_MEMORY; parser->m_declAttributeType = parser->m_tempPool.start; poolFinish(&parser->m_tempPool); } *eventEndPP = s; parser->m_attlistDeclHandler( parser->m_handlerArg, parser->m_declElementType->name, parser->m_declAttributeId->name, parser->m_declAttributeType, attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE); poolClear(&parser->m_tempPool); handleDefault = XML_FALSE; } } break; case XML_ROLE_ENTITY_VALUE: if (dtd->keepProcessing) { enum XML_Error result = storeEntityValue(parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar, XML_ACCOUNT_NONE); if (parser->m_declEntity) { parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool); parser->m_declEntity->textLen = (int)(poolLength(&dtd->entityValuePool)); poolFinish(&dtd->entityValuePool); if (parser->m_entityDeclHandler) { *eventEndPP = s; parser->m_entityDeclHandler( parser->m_handlerArg, parser->m_declEntity->name, parser->m_declEntity->is_param, parser->m_declEntity->textPtr, parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0); handleDefault = XML_FALSE; } } else poolDiscard(&dtd->entityValuePool); if (result != XML_ERROR_NONE) return result; } break; case XML_ROLE_DOCTYPE_SYSTEM_ID: #ifdef XML_DTD parser->m_useForeignDTD = XML_FALSE; #endif /* XML_DTD */ dtd->hasParamEntityRefs = XML_TRUE; if (parser->m_startDoctypeDeclHandler) { parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (parser->m_doctypeSysid == NULL) return XML_ERROR_NO_MEMORY; poolFinish(&parser->m_tempPool); handleDefault = XML_FALSE; } #ifdef XML_DTD else /* use externalSubsetName to make parser->m_doctypeSysid non-NULL for the case where no parser->m_startDoctypeDeclHandler is set */ parser->m_doctypeSysid = externalSubsetName; #endif /* XML_DTD */ if (! dtd->standalone #ifdef XML_DTD && ! parser->m_paramEntityParsing #endif /* XML_DTD */ && parser->m_notStandaloneHandler && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) return XML_ERROR_NOT_STANDALONE; #ifndef XML_DTD break; #else /* XML_DTD */ if (! parser->m_declEntity) { parser->m_declEntity = (ENTITY *)lookup( parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); if (! parser->m_declEntity) return XML_ERROR_NO_MEMORY; parser->m_declEntity->publicId = NULL; } #endif /* XML_DTD */ /* fall through */ case XML_ROLE_ENTITY_SYSTEM_ID: if (dtd->keepProcessing && parser->m_declEntity) { parser->m_declEntity->systemId = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (! parser->m_declEntity->systemId) return XML_ERROR_NO_MEMORY; parser->m_declEntity->base = parser->m_curBase; poolFinish(&dtd->pool); /* Don't suppress the default handler if we fell through from * the XML_ROLE_DOCTYPE_SYSTEM_ID case. */ if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID) handleDefault = XML_FALSE; } break; case XML_ROLE_ENTITY_COMPLETE: if (dtd->keepProcessing && parser->m_declEntity && parser->m_entityDeclHandler) { *eventEndPP = s; parser->m_entityDeclHandler( parser->m_handlerArg, parser->m_declEntity->name, parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base, parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0); handleDefault = XML_FALSE; } break; case XML_ROLE_ENTITY_NOTATION_NAME: if (dtd->keepProcessing && parser->m_declEntity) { parser->m_declEntity->notation = poolStoreString(&dtd->pool, enc, s, next); if (! parser->m_declEntity->notation) return XML_ERROR_NO_MEMORY; poolFinish(&dtd->pool); if (parser->m_unparsedEntityDeclHandler) { *eventEndPP = s; parser->m_unparsedEntityDeclHandler( parser->m_handlerArg, parser->m_declEntity->name, parser->m_declEntity->base, parser->m_declEntity->systemId, parser->m_declEntity->publicId, parser->m_declEntity->notation); handleDefault = XML_FALSE; } else if (parser->m_entityDeclHandler) { *eventEndPP = s; parser->m_entityDeclHandler( parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0, parser->m_declEntity->base, parser->m_declEntity->systemId, parser->m_declEntity->publicId, parser->m_declEntity->notation); handleDefault = XML_FALSE; } } break; case XML_ROLE_GENERAL_ENTITY_NAME: { if (XmlPredefinedEntityName(enc, s, next)) { parser->m_declEntity = NULL; break; } if (dtd->keepProcessing) { const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); if (! name) return XML_ERROR_NO_MEMORY; parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, sizeof(ENTITY)); if (! parser->m_declEntity) return XML_ERROR_NO_MEMORY; if (parser->m_declEntity->name != name) { poolDiscard(&dtd->pool); parser->m_declEntity = NULL; } else { poolFinish(&dtd->pool); parser->m_declEntity->publicId = NULL; parser->m_declEntity->is_param = XML_FALSE; /* if we have a parent parser or are reading an internal parameter entity, then the entity declaration is not considered "internal" */ parser->m_declEntity->is_internal = ! (parser->m_parentParser || parser->m_openInternalEntities); if (parser->m_entityDeclHandler) handleDefault = XML_FALSE; } } else { poolDiscard(&dtd->pool); parser->m_declEntity = NULL; } } break; case XML_ROLE_PARAM_ENTITY_NAME: #ifdef XML_DTD if (dtd->keepProcessing) { const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); if (! name) return XML_ERROR_NO_MEMORY; parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, sizeof(ENTITY)); if (! parser->m_declEntity) return XML_ERROR_NO_MEMORY; if (parser->m_declEntity->name != name) { poolDiscard(&dtd->pool); parser->m_declEntity = NULL; } else { poolFinish(&dtd->pool); parser->m_declEntity->publicId = NULL; parser->m_declEntity->is_param = XML_TRUE; /* if we have a parent parser or are reading an internal parameter entity, then the entity declaration is not considered "internal" */ parser->m_declEntity->is_internal = ! (parser->m_parentParser || parser->m_openInternalEntities); if (parser->m_entityDeclHandler) handleDefault = XML_FALSE; } } else { poolDiscard(&dtd->pool); parser->m_declEntity = NULL; } #else /* not XML_DTD */ parser->m_declEntity = NULL; #endif /* XML_DTD */ break; case XML_ROLE_NOTATION_NAME: parser->m_declNotationPublicId = NULL; parser->m_declNotationName = NULL; if (parser->m_notationDeclHandler) { parser->m_declNotationName = poolStoreString(&parser->m_tempPool, enc, s, next); if (! parser->m_declNotationName) return XML_ERROR_NO_MEMORY; poolFinish(&parser->m_tempPool); handleDefault = XML_FALSE; } break; case XML_ROLE_NOTATION_PUBLIC_ID: if (! XmlIsPublicId(enc, s, next, eventPP)) return XML_ERROR_PUBLICID; if (parser ->m_declNotationName) { /* means m_notationDeclHandler != NULL */ XML_Char *tem = poolStoreString(&parser->m_tempPool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (! tem) return XML_ERROR_NO_MEMORY; normalizePublicId(tem); parser->m_declNotationPublicId = tem; poolFinish(&parser->m_tempPool); handleDefault = XML_FALSE; } break; case XML_ROLE_NOTATION_SYSTEM_ID: if (parser->m_declNotationName && parser->m_notationDeclHandler) { const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (! systemId) return XML_ERROR_NO_MEMORY; *eventEndPP = s; parser->m_notationDeclHandler( parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, systemId, parser->m_declNotationPublicId); handleDefault = XML_FALSE; } poolClear(&parser->m_tempPool); break; case XML_ROLE_NOTATION_NO_SYSTEM_ID: if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) { *eventEndPP = s; parser->m_notationDeclHandler( parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase, 0, parser->m_declNotationPublicId); handleDefault = XML_FALSE; } poolClear(&parser->m_tempPool); break; case XML_ROLE_ERROR: switch (tok) { case XML_TOK_PARAM_ENTITY_REF: /* PE references in internal subset are not allowed within declarations. */ return XML_ERROR_PARAM_ENTITY_REF; case XML_TOK_XML_DECL: return XML_ERROR_MISPLACED_XML_PI; default: return XML_ERROR_SYNTAX; } #ifdef XML_DTD case XML_ROLE_IGNORE_SECT: { enum XML_Error result; if (parser->m_defaultHandler) reportDefault(parser, enc, s, next); handleDefault = XML_FALSE; result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore); if (result != XML_ERROR_NONE) return result; else if (! next) { parser->m_processor = ignoreSectionProcessor; return result; } } break; #endif /* XML_DTD */ case XML_ROLE_GROUP_OPEN: if (parser->m_prologState.level >= parser->m_groupSize) { if (parser->m_groupSize) { { /* Detect and prevent integer overflow */ if (parser->m_groupSize > (unsigned int)(-1) / 2u) { return XML_ERROR_NO_MEMORY; } char *const new_connector = (char *)REALLOC( parser, parser->m_groupConnector, parser->m_groupSize *= 2); if (new_connector == NULL) { parser->m_groupSize /= 2; return XML_ERROR_NO_MEMORY; } parser->m_groupConnector = new_connector; } if (dtd->scaffIndex) { /* Detect and prevent integer overflow. * The preprocessor guard addresses the "always false" warning * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) { return XML_ERROR_NO_MEMORY; } #endif int *const new_scaff_index = (int *)REALLOC( parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int)); if (new_scaff_index == NULL) return XML_ERROR_NO_MEMORY; dtd->scaffIndex = new_scaff_index; } } else { parser->m_groupConnector = (char *)MALLOC(parser, parser->m_groupSize = 32); if (! parser->m_groupConnector) { parser->m_groupSize = 0; return XML_ERROR_NO_MEMORY; } } } parser->m_groupConnector[parser->m_prologState.level] = 0; if (dtd->in_eldecl) { int myindex = nextScaffoldPart(parser); if (myindex < 0) return XML_ERROR_NO_MEMORY; assert(dtd->scaffIndex != NULL); dtd->scaffIndex[dtd->scaffLevel] = myindex; dtd->scaffLevel++; dtd->scaffold[myindex].type = XML_CTYPE_SEQ; if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; } break; case XML_ROLE_GROUP_SEQUENCE: if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE) return XML_ERROR_SYNTAX; parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA; if (dtd->in_eldecl && parser->m_elementDeclHandler) handleDefault = XML_FALSE; break; case XML_ROLE_GROUP_CHOICE: if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA) return XML_ERROR_SYNTAX; if (dtd->in_eldecl && ! parser->m_groupConnector[parser->m_prologState.level] && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type != XML_CTYPE_MIXED)) { dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type = XML_CTYPE_CHOICE; if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; } parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE; break; case XML_ROLE_PARAM_ENTITY_REF: #ifdef XML_DTD case XML_ROLE_INNER_PARAM_ENTITY_REF: dtd->hasParamEntityRefs = XML_TRUE; if (! parser->m_paramEntityParsing) dtd->keepProcessing = dtd->standalone; else { const XML_Char *name; ENTITY *entity; name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (! name) return XML_ERROR_NO_MEMORY; entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); poolDiscard(&dtd->pool); /* first, determine if a check for an existing declaration is needed; if yes, check that the entity exists, and that it is internal, otherwise call the skipped entity handler */ if (parser->m_prologState.documentEntity && (dtd->standalone ? ! parser->m_openInternalEntities : ! dtd->hasParamEntityRefs)) { if (! entity) return XML_ERROR_UNDEFINED_ENTITY; else if (! entity->is_internal) { /* It's hard to exhaustively search the code to be sure, * but there doesn't seem to be a way of executing the * following line. There are two cases: * * If 'standalone' is false, the DTD must have no * parameter entities or we wouldn't have passed the outer * 'if' statement. That measn the only entity in the hash * table is the external subset name "#" which cannot be * given as a parameter entity name in XML syntax, so the * lookup must have returned NULL and we don't even reach * the test for an internal entity. * * If 'standalone' is true, it does not seem to be * possible to create entities taking this code path that * are not internal entities, so fail the test above. * * Because this analysis is very uncertain, the code is * being left in place and merely removed from the * coverage test statistics. */ return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */ } } else if (! entity) { dtd->keepProcessing = dtd->standalone; /* cannot report skipped entities in declarations */ if ((role == XML_ROLE_PARAM_ENTITY_REF) && parser->m_skippedEntityHandler) { parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1); handleDefault = XML_FALSE; } break; } if (entity->open) return XML_ERROR_RECURSIVE_ENTITY_REF; if (entity->textPtr) { enum XML_Error result; XML_Bool betweenDecl = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE); result = processInternalEntity(parser, entity, betweenDecl); if (result != XML_ERROR_NONE) return result; handleDefault = XML_FALSE; break; } if (parser->m_externalEntityRefHandler) { dtd->paramEntityRead = XML_FALSE; entity->open = XML_TRUE; entityTrackingOnOpen(parser, entity, __LINE__); if (! parser->m_externalEntityRefHandler( parser->m_externalEntityRefHandlerArg, 0, entity->base, entity->systemId, entity->publicId)) { entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; return XML_ERROR_EXTERNAL_ENTITY_HANDLING; } entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; handleDefault = XML_FALSE; if (! dtd->paramEntityRead) { dtd->keepProcessing = dtd->standalone; break; } } else { dtd->keepProcessing = dtd->standalone; break; } } #endif /* XML_DTD */ if (! dtd->standalone && parser->m_notStandaloneHandler && ! parser->m_notStandaloneHandler(parser->m_handlerArg)) return XML_ERROR_NOT_STANDALONE; break; /* Element declaration stuff */ case XML_ROLE_ELEMENT_NAME: if (parser->m_elementDeclHandler) { parser->m_declElementType = getElementType(parser, enc, s, next); if (! parser->m_declElementType) return XML_ERROR_NO_MEMORY; dtd->scaffLevel = 0; dtd->scaffCount = 0; dtd->in_eldecl = XML_TRUE; handleDefault = XML_FALSE; } break; case XML_ROLE_CONTENT_ANY: case XML_ROLE_CONTENT_EMPTY: if (dtd->in_eldecl) { if (parser->m_elementDeclHandler) { XML_Content *content = (XML_Content *)MALLOC(parser, sizeof(XML_Content)); if (! content) return XML_ERROR_NO_MEMORY; content->quant = XML_CQUANT_NONE; content->name = NULL; content->numchildren = 0; content->children = NULL; content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY : XML_CTYPE_EMPTY); *eventEndPP = s; parser->m_elementDeclHandler( parser->m_handlerArg, parser->m_declElementType->name, content); handleDefault = XML_FALSE; } dtd->in_eldecl = XML_FALSE; } break; case XML_ROLE_CONTENT_PCDATA: if (dtd->in_eldecl) { dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type = XML_CTYPE_MIXED; if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; } break; case XML_ROLE_CONTENT_ELEMENT: quant = XML_CQUANT_NONE; goto elementContent; case XML_ROLE_CONTENT_ELEMENT_OPT: quant = XML_CQUANT_OPT; goto elementContent; case XML_ROLE_CONTENT_ELEMENT_REP: quant = XML_CQUANT_REP; goto elementContent; case XML_ROLE_CONTENT_ELEMENT_PLUS: quant = XML_CQUANT_PLUS; elementContent: if (dtd->in_eldecl) { ELEMENT_TYPE *el; const XML_Char *name; size_t nameLen; const char *nxt = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar); int myindex = nextScaffoldPart(parser); if (myindex < 0) return XML_ERROR_NO_MEMORY; dtd->scaffold[myindex].type = XML_CTYPE_NAME; dtd->scaffold[myindex].quant = quant; el = getElementType(parser, enc, s, nxt); if (! el) return XML_ERROR_NO_MEMORY; name = el->name; dtd->scaffold[myindex].name = name; nameLen = 0; for (; name[nameLen++];) ; /* Detect and prevent integer overflow */ if (nameLen > UINT_MAX - dtd->contentStringLen) { return XML_ERROR_NO_MEMORY; } dtd->contentStringLen += (unsigned)nameLen; if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; } break; case XML_ROLE_GROUP_CLOSE: quant = XML_CQUANT_NONE; goto closeGroup; case XML_ROLE_GROUP_CLOSE_OPT: quant = XML_CQUANT_OPT; goto closeGroup; case XML_ROLE_GROUP_CLOSE_REP: quant = XML_CQUANT_REP; goto closeGroup; case XML_ROLE_GROUP_CLOSE_PLUS: quant = XML_CQUANT_PLUS; closeGroup: if (dtd->in_eldecl) { if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; dtd->scaffLevel--; dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant; if (dtd->scaffLevel == 0) { if (! handleDefault) { XML_Content *model = build_model(parser); if (! model) return XML_ERROR_NO_MEMORY; *eventEndPP = s; parser->m_elementDeclHandler( parser->m_handlerArg, parser->m_declElementType->name, model); } dtd->in_eldecl = XML_FALSE; dtd->contentStringLen = 0; } } break; /* End element declaration stuff */ case XML_ROLE_PI: if (! reportProcessingInstruction(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; handleDefault = XML_FALSE; break; case XML_ROLE_COMMENT: if (! reportComment(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; handleDefault = XML_FALSE; break; case XML_ROLE_NONE: switch (tok) { case XML_TOK_BOM: handleDefault = XML_FALSE; break; } break; case XML_ROLE_DOCTYPE_NONE: if (parser->m_startDoctypeDeclHandler) handleDefault = XML_FALSE; break; case XML_ROLE_ENTITY_NONE: if (dtd->keepProcessing && parser->m_entityDeclHandler) handleDefault = XML_FALSE; break; case XML_ROLE_NOTATION_NONE: if (parser->m_notationDeclHandler) handleDefault = XML_FALSE; break; case XML_ROLE_ATTLIST_NONE: if (dtd->keepProcessing && parser->m_attlistDeclHandler) handleDefault = XML_FALSE; break; case XML_ROLE_ELEMENT_NONE: if (parser->m_elementDeclHandler) handleDefault = XML_FALSE; break; } /* end of big switch */ if (handleDefault && parser->m_defaultHandler) reportDefault(parser, enc, s, next); switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: *nextPtr = next; return XML_ERROR_NONE; case XML_FINISHED: return XML_ERROR_ABORTED; default: s = next; tok = XmlPrologTok(enc, s, end, &next); } } /* not reached */ } static enum XML_Error PTRCALL epilogProcessor(XML_Parser parser, const char *s, const char *end, const char **nextPtr) { parser->m_processor = epilogProcessor; parser->m_eventPtr = s; for (;;) { const char *next = NULL; int tok = XmlPrologTok(parser->m_encoding, s, end, &next); #ifdef XML_DTD if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, XML_ACCOUNT_DIRECT)) { accountingOnAbort(parser); return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; } #endif parser->m_eventEndPtr = next; switch (tok) { /* report partial linebreak - it might be the last token */ case -XML_TOK_PROLOG_S: if (parser->m_defaultHandler) { reportDefault(parser, parser->m_encoding, s, next); if (parser->m_parsingStatus.parsing == XML_FINISHED) return XML_ERROR_ABORTED; } *nextPtr = next; return XML_ERROR_NONE; case XML_TOK_NONE: *nextPtr = s; return XML_ERROR_NONE; case XML_TOK_PROLOG_S: if (parser->m_defaultHandler) reportDefault(parser, parser->m_encoding, s, next); break; case XML_TOK_PI: if (! reportProcessingInstruction(parser, parser->m_encoding, s, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_COMMENT: if (! reportComment(parser, parser->m_encoding, s, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_INVALID: parser->m_eventPtr = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: if (! parser->m_parsingStatus.finalBuffer) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: if (! parser->m_parsingStatus.finalBuffer) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; default: return XML_ERROR_JUNK_AFTER_DOC_ELEMENT; } parser->m_eventPtr = s = next; switch (parser->m_parsingStatus.parsing) { case XML_SUSPENDED: *nextPtr = next; return XML_ERROR_NONE; case XML_FINISHED: return XML_ERROR_ABORTED; default:; } } } static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) { const char *textStart, *textEnd; const char *next; enum XML_Error result; OPEN_INTERNAL_ENTITY *openEntity; if (parser->m_freeInternalEntities) { openEntity = parser->m_freeInternalEntities; parser->m_freeInternalEntities = openEntity->next; } else { openEntity = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY)); if (! openEntity) return XML_ERROR_NO_MEMORY; } entity->open = XML_TRUE; #ifdef XML_DTD entityTrackingOnOpen(parser, entity, __LINE__); #endif entity->processed = 0; openEntity->next = parser->m_openInternalEntities; parser->m_openInternalEntities = openEntity; openEntity->entity = entity; openEntity->startTagLevel = parser->m_tagLevel; openEntity->betweenDecl = betweenDecl; openEntity->internalEventPtr = NULL; openEntity->internalEventEndPtr = NULL; textStart = (const char *)entity->textPtr; textEnd = (const char *)(entity->textPtr + entity->textLen); /* Set a safe default value in case 'next' does not get set */ next = textStart; #ifdef XML_DTD if (entity->is_param) { int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, tok, next, &next, XML_FALSE, XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); } else #endif /* XML_DTD */ result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, textStart, textEnd, &next, XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); if (result == XML_ERROR_NONE) { if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { entity->processed = (int)(next - textStart); parser->m_processor = internalEntityProcessor; } else { #ifdef XML_DTD entityTrackingOnClose(parser, entity, __LINE__); #endif /* XML_DTD */ entity->open = XML_FALSE; parser->m_openInternalEntities = openEntity->next; /* put openEntity back in list of free instances */ openEntity->next = parser->m_freeInternalEntities; parser->m_freeInternalEntities = openEntity; } } return result; } static enum XML_Error PTRCALL internalEntityProcessor(XML_Parser parser, const char *s, const char *end, const char **nextPtr) { ENTITY *entity; const char *textStart, *textEnd; const char *next; enum XML_Error result; OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities; if (! openEntity) return XML_ERROR_UNEXPECTED_STATE; entity = openEntity->entity; textStart = ((const char *)entity->textPtr) + entity->processed; textEnd = (const char *)(entity->textPtr + entity->textLen); /* Set a safe default value in case 'next' does not get set */ next = textStart; #ifdef XML_DTD if (entity->is_param) { int tok = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, tok, next, &next, XML_FALSE, XML_TRUE, XML_ACCOUNT_ENTITY_EXPANSION); } else #endif /* XML_DTD */ result = doContent(parser, openEntity->startTagLevel, parser->m_internalEncoding, textStart, textEnd, &next, XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); if (result != XML_ERROR_NONE) return result; else if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { entity->processed = (int)(next - (const char *)entity->textPtr); return result; } else { #ifdef XML_DTD entityTrackingOnClose(parser, entity, __LINE__); #endif entity->open = XML_FALSE; parser->m_openInternalEntities = openEntity->next; /* put openEntity back in list of free instances */ openEntity->next = parser->m_freeInternalEntities; parser->m_freeInternalEntities = openEntity; } #ifdef XML_DTD if (entity->is_param) { int tok; parser->m_processor = prologProcessor; tok = XmlPrologTok(parser->m_encoding, s, end, &next); return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, XML_ACCOUNT_DIRECT); } else #endif /* XML_DTD */ { parser->m_processor = contentProcessor; /* see externalEntityContentProcessor vs contentProcessor */ return doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, s, end, nextPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); } } static enum XML_Error PTRCALL errorProcessor(XML_Parser parser, const char *s, const char *end, const char **nextPtr) { UNUSED_P(s); UNUSED_P(end); UNUSED_P(nextPtr); return parser->m_errorCode; } static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, const char *ptr, const char *end, STRING_POOL *pool, enum XML_Account account) { enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account); if (result) return result; if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) poolChop(pool); if (! poolAppendChar(pool, XML_T('\0'))) return XML_ERROR_NO_MEMORY; return XML_ERROR_NONE; } static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, const char *ptr, const char *end, STRING_POOL *pool, enum XML_Account account) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ #ifndef XML_DTD UNUSED_P(account); #endif for (;;) { const char *next = ptr; /* XmlAttributeValueTok doesn't always set the last arg */ int tok = XmlAttributeValueTok(enc, ptr, end, &next); #ifdef XML_DTD if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) { accountingOnAbort(parser); return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; } #endif switch (tok) { case XML_TOK_NONE: return XML_ERROR_NONE; case XML_TOK_INVALID: if (enc == parser->m_encoding) parser->m_eventPtr = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: if (enc == parser->m_encoding) parser->m_eventPtr = ptr; return XML_ERROR_INVALID_TOKEN; case XML_TOK_CHAR_REF: { XML_Char buf[XML_ENCODE_MAX]; int i; int n = XmlCharRefNumber(enc, ptr); if (n < 0) { if (enc == parser->m_encoding) parser->m_eventPtr = ptr; return XML_ERROR_BAD_CHAR_REF; } if (! isCdata && n == 0x20 /* space */ && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) break; n = XmlEncode(n, (ICHAR *)buf); /* The XmlEncode() functions can never return 0 here. That * error return happens if the code point passed in is either * negative or greater than or equal to 0x110000. The * XmlCharRefNumber() functions will all return a number * strictly less than 0x110000 or a negative value if an error * occurred. The negative value is intercepted above, so * XmlEncode() is never passed a value it might return an * error for. */ for (i = 0; i < n; i++) { if (! poolAppendChar(pool, buf[i])) return XML_ERROR_NO_MEMORY; } } break; case XML_TOK_DATA_CHARS: if (! poolAppend(pool, enc, ptr, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_TRAILING_CR: next = ptr + enc->minBytesPerChar; /* fall through */ case XML_TOK_ATTRIBUTE_VALUE_S: case XML_TOK_DATA_NEWLINE: if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) break; if (! poolAppendChar(pool, 0x20)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_ENTITY_REF: { const XML_Char *name; ENTITY *entity; char checkEntityDecl; XML_Char ch = (XML_Char)XmlPredefinedEntityName( enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); if (ch) { #ifdef XML_DTD /* NOTE: We are replacing 4-6 characters original input for 1 character * so there is no amplification and hence recording without * protection. */ accountingDiffTolerated(parser, tok, (char *)&ch, ((char *)&ch) + sizeof(XML_Char), __LINE__, XML_ACCOUNT_ENTITY_EXPANSION); #endif /* XML_DTD */ if (! poolAppendChar(pool, ch)) return XML_ERROR_NO_MEMORY; break; } name = poolStoreString(&parser->m_temp2Pool, enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); if (! name) return XML_ERROR_NO_MEMORY; entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); poolDiscard(&parser->m_temp2Pool); /* First, determine if a check for an existing declaration is needed; if yes, check that the entity exists, and that it is internal. */ if (pool == &dtd->pool) /* are we called from prolog? */ checkEntityDecl = #ifdef XML_DTD parser->m_prologState.documentEntity && #endif /* XML_DTD */ (dtd->standalone ? ! parser->m_openInternalEntities : ! dtd->hasParamEntityRefs); else /* if (pool == &parser->m_tempPool): we are called from content */ checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone; if (checkEntityDecl) { if (! entity) return XML_ERROR_UNDEFINED_ENTITY; else if (! entity->is_internal) return XML_ERROR_ENTITY_DECLARED_IN_PE; } else if (! entity) { /* Cannot report skipped entity here - see comments on parser->m_skippedEntityHandler. if (parser->m_skippedEntityHandler) parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); */ /* Cannot call the default handler because this would be out of sync with the call to the startElementHandler. if ((pool == &parser->m_tempPool) && parser->m_defaultHandler) reportDefault(parser, enc, ptr, next); */ break; } if (entity->open) { if (enc == parser->m_encoding) { /* It does not appear that this line can be executed. * * The "if (entity->open)" check catches recursive entity * definitions. In order to be called with an open * entity, it must have gone through this code before and * been through the recursive call to * appendAttributeValue() some lines below. That call * sets the local encoding ("enc") to the parser's * internal encoding (internal_utf8 or internal_utf16), * which can never be the same as the principle encoding. * It doesn't appear there is another code path that gets * here with entity->open being TRUE. * * Since it is not certain that this logic is watertight, * we keep the line and merely exclude it from coverage * tests. */ parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */ } return XML_ERROR_RECURSIVE_ENTITY_REF; } if (entity->notation) { if (enc == parser->m_encoding) parser->m_eventPtr = ptr; return XML_ERROR_BINARY_ENTITY_REF; } if (! entity->textPtr) { if (enc == parser->m_encoding) parser->m_eventPtr = ptr; return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; } else { enum XML_Error result; const XML_Char *textEnd = entity->textPtr + entity->textLen; entity->open = XML_TRUE; #ifdef XML_DTD entityTrackingOnOpen(parser, entity, __LINE__); #endif result = appendAttributeValue(parser, parser->m_internalEncoding, isCdata, (const char *)entity->textPtr, (const char *)textEnd, pool, XML_ACCOUNT_ENTITY_EXPANSION); #ifdef XML_DTD entityTrackingOnClose(parser, entity, __LINE__); #endif entity->open = XML_FALSE; if (result) return result; } } break; default: /* The only token returned by XmlAttributeValueTok() that does * not have an explicit case here is XML_TOK_PARTIAL_CHAR. * Getting that would require an entity name to contain an * incomplete XML character (e.g. \xE2\x82); however previous * tokenisers will have already recognised and rejected such * names before XmlAttributeValueTok() gets a look-in. This * default case should be retained as a safety net, but the code * excluded from coverage tests. * * LCOV_EXCL_START */ if (enc == parser->m_encoding) parser->m_eventPtr = ptr; return XML_ERROR_UNEXPECTED_STATE; /* LCOV_EXCL_STOP */ } ptr = next; } /* not reached */ } static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *entityTextPtr, const char *entityTextEnd, enum XML_Account account) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ STRING_POOL *pool = &(dtd->entityValuePool); enum XML_Error result = XML_ERROR_NONE; #ifdef XML_DTD int oldInEntityValue = parser->m_prologState.inEntityValue; parser->m_prologState.inEntityValue = 1; #else UNUSED_P(account); #endif /* XML_DTD */ /* never return Null for the value argument in EntityDeclHandler, since this would indicate an external entity; therefore we have to make sure that entityValuePool.start is not null */ if (! pool->blocks) { if (! poolGrow(pool)) return XML_ERROR_NO_MEMORY; } for (;;) { const char *next = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */ int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); #ifdef XML_DTD if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__, account)) { accountingOnAbort(parser); result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH; goto endEntityValue; } #endif switch (tok) { case XML_TOK_PARAM_ENTITY_REF: #ifdef XML_DTD if (parser->m_isParamEntity || enc != parser->m_encoding) { const XML_Char *name; ENTITY *entity; name = poolStoreString(&parser->m_tempPool, enc, entityTextPtr + enc->minBytesPerChar, next - enc->minBytesPerChar); if (! name) { result = XML_ERROR_NO_MEMORY; goto endEntityValue; } entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); poolDiscard(&parser->m_tempPool); if (! entity) { /* not a well-formedness error - see XML 1.0: WFC Entity Declared */ /* cannot report skipped entity here - see comments on parser->m_skippedEntityHandler if (parser->m_skippedEntityHandler) parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0); */ dtd->keepProcessing = dtd->standalone; goto endEntityValue; } if (entity->open) { if (enc == parser->m_encoding) parser->m_eventPtr = entityTextPtr; result = XML_ERROR_RECURSIVE_ENTITY_REF; goto endEntityValue; } if (entity->systemId) { if (parser->m_externalEntityRefHandler) { dtd->paramEntityRead = XML_FALSE; entity->open = XML_TRUE; entityTrackingOnOpen(parser, entity, __LINE__); if (! parser->m_externalEntityRefHandler( parser->m_externalEntityRefHandlerArg, 0, entity->base, entity->systemId, entity->publicId)) { entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; result = XML_ERROR_EXTERNAL_ENTITY_HANDLING; goto endEntityValue; } entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; if (! dtd->paramEntityRead) dtd->keepProcessing = dtd->standalone; } else dtd->keepProcessing = dtd->standalone; } else { entity->open = XML_TRUE; entityTrackingOnOpen(parser, entity, __LINE__); result = storeEntityValue( parser, parser->m_internalEncoding, (const char *)entity->textPtr, (const char *)(entity->textPtr + entity->textLen), XML_ACCOUNT_ENTITY_EXPANSION); entityTrackingOnClose(parser, entity, __LINE__); entity->open = XML_FALSE; if (result) goto endEntityValue; } break; } #endif /* XML_DTD */ /* In the internal subset, PE references are not legal within markup declarations, e.g entity values in this case. */ parser->m_eventPtr = entityTextPtr; result = XML_ERROR_PARAM_ENTITY_REF; goto endEntityValue; case XML_TOK_NONE: result = XML_ERROR_NONE; goto endEntityValue; case XML_TOK_ENTITY_REF: case XML_TOK_DATA_CHARS: if (! poolAppend(pool, enc, entityTextPtr, next)) { result = XML_ERROR_NO_MEMORY; goto endEntityValue; } break; case XML_TOK_TRAILING_CR: next = entityTextPtr + enc->minBytesPerChar; /* fall through */ case XML_TOK_DATA_NEWLINE: if (pool->end == pool->ptr && ! poolGrow(pool)) { result = XML_ERROR_NO_MEMORY; goto endEntityValue; } *(pool->ptr)++ = 0xA; break; case XML_TOK_CHAR_REF: { XML_Char buf[XML_ENCODE_MAX]; int i; int n = XmlCharRefNumber(enc, entityTextPtr); if (n < 0) { if (enc == parser->m_encoding) parser->m_eventPtr = entityTextPtr; result = XML_ERROR_BAD_CHAR_REF; goto endEntityValue; } n = XmlEncode(n, (ICHAR *)buf); /* The XmlEncode() functions can never return 0 here. That * error return happens if the code point passed in is either * negative or greater than or equal to 0x110000. The * XmlCharRefNumber() functions will all return a number * strictly less than 0x110000 or a negative value if an error * occurred. The negative value is intercepted above, so * XmlEncode() is never passed a value it might return an * error for. */ for (i = 0; i < n; i++) { if (pool->end == pool->ptr && ! poolGrow(pool)) { result = XML_ERROR_NO_MEMORY; goto endEntityValue; } *(pool->ptr)++ = buf[i]; } } break; case XML_TOK_PARTIAL: if (enc == parser->m_encoding) parser->m_eventPtr = entityTextPtr; result = XML_ERROR_INVALID_TOKEN; goto endEntityValue; case XML_TOK_INVALID: if (enc == parser->m_encoding) parser->m_eventPtr = next; result = XML_ERROR_INVALID_TOKEN; goto endEntityValue; default: /* This default case should be unnecessary -- all the tokens * that XmlEntityValueTok() can return have their own explicit * cases -- but should be retained for safety. We do however * exclude it from the coverage statistics. * * LCOV_EXCL_START */ if (enc == parser->m_encoding) parser->m_eventPtr = entityTextPtr; result = XML_ERROR_UNEXPECTED_STATE; goto endEntityValue; /* LCOV_EXCL_STOP */ } entityTextPtr = next; } endEntityValue: #ifdef XML_DTD parser->m_prologState.inEntityValue = oldInEntityValue; #endif /* XML_DTD */ return result; } static void FASTCALL normalizeLines(XML_Char *s) { XML_Char *p; for (;; s++) { if (*s == XML_T('\0')) return; if (*s == 0xD) break; } p = s; do { if (*s == 0xD) { *p++ = 0xA; if (*++s == 0xA) s++; } else *p++ = *s++; } while (*s); *p = XML_T('\0'); } static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end) { const XML_Char *target; XML_Char *data; const char *tem; if (! parser->m_processingInstructionHandler) { if (parser->m_defaultHandler) reportDefault(parser, enc, start, end); return 1; } start += enc->minBytesPerChar * 2; tem = start + XmlNameLength(enc, start); target = poolStoreString(&parser->m_tempPool, enc, start, tem); if (! target) return 0; poolFinish(&parser->m_tempPool); data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem), end - enc->minBytesPerChar * 2); if (! data) return 0; normalizeLines(data); parser->m_processingInstructionHandler(parser->m_handlerArg, target, data); poolClear(&parser->m_tempPool); return 1; } static int reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end) { XML_Char *data; if (! parser->m_commentHandler) { if (parser->m_defaultHandler) reportDefault(parser, enc, start, end); return 1; } data = poolStoreString(&parser->m_tempPool, enc, start + enc->minBytesPerChar * 4, end - enc->minBytesPerChar * 3); if (! data) return 0; normalizeLines(data); parser->m_commentHandler(parser->m_handlerArg, data); poolClear(&parser->m_tempPool); return 1; } static void reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end) { if (MUST_CONVERT(enc, s)) { enum XML_Convert_Result convert_res; const char **eventPP; const char **eventEndPP; if (enc == parser->m_encoding) { eventPP = &parser->m_eventPtr; eventEndPP = &parser->m_eventEndPtr; } else { /* To get here, two things must be true; the parser must be * using a character encoding that is not the same as the * encoding passed in, and the encoding passed in must need * conversion to the internal format (UTF-8 unless XML_UNICODE * is defined). The only occasions on which the encoding passed * in is not the same as the parser's encoding are when it is * the internal encoding (e.g. a previously defined parameter * entity, already converted to internal format). This by * definition doesn't need conversion, so the whole branch never * gets executed. * * For safety's sake we don't delete these lines and merely * exclude them from coverage statistics. * * LCOV_EXCL_START */ eventPP = &(parser->m_openInternalEntities->internalEventPtr); eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr); /* LCOV_EXCL_STOP */ } do { ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf; convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd); *eventEndPP = s; parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf, (int)(dataPtr - (ICHAR *)parser->m_dataBuf)); *eventPP = s; } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE)); } else parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s)); } static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, XML_Bool isId, const XML_Char *value, XML_Parser parser) { DEFAULT_ATTRIBUTE *att; if (value || isId) { /* The handling of default attributes gets messed up if we have a default which duplicates a non-default. */ int i; for (i = 0; i < type->nDefaultAtts; i++) if (attId == type->defaultAtts[i].id) return 1; if (isId && ! type->idAtt && ! attId->xmlns) type->idAtt = attId; } if (type->nDefaultAtts == type->allocDefaultAtts) { if (type->allocDefaultAtts == 0) { type->allocDefaultAtts = 8; type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC( parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); if (! type->defaultAtts) { type->allocDefaultAtts = 0; return 0; } } else { DEFAULT_ATTRIBUTE *temp; /* Detect and prevent integer overflow */ if (type->allocDefaultAtts > INT_MAX / 2) { return 0; } int count = type->allocDefaultAtts * 2; /* Detect and prevent integer overflow. * The preprocessor guard addresses the "always false" warning * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) { return 0; } #endif temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts, (count * sizeof(DEFAULT_ATTRIBUTE))); if (temp == NULL) return 0; type->allocDefaultAtts = count; type->defaultAtts = temp; } } att = type->defaultAtts + type->nDefaultAtts; att->id = attId; att->value = value; att->isCdata = isCdata; if (! isCdata) attId->maybeTokenized = XML_TRUE; type->nDefaultAtts += 1; return 1; } static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ const XML_Char *name; for (name = elementType->name; *name; name++) { if (*name == XML_T(ASCII_COLON)) { PREFIX *prefix; const XML_Char *s; for (s = elementType->name; s != name; s++) { if (! poolAppendChar(&dtd->pool, *s)) return 0; } if (! poolAppendChar(&dtd->pool, XML_T('\0'))) return 0; prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), sizeof(PREFIX)); if (! prefix) return 0; if (prefix->name == poolStart(&dtd->pool)) poolFinish(&dtd->pool); else poolDiscard(&dtd->pool); elementType->prefix = prefix; break; } } return 1; } static ATTRIBUTE_ID * getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ ATTRIBUTE_ID *id; const XML_Char *name; if (! poolAppendChar(&dtd->pool, XML_T('\0'))) return NULL; name = poolStoreString(&dtd->pool, enc, start, end); if (! name) return NULL; /* skip quotation mark - its storage will be re-used (like in name[-1]) */ ++name; id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID)); if (! id) return NULL; if (id->name != name) poolDiscard(&dtd->pool); else { poolFinish(&dtd->pool); if (! parser->m_ns) ; else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m) && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n) && name[4] == XML_T(ASCII_s) && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) { if (name[5] == XML_T('\0')) id->prefix = &dtd->defaultPrefix; else id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, sizeof(PREFIX)); id->xmlns = XML_TRUE; } else { int i; for (i = 0; name[i]; i++) { /* attributes without prefix are *not* in the default namespace */ if (name[i] == XML_T(ASCII_COLON)) { int j; for (j = 0; j < i; j++) { if (! poolAppendChar(&dtd->pool, name[j])) return NULL; } if (! poolAppendChar(&dtd->pool, XML_T('\0'))) return NULL; id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), sizeof(PREFIX)); if (! id->prefix) return NULL; if (id->prefix->name == poolStart(&dtd->pool)) poolFinish(&dtd->pool); else poolDiscard(&dtd->pool); break; } } } } return id; } #define CONTEXT_SEP XML_T(ASCII_FF) static const XML_Char * getContext(XML_Parser parser) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ HASH_TABLE_ITER iter; XML_Bool needSep = XML_FALSE; if (dtd->defaultPrefix.binding) { int i; int len; if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) return NULL; len = dtd->defaultPrefix.binding->uriLen; if (parser->m_namespaceSeparator) len--; for (i = 0; i < len; i++) { if (! poolAppendChar(&parser->m_tempPool, dtd->defaultPrefix.binding->uri[i])) { /* Because of memory caching, I don't believe this line can be * executed. * * This is part of a loop copying the default prefix binding * URI into the parser's temporary string pool. Previously, * that URI was copied into the same string pool, with a * terminating NUL character, as part of setContext(). When * the pool was cleared, that leaves a block definitely big * enough to hold the URI on the free block list of the pool. * The URI copy in getContext() therefore cannot run out of * memory. * * If the pool is used between the setContext() and * getContext() calls, the worst it can do is leave a bigger * block on the front of the free list. Given that this is * all somewhat inobvious and program logic can be changed, we * don't delete the line but we do exclude it from the test * coverage statistics. */ return NULL; /* LCOV_EXCL_LINE */ } } needSep = XML_TRUE; } hashTableIterInit(&iter, &(dtd->prefixes)); for (;;) { int i; int len; const XML_Char *s; PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter); if (! prefix) break; if (! prefix->binding) { /* This test appears to be (justifiable) paranoia. There does * not seem to be a way of injecting a prefix without a binding * that doesn't get errored long before this function is called. * The test should remain for safety's sake, so we instead * exclude the following line from the coverage statistics. */ continue; /* LCOV_EXCL_LINE */ } if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) return NULL; for (s = prefix->name; *s; s++) if (! poolAppendChar(&parser->m_tempPool, *s)) return NULL; if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS))) return NULL; len = prefix->binding->uriLen; if (parser->m_namespaceSeparator) len--; for (i = 0; i < len; i++) if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i])) return NULL; needSep = XML_TRUE; } hashTableIterInit(&iter, &(dtd->generalEntities)); for (;;) { const XML_Char *s; ENTITY *e = (ENTITY *)hashTableIterNext(&iter); if (! e) break; if (! e->open) continue; if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP)) return NULL; for (s = e->name; *s; s++) if (! poolAppendChar(&parser->m_tempPool, *s)) return 0; needSep = XML_TRUE; } if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return NULL; return parser->m_tempPool.start; } static XML_Bool setContext(XML_Parser parser, const XML_Char *context) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ const XML_Char *s = context; while (*context != XML_T('\0')) { if (*s == CONTEXT_SEP || *s == XML_T('\0')) { ENTITY *e; if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_FALSE; e = (ENTITY *)lookup(parser, &dtd->generalEntities, poolStart(&parser->m_tempPool), 0); if (e) e->open = XML_TRUE; if (*s != XML_T('\0')) s++; context = s; poolDiscard(&parser->m_tempPool); } else if (*s == XML_T(ASCII_EQUALS)) { PREFIX *prefix; if (poolLength(&parser->m_tempPool) == 0) prefix = &dtd->defaultPrefix; else { if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_FALSE; prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&parser->m_tempPool), sizeof(PREFIX)); if (! prefix) return XML_FALSE; if (prefix->name == poolStart(&parser->m_tempPool)) { prefix->name = poolCopyString(&dtd->pool, prefix->name); if (! prefix->name) return XML_FALSE; } poolDiscard(&parser->m_tempPool); } for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); context++) if (! poolAppendChar(&parser->m_tempPool, *context)) return XML_FALSE; if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_FALSE; if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool), &parser->m_inheritedBindings) != XML_ERROR_NONE) return XML_FALSE; poolDiscard(&parser->m_tempPool); if (*context != XML_T('\0')) ++context; s = context; } else { if (! poolAppendChar(&parser->m_tempPool, *s)) return XML_FALSE; s++; } } return XML_TRUE; } static void FASTCALL normalizePublicId(XML_Char *publicId) { XML_Char *p = publicId; XML_Char *s; for (s = publicId; *s; s++) { switch (*s) { case 0x20: case 0xD: case 0xA: if (p != publicId && p[-1] != 0x20) *p++ = 0x20; break; default: *p++ = *s; } } if (p != publicId && p[-1] == 0x20) --p; *p = XML_T('\0'); } static DTD * dtdCreate(const XML_Memory_Handling_Suite *ms) { DTD *p = ms->malloc_fcn(sizeof(DTD)); if (p == NULL) return p; poolInit(&(p->pool), ms); poolInit(&(p->entityValuePool), ms); hashTableInit(&(p->generalEntities), ms); hashTableInit(&(p->elementTypes), ms); hashTableInit(&(p->attributeIds), ms); hashTableInit(&(p->prefixes), ms); #ifdef XML_DTD p->paramEntityRead = XML_FALSE; hashTableInit(&(p->paramEntities), ms); #endif /* XML_DTD */ p->defaultPrefix.name = NULL; p->defaultPrefix.binding = NULL; p->in_eldecl = XML_FALSE; p->scaffIndex = NULL; p->scaffold = NULL; p->scaffLevel = 0; p->scaffSize = 0; p->scaffCount = 0; p->contentStringLen = 0; p->keepProcessing = XML_TRUE; p->hasParamEntityRefs = XML_FALSE; p->standalone = XML_FALSE; return p; } static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) { HASH_TABLE_ITER iter; hashTableIterInit(&iter, &(p->elementTypes)); for (;;) { ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); if (! e) break; if (e->allocDefaultAtts != 0) ms->free_fcn(e->defaultAtts); } hashTableClear(&(p->generalEntities)); #ifdef XML_DTD p->paramEntityRead = XML_FALSE; hashTableClear(&(p->paramEntities)); #endif /* XML_DTD */ hashTableClear(&(p->elementTypes)); hashTableClear(&(p->attributeIds)); hashTableClear(&(p->prefixes)); poolClear(&(p->pool)); poolClear(&(p->entityValuePool)); p->defaultPrefix.name = NULL; p->defaultPrefix.binding = NULL; p->in_eldecl = XML_FALSE; ms->free_fcn(p->scaffIndex); p->scaffIndex = NULL; ms->free_fcn(p->scaffold); p->scaffold = NULL; p->scaffLevel = 0; p->scaffSize = 0; p->scaffCount = 0; p->contentStringLen = 0; p->keepProcessing = XML_TRUE; p->hasParamEntityRefs = XML_FALSE; p->standalone = XML_FALSE; } static void dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { HASH_TABLE_ITER iter; hashTableIterInit(&iter, &(p->elementTypes)); for (;;) { ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); if (! e) break; if (e->allocDefaultAtts != 0) ms->free_fcn(e->defaultAtts); } hashTableDestroy(&(p->generalEntities)); #ifdef XML_DTD hashTableDestroy(&(p->paramEntities)); #endif /* XML_DTD */ hashTableDestroy(&(p->elementTypes)); hashTableDestroy(&(p->attributeIds)); hashTableDestroy(&(p->prefixes)); poolDestroy(&(p->pool)); poolDestroy(&(p->entityValuePool)); if (isDocEntity) { ms->free_fcn(p->scaffIndex); ms->free_fcn(p->scaffold); } ms->free_fcn(p); } /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise. The new DTD has already been initialized. */ static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) { HASH_TABLE_ITER iter; /* Copy the prefix table. */ hashTableIterInit(&iter, &(oldDtd->prefixes)); for (;;) { const XML_Char *name; const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter); if (! oldP) break; name = poolCopyString(&(newDtd->pool), oldP->name); if (! name) return 0; if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX))) return 0; } hashTableIterInit(&iter, &(oldDtd->attributeIds)); /* Copy the attribute id table. */ for (;;) { ATTRIBUTE_ID *newA; const XML_Char *name; const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter); if (! oldA) break; /* Remember to allocate the scratch byte before the name. */ if (! poolAppendChar(&(newDtd->pool), XML_T('\0'))) return 0; name = poolCopyString(&(newDtd->pool), oldA->name); if (! name) return 0; ++name; newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID)); if (! newA) return 0; newA->maybeTokenized = oldA->maybeTokenized; if (oldA->prefix) { newA->xmlns = oldA->xmlns; if (oldA->prefix == &oldDtd->defaultPrefix) newA->prefix = &newDtd->defaultPrefix; else newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), oldA->prefix->name, 0); } } /* Copy the element type table. */ hashTableIterInit(&iter, &(oldDtd->elementTypes)); for (;;) { int i; ELEMENT_TYPE *newE; const XML_Char *name; const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter); if (! oldE) break; name = poolCopyString(&(newDtd->pool), oldE->name); if (! name) return 0; newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE)); if (! newE) return 0; if (oldE->nDefaultAtts) { newE->defaultAtts = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); if (! newE->defaultAtts) { return 0; } } if (oldE->idAtt) newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), oldE->idAtt->name, 0); newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts; if (oldE->prefix) newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), oldE->prefix->name, 0); for (i = 0; i < newE->nDefaultAtts; i++) { newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup( oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata; if (oldE->defaultAtts[i].value) { newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value); if (! newE->defaultAtts[i].value) return 0; } else newE->defaultAtts[i].value = NULL; } } /* Copy the entity tables. */ if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool), &(oldDtd->generalEntities))) return 0; #ifdef XML_DTD if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool), &(oldDtd->paramEntities))) return 0; newDtd->paramEntityRead = oldDtd->paramEntityRead; #endif /* XML_DTD */ newDtd->keepProcessing = oldDtd->keepProcessing; newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs; newDtd->standalone = oldDtd->standalone; /* Don't want deep copying for scaffolding */ newDtd->in_eldecl = oldDtd->in_eldecl; newDtd->scaffold = oldDtd->scaffold; newDtd->contentStringLen = oldDtd->contentStringLen; newDtd->scaffSize = oldDtd->scaffSize; newDtd->scaffLevel = oldDtd->scaffLevel; newDtd->scaffIndex = oldDtd->scaffIndex; return 1; } /* End dtdCopy */ static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable, STRING_POOL *newPool, const HASH_TABLE *oldTable) { HASH_TABLE_ITER iter; const XML_Char *cachedOldBase = NULL; const XML_Char *cachedNewBase = NULL; hashTableIterInit(&iter, oldTable); for (;;) { ENTITY *newE; const XML_Char *name; const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter); if (! oldE) break; name = poolCopyString(newPool, oldE->name); if (! name) return 0; newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY)); if (! newE) return 0; if (oldE->systemId) { const XML_Char *tem = poolCopyString(newPool, oldE->systemId); if (! tem) return 0; newE->systemId = tem; if (oldE->base) { if (oldE->base == cachedOldBase) newE->base = cachedNewBase; else { cachedOldBase = oldE->base; tem = poolCopyString(newPool, cachedOldBase); if (! tem) return 0; cachedNewBase = newE->base = tem; } } if (oldE->publicId) { tem = poolCopyString(newPool, oldE->publicId); if (! tem) return 0; newE->publicId = tem; } } else { const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen); if (! tem) return 0; newE->textPtr = tem; newE->textLen = oldE->textLen; } if (oldE->notation) { const XML_Char *tem = poolCopyString(newPool, oldE->notation); if (! tem) return 0; newE->notation = tem; } newE->is_param = oldE->is_param; newE->is_internal = oldE->is_internal; } return 1; } #define INIT_POWER 6 static XML_Bool FASTCALL keyeq(KEY s1, KEY s2) { for (; *s1 == *s2; s1++, s2++) if (*s1 == 0) return XML_TRUE; return XML_FALSE; } static size_t keylen(KEY s) { size_t len = 0; for (; *s; s++, len++) ; return len; } static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) { key->k[0] = 0; key->k[1] = get_hash_secret_salt(parser); } static unsigned long FASTCALL hash(XML_Parser parser, KEY s) { struct siphash state; struct sipkey key; (void)sip24_valid; copy_salt_to_sipkey(parser, &key); sip24_init(&state, &key); sip24_update(&state, s, keylen(s) * sizeof(XML_Char)); return (unsigned long)sip24_final(&state); } static NAMED * lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { size_t i; if (table->size == 0) { size_t tsize; if (! createSize) return NULL; table->power = INIT_POWER; /* table->size is a power of 2 */ table->size = (size_t)1 << INIT_POWER; tsize = table->size * sizeof(NAMED *); table->v = table->mem->malloc_fcn(tsize); if (! table->v) { table->size = 0; return NULL; } memset(table->v, 0, tsize); i = hash(parser, name) & ((unsigned long)table->size - 1); } else { unsigned long h = hash(parser, name); unsigned long mask = (unsigned long)table->size - 1; unsigned char step = 0; i = h & mask; while (table->v[i]) { if (keyeq(name, table->v[i]->name)) return table->v[i]; if (! step) step = PROBE_STEP(h, mask, table->power); i < step ? (i += table->size - step) : (i -= step); } if (! createSize) return NULL; /* check for overflow (table is half full) */ if (table->used >> (table->power - 1)) { unsigned char newPower = table->power + 1; /* Detect and prevent invalid shift */ if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) { return NULL; } size_t newSize = (size_t)1 << newPower; unsigned long newMask = (unsigned long)newSize - 1; /* Detect and prevent integer overflow */ if (newSize > (size_t)(-1) / sizeof(NAMED *)) { return NULL; } size_t tsize = newSize * sizeof(NAMED *); NAMED **newV = table->mem->malloc_fcn(tsize); if (! newV) return NULL; memset(newV, 0, tsize); for (i = 0; i < table->size; i++) if (table->v[i]) { unsigned long newHash = hash(parser, table->v[i]->name); size_t j = newHash & newMask; step = 0; while (newV[j]) { if (! step) step = PROBE_STEP(newHash, newMask, newPower); j < step ? (j += newSize - step) : (j -= step); } newV[j] = table->v[i]; } table->mem->free_fcn(table->v); table->v = newV; table->power = newPower; table->size = newSize; i = h & newMask; step = 0; while (table->v[i]) { if (! step) step = PROBE_STEP(h, newMask, newPower); i < step ? (i += newSize - step) : (i -= step); } } } table->v[i] = table->mem->malloc_fcn(createSize); if (! table->v[i]) return NULL; memset(table->v[i], 0, createSize); table->v[i]->name = name; (table->used)++; return table->v[i]; } static void FASTCALL hashTableClear(HASH_TABLE *table) { size_t i; for (i = 0; i < table->size; i++) { table->mem->free_fcn(table->v[i]); table->v[i] = NULL; } table->used = 0; } static void FASTCALL hashTableDestroy(HASH_TABLE *table) { size_t i; for (i = 0; i < table->size; i++) table->mem->free_fcn(table->v[i]); table->mem->free_fcn(table->v); } static void FASTCALL hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) { p->power = 0; p->size = 0; p->used = 0; p->v = NULL; p->mem = ms; } static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) { iter->p = table->v; iter->end = iter->p ? iter->p + table->size : NULL; } static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter) { while (iter->p != iter->end) { NAMED *tem = *(iter->p)++; if (tem) return tem; } return NULL; } static void FASTCALL poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) { pool->blocks = NULL; pool->freeBlocks = NULL; pool->start = NULL; pool->ptr = NULL; pool->end = NULL; pool->mem = ms; } static void FASTCALL poolClear(STRING_POOL *pool) { if (! pool->freeBlocks) pool->freeBlocks = pool->blocks; else { BLOCK *p = pool->blocks; while (p) { BLOCK *tem = p->next; p->next = pool->freeBlocks; pool->freeBlocks = p; p = tem; } } pool->blocks = NULL; pool->start = NULL; pool->ptr = NULL; pool->end = NULL; } static void FASTCALL poolDestroy(STRING_POOL *pool) { BLOCK *p = pool->blocks; while (p) { BLOCK *tem = p->next; pool->mem->free_fcn(p); p = tem; } p = pool->freeBlocks; while (p) { BLOCK *tem = p->next; pool->mem->free_fcn(p); p = tem; } } static XML_Char * poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr, const char *end) { if (! pool->ptr && ! poolGrow(pool)) return NULL; for (;;) { const enum XML_Convert_Result convert_res = XmlConvert( enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; if (! poolGrow(pool)) return NULL; } return pool->start; } static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool, const XML_Char *s) { do { if (! poolAppendChar(pool, *s)) return NULL; } while (*s++); s = pool->start; poolFinish(pool); return s; } static const XML_Char * poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) { if (! pool->ptr && ! poolGrow(pool)) { /* The following line is unreachable given the current usage of * poolCopyStringN(). Currently it is called from exactly one * place to copy the text of a simple general entity. By that * point, the name of the entity is already stored in the pool, so * pool->ptr cannot be NULL. * * If poolCopyStringN() is used elsewhere as it well might be, * this line may well become executable again. Regardless, this * sort of check shouldn't be removed lightly, so we just exclude * it from the coverage statistics. */ return NULL; /* LCOV_EXCL_LINE */ } for (; n > 0; --n, s++) { if (! poolAppendChar(pool, *s)) return NULL; } s = pool->start; poolFinish(pool); return s; } static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool, const XML_Char *s) { while (*s) { if (! poolAppendChar(pool, *s)) return NULL; s++; } return pool->start; } static XML_Char * poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr, const char *end) { if (! poolAppend(pool, enc, ptr, end)) return NULL; if (pool->ptr == pool->end && ! poolGrow(pool)) return NULL; *(pool->ptr)++ = 0; return pool->start; } static size_t poolBytesToAllocateFor(int blockSize) { /* Unprotected math would be: ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char); ** ** Detect overflow, avoiding _signed_ overflow undefined behavior ** For a + b * c we check b * c in isolation first, so that addition of a ** on top has no chance of making us accept a small non-negative number */ const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */ if (blockSize <= 0) return 0; if (blockSize > (int)(INT_MAX / stretch)) return 0; { const int stretchedBlockSize = blockSize * (int)stretch; const int bytesToAllocate = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize); if (bytesToAllocate < 0) return 0; return (size_t)bytesToAllocate; } } static XML_Bool FASTCALL poolGrow(STRING_POOL *pool) { if (pool->freeBlocks) { if (pool->start == 0) { pool->blocks = pool->freeBlocks; pool->freeBlocks = pool->freeBlocks->next; pool->blocks->next = NULL; pool->start = pool->blocks->s; pool->end = pool->start + pool->blocks->size; pool->ptr = pool->start; return XML_TRUE; } if (pool->end - pool->start < pool->freeBlocks->size) { BLOCK *tem = pool->freeBlocks->next; pool->freeBlocks->next = pool->blocks; pool->blocks = pool->freeBlocks; pool->freeBlocks = tem; memcpy(pool->blocks->s, pool->start, (pool->end - pool->start) * sizeof(XML_Char)); pool->ptr = pool->blocks->s + (pool->ptr - pool->start); pool->start = pool->blocks->s; pool->end = pool->start + pool->blocks->size; return XML_TRUE; } } if (pool->blocks && pool->start == pool->blocks->s) { BLOCK *temp; int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U); size_t bytesToAllocate; /* NOTE: Needs to be calculated prior to calling `realloc` to avoid dangling pointers: */ const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start; if (blockSize < 0) { /* This condition traps a situation where either more than * INT_MAX/2 bytes have already been allocated. This isn't * readily testable, since it is unlikely that an average * machine will have that much memory, so we exclude it from the * coverage statistics. */ return XML_FALSE; /* LCOV_EXCL_LINE */ } bytesToAllocate = poolBytesToAllocateFor(blockSize); if (bytesToAllocate == 0) return XML_FALSE; temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks, (unsigned)bytesToAllocate); if (temp == NULL) return XML_FALSE; pool->blocks = temp; pool->blocks->size = blockSize; pool->ptr = pool->blocks->s + offsetInsideBlock; pool->start = pool->blocks->s; pool->end = pool->start + blockSize; } else { BLOCK *tem; int blockSize = (int)(pool->end - pool->start); size_t bytesToAllocate; if (blockSize < 0) { /* This condition traps a situation where either more than * INT_MAX bytes have already been allocated (which is prevented * by various pieces of program logic, not least this one, never * mind the unlikelihood of actually having that much memory) or * the pool control fields have been corrupted (which could * conceivably happen in an extremely buggy user handler * function). Either way it isn't readily testable, so we * exclude it from the coverage statistics. */ return XML_FALSE; /* LCOV_EXCL_LINE */ } if (blockSize < INIT_BLOCK_SIZE) blockSize = INIT_BLOCK_SIZE; else { /* Detect overflow, avoiding _signed_ overflow undefined behavior */ if ((int)((unsigned)blockSize * 2U) < 0) { return XML_FALSE; } blockSize *= 2; } bytesToAllocate = poolBytesToAllocateFor(blockSize); if (bytesToAllocate == 0) return XML_FALSE; tem = pool->mem->malloc_fcn(bytesToAllocate); if (! tem) return XML_FALSE; tem->size = blockSize; tem->next = pool->blocks; pool->blocks = tem; if (pool->ptr != pool->start) memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char)); pool->ptr = tem->s + (pool->ptr - pool->start); pool->start = tem->s; pool->end = tem->s + blockSize; } return XML_TRUE; } static int FASTCALL nextScaffoldPart(XML_Parser parser) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ CONTENT_SCAFFOLD *me; int next; if (! dtd->scaffIndex) { dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int)); if (! dtd->scaffIndex) return -1; dtd->scaffIndex[0] = 0; } if (dtd->scaffCount >= dtd->scaffSize) { CONTENT_SCAFFOLD *temp; if (dtd->scaffold) { /* Detect and prevent integer overflow */ if (dtd->scaffSize > UINT_MAX / 2u) { return -1; } /* Detect and prevent integer overflow. * The preprocessor guard addresses the "always false" warning * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) { return -1; } #endif temp = (CONTENT_SCAFFOLD *)REALLOC( parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD)); if (temp == NULL) return -1; dtd->scaffSize *= 2; } else { temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS * sizeof(CONTENT_SCAFFOLD)); if (temp == NULL) return -1; dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS; } dtd->scaffold = temp; } next = dtd->scaffCount++; me = &dtd->scaffold[next]; if (dtd->scaffLevel) { CONTENT_SCAFFOLD *parent = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]]; if (parent->lastchild) { dtd->scaffold[parent->lastchild].nextsib = next; } if (! parent->childcnt) parent->firstchild = next; parent->lastchild = next; parent->childcnt++; } me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0; return next; } static XML_Content * build_model(XML_Parser parser) { /* Function build_model transforms the existing parser->m_dtd->scaffold * array of CONTENT_SCAFFOLD tree nodes into a new array of * XML_Content tree nodes followed by a gapless list of zero-terminated * strings. */ DTD *const dtd = parser->m_dtd; /* save one level of indirection */ XML_Content *ret; XML_Char *str; /* the current string writing location */ /* Detect and prevent integer overflow. * The preprocessor guard addresses the "always false" warning * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) { return NULL; } if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) { return NULL; } #endif if (dtd->scaffCount * sizeof(XML_Content) > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) { return NULL; } const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content) + (dtd->contentStringLen * sizeof(XML_Char))); ret = (XML_Content *)MALLOC(parser, allocsize); if (! ret) return NULL; /* What follows is an iterative implementation (of what was previously done * recursively in a dedicated function called "build_node". The old recursive * build_node could be forced into stack exhaustion from input as small as a * few megabyte, and so that was a security issue. Hence, a function call * stack is avoided now by resolving recursion.) * * The iterative approach works as follows: * * - We have two writing pointers, both walking up the result array; one does * the work, the other creates "jobs" for its colleague to do, and leads * the way: * * - The faster one, pointer jobDest, always leads and writes "what job * to do" by the other, once they reach that place in the * array: leader "jobDest" stores the source node array index (relative * to array dtd->scaffold) in field "numchildren". * * - The slower one, pointer dest, looks at the value stored in the * "numchildren" field (which actually holds a source node array index * at that time) and puts the real data from dtd->scaffold in. * * - Before the loop starts, jobDest writes source array index 0 * (where the root node is located) so that dest will have something to do * when it starts operation. * * - Whenever nodes with children are encountered, jobDest appends * them as new jobs, in order. As a result, tree node siblings are * adjacent in the resulting array, for example: * * [0] root, has two children * [1] first child of 0, has three children * [3] first child of 1, does not have children * [4] second child of 1, does not have children * [5] third child of 1, does not have children * [2] second child of 0, does not have children * * Or (the same data) presented in flat array view: * * [0] root, has two children * * [1] first child of 0, has three children * [2] second child of 0, does not have children * * [3] first child of 1, does not have children * [4] second child of 1, does not have children * [5] third child of 1, does not have children * * - The algorithm repeats until all target array indices have been processed. */ XML_Content *dest = ret; /* tree node writing location, moves upwards */ XML_Content *const destLimit = &ret[dtd->scaffCount]; XML_Content *jobDest = ret; /* next free writing location in target array */ str = (XML_Char *)&ret[dtd->scaffCount]; /* Add the starting job, the root node (index 0) of the source tree */ (jobDest++)->numchildren = 0; for (; dest < destLimit; dest++) { /* Retrieve source tree array index from job storage */ const int src_node = (int)dest->numchildren; /* Convert item */ dest->type = dtd->scaffold[src_node].type; dest->quant = dtd->scaffold[src_node].quant; if (dest->type == XML_CTYPE_NAME) { const XML_Char *src; dest->name = str; src = dtd->scaffold[src_node].name; for (;;) { *str++ = *src; if (! *src) break; src++; } dest->numchildren = 0; dest->children = NULL; } else { unsigned int i; int cn; dest->name = NULL; dest->numchildren = dtd->scaffold[src_node].childcnt; dest->children = jobDest; /* Append scaffold indices of children to array */ for (i = 0, cn = dtd->scaffold[src_node].firstchild; i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) (jobDest++)->numchildren = (unsigned int)cn; } } return ret; } static ELEMENT_TYPE * getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, const char *end) { DTD *const dtd = parser->m_dtd; /* save one level of indirection */ const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end); ELEMENT_TYPE *ret; if (! name) return NULL; ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE)); if (! ret) return NULL; if (ret->name != name) poolDiscard(&dtd->pool); else { poolFinish(&dtd->pool); if (! setElementTypePrefix(parser, ret)) return NULL; } return ret; } static XML_Char * copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { size_t charsRequired = 0; XML_Char *result; /* First determine how long the string is */ while (s[charsRequired] != 0) { charsRequired++; } /* Include the terminator */ charsRequired++; /* Now allocate space for the copy */ result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char)); if (result == NULL) return NULL; /* Copy the original into place */ memcpy(result, s, charsRequired * sizeof(XML_Char)); return result; } #ifdef XML_DTD static float accountingGetCurrentAmplification(XML_Parser rootParser) { const XmlBigCount countBytesOutput = rootParser->m_accounting.countBytesDirect + rootParser->m_accounting.countBytesIndirect; const float amplificationFactor = rootParser->m_accounting.countBytesDirect ? (countBytesOutput / (float)(rootParser->m_accounting.countBytesDirect)) : 1.0f; assert(! rootParser->m_parentParser); return amplificationFactor; } static void accountingReportStats(XML_Parser originParser, const char *epilog) { const XML_Parser rootParser = getRootParserOf(originParser, NULL); assert(! rootParser->m_parentParser); if (rootParser->m_accounting.debugLevel < 1) { return; } const float amplificationFactor = accountingGetCurrentAmplification(rootParser); fprintf(stderr, "expat: Accounting(%p): Direct " EXPAT_FMT_ULL( "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s", (void *)rootParser, rootParser->m_accounting.countBytesDirect, rootParser->m_accounting.countBytesIndirect, (double)amplificationFactor, epilog); } static void accountingOnAbort(XML_Parser originParser) { accountingReportStats(originParser, " ABORTING\n"); } static void accountingReportDiff(XML_Parser rootParser, unsigned int levelsAwayFromRootParser, const char *before, const char *after, ptrdiff_t bytesMore, int source_line, enum XML_Account account) { assert(! rootParser->m_parentParser); fprintf(stderr, " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"", bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP", levelsAwayFromRootParser, source_line, 10, ""); const char ellipis[] = "[..]"; const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1; const unsigned int contextLength = 10; /* Note: Performance is of no concern here */ const char *walker = before; if ((rootParser->m_accounting.debugLevel >= 3) || (after - before) <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) { for (; walker < after; walker++) { fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); } } else { for (; walker < before + contextLength; walker++) { fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); } fprintf(stderr, ellipis); walker = after - contextLength; for (; walker < after; walker++) { fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); } } fprintf(stderr, "\"\n"); } static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok, const char *before, const char *after, int source_line, enum XML_Account account) { /* Note: We need to check the token type *first* to be sure that * we can even access variable , safely. * E.g. for XML_TOK_NONE may hold an invalid pointer. */ switch (tok) { case XML_TOK_INVALID: case XML_TOK_PARTIAL: case XML_TOK_PARTIAL_CHAR: case XML_TOK_NONE: return XML_TRUE; } if (account == XML_ACCOUNT_NONE) return XML_TRUE; /* because these bytes have been accounted for, already */ unsigned int levelsAwayFromRootParser; const XML_Parser rootParser = getRootParserOf(originParser, &levelsAwayFromRootParser); assert(! rootParser->m_parentParser); const int isDirect = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser); const ptrdiff_t bytesMore = after - before; XmlBigCount *const additionTarget = isDirect ? &rootParser->m_accounting.countBytesDirect : &rootParser->m_accounting.countBytesIndirect; /* Detect and avoid integer overflow */ if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore) return XML_FALSE; *additionTarget += bytesMore; const XmlBigCount countBytesOutput = rootParser->m_accounting.countBytesDirect + rootParser->m_accounting.countBytesIndirect; const float amplificationFactor = accountingGetCurrentAmplification(rootParser); const XML_Bool tolerated = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes) || (amplificationFactor <= rootParser->m_accounting.maximumAmplificationFactor); if (rootParser->m_accounting.debugLevel >= 2) { accountingReportStats(rootParser, ""); accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after, bytesMore, source_line, account); } return tolerated; } unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser) { if (! parser) return 0; return parser->m_accounting.countBytesDirect; } unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser) { if (! parser) return 0; return parser->m_accounting.countBytesIndirect; } static void entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity, const char *action, int sourceLine) { assert(! rootParser->m_parentParser); if (rootParser->m_entity_stats.debugLevel < 1) return; # if defined(XML_UNICODE) const char *const entityName = "[..]"; # else const char *const entityName = entity->name; # endif fprintf( stderr, "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n", (void *)rootParser, rootParser->m_entity_stats.countEverOpened, rootParser->m_entity_stats.currentDepth, rootParser->m_entity_stats.maximumDepthSeen, (rootParser->m_entity_stats.currentDepth - 1) * 2, "", entity->is_param ? "%" : "&", entityName, action, entity->textLen, sourceLine); } static void entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) { const XML_Parser rootParser = getRootParserOf(originParser, NULL); assert(! rootParser->m_parentParser); rootParser->m_entity_stats.countEverOpened++; rootParser->m_entity_stats.currentDepth++; if (rootParser->m_entity_stats.currentDepth > rootParser->m_entity_stats.maximumDepthSeen) { rootParser->m_entity_stats.maximumDepthSeen++; } entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine); } static void entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) { const XML_Parser rootParser = getRootParserOf(originParser, NULL); assert(! rootParser->m_parentParser); entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine); rootParser->m_entity_stats.currentDepth--; } static XML_Parser getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) { XML_Parser rootParser = parser; unsigned int stepsTakenUpwards = 0; while (rootParser->m_parentParser) { rootParser = rootParser->m_parentParser; stepsTakenUpwards++; } assert(! rootParser->m_parentParser); if (outLevelDiff != NULL) { *outLevelDiff = stepsTakenUpwards; } return rootParser; } const char * unsignedCharToPrintable(unsigned char c) { switch (c) { case 0: return "\\0"; case 1: return "\\x1"; case 2: return "\\x2"; case 3: return "\\x3"; case 4: return "\\x4"; case 5: return "\\x5"; case 6: return "\\x6"; case 7: return "\\x7"; case 8: return "\\x8"; case 9: return "\\t"; case 10: return "\\n"; case 11: return "\\xB"; case 12: return "\\xC"; case 13: return "\\r"; case 14: return "\\xE"; case 15: return "\\xF"; case 16: return "\\x10"; case 17: return "\\x11"; case 18: return "\\x12"; case 19: return "\\x13"; case 20: return "\\x14"; case 21: return "\\x15"; case 22: return "\\x16"; case 23: return "\\x17"; case 24: return "\\x18"; case 25: return "\\x19"; case 26: return "\\x1A"; case 27: return "\\x1B"; case 28: return "\\x1C"; case 29: return "\\x1D"; case 30: return "\\x1E"; case 31: return "\\x1F"; case 32: return " "; case 33: return "!"; case 34: return "\\\""; case 35: return "#"; case 36: return "$"; case 37: return "%"; case 38: return "&"; case 39: return "'"; case 40: return "("; case 41: return ")"; case 42: return "*"; case 43: return "+"; case 44: return ","; case 45: return "-"; case 46: return "."; case 47: return "/"; case 48: return "0"; case 49: return "1"; case 50: return "2"; case 51: return "3"; case 52: return "4"; case 53: return "5"; case 54: return "6"; case 55: return "7"; case 56: return "8"; case 57: return "9"; case 58: return ":"; case 59: return ";"; case 60: return "<"; case 61: return "="; case 62: return ">"; case 63: return "?"; case 64: return "@"; case 65: return "A"; case 66: return "B"; case 67: return "C"; case 68: return "D"; case 69: return "E"; case 70: return "F"; case 71: return "G"; case 72: return "H"; case 73: return "I"; case 74: return "J"; case 75: return "K"; case 76: return "L"; case 77: return "M"; case 78: return "N"; case 79: return "O"; case 80: return "P"; case 81: return "Q"; case 82: return "R"; case 83: return "S"; case 84: return "T"; case 85: return "U"; case 86: return "V"; case 87: return "W"; case 88: return "X"; case 89: return "Y"; case 90: return "Z"; case 91: return "["; case 92: return "\\\\"; case 93: return "]"; case 94: return "^"; case 95: return "_"; case 96: return "`"; case 97: return "a"; case 98: return "b"; case 99: return "c"; case 100: return "d"; case 101: return "e"; case 102: return "f"; case 103: return "g"; case 104: return "h"; case 105: return "i"; case 106: return "j"; case 107: return "k"; case 108: return "l"; case 109: return "m"; case 110: return "n"; case 111: return "o"; case 112: return "p"; case 113: return "q"; case 114: return "r"; case 115: return "s"; case 116: return "t"; case 117: return "u"; case 118: return "v"; case 119: return "w"; case 120: return "x"; case 121: return "y"; case 122: return "z"; case 123: return "{"; case 124: return "|"; case 125: return "}"; case 126: return "~"; case 127: return "\\x7F"; case 128: return "\\x80"; case 129: return "\\x81"; case 130: return "\\x82"; case 131: return "\\x83"; case 132: return "\\x84"; case 133: return "\\x85"; case 134: return "\\x86"; case 135: return "\\x87"; case 136: return "\\x88"; case 137: return "\\x89"; case 138: return "\\x8A"; case 139: return "\\x8B"; case 140: return "\\x8C"; case 141: return "\\x8D"; case 142: return "\\x8E"; case 143: return "\\x8F"; case 144: return "\\x90"; case 145: return "\\x91"; case 146: return "\\x92"; case 147: return "\\x93"; case 148: return "\\x94"; case 149: return "\\x95"; case 150: return "\\x96"; case 151: return "\\x97"; case 152: return "\\x98"; case 153: return "\\x99"; case 154: return "\\x9A"; case 155: return "\\x9B"; case 156: return "\\x9C"; case 157: return "\\x9D"; case 158: return "\\x9E"; case 159: return "\\x9F"; case 160: return "\\xA0"; case 161: return "\\xA1"; case 162: return "\\xA2"; case 163: return "\\xA3"; case 164: return "\\xA4"; case 165: return "\\xA5"; case 166: return "\\xA6"; case 167: return "\\xA7"; case 168: return "\\xA8"; case 169: return "\\xA9"; case 170: return "\\xAA"; case 171: return "\\xAB"; case 172: return "\\xAC"; case 173: return "\\xAD"; case 174: return "\\xAE"; case 175: return "\\xAF"; case 176: return "\\xB0"; case 177: return "\\xB1"; case 178: return "\\xB2"; case 179: return "\\xB3"; case 180: return "\\xB4"; case 181: return "\\xB5"; case 182: return "\\xB6"; case 183: return "\\xB7"; case 184: return "\\xB8"; case 185: return "\\xB9"; case 186: return "\\xBA"; case 187: return "\\xBB"; case 188: return "\\xBC"; case 189: return "\\xBD"; case 190: return "\\xBE"; case 191: return "\\xBF"; case 192: return "\\xC0"; case 193: return "\\xC1"; case 194: return "\\xC2"; case 195: return "\\xC3"; case 196: return "\\xC4"; case 197: return "\\xC5"; case 198: return "\\xC6"; case 199: return "\\xC7"; case 200: return "\\xC8"; case 201: return "\\xC9"; case 202: return "\\xCA"; case 203: return "\\xCB"; case 204: return "\\xCC"; case 205: return "\\xCD"; case 206: return "\\xCE"; case 207: return "\\xCF"; case 208: return "\\xD0"; case 209: return "\\xD1"; case 210: return "\\xD2"; case 211: return "\\xD3"; case 212: return "\\xD4"; case 213: return "\\xD5"; case 214: return "\\xD6"; case 215: return "\\xD7"; case 216: return "\\xD8"; case 217: return "\\xD9"; case 218: return "\\xDA"; case 219: return "\\xDB"; case 220: return "\\xDC"; case 221: return "\\xDD"; case 222: return "\\xDE"; case 223: return "\\xDF"; case 224: return "\\xE0"; case 225: return "\\xE1"; case 226: return "\\xE2"; case 227: return "\\xE3"; case 228: return "\\xE4"; case 229: return "\\xE5"; case 230: return "\\xE6"; case 231: return "\\xE7"; case 232: return "\\xE8"; case 233: return "\\xE9"; case 234: return "\\xEA"; case 235: return "\\xEB"; case 236: return "\\xEC"; case 237: return "\\xED"; case 238: return "\\xEE"; case 239: return "\\xEF"; case 240: return "\\xF0"; case 241: return "\\xF1"; case 242: return "\\xF2"; case 243: return "\\xF3"; case 244: return "\\xF4"; case 245: return "\\xF5"; case 246: return "\\xF6"; case 247: return "\\xF7"; case 248: return "\\xF8"; case 249: return "\\xF9"; case 250: return "\\xFA"; case 251: return "\\xFB"; case 252: return "\\xFC"; case 253: return "\\xFD"; case 254: return "\\xFE"; case 255: return "\\xFF"; default: assert(0); /* never gets here */ return "dead code"; } assert(0); /* never gets here */ } #endif /* XML_DTD */ static unsigned long getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) { const char *const valueOrNull = getenv(variableName); if (valueOrNull == NULL) { return defaultDebugLevel; } const char *const value = valueOrNull; errno = 0; char *afterValue = (char *)value; unsigned long debugLevel = strtoul(value, &afterValue, 10); if ((errno != 0) || (afterValue[0] != '\0')) { errno = 0; return defaultDebugLevel; } return debugLevel; } diff --git a/contrib/expat/tests/runtests.c b/contrib/expat/tests/runtests.c index 6d6f66909a11..3309bbaa076b 100644 --- a/contrib/expat/tests/runtests.c +++ b/contrib/expat/tests/runtests.c @@ -1,12265 +1,12276 @@ /* Run the Expat test suite __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| | __// \| |_) | (_| | |_ \___/_/\_\ .__/ \__,_|\__| |_| XML parser Copyright (c) 2001-2006 Fred L. Drake, Jr. Copyright (c) 2003 Greg Stein Copyright (c) 2005-2007 Steven Solie Copyright (c) 2005-2012 Karl Waclawek Copyright (c) 2016-2022 Sebastian Pipping Copyright (c) 2017-2018 Rhodri James Copyright (c) 2017 Joe Orton Copyright (c) 2017 José Gutiérrez de la Concha Copyright (c) 2018 Marco Maggi Copyright (c) 2019 David Loffredo Copyright (c) 2020 Tim Gates Copyright (c) 2021 Dong-hee Na Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #if defined(NDEBUG) # undef NDEBUG /* because test suite relies on assert(...) at the moment */ #endif #include #include #include #include #include /* ptrdiff_t */ #include #include #include /* intptr_t uint64_t */ -#include /* NAN, INFINITY, isnan */ #if ! defined(__cplusplus) # include #endif #include "expat.h" #include "chardata.h" #include "structdata.h" #include "internal.h" #include "minicheck.h" #include "memcheck.h" #include "siphash.h" #include "ascii.h" /* for ASCII_xxx */ #ifdef XML_LARGE_SIZE # define XML_FMT_INT_MOD "ll" #else # define XML_FMT_INT_MOD "l" #endif #ifdef XML_UNICODE_WCHAR_T # define XML_FMT_CHAR "lc" # define XML_FMT_STR "ls" # include # define xcstrlen(s) wcslen(s) # define xcstrcmp(s, t) wcscmp((s), (t)) # define xcstrncmp(s, t, n) wcsncmp((s), (t), (n)) # define XCS(s) _XCS(s) # define _XCS(s) L##s #else # ifdef XML_UNICODE # error "No support for UTF-16 character without wchar_t in tests" # else # define XML_FMT_CHAR "c" # define XML_FMT_STR "s" # define xcstrlen(s) strlen(s) # define xcstrcmp(s, t) strcmp((s), (t)) # define xcstrncmp(s, t, n) strncmp((s), (t), (n)) # define XCS(s) s # endif /* XML_UNICODE */ #endif /* XML_UNICODE_WCHAR_T */ static XML_Parser g_parser = NULL; static void tcase_add_test__ifdef_xml_dtd(TCase *tc, tcase_test_function test) { #ifdef XML_DTD tcase_add_test(tc, test); #else UNUSED_P(tc); UNUSED_P(test); #endif } static void basic_setup(void) { g_parser = XML_ParserCreate(NULL); if (g_parser == NULL) fail("Parser not created."); } static void basic_teardown(void) { if (g_parser != NULL) { XML_ParserFree(g_parser); g_parser = NULL; } } /* Generate a failure using the parser state to create an error message; this should be used when the parser reports an error we weren't expecting. */ static void _xml_failure(XML_Parser parser, const char *file, int line) { char buffer[1024]; enum XML_Error err = XML_GetErrorCode(parser); sprintf(buffer, " %d: %" XML_FMT_STR " (line %" XML_FMT_INT_MOD "u, offset %" XML_FMT_INT_MOD "u)\n reported from %s, line %d\n", err, XML_ErrorString(err), XML_GetCurrentLineNumber(parser), XML_GetCurrentColumnNumber(parser), file, line); _fail_unless(0, file, line, buffer); } static enum XML_Status _XML_Parse_SINGLE_BYTES(XML_Parser parser, const char *s, int len, int isFinal) { enum XML_Status res = XML_STATUS_ERROR; int offset = 0; if (len == 0) { return XML_Parse(parser, s, len, isFinal); } for (; offset < len; offset++) { const int innerIsFinal = (offset == len - 1) && isFinal; const char c = s[offset]; /* to help out-of-bounds detection */ res = XML_Parse(parser, &c, sizeof(char), innerIsFinal); if (res != XML_STATUS_OK) { return res; } } return res; } #define xml_failure(parser) _xml_failure((parser), __FILE__, __LINE__) static void _expect_failure(const char *text, enum XML_Error errorCode, const char *errorMessage, const char *file, int lineno) { if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_OK) /* Hackish use of _fail_unless() macro, but let's us report the right filename and line number. */ _fail_unless(0, file, lineno, errorMessage); if (XML_GetErrorCode(g_parser) != errorCode) _xml_failure(g_parser, file, lineno); } #define expect_failure(text, errorCode, errorMessage) \ _expect_failure((text), (errorCode), (errorMessage), __FILE__, __LINE__) /* Dummy handlers for when we need to set a handler to tickle a bug, but it doesn't need to do anything. */ static unsigned long dummy_handler_flags = 0; #define DUMMY_START_DOCTYPE_HANDLER_FLAG (1UL << 0) #define DUMMY_END_DOCTYPE_HANDLER_FLAG (1UL << 1) #define DUMMY_ENTITY_DECL_HANDLER_FLAG (1UL << 2) #define DUMMY_NOTATION_DECL_HANDLER_FLAG (1UL << 3) #define DUMMY_ELEMENT_DECL_HANDLER_FLAG (1UL << 4) #define DUMMY_ATTLIST_DECL_HANDLER_FLAG (1UL << 5) #define DUMMY_COMMENT_HANDLER_FLAG (1UL << 6) #define DUMMY_PI_HANDLER_FLAG (1UL << 7) #define DUMMY_START_ELEMENT_HANDLER_FLAG (1UL << 8) #define DUMMY_START_CDATA_HANDLER_FLAG (1UL << 9) #define DUMMY_END_CDATA_HANDLER_FLAG (1UL << 10) #define DUMMY_UNPARSED_ENTITY_DECL_HANDLER_FLAG (1UL << 11) #define DUMMY_START_NS_DECL_HANDLER_FLAG (1UL << 12) #define DUMMY_END_NS_DECL_HANDLER_FLAG (1UL << 13) #define DUMMY_START_DOCTYPE_DECL_HANDLER_FLAG (1UL << 14) #define DUMMY_END_DOCTYPE_DECL_HANDLER_FLAG (1UL << 15) #define DUMMY_SKIP_HANDLER_FLAG (1UL << 16) #define DUMMY_DEFAULT_HANDLER_FLAG (1UL << 17) static void XMLCALL dummy_xdecl_handler(void *userData, const XML_Char *version, const XML_Char *encoding, int standalone) { UNUSED_P(userData); UNUSED_P(version); UNUSED_P(encoding); UNUSED_P(standalone); } static void XMLCALL dummy_start_doctype_handler(void *userData, const XML_Char *doctypeName, const XML_Char *sysid, const XML_Char *pubid, int has_internal_subset) { UNUSED_P(userData); UNUSED_P(doctypeName); UNUSED_P(sysid); UNUSED_P(pubid); UNUSED_P(has_internal_subset); dummy_handler_flags |= DUMMY_START_DOCTYPE_HANDLER_FLAG; } static void XMLCALL dummy_end_doctype_handler(void *userData) { UNUSED_P(userData); dummy_handler_flags |= DUMMY_END_DOCTYPE_HANDLER_FLAG; } static void XMLCALL dummy_entity_decl_handler(void *userData, const XML_Char *entityName, int is_parameter_entity, const XML_Char *value, int value_length, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName) { UNUSED_P(userData); UNUSED_P(entityName); UNUSED_P(is_parameter_entity); UNUSED_P(value); UNUSED_P(value_length); UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); UNUSED_P(notationName); dummy_handler_flags |= DUMMY_ENTITY_DECL_HANDLER_FLAG; } static void XMLCALL dummy_notation_decl_handler(void *userData, const XML_Char *notationName, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { UNUSED_P(userData); UNUSED_P(notationName); UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); dummy_handler_flags |= DUMMY_NOTATION_DECL_HANDLER_FLAG; } static void XMLCALL dummy_element_decl_handler(void *userData, const XML_Char *name, XML_Content *model) { UNUSED_P(userData); UNUSED_P(name); /* The content model must be freed by the handler. Unfortunately * we cannot pass the parser as the userData because this is used * with other handlers that require other userData. */ XML_FreeContentModel(g_parser, model); dummy_handler_flags |= DUMMY_ELEMENT_DECL_HANDLER_FLAG; } static void XMLCALL dummy_attlist_decl_handler(void *userData, const XML_Char *elname, const XML_Char *attname, const XML_Char *att_type, const XML_Char *dflt, int isrequired) { UNUSED_P(userData); UNUSED_P(elname); UNUSED_P(attname); UNUSED_P(att_type); UNUSED_P(dflt); UNUSED_P(isrequired); dummy_handler_flags |= DUMMY_ATTLIST_DECL_HANDLER_FLAG; } static void XMLCALL dummy_comment_handler(void *userData, const XML_Char *data) { UNUSED_P(userData); UNUSED_P(data); dummy_handler_flags |= DUMMY_COMMENT_HANDLER_FLAG; } static void XMLCALL dummy_pi_handler(void *userData, const XML_Char *target, const XML_Char *data) { UNUSED_P(userData); UNUSED_P(target); UNUSED_P(data); dummy_handler_flags |= DUMMY_PI_HANDLER_FLAG; } static void XMLCALL dummy_start_element(void *userData, const XML_Char *name, const XML_Char **atts) { UNUSED_P(userData); UNUSED_P(name); UNUSED_P(atts); dummy_handler_flags |= DUMMY_START_ELEMENT_HANDLER_FLAG; } static void XMLCALL dummy_end_element(void *userData, const XML_Char *name) { UNUSED_P(userData); UNUSED_P(name); } static void XMLCALL dummy_start_cdata_handler(void *userData) { UNUSED_P(userData); dummy_handler_flags |= DUMMY_START_CDATA_HANDLER_FLAG; } static void XMLCALL dummy_end_cdata_handler(void *userData) { UNUSED_P(userData); dummy_handler_flags |= DUMMY_END_CDATA_HANDLER_FLAG; } static void XMLCALL dummy_cdata_handler(void *userData, const XML_Char *s, int len) { UNUSED_P(userData); UNUSED_P(s); UNUSED_P(len); } static void XMLCALL dummy_start_namespace_decl_handler(void *userData, const XML_Char *prefix, const XML_Char *uri) { UNUSED_P(userData); UNUSED_P(prefix); UNUSED_P(uri); dummy_handler_flags |= DUMMY_START_NS_DECL_HANDLER_FLAG; } static void XMLCALL dummy_end_namespace_decl_handler(void *userData, const XML_Char *prefix) { UNUSED_P(userData); UNUSED_P(prefix); dummy_handler_flags |= DUMMY_END_NS_DECL_HANDLER_FLAG; } /* This handler is obsolete, but while the code exists we should * ensure that dealing with the handler is covered by tests. */ static void XMLCALL dummy_unparsed_entity_decl_handler(void *userData, const XML_Char *entityName, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName) { UNUSED_P(userData); UNUSED_P(entityName); UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); UNUSED_P(notationName); dummy_handler_flags |= DUMMY_UNPARSED_ENTITY_DECL_HANDLER_FLAG; } static void XMLCALL dummy_default_handler(void *userData, const XML_Char *s, int len) { UNUSED_P(userData); UNUSED_P(s); UNUSED_P(len); } static void XMLCALL dummy_start_doctype_decl_handler(void *userData, const XML_Char *doctypeName, const XML_Char *sysid, const XML_Char *pubid, int has_internal_subset) { UNUSED_P(userData); UNUSED_P(doctypeName); UNUSED_P(sysid); UNUSED_P(pubid); UNUSED_P(has_internal_subset); dummy_handler_flags |= DUMMY_START_DOCTYPE_DECL_HANDLER_FLAG; } static void XMLCALL dummy_end_doctype_decl_handler(void *userData) { UNUSED_P(userData); dummy_handler_flags |= DUMMY_END_DOCTYPE_DECL_HANDLER_FLAG; } static void XMLCALL dummy_skip_handler(void *userData, const XML_Char *entityName, int is_parameter_entity) { UNUSED_P(userData); UNUSED_P(entityName); UNUSED_P(is_parameter_entity); dummy_handler_flags |= DUMMY_SKIP_HANDLER_FLAG; } /* Useful external entity handler */ typedef struct ExtOption { const XML_Char *system_id; const char *parse_text; } ExtOption; static int XMLCALL external_entity_optioner(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { ExtOption *options = (ExtOption *)XML_GetUserData(parser); XML_Parser ext_parser; UNUSED_P(base); UNUSED_P(publicId); while (options->parse_text != NULL) { if (! xcstrcmp(systemId, options->system_id)) { enum XML_Status rc; ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) return XML_STATUS_ERROR; rc = _XML_Parse_SINGLE_BYTES(ext_parser, options->parse_text, (int)strlen(options->parse_text), XML_TRUE); XML_ParserFree(ext_parser); return rc; } options++; } fail("No suitable option found"); return XML_STATUS_ERROR; } /* * Parameter entity evaluation support. */ #define ENTITY_MATCH_FAIL (-1) #define ENTITY_MATCH_NOT_FOUND (0) #define ENTITY_MATCH_SUCCESS (1) static const XML_Char *entity_name_to_match = NULL; static const XML_Char *entity_value_to_match = NULL; static int entity_match_flag = ENTITY_MATCH_NOT_FOUND; static void XMLCALL param_entity_match_handler(void *userData, const XML_Char *entityName, int is_parameter_entity, const XML_Char *value, int value_length, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName) { UNUSED_P(userData); UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); UNUSED_P(notationName); if (! is_parameter_entity || entity_name_to_match == NULL || entity_value_to_match == NULL) { return; } if (! xcstrcmp(entityName, entity_name_to_match)) { /* The cast here is safe because we control the horizontal and * the vertical, and we therefore know our strings are never * going to overflow an int. */ if (value_length != (int)xcstrlen(entity_value_to_match) || xcstrncmp(value, entity_value_to_match, value_length)) { entity_match_flag = ENTITY_MATCH_FAIL; } else { entity_match_flag = ENTITY_MATCH_SUCCESS; } } /* Else leave the match flag alone */ } /* * Character & encoding tests. */ START_TEST(test_nul_byte) { char text[] = "\0"; /* test that a NUL byte (in US-ASCII data) is an error */ if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_OK) fail("Parser did not report error on NUL-byte."); if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) xml_failure(g_parser); } END_TEST START_TEST(test_u0000_char) { /* test that a NUL byte (in US-ASCII data) is an error */ expect_failure("", XML_ERROR_BAD_CHAR_REF, "Parser did not report error on NUL-byte."); } END_TEST START_TEST(test_siphash_self) { if (! sip24_valid()) fail("SipHash self-test failed"); } END_TEST START_TEST(test_siphash_spec) { /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */ const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" "\x0a\x0b\x0c\x0d\x0e"; const size_t len = sizeof(message) - 1; const uint64_t expected = _SIP_ULL(0xa129ca61U, 0x49be45e5U); struct siphash state; struct sipkey key; sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" "\x0a\x0b\x0c\x0d\x0e\x0f"); sip24_init(&state, &key); /* Cover spread across calls */ sip24_update(&state, message, 4); sip24_update(&state, message + 4, len - 4); /* Cover null length */ sip24_update(&state, message, 0); if (sip24_final(&state) != expected) fail("sip24_final failed spec test\n"); /* Cover wrapper */ if (siphash24(message, len, &key) != expected) fail("siphash24 failed spec test\n"); } END_TEST START_TEST(test_bom_utf8) { /* This test is really just making sure we don't core on a UTF-8 BOM. */ const char *text = "\357\273\277"; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST START_TEST(test_bom_utf16_be) { char text[] = "\376\377\0<\0e\0/\0>"; if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST START_TEST(test_bom_utf16_le) { char text[] = "\377\376<\0e\0/\0>\0"; if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST /* Parse whole buffer at once to exercise a different code path */ START_TEST(test_nobom_utf16_le) { char text[] = " \0<\0e\0/\0>\0"; if (XML_Parse(g_parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST static void XMLCALL accumulate_characters(void *userData, const XML_Char *s, int len) { CharData_AppendXMLChars((CharData *)userData, s, len); } static void XMLCALL accumulate_attribute(void *userData, const XML_Char *name, const XML_Char **atts) { CharData *storage = (CharData *)userData; UNUSED_P(name); /* Check there are attributes to deal with */ if (atts == NULL) return; while (storage->count < 0 && atts[0] != NULL) { /* "accumulate" the value of the first attribute we see */ CharData_AppendXMLChars(storage, atts[1], -1); atts += 2; } } static void _run_character_check(const char *text, const XML_Char *expected, const char *file, int line) { CharData storage; CharData_Init(&storage); XML_SetUserData(g_parser, &storage); XML_SetCharacterDataHandler(g_parser, accumulate_characters); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) _xml_failure(g_parser, file, line); CharData_CheckXMLChars(&storage, expected); } #define run_character_check(text, expected) \ _run_character_check(text, expected, __FILE__, __LINE__) static void _run_attribute_check(const char *text, const XML_Char *expected, const char *file, int line) { CharData storage; CharData_Init(&storage); XML_SetUserData(g_parser, &storage); XML_SetStartElementHandler(g_parser, accumulate_attribute); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) _xml_failure(g_parser, file, line); CharData_CheckXMLChars(&storage, expected); } #define run_attribute_check(text, expected) \ _run_attribute_check(text, expected, __FILE__, __LINE__) typedef struct ExtTest { const char *parse_text; const XML_Char *encoding; CharData *storage; } ExtTest; static void XMLCALL ext_accumulate_characters(void *userData, const XML_Char *s, int len) { ExtTest *test_data = (ExtTest *)userData; accumulate_characters(test_data->storage, s, len); } static void _run_ext_character_check(const char *text, ExtTest *test_data, const XML_Char *expected, const char *file, int line) { CharData *const storage = (CharData *)malloc(sizeof(CharData)); CharData_Init(storage); test_data->storage = storage; XML_SetUserData(g_parser, test_data); XML_SetCharacterDataHandler(g_parser, ext_accumulate_characters); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) _xml_failure(g_parser, file, line); CharData_CheckXMLChars(storage, expected); free(storage); } #define run_ext_character_check(text, test_data, expected) \ _run_ext_character_check(text, test_data, expected, __FILE__, __LINE__) /* Regression test for SF bug #491986. */ START_TEST(test_danish_latin1) { const char *text = "\n" "J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5"; #ifdef XML_UNICODE const XML_Char *expected = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5"); #else const XML_Char *expected = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85"); #endif run_character_check(text, expected); } END_TEST /* Regression test for SF bug #514281. */ START_TEST(test_french_charref_hexidecimal) { const char *text = "\n" "éèàçêÈ"; #ifdef XML_UNICODE const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); #else const XML_Char *expected = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); #endif run_character_check(text, expected); } END_TEST START_TEST(test_french_charref_decimal) { const char *text = "\n" "éèàçêÈ"; #ifdef XML_UNICODE const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); #else const XML_Char *expected = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); #endif run_character_check(text, expected); } END_TEST START_TEST(test_french_latin1) { const char *text = "\n" "\xE9\xE8\xE0\xE7\xEa\xC8"; #ifdef XML_UNICODE const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); #else const XML_Char *expected = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); #endif run_character_check(text, expected); } END_TEST START_TEST(test_french_utf8) { const char *text = "\n" "\xC3\xA9"; #ifdef XML_UNICODE const XML_Char *expected = XCS("\x00e9"); #else const XML_Char *expected = XCS("\xC3\xA9"); #endif run_character_check(text, expected); } END_TEST /* Regression test for SF bug #600479. XXX There should be a test that exercises all legal XML Unicode characters as PCDATA and attribute value content, and XML Name characters as part of element and attribute names. */ START_TEST(test_utf8_false_rejection) { const char *text = "\xEF\xBA\xBF"; #ifdef XML_UNICODE const XML_Char *expected = XCS("\xfebf"); #else const XML_Char *expected = XCS("\xEF\xBA\xBF"); #endif run_character_check(text, expected); } END_TEST /* Regression test for SF bug #477667. This test assures that any 8-bit character followed by a 7-bit character will not be mistakenly interpreted as a valid UTF-8 sequence. */ START_TEST(test_illegal_utf8) { char text[100]; int i; for (i = 128; i <= 255; ++i) { sprintf(text, "%ccd", i); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_OK) { sprintf(text, "expected token error for '%c' (ordinal %d) in UTF-8 text", i, i); fail(text); } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) xml_failure(g_parser); /* Reset the parser since we use the same parser repeatedly. */ XML_ParserReset(g_parser, NULL); } } END_TEST /* Examples, not masks: */ #define UTF8_LEAD_1 "\x7f" /* 0b01111111 */ #define UTF8_LEAD_2 "\xdf" /* 0b11011111 */ #define UTF8_LEAD_3 "\xef" /* 0b11101111 */ #define UTF8_LEAD_4 "\xf7" /* 0b11110111 */ #define UTF8_FOLLOW "\xbf" /* 0b10111111 */ START_TEST(test_utf8_auto_align) { struct TestCase { ptrdiff_t expectedMovementInChars; const char *input; }; struct TestCase cases[] = { {00, ""}, {00, UTF8_LEAD_1}, {-1, UTF8_LEAD_2}, {00, UTF8_LEAD_2 UTF8_FOLLOW}, {-1, UTF8_LEAD_3}, {-2, UTF8_LEAD_3 UTF8_FOLLOW}, {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW}, {-1, UTF8_LEAD_4}, {-2, UTF8_LEAD_4 UTF8_FOLLOW}, {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW}, {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW}, }; size_t i = 0; bool success = true; for (; i < sizeof(cases) / sizeof(*cases); i++) { const char *fromLim = cases[i].input + strlen(cases[i].input); const char *const fromLimInitially = fromLim; ptrdiff_t actualMovementInChars; _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim); actualMovementInChars = (fromLim - fromLimInitially); if (actualMovementInChars != cases[i].expectedMovementInChars) { size_t j = 0; success = false; printf("[-] UTF-8 case %2u: Expected movement by %2d chars" ", actually moved by %2d chars: \"", (unsigned)(i + 1), (int)cases[i].expectedMovementInChars, (int)actualMovementInChars); for (; j < strlen(cases[i].input); j++) { printf("\\x%02x", (unsigned char)cases[i].input[j]); } printf("\"\n"); } } if (! success) { fail("UTF-8 auto-alignment is not bullet-proof\n"); } } END_TEST START_TEST(test_utf16) { /* * some {A} text * * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A */ char text[] = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o" "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o" "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066" "\000'\000?\000>\000\n" "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>" "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000" "<\000/\000d\000o\000c\000>"; #ifdef XML_UNICODE const XML_Char *expected = XCS("some \xff21 text"); #else const XML_Char *expected = XCS("some \357\274\241 text"); #endif CharData storage; CharData_Init(&storage); XML_SetUserData(g_parser, &storage); XML_SetCharacterDataHandler(g_parser, accumulate_characters); if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST START_TEST(test_utf16_le_epilog_newline) { unsigned int first_chunk_bytes = 17; char text[] = "\xFF\xFE" /* BOM */ "<\000e\000/\000>\000" /* document element */ "\r\000\n\000\r\000\n\000"; /* epilog */ if (first_chunk_bytes >= sizeof(text) - 1) fail("bad value of first_chunk_bytes"); if (_XML_Parse_SINGLE_BYTES(g_parser, text, first_chunk_bytes, XML_FALSE) == XML_STATUS_ERROR) xml_failure(g_parser); else { enum XML_Status rc; rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes, sizeof(text) - first_chunk_bytes - 1, XML_TRUE); if (rc == XML_STATUS_ERROR) xml_failure(g_parser); } } END_TEST /* Test that an outright lie in the encoding is faulted */ START_TEST(test_not_utf16) { const char *text = "" "Hi"; /* Use a handler to provoke the appropriate code paths */ XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler); expect_failure(text, XML_ERROR_INCORRECT_ENCODING, "UTF-16 declared in UTF-8 not faulted"); } END_TEST /* Test that an unknown encoding is rejected */ START_TEST(test_bad_encoding) { const char *text = "Hi"; if (! XML_SetEncoding(g_parser, XCS("unknown-encoding"))) fail("XML_SetEncoding failed"); expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, "Unknown encoding not faulted"); } END_TEST /* Regression test for SF bug #481609, #774028. */ START_TEST(test_latin1_umlauts) { const char *text = "\n" "\xE4 \xF6 \xFC ä ö ü ä ö ü >"; #ifdef XML_UNICODE /* Expected results in UTF-16 */ const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >"); #else /* Expected results in UTF-8 */ const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >"); #endif run_character_check(text, expected); XML_ParserReset(g_parser, NULL); run_attribute_check(text, expected); /* Repeat with a default handler */ XML_ParserReset(g_parser, NULL); XML_SetDefaultHandler(g_parser, dummy_default_handler); run_character_check(text, expected); XML_ParserReset(g_parser, NULL); XML_SetDefaultHandler(g_parser, dummy_default_handler); run_attribute_check(text, expected); } END_TEST /* Test that an element name with a 4-byte UTF-8 character is rejected */ START_TEST(test_long_utf8_character) { const char *text = "\n" /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */ ""; expect_failure(text, XML_ERROR_INVALID_TOKEN, "4-byte UTF-8 character in element name not faulted"); } END_TEST /* Test that a long latin-1 attribute (too long to convert in one go) * is correctly converted */ START_TEST(test_long_latin1_attribute) { const char *text = "\n" "\n"; const XML_Char *expected = /* 64 characters per line */ /* clang-format off */ XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO") /* clang-format on */ #ifdef XML_UNICODE XCS("\x00e4"); #else XCS("\xc3\xa4"); #endif run_attribute_check(text, expected); } END_TEST /* Test that a long ASCII attribute (too long to convert in one go) * is correctly converted */ START_TEST(test_long_ascii_attribute) { const char *text = "\n" "\n"; const XML_Char *expected = /* 64 characters per line */ /* clang-format off */ XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("01234"); /* clang-format on */ run_attribute_check(text, expected); } END_TEST /* Regression test #1 for SF bug #653180. */ START_TEST(test_line_number_after_parse) { const char *text = "\n" "\n" "\n"; XML_Size lineno; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) == XML_STATUS_ERROR) xml_failure(g_parser); lineno = XML_GetCurrentLineNumber(g_parser); if (lineno != 4) { char buffer[100]; sprintf(buffer, "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno); fail(buffer); } } END_TEST /* Regression test #2 for SF bug #653180. */ START_TEST(test_column_number_after_parse) { const char *text = ""; XML_Size colno; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) == XML_STATUS_ERROR) xml_failure(g_parser); colno = XML_GetCurrentColumnNumber(g_parser); if (colno != 11) { char buffer[100]; sprintf(buffer, "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno); fail(buffer); } } END_TEST #define STRUCT_START_TAG 0 #define STRUCT_END_TAG 1 static void XMLCALL start_element_event_handler2(void *userData, const XML_Char *name, const XML_Char **attr) { StructData *storage = (StructData *)userData; UNUSED_P(attr); StructData_AddItem(storage, name, XML_GetCurrentColumnNumber(g_parser), XML_GetCurrentLineNumber(g_parser), STRUCT_START_TAG); } static void XMLCALL end_element_event_handler2(void *userData, const XML_Char *name) { StructData *storage = (StructData *)userData; StructData_AddItem(storage, name, XML_GetCurrentColumnNumber(g_parser), XML_GetCurrentLineNumber(g_parser), STRUCT_END_TAG); } /* Regression test #3 for SF bug #653180. */ START_TEST(test_line_and_column_numbers_inside_handlers) { const char *text = "\n" /* Unix end-of-line */ " \r\n" /* Windows end-of-line */ " \r" /* Mac OS end-of-line */ " \n" " \n" " \n" " \n" ""; const StructDataEntry expected[] = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG}, {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG}, {XCS("b"), 2, 4, STRUCT_END_TAG}, {XCS("d"), 2, 5, STRUCT_START_TAG}, {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG}, {XCS("d"), 2, 7, STRUCT_END_TAG}, {XCS("a"), 0, 8, STRUCT_END_TAG}}; const int expected_count = sizeof(expected) / sizeof(StructDataEntry); StructData storage; StructData_Init(&storage); XML_SetUserData(g_parser, &storage); XML_SetStartElementHandler(g_parser, start_element_event_handler2); XML_SetEndElementHandler(g_parser, end_element_event_handler2); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); StructData_CheckItems(&storage, expected, expected_count); StructData_Dispose(&storage); } END_TEST /* Regression test #4 for SF bug #653180. */ START_TEST(test_line_number_after_error) { const char *text = "\n" " \n" " "; /* missing */ XML_Size lineno; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) != XML_STATUS_ERROR) fail("Expected a parse error"); lineno = XML_GetCurrentLineNumber(g_parser); if (lineno != 3) { char buffer[100]; sprintf(buffer, "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno); fail(buffer); } } END_TEST /* Regression test #5 for SF bug #653180. */ START_TEST(test_column_number_after_error) { const char *text = "\n" " \n" " "; /* missing */ XML_Size colno; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) != XML_STATUS_ERROR) fail("Expected a parse error"); colno = XML_GetCurrentColumnNumber(g_parser); if (colno != 4) { char buffer[100]; sprintf(buffer, "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno); fail(buffer); } } END_TEST /* Regression test for SF bug #478332. */ START_TEST(test_really_long_lines) { /* This parses an input line longer than INIT_DATA_BUF_SIZE characters long (defined to be 1024 in xmlparse.c). We take a really cheesy approach to building the input buffer, because this avoids writing bugs in buffer-filling code. */ const char *text = "" /* 64 chars */ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" /* until we have at least 1024 characters on the line: */ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" ""; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST /* Test cdata processing across a buffer boundary */ START_TEST(test_really_long_encoded_lines) { /* As above, except that we want to provoke an output buffer * overflow with a non-trivial encoding. For this we need to pass * the whole cdata in one go, not byte-by-byte. */ void *buffer; const char *text = "" "" /* 64 chars */ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" /* until we have at least 1024 characters on the line: */ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" ""; int parse_len = (int)strlen(text); /* Need a cdata handler to provoke the code path we want to test */ XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler); buffer = XML_GetBuffer(g_parser, parse_len); if (buffer == NULL) fail("Could not allocate parse buffer"); assert(buffer != NULL); memcpy(buffer, text, parse_len); if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST /* * Element event tests. */ static void XMLCALL start_element_event_handler(void *userData, const XML_Char *name, const XML_Char **atts) { UNUSED_P(atts); CharData_AppendXMLChars((CharData *)userData, name, -1); } static void XMLCALL end_element_event_handler(void *userData, const XML_Char *name) { CharData *storage = (CharData *)userData; CharData_AppendXMLChars(storage, XCS("/"), 1); CharData_AppendXMLChars(storage, name, -1); } START_TEST(test_end_element_events) { const char *text = ""; const XML_Char *expected = XCS("/c/b/f/d/a"); CharData storage; CharData_Init(&storage); XML_SetUserData(g_parser, &storage); XML_SetEndElementHandler(g_parser, end_element_event_handler); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST /* * Attribute tests. */ /* Helpers used by the following test; this checks any "attr" and "refs" attributes to make sure whitespace has been normalized. Return true if whitespace has been normalized in a string, using the rules for attribute value normalization. The 'is_cdata' flag is needed since CDATA attributes don't need to have multiple whitespace characters collapsed to a single space, while other attribute data types do. (Section 3.3.3 of the recommendation.) */ static int is_whitespace_normalized(const XML_Char *s, int is_cdata) { int blanks = 0; int at_start = 1; while (*s) { if (*s == XCS(' ')) ++blanks; else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r')) return 0; else { if (at_start) { at_start = 0; if (blanks && ! is_cdata) /* illegal leading blanks */ return 0; } else if (blanks > 1 && ! is_cdata) return 0; blanks = 0; } ++s; } if (blanks && ! is_cdata) return 0; return 1; } /* Check the attribute whitespace checker: */ static void testhelper_is_whitespace_normalized(void) { assert(is_whitespace_normalized(XCS("abc"), 0)); assert(is_whitespace_normalized(XCS("abc"), 1)); assert(is_whitespace_normalized(XCS("abc def ghi"), 0)); assert(is_whitespace_normalized(XCS("abc def ghi"), 1)); assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0)); assert(is_whitespace_normalized(XCS(" abc def ghi"), 1)); assert(! is_whitespace_normalized(XCS("abc def ghi"), 0)); assert(is_whitespace_normalized(XCS("abc def ghi"), 1)); assert(! is_whitespace_normalized(XCS("abc def ghi "), 0)); assert(is_whitespace_normalized(XCS("abc def ghi "), 1)); assert(! is_whitespace_normalized(XCS(" "), 0)); assert(is_whitespace_normalized(XCS(" "), 1)); assert(! is_whitespace_normalized(XCS("\t"), 0)); assert(! is_whitespace_normalized(XCS("\t"), 1)); assert(! is_whitespace_normalized(XCS("\n"), 0)); assert(! is_whitespace_normalized(XCS("\n"), 1)); assert(! is_whitespace_normalized(XCS("\r"), 0)); assert(! is_whitespace_normalized(XCS("\r"), 1)); assert(! is_whitespace_normalized(XCS("abc\t def"), 1)); } static void XMLCALL check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name, const XML_Char **atts) { int i; UNUSED_P(userData); UNUSED_P(name); for (i = 0; atts[i] != NULL; i += 2) { const XML_Char *attrname = atts[i]; const XML_Char *value = atts[i + 1]; if (xcstrcmp(XCS("attr"), attrname) == 0 || xcstrcmp(XCS("ents"), attrname) == 0 || xcstrcmp(XCS("refs"), attrname) == 0) { if (! is_whitespace_normalized(value, 0)) { char buffer[256]; sprintf(buffer, "attribute value not normalized: %" XML_FMT_STR "='%" XML_FMT_STR "'", attrname, value); fail(buffer); } } } } START_TEST(test_attr_whitespace_normalization) { const char *text = "\n" "]>\n" "\n" " \n" " \n" ""; XML_SetStartElementHandler(g_parser, check_attr_contains_normalized_whitespace); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST /* * XML declaration tests. */ START_TEST(test_xmldecl_misplaced) { expect_failure("\n" "\n" "", XML_ERROR_MISPLACED_XML_PI, "failed to report misplaced XML declaration"); } END_TEST START_TEST(test_xmldecl_invalid) { expect_failure("\n", XML_ERROR_XML_DECL, "Failed to report invalid XML declaration"); } END_TEST START_TEST(test_xmldecl_missing_attr) { expect_failure("\n\n", XML_ERROR_XML_DECL, "Failed to report missing XML declaration attribute"); } END_TEST START_TEST(test_xmldecl_missing_value) { expect_failure("\n" "", XML_ERROR_XML_DECL, "Failed to report missing attribute value"); } END_TEST /* Regression test for SF bug #584832. */ static int XMLCALL UnknownEncodingHandler(void *data, const XML_Char *encoding, XML_Encoding *info) { UNUSED_P(data); if (xcstrcmp(encoding, XCS("unsupported-encoding")) == 0) { int i; for (i = 0; i < 256; ++i) info->map[i] = i; info->data = NULL; info->convert = NULL; info->release = NULL; return XML_STATUS_OK; } return XML_STATUS_ERROR; } START_TEST(test_unknown_encoding_internal_entity) { const char *text = "\n" "]>\n" ""; XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST /* Test unrecognised encoding handler */ static void dummy_release(void *data) { UNUSED_P(data); } static int XMLCALL UnrecognisedEncodingHandler(void *data, const XML_Char *encoding, XML_Encoding *info) { UNUSED_P(data); UNUSED_P(encoding); info->data = NULL; info->convert = NULL; info->release = dummy_release; return XML_STATUS_ERROR; } START_TEST(test_unrecognised_encoding_internal_entity) { const char *text = "\n" "]>\n" ""; XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) fail("Unrecognised encoding not rejected"); } END_TEST /* Regression test for SF bug #620106. */ static int XMLCALL external_entity_loader(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { ExtTest *test_data = (ExtTest *)XML_GetUserData(parser); XML_Parser extparser; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); extparser = XML_ExternalEntityParserCreate(parser, context, NULL); if (extparser == NULL) fail("Could not create external entity parser."); if (test_data->encoding != NULL) { if (! XML_SetEncoding(extparser, test_data->encoding)) fail("XML_SetEncoding() ignored for external entity"); } if (_XML_Parse_SINGLE_BYTES(extparser, test_data->parse_text, (int)strlen(test_data->parse_text), XML_TRUE) == XML_STATUS_ERROR) { xml_failure(extparser); return XML_STATUS_ERROR; } XML_ParserFree(extparser); return XML_STATUS_OK; } START_TEST(test_ext_entity_set_encoding) { const char *text = "\n" "]>\n" "&en;"; ExtTest test_data = {/* This text says it's an unsupported encoding, but it's really UTF-8, which we tell Expat using XML_SetEncoding(). */ "\xC3\xA9", XCS("utf-8"), NULL}; #ifdef XML_UNICODE const XML_Char *expected = XCS("\x00e9"); #else const XML_Char *expected = XCS("\xc3\xa9"); #endif XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); run_ext_character_check(text, &test_data, expected); } END_TEST /* Test external entities with no handler */ START_TEST(test_ext_entity_no_handler) { const char *text = "\n" "]>\n" "&en;"; XML_SetDefaultHandler(g_parser, dummy_default_handler); run_character_check(text, XCS("")); } END_TEST /* Test UTF-8 BOM is accepted */ START_TEST(test_ext_entity_set_bom) { const char *text = "\n" "]>\n" "&en;"; ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */ "" "\xC3\xA9", XCS("utf-8"), NULL}; #ifdef XML_UNICODE const XML_Char *expected = XCS("\x00e9"); #else const XML_Char *expected = XCS("\xc3\xa9"); #endif XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); run_ext_character_check(text, &test_data, expected); } END_TEST /* Test that bad encodings are faulted */ typedef struct ext_faults { const char *parse_text; const char *fail_text; const XML_Char *encoding; enum XML_Error error; } ExtFaults; static int XMLCALL external_entity_faulter(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { XML_Parser ext_parser; ExtFaults *fault = (ExtFaults *)XML_GetUserData(parser); UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); if (fault->encoding != NULL) { if (! XML_SetEncoding(ext_parser, fault->encoding)) fail("XML_SetEncoding failed"); } if (_XML_Parse_SINGLE_BYTES(ext_parser, fault->parse_text, (int)strlen(fault->parse_text), XML_TRUE) != XML_STATUS_ERROR) fail(fault->fail_text); if (XML_GetErrorCode(ext_parser) != fault->error) xml_failure(ext_parser); XML_ParserFree(ext_parser); return XML_STATUS_ERROR; } START_TEST(test_ext_entity_bad_encoding) { const char *text = "\n" "]>\n" "&en;"; ExtFaults fault = {"u", "Unsupported encoding not faulted", XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING}; XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); XML_SetUserData(g_parser, &fault); expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, "Bad encoding should not have been accepted"); } END_TEST /* Try handing an invalid encoding to an external entity parser */ START_TEST(test_ext_entity_bad_encoding_2) { const char *text = "\n" "\n" "&entity;"; ExtFaults fault = {"", "Unknown encoding not faulted", XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING}; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); XML_SetUserData(g_parser, &fault); expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, "Bad encoding not faulted in external entity handler"); } END_TEST /* Test that no error is reported for unknown entities if we don't read an external subset. This was fixed in Expat 1.95.5. */ START_TEST(test_wfc_undeclared_entity_unread_external_subset) { const char *text = "\n" "&entity;"; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST /* Test that an error is reported for unknown entities if we don't have an external subset. */ START_TEST(test_wfc_undeclared_entity_no_external_subset) { expect_failure("&entity;", XML_ERROR_UNDEFINED_ENTITY, "Parser did not report undefined entity w/out a DTD."); } END_TEST /* Test that an error is reported for unknown entities if we don't read an external subset, but have been declared standalone. */ START_TEST(test_wfc_undeclared_entity_standalone) { const char *text = "\n" "\n" "&entity;"; expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, "Parser did not report undefined entity (standalone)."); } END_TEST /* Test that an error is reported for unknown entities if we have read an external subset, and standalone is true. */ START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) { const char *text = "\n" "\n" "&entity;"; ExtTest test_data = {"", NULL, NULL}; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetUserData(g_parser, &test_data); XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, "Parser did not report undefined entity (external DTD)."); } END_TEST /* Test that external entity handling is not done if the parsing flag * is set to UNLESS_STANDALONE */ START_TEST(test_entity_with_external_subset_unless_standalone) { const char *text = "\n" "\n" "&entity;"; ExtTest test_data = {"", NULL, NULL}; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE); XML_SetUserData(g_parser, &test_data); XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, "Parser did not report undefined entity"); } END_TEST /* Test that no error is reported for unknown entities if we have read an external subset, and standalone is false. */ START_TEST(test_wfc_undeclared_entity_with_external_subset) { const char *text = "\n" "\n" "&entity;"; ExtTest test_data = {"", NULL, NULL}; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); run_ext_character_check(text, &test_data, XCS("")); } END_TEST /* Test that an error is reported if our NotStandalone handler fails */ static int XMLCALL reject_not_standalone_handler(void *userData) { UNUSED_P(userData); return XML_STATUS_ERROR; } START_TEST(test_not_standalone_handler_reject) { const char *text = "\n" "\n" "&entity;"; ExtTest test_data = {"", NULL, NULL}; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetUserData(g_parser, &test_data); XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); expect_failure(text, XML_ERROR_NOT_STANDALONE, "NotStandalone handler failed to reject"); /* Try again but without external entity handling */ XML_ParserReset(g_parser, NULL); XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); expect_failure(text, XML_ERROR_NOT_STANDALONE, "NotStandalone handler failed to reject"); } END_TEST /* Test that no error is reported if our NotStandalone handler succeeds */ static int XMLCALL accept_not_standalone_handler(void *userData) { UNUSED_P(userData); return XML_STATUS_OK; } START_TEST(test_not_standalone_handler_accept) { const char *text = "\n" "\n" "&entity;"; ExtTest test_data = {"", NULL, NULL}; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler); run_ext_character_check(text, &test_data, XCS("")); /* Repeat without the external entity handler */ XML_ParserReset(g_parser, NULL); XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler); run_character_check(text, XCS("")); } END_TEST START_TEST(test_wfc_no_recursive_entity_refs) { const char *text = "\n" "]>\n" "&entity;"; expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF, "Parser did not report recursive entity reference."); } END_TEST /* Test incomplete external entities are faulted */ START_TEST(test_ext_entity_invalid_parse) { const char *text = "\n" "]>\n" "&en;"; const ExtFaults faults[] = {{"<", "Incomplete element declaration not faulted", NULL, XML_ERROR_UNCLOSED_TOKEN}, {"<\xe2\x82", /* First two bytes of a three-byte char */ "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR}, {"\xe2\x82", "Incomplete character in CDATA not faulted", NULL, XML_ERROR_PARTIAL_CHAR}, {NULL, NULL, NULL, XML_ERROR_NONE}}; const ExtFaults *fault = faults; for (; fault->parse_text != NULL; fault++) { XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); XML_SetUserData(g_parser, (void *)fault); expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, "Parser did not report external entity error"); XML_ParserReset(g_parser, NULL); } } END_TEST /* Regression test for SF bug #483514. */ START_TEST(test_dtd_default_handling) { const char *text = "\n" "\n" "\n" "\n" "\n" "\n" "]>"; XML_SetDefaultHandler(g_parser, accumulate_characters); XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler); XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler); XML_SetCommentHandler(g_parser, dummy_comment_handler); XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler); XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler); run_character_check(text, XCS("\n\n\n\n\n\n\n")); } END_TEST /* Test handling of attribute declarations */ typedef struct AttTest { const char *definition; const XML_Char *element_name; const XML_Char *attr_name; const XML_Char *attr_type; const XML_Char *default_value; int is_required; } AttTest; static void XMLCALL verify_attlist_decl_handler(void *userData, const XML_Char *element_name, const XML_Char *attr_name, const XML_Char *attr_type, const XML_Char *default_value, int is_required) { AttTest *at = (AttTest *)userData; if (xcstrcmp(element_name, at->element_name)) fail("Unexpected element name in attribute declaration"); if (xcstrcmp(attr_name, at->attr_name)) fail("Unexpected attribute name in attribute declaration"); if (xcstrcmp(attr_type, at->attr_type)) fail("Unexpected attribute type in attribute declaration"); if ((default_value == NULL && at->default_value != NULL) || (default_value != NULL && at->default_value == NULL) || (default_value != NULL && xcstrcmp(default_value, at->default_value))) fail("Unexpected default value in attribute declaration"); if (is_required != at->is_required) fail("Requirement mismatch in attribute declaration"); } START_TEST(test_dtd_attr_handling) { const char *prolog = "\n"; AttTest attr_data[] = {{"\n" "]>" "", XCS("doc"), XCS("a"), XCS("(one|two|three)"), /* Extraneous spaces will be removed */ NULL, XML_TRUE}, {"\n" "\n" "]>" "", XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE}, {"\n" "]>" "", XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE}, {"\n" "]>" "", XCS("doc"), XCS("a"), XCS("CDATA"), #ifdef XML_UNICODE XCS("\x06f2"), #else XCS("\xdb\xb2"), #endif XML_FALSE}, {NULL, NULL, NULL, NULL, NULL, XML_FALSE}}; AttTest *test; for (test = attr_data; test->definition != NULL; test++) { XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler); XML_SetUserData(g_parser, test); if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog), XML_FALSE) == XML_STATUS_ERROR) xml_failure(g_parser); if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition, (int)strlen(test->definition), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); XML_ParserReset(g_parser, NULL); } } END_TEST /* See related SF bug #673791. When namespace processing is enabled, setting the namespace URI for a prefix is not allowed; this test ensures that it *is* allowed when namespace processing is not enabled. (See Namespaces in XML, section 2.) */ START_TEST(test_empty_ns_without_namespaces) { const char *text = "\n" " \n" ""; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST /* Regression test for SF bug #824420. Checks that an xmlns:prefix attribute set in an attribute's default value isn't misinterpreted. */ START_TEST(test_ns_in_attribute_default_without_namespaces) { const char *text = "\n" " ]>\n" ""; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST static const char *long_character_data_text = "" "012345678901234567890123456789012345678901234567890123456789" "012345678901234567890123456789012345678901234567890123456789" "012345678901234567890123456789012345678901234567890123456789" "012345678901234567890123456789012345678901234567890123456789" "012345678901234567890123456789012345678901234567890123456789" "012345678901234567890123456789012345678901234567890123456789" "012345678901234567890123456789012345678901234567890123456789" "012345678901234567890123456789012345678901234567890123456789" "012345678901234567890123456789012345678901234567890123456789" "012345678901234567890123456789012345678901234567890123456789" "012345678901234567890123456789012345678901234567890123456789" "012345678901234567890123456789012345678901234567890123456789" "012345678901234567890123456789012345678901234567890123456789" "012345678901234567890123456789012345678901234567890123456789" "012345678901234567890123456789012345678901234567890123456789" "012345678901234567890123456789012345678901234567890123456789" "012345678901234567890123456789012345678901234567890123456789" "012345678901234567890123456789012345678901234567890123456789" "012345678901234567890123456789012345678901234567890123456789" "012345678901234567890123456789012345678901234567890123456789" ""; static XML_Bool resumable = XML_FALSE; static void clearing_aborting_character_handler(void *userData, const XML_Char *s, int len) { UNUSED_P(userData); UNUSED_P(s); UNUSED_P(len); XML_StopParser(g_parser, resumable); XML_SetCharacterDataHandler(g_parser, NULL); } /* Regression test for SF bug #1515266: missing check of stopped parser in doContext() 'for' loop. */ START_TEST(test_stop_parser_between_char_data_calls) { /* The sample data must be big enough that there are two calls to the character data handler from within the inner "for" loop of the XML_TOK_DATA_CHARS case in doContent(), and the character handler must stop the parser and clear the character data handler. */ const char *text = long_character_data_text; XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); resumable = XML_FALSE; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) xml_failure(g_parser); if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED) xml_failure(g_parser); } END_TEST /* Regression test for SF bug #1515266: missing check of stopped parser in doContext() 'for' loop. */ START_TEST(test_suspend_parser_between_char_data_calls) { /* The sample data must be big enough that there are two calls to the character data handler from within the inner "for" loop of the XML_TOK_DATA_CHARS case in doContent(), and the character handler must stop the parser and clear the character data handler. */ const char *text = long_character_data_text; XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); resumable = XML_TRUE; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_SUSPENDED) xml_failure(g_parser); if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) xml_failure(g_parser); /* Try parsing directly */ if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) fail("Attempt to continue parse while suspended not faulted"); if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED) fail("Suspended parse not faulted with correct error"); } END_TEST static XML_Bool abortable = XML_FALSE; static void parser_stop_character_handler(void *userData, const XML_Char *s, int len) { UNUSED_P(userData); UNUSED_P(s); UNUSED_P(len); XML_StopParser(g_parser, resumable); XML_SetCharacterDataHandler(g_parser, NULL); if (! resumable) { /* Check that aborting an aborted parser is faulted */ if (XML_StopParser(g_parser, XML_FALSE) != XML_STATUS_ERROR) fail("Aborting aborted parser not faulted"); if (XML_GetErrorCode(g_parser) != XML_ERROR_FINISHED) xml_failure(g_parser); } else if (abortable) { /* Check that aborting a suspended parser works */ if (XML_StopParser(g_parser, XML_FALSE) == XML_STATUS_ERROR) xml_failure(g_parser); } else { /* Check that suspending a suspended parser works */ if (XML_StopParser(g_parser, XML_TRUE) != XML_STATUS_ERROR) fail("Suspending suspended parser not faulted"); if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED) xml_failure(g_parser); } } /* Test repeated calls to XML_StopParser are handled correctly */ START_TEST(test_repeated_stop_parser_between_char_data_calls) { const char *text = long_character_data_text; XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); resumable = XML_FALSE; abortable = XML_FALSE; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) fail("Failed to double-stop parser"); XML_ParserReset(g_parser, NULL); XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); resumable = XML_TRUE; abortable = XML_FALSE; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_SUSPENDED) fail("Failed to double-suspend parser"); XML_ParserReset(g_parser, NULL); XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); resumable = XML_TRUE; abortable = XML_TRUE; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) fail("Failed to suspend-abort parser"); } END_TEST START_TEST(test_good_cdata_ascii) { const char *text = "Hello, world!]]>"; const XML_Char *expected = XCS("Hello, world!"); CharData storage; CharData_Init(&storage); XML_SetUserData(g_parser, &storage); XML_SetCharacterDataHandler(g_parser, accumulate_characters); /* Add start and end handlers for coverage */ XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler); XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); /* Try again, this time with a default handler */ XML_ParserReset(g_parser, NULL); CharData_Init(&storage); XML_SetUserData(g_parser, &storage); XML_SetCharacterDataHandler(g_parser, accumulate_characters); XML_SetDefaultHandler(g_parser, dummy_default_handler); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST START_TEST(test_good_cdata_utf16) { /* Test data is: * * */ const char text[] = "\0<\0?\0x\0m\0l\0" " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" "1\0" "6\0'" "\0?\0>\0\n" "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>"; const XML_Char *expected = XCS("hello"); CharData storage; CharData_Init(&storage); XML_SetUserData(g_parser, &storage); XML_SetCharacterDataHandler(g_parser, accumulate_characters); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST START_TEST(test_good_cdata_utf16_le) { /* Test data is: * * */ const char text[] = "<\0?\0x\0m\0l\0" " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" "1\0" "6\0'" "\0?\0>\0\n" "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0"; const XML_Char *expected = XCS("hello"); CharData storage; CharData_Init(&storage); XML_SetUserData(g_parser, &storage); XML_SetCharacterDataHandler(g_parser, accumulate_characters); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST /* Test UTF16 conversion of a long cdata string */ /* 16 characters: handy macro to reduce visual clutter */ #define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P" START_TEST(test_long_cdata_utf16) { /* Test data is: * * */ const char text[] = "\0<\0?\0x\0m\0l\0 " "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 " "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>" "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" /* 64 characters per line */ /* clang-format off */ A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 /* clang-format on */ "\0]\0]\0>\0<\0/\0a\0>"; const XML_Char *expected = /* clang-format off */ XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") XCS("ABCDEFGHIJKLMNOP"); /* clang-format on */ CharData storage; void *buffer; CharData_Init(&storage); XML_SetUserData(g_parser, &storage); XML_SetCharacterDataHandler(g_parser, accumulate_characters); buffer = XML_GetBuffer(g_parser, sizeof(text) - 1); if (buffer == NULL) fail("Could not allocate parse buffer"); assert(buffer != NULL); memcpy(buffer, text, sizeof(text) - 1); if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST /* Test handling of multiple unit UTF-16 characters */ START_TEST(test_multichar_cdata_utf16) { /* Test data is: * * * * where {MINIM} is U+1d15e (a minim or half-note) * UTF-16: 0xd834 0xdd5e * UTF-8: 0xf0 0x9d 0x85 0x9e * and {CROTCHET} is U+1d15f (a crotchet or quarter-note) * UTF-16: 0xd834 0xdd5f * UTF-8: 0xf0 0x9d 0x85 0x9f */ const char text[] = "\0<\0?\0x\0m\0l\0" " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" "1\0" "6\0'" "\0?\0>\0\n" "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f" "\0]\0]\0>\0<\0/\0a\0>"; #ifdef XML_UNICODE const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f"); #else const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f"); #endif CharData storage; CharData_Init(&storage); XML_SetUserData(g_parser, &storage); XML_SetCharacterDataHandler(g_parser, accumulate_characters); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST /* Test that an element name with a UTF-16 surrogate pair is rejected */ START_TEST(test_utf16_bad_surrogate_pair) { /* Test data is: * * * * where {BADLINB} is U+10000 (the first Linear B character) * with the UTF-16 surrogate pair in the wrong order, i.e. * 0xdc00 0xd800 */ const char text[] = "\0<\0?\0x\0m\0l\0" " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" "1\0" "6\0'" "\0?\0>\0\n" "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" "\xdc\x00\xd8\x00" "\0]\0]\0>\0<\0/\0a\0>"; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) != XML_STATUS_ERROR) fail("Reversed UTF-16 surrogate pair not faulted"); if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) xml_failure(g_parser); } END_TEST START_TEST(test_bad_cdata) { struct CaseData { const char *text; enum XML_Error expectedError; }; struct CaseData cases[] = {{"<", XML_ERROR_UNCLOSED_TOKEN}, {"", XML_ERROR_INVALID_TOKEN}, {"", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */ {"", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */ {"", XML_ERROR_INVALID_TOKEN}, {"", XML_ERROR_INVALID_TOKEN}, {"", XML_ERROR_INVALID_TOKEN}, {"", XML_ERROR_INVALID_TOKEN}, {"", XML_ERROR_UNCLOSED_CDATA_SECTION}, {"", XML_ERROR_UNCLOSED_CDATA_SECTION}, {"", XML_ERROR_UNCLOSED_CDATA_SECTION}}; size_t i = 0; for (; i < sizeof(cases) / sizeof(struct CaseData); i++) { const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES( g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE); const enum XML_Error actualError = XML_GetErrorCode(g_parser); assert(actualStatus == XML_STATUS_ERROR); if (actualError != cases[i].expectedError) { char message[100]; sprintf(message, "Expected error %d but got error %d for case %u: \"%s\"\n", cases[i].expectedError, actualError, (unsigned int)i + 1, cases[i].text); fail(message); } XML_ParserReset(g_parser, NULL); } } END_TEST /* Test failures in UTF-16 CDATA */ START_TEST(test_bad_cdata_utf16) { struct CaseData { size_t text_bytes; const char *text; enum XML_Error expected_error; }; const char prolog[] = "\0<\0?\0x\0m\0l\0" " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" "1\0" "6\0'" "\0?\0>\0\n" "\0<\0a\0>"; struct CaseData cases[] = { {1, "\0", XML_ERROR_UNCLOSED_TOKEN}, {2, "\0<", XML_ERROR_UNCLOSED_TOKEN}, {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN}, {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN}, {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN}, {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN}, {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN}, {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN}, {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN}, {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN}, {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN}, {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN}, {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN}, {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN}, {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN}, {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN}, {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN}, {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION}, {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION}, {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION}, /* Now add a four-byte UTF-16 character */ {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8", XML_ERROR_UNCLOSED_CDATA_SECTION}, {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR}, {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd", XML_ERROR_PARTIAL_CHAR}, {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e", XML_ERROR_UNCLOSED_CDATA_SECTION}}; size_t i; for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) { enum XML_Status actual_status; enum XML_Error actual_error; if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1, XML_FALSE) == XML_STATUS_ERROR) xml_failure(g_parser); actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text, (int)cases[i].text_bytes, XML_TRUE); assert(actual_status == XML_STATUS_ERROR); actual_error = XML_GetErrorCode(g_parser); if (actual_error != cases[i].expected_error) { char message[1024]; sprintf(message, "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR ") for case %lu\n", cases[i].expected_error, XML_ErrorString(cases[i].expected_error), actual_error, XML_ErrorString(actual_error), (long unsigned)(i + 1)); fail(message); } XML_ParserReset(g_parser, NULL); } } END_TEST static const char *long_cdata_text = ""; /* Test stopping the parser in cdata handler */ START_TEST(test_stop_parser_between_cdata_calls) { const char *text = long_cdata_text; XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); resumable = XML_FALSE; expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler"); } END_TEST /* Test suspending the parser in cdata handler */ START_TEST(test_suspend_parser_between_cdata_calls) { const char *text = long_cdata_text; enum XML_Status result; XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); resumable = XML_TRUE; result = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE); if (result != XML_STATUS_SUSPENDED) { if (result == XML_STATUS_ERROR) xml_failure(g_parser); fail("Parse not suspended in CDATA handler"); } if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) xml_failure(g_parser); } END_TEST /* Test memory allocation functions */ START_TEST(test_memory_allocation) { char *buffer = (char *)XML_MemMalloc(g_parser, 256); char *p; if (buffer == NULL) { fail("Allocation failed"); } else { /* Try writing to memory; some OSes try to cheat! */ buffer[0] = 'T'; buffer[1] = 'E'; buffer[2] = 'S'; buffer[3] = 'T'; buffer[4] = '\0'; if (strcmp(buffer, "TEST") != 0) { fail("Memory not writable"); } else { p = (char *)XML_MemRealloc(g_parser, buffer, 512); if (p == NULL) { fail("Reallocation failed"); } else { /* Write again, just to be sure */ buffer = p; buffer[0] = 'V'; if (strcmp(buffer, "VEST") != 0) { fail("Reallocated memory not writable"); } } } XML_MemFree(g_parser, buffer); } } END_TEST static void XMLCALL record_default_handler(void *userData, const XML_Char *s, int len) { UNUSED_P(s); UNUSED_P(len); CharData_AppendXMLChars((CharData *)userData, XCS("D"), 1); } static void XMLCALL record_cdata_handler(void *userData, const XML_Char *s, int len) { UNUSED_P(s); UNUSED_P(len); CharData_AppendXMLChars((CharData *)userData, XCS("C"), 1); XML_DefaultCurrent(g_parser); } static void XMLCALL record_cdata_nodefault_handler(void *userData, const XML_Char *s, int len) { UNUSED_P(s); UNUSED_P(len); CharData_AppendXMLChars((CharData *)userData, XCS("c"), 1); } static void XMLCALL record_skip_handler(void *userData, const XML_Char *entityName, int is_parameter_entity) { UNUSED_P(entityName); CharData_AppendXMLChars((CharData *)userData, is_parameter_entity ? XCS("E") : XCS("e"), 1); } /* Test XML_DefaultCurrent() passes handling on correctly */ START_TEST(test_default_current) { const char *text = "hell]"; const char *entity_text = "\n" "]>\n" "&entity;"; CharData storage; XML_SetDefaultHandler(g_parser, record_default_handler); XML_SetCharacterDataHandler(g_parser, record_cdata_handler); CharData_Init(&storage); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, XCS("DCDCDCDCDCDD")); /* Again, without the defaulting */ XML_ParserReset(g_parser, NULL); XML_SetDefaultHandler(g_parser, record_default_handler); XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); CharData_Init(&storage); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, XCS("DcccccD")); /* Now with an internal entity to complicate matters */ XML_ParserReset(g_parser, NULL); XML_SetDefaultHandler(g_parser, record_default_handler); XML_SetCharacterDataHandler(g_parser, record_cdata_handler); CharData_Init(&storage); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); /* The default handler suppresses the entity */ CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDDD")); /* Again, with a skip handler */ XML_ParserReset(g_parser, NULL); XML_SetDefaultHandler(g_parser, record_default_handler); XML_SetCharacterDataHandler(g_parser, record_cdata_handler); XML_SetSkippedEntityHandler(g_parser, record_skip_handler); CharData_Init(&storage); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); /* The default handler suppresses the entity */ CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDeD")); /* This time, allow the entity through */ XML_ParserReset(g_parser, NULL); XML_SetDefaultHandlerExpand(g_parser, record_default_handler); XML_SetCharacterDataHandler(g_parser, record_cdata_handler); CharData_Init(&storage); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDCDD")); /* Finally, without passing the cdata to the default handler */ XML_ParserReset(g_parser, NULL); XML_SetDefaultHandlerExpand(g_parser, record_default_handler); XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); CharData_Init(&storage); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDcD")); } END_TEST /* Test DTD element parsing code paths */ START_TEST(test_dtd_elements) { const char *text = "\n" "\n" "]>\n" "Wombats are go"; XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST static void XMLCALL element_decl_check_model(void *userData, const XML_Char *name, XML_Content *model) { UNUSED_P(userData); uint32_t errorFlags = 0; /* Expected model array structure is this: * [0] (type 6, quant 0) * [1] (type 5, quant 0) * [3] (type 4, quant 0, name "bar") * [4] (type 4, quant 0, name "foo") * [5] (type 4, quant 3, name "xyz") * [2] (type 4, quant 2, name "zebra") */ errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0)); errorFlags |= ((model != NULL) ? 0 : (1u << 1)); errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2)); errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3)); errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4)); errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5)); errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6)); errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7)); errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8)); errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9)); errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10)); errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11)); errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12)); errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13)); errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14)); errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15)); errorFlags |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16)); errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17)); errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18)); errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19)); errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20)); errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21)); errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22)); errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23)); errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24)); errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25)); errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26)); errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27)); errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28)); errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29)); errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30)); errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31)); XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags); XML_FreeContentModel(g_parser, model); } START_TEST(test_dtd_elements_nesting) { // Payload inspired by a test in Perl's XML::Parser const char *text = "\n" "]>\n" ""; XML_SetUserData(g_parser, (void *)(uintptr_t)-1); XML_SetElementDeclHandler(g_parser, element_decl_check_model); if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0) fail("Element declaration model regression detected"); } END_TEST /* Test foreign DTD handling */ START_TEST(test_set_foreign_dtd) { const char *text1 = "\n"; const char *text2 = "&entity;"; ExtTest test_data = {"", NULL, NULL}; /* Check hash salt is passed through too */ XML_SetHashSalt(g_parser, 0x12345678); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetUserData(g_parser, &test_data); XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); /* Add a default handler to exercise more code paths */ XML_SetDefaultHandler(g_parser, dummy_default_handler); if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) fail("Could not set foreign DTD"); if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) == XML_STATUS_ERROR) xml_failure(g_parser); /* Ensure that trying to set the DTD after parsing has started * is faulted, even if it's the same setting. */ if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING) fail("Failed to reject late foreign DTD setting"); /* Ditto for the hash salt */ if (XML_SetHashSalt(g_parser, 0x23456789)) fail("Failed to reject late hash salt change"); /* Now finish the parse */ if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST /* Test foreign DTD handling with a failing NotStandalone handler */ START_TEST(test_foreign_dtd_not_standalone) { const char *text = "\n" "&entity;"; ExtTest test_data = {"", NULL, NULL}; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetUserData(g_parser, &test_data); XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) fail("Could not set foreign DTD"); expect_failure(text, XML_ERROR_NOT_STANDALONE, "NotStandalonehandler failed to reject"); } END_TEST /* Test invalid character in a foreign DTD is faulted */ START_TEST(test_invalid_foreign_dtd) { const char *text = "\n" "&entity;"; ExtFaults test_data = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN}; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetUserData(g_parser, &test_data); XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); XML_UseForeignDTD(g_parser, XML_TRUE); expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, "Bad DTD should not have been accepted"); } END_TEST /* Test foreign DTD use with a doctype */ START_TEST(test_foreign_dtd_with_doctype) { const char *text1 = "\n" "]>\n"; const char *text2 = "&entity;"; ExtTest test_data = {"", NULL, NULL}; /* Check hash salt is passed through too */ XML_SetHashSalt(g_parser, 0x12345678); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetUserData(g_parser, &test_data); XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); /* Add a default handler to exercise more code paths */ XML_SetDefaultHandler(g_parser, dummy_default_handler); if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) fail("Could not set foreign DTD"); if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) == XML_STATUS_ERROR) xml_failure(g_parser); /* Ensure that trying to set the DTD after parsing has started * is faulted, even if it's the same setting. */ if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING) fail("Failed to reject late foreign DTD setting"); /* Ditto for the hash salt */ if (XML_SetHashSalt(g_parser, 0x23456789)) fail("Failed to reject late hash salt change"); /* Now finish the parse */ if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST /* Test XML_UseForeignDTD with no external subset present */ static int XMLCALL external_entity_null_loader(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { UNUSED_P(parser); UNUSED_P(context); UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); return XML_STATUS_OK; } START_TEST(test_foreign_dtd_without_external_subset) { const char *text = "]>\n" "&foo;"; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetUserData(g_parser, NULL); XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader); XML_UseForeignDTD(g_parser, XML_TRUE); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST START_TEST(test_empty_foreign_dtd) { const char *text = "\n" "&entity;"; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader); XML_UseForeignDTD(g_parser, XML_TRUE); expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, "Undefined entity not faulted"); } END_TEST /* Test XML Base is set and unset appropriately */ START_TEST(test_set_base) { const XML_Char *old_base; const XML_Char *new_base = XCS("/local/file/name.xml"); old_base = XML_GetBase(g_parser); if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK) fail("Unable to set base"); if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0) fail("Base setting not correct"); if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK) fail("Unable to NULL base"); if (XML_GetBase(g_parser) != NULL) fail("Base setting not nulled"); XML_SetBase(g_parser, old_base); } END_TEST /* Test attribute counts, indexing, etc */ typedef struct attrInfo { const XML_Char *name; const XML_Char *value; } AttrInfo; typedef struct elementInfo { const XML_Char *name; int attr_count; const XML_Char *id_name; AttrInfo *attributes; } ElementInfo; static void XMLCALL counting_start_element_handler(void *userData, const XML_Char *name, const XML_Char **atts) { ElementInfo *info = (ElementInfo *)userData; AttrInfo *attr; int count, id, i; while (info->name != NULL) { if (! xcstrcmp(name, info->name)) break; info++; } if (info->name == NULL) fail("Element not recognised"); /* The attribute count is twice what you might expect. It is a * count of items in atts, an array which contains alternating * attribute names and attribute values. For the naive user this * is possibly a little unexpected, but it is what the * documentation in expat.h tells us to expect. */ count = XML_GetSpecifiedAttributeCount(g_parser); if (info->attr_count * 2 != count) { fail("Not got expected attribute count"); return; } id = XML_GetIdAttributeIndex(g_parser); if (id == -1 && info->id_name != NULL) { fail("ID not present"); return; } if (id != -1 && xcstrcmp(atts[id], info->id_name)) { fail("ID does not have the correct name"); return; } for (i = 0; i < info->attr_count; i++) { attr = info->attributes; while (attr->name != NULL) { if (! xcstrcmp(atts[0], attr->name)) break; attr++; } if (attr->name == NULL) { fail("Attribute not recognised"); return; } if (xcstrcmp(atts[1], attr->value)) { fail("Attribute has wrong value"); return; } /* Remember, two entries in atts per attribute (see above) */ atts += 2; } } START_TEST(test_attributes) { const char *text = "\n" "\n" "]>" "" "" ""; AttrInfo doc_info[] = {{XCS("a"), XCS("1")}, {XCS("b"), XCS("2")}, {XCS("id"), XCS("one")}, {NULL, NULL}}; AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}}; ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL}, {XCS("tag"), 1, NULL, NULL}, {NULL, 0, NULL, NULL}}; info[0].attributes = doc_info; info[1].attributes = tag_info; XML_SetStartElementHandler(g_parser, counting_start_element_handler); XML_SetUserData(g_parser, info); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST /* Test reset works correctly in the middle of processing an internal * entity. Exercises some obscure code in XML_ParserReset(). */ START_TEST(test_reset_in_entity) { const char *text = "\n" "\n" "]>\n" "&entity;"; XML_ParsingStatus status; resumable = XML_TRUE; XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) == XML_STATUS_ERROR) xml_failure(g_parser); XML_GetParsingStatus(g_parser, &status); if (status.parsing != XML_SUSPENDED) fail("Parsing status not SUSPENDED"); XML_ParserReset(g_parser, NULL); XML_GetParsingStatus(g_parser, &status); if (status.parsing != XML_INITIALIZED) fail("Parsing status doesn't reset to INITIALIZED"); } END_TEST /* Test that resume correctly passes through parse errors */ START_TEST(test_resume_invalid_parse) { const char *text = "Hello"); CharData storage; CharData_Init(&storage); XML_SetUserData(g_parser, &storage); XML_SetDefaultHandler(g_parser, accumulate_characters); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST /* Test resetting a subordinate parser does exactly nothing */ static int XMLCALL external_entity_resetter(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { const char *text = ""; XML_Parser ext_parser; XML_ParsingStatus status; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); XML_GetParsingStatus(ext_parser, &status); if (status.parsing != XML_INITIALIZED) { fail("Parsing status is not INITIALIZED"); return XML_STATUS_ERROR; } if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) { xml_failure(parser); return XML_STATUS_ERROR; } XML_GetParsingStatus(ext_parser, &status); if (status.parsing != XML_FINISHED) { fail("Parsing status is not FINISHED"); return XML_STATUS_ERROR; } /* Check we can't parse here */ if (XML_Parse(ext_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) fail("Parsing when finished not faulted"); if (XML_GetErrorCode(ext_parser) != XML_ERROR_FINISHED) fail("Parsing when finished faulted with wrong code"); XML_ParserReset(ext_parser, NULL); XML_GetParsingStatus(ext_parser, &status); if (status.parsing != XML_FINISHED) { fail("Parsing status not still FINISHED"); return XML_STATUS_ERROR; } XML_ParserFree(ext_parser); return XML_STATUS_OK; } START_TEST(test_subordinate_reset) { const char *text = "\n" "\n" "&entity;"; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST /* Test suspending a subordinate parser */ static void XMLCALL entity_suspending_decl_handler(void *userData, const XML_Char *name, XML_Content *model) { XML_Parser ext_parser = (XML_Parser)userData; UNUSED_P(name); if (XML_StopParser(ext_parser, XML_TRUE) != XML_STATUS_ERROR) fail("Attempting to suspend a subordinate parser not faulted"); if (XML_GetErrorCode(ext_parser) != XML_ERROR_SUSPEND_PE) fail("Suspending subordinate parser get wrong code"); XML_SetElementDeclHandler(ext_parser, NULL); XML_FreeContentModel(g_parser, model); } static int XMLCALL external_entity_suspender(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { const char *text = ""; XML_Parser ext_parser; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); XML_SetElementDeclHandler(ext_parser, entity_suspending_decl_handler); XML_SetUserData(ext_parser, ext_parser); if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) { xml_failure(ext_parser); return XML_STATUS_ERROR; } XML_ParserFree(ext_parser); return XML_STATUS_OK; } START_TEST(test_subordinate_suspend) { const char *text = "\n" "\n" "&entity;"; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST /* Test suspending a subordinate parser from an XML declaration */ /* Increases code coverage of the tests */ static void XMLCALL entity_suspending_xdecl_handler(void *userData, const XML_Char *version, const XML_Char *encoding, int standalone) { XML_Parser ext_parser = (XML_Parser)userData; UNUSED_P(version); UNUSED_P(encoding); UNUSED_P(standalone); XML_StopParser(ext_parser, resumable); XML_SetXmlDeclHandler(ext_parser, NULL); } static int XMLCALL external_entity_suspend_xmldecl(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { const char *text = ""; XML_Parser ext_parser; XML_ParsingStatus status; enum XML_Status rc; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); XML_SetXmlDeclHandler(ext_parser, entity_suspending_xdecl_handler); XML_SetUserData(ext_parser, ext_parser); rc = _XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE); XML_GetParsingStatus(ext_parser, &status); if (resumable) { if (rc == XML_STATUS_ERROR) xml_failure(ext_parser); if (status.parsing != XML_SUSPENDED) fail("Ext Parsing status not SUSPENDED"); } else { if (rc != XML_STATUS_ERROR) fail("Ext parsing not aborted"); if (XML_GetErrorCode(ext_parser) != XML_ERROR_ABORTED) xml_failure(ext_parser); if (status.parsing != XML_FINISHED) fail("Ext Parsing status not FINISHED"); } XML_ParserFree(ext_parser); return XML_STATUS_OK; } START_TEST(test_subordinate_xdecl_suspend) { const char *text = "\n" "]>\n" "&entity;"; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl); resumable = XML_TRUE; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST START_TEST(test_subordinate_xdecl_abort) { const char *text = "\n" "]>\n" "&entity;"; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl); resumable = XML_FALSE; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST /* Test external entity fault handling with suspension */ static int XMLCALL external_entity_suspending_faulter(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { XML_Parser ext_parser; ExtFaults *fault = (ExtFaults *)XML_GetUserData(parser); void *buffer; int parse_len = (int)strlen(fault->parse_text); UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); XML_SetXmlDeclHandler(ext_parser, entity_suspending_xdecl_handler); XML_SetUserData(ext_parser, ext_parser); resumable = XML_TRUE; buffer = XML_GetBuffer(ext_parser, parse_len); if (buffer == NULL) fail("Could not allocate parse buffer"); assert(buffer != NULL); memcpy(buffer, fault->parse_text, parse_len); if (XML_ParseBuffer(ext_parser, parse_len, XML_FALSE) != XML_STATUS_SUSPENDED) fail("XML declaration did not suspend"); if (XML_ResumeParser(ext_parser) != XML_STATUS_OK) xml_failure(ext_parser); if (XML_ParseBuffer(ext_parser, 0, XML_TRUE) != XML_STATUS_ERROR) fail(fault->fail_text); if (XML_GetErrorCode(ext_parser) != fault->error) xml_failure(ext_parser); XML_ParserFree(ext_parser); return XML_STATUS_ERROR; } START_TEST(test_ext_entity_invalid_suspended_parse) { const char *text = "\n" "]>\n" "&en;"; ExtFaults faults[] = {{"<", "Incomplete element declaration not faulted", NULL, XML_ERROR_UNCLOSED_TOKEN}, {/* First two bytes of a three-byte char */ "\xe2\x82", "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR}, {NULL, NULL, NULL, XML_ERROR_NONE}}; ExtFaults *fault; for (fault = &faults[0]; fault->parse_text != NULL; fault++) { XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_suspending_faulter); XML_SetUserData(g_parser, fault); expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, "Parser did not report external entity error"); XML_ParserReset(g_parser, NULL); } } END_TEST /* Test setting an explicit encoding */ START_TEST(test_explicit_encoding) { const char *text1 = "Hello "; const char *text2 = " World"; /* Just check that we can set the encoding to NULL before starting */ if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK) fail("Failed to initialise encoding to NULL"); /* Say we are UTF-8 */ if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK) fail("Failed to set explicit encoding"); if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) == XML_STATUS_ERROR) xml_failure(g_parser); /* Try to switch encodings mid-parse */ if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR) fail("Allowed encoding change"); if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); /* Try now the parse is over */ if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK) fail("Failed to unset encoding"); } END_TEST /* Test handling of trailing CR (rather than newline) */ static void XMLCALL cr_cdata_handler(void *userData, const XML_Char *s, int len) { int *pfound = (int *)userData; /* Internal processing turns the CR into a newline for the * character data handler, but not for the default handler */ if (len == 1 && (*s == XCS('\n') || *s == XCS('\r'))) *pfound = 1; } START_TEST(test_trailing_cr) { const char *text = "\r"; int found_cr; /* Try with a character handler, for code coverage */ XML_SetCharacterDataHandler(g_parser, cr_cdata_handler); XML_SetUserData(g_parser, &found_cr); found_cr = 0; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_OK) fail("Failed to fault unclosed doc"); if (found_cr == 0) fail("Did not catch the carriage return"); XML_ParserReset(g_parser, NULL); /* Now with a default handler instead */ XML_SetDefaultHandler(g_parser, cr_cdata_handler); XML_SetUserData(g_parser, &found_cr); found_cr = 0; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_OK) fail("Failed to fault unclosed doc"); if (found_cr == 0) fail("Did not catch default carriage return"); } END_TEST /* Test trailing CR in an external entity parse */ static int XMLCALL external_entity_cr_catcher(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { const char *text = "\r"; XML_Parser ext_parser; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); XML_SetCharacterDataHandler(ext_parser, cr_cdata_handler); if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(ext_parser); XML_ParserFree(ext_parser); return XML_STATUS_OK; } static int XMLCALL external_entity_bad_cr_catcher(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { const char *text = "\r"; XML_Parser ext_parser; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); XML_SetCharacterDataHandler(ext_parser, cr_cdata_handler); if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_OK) fail("Async entity error not caught"); if (XML_GetErrorCode(ext_parser) != XML_ERROR_ASYNC_ENTITY) xml_failure(ext_parser); XML_ParserFree(ext_parser); return XML_STATUS_OK; } START_TEST(test_ext_entity_trailing_cr) { const char *text = "\n" "]>\n" "&en;"; int found_cr; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher); XML_SetUserData(g_parser, &found_cr); found_cr = 0; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) xml_failure(g_parser); if (found_cr == 0) fail("No carriage return found"); XML_ParserReset(g_parser, NULL); /* Try again with a different trailing CR */ XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher); XML_SetUserData(g_parser, &found_cr); found_cr = 0; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) xml_failure(g_parser); if (found_cr == 0) fail("No carriage return found"); } END_TEST /* Test handling of trailing square bracket */ static void XMLCALL rsqb_handler(void *userData, const XML_Char *s, int len) { int *pfound = (int *)userData; if (len == 1 && *s == XCS(']')) *pfound = 1; } START_TEST(test_trailing_rsqb) { const char *text8 = "]"; const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000"; int found_rsqb; int text8_len = (int)strlen(text8); XML_SetCharacterDataHandler(g_parser, rsqb_handler); XML_SetUserData(g_parser, &found_rsqb); found_rsqb = 0; if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE) == XML_STATUS_OK) fail("Failed to fault unclosed doc"); if (found_rsqb == 0) fail("Did not catch the right square bracket"); /* Try again with a different encoding */ XML_ParserReset(g_parser, NULL); XML_SetCharacterDataHandler(g_parser, rsqb_handler); XML_SetUserData(g_parser, &found_rsqb); found_rsqb = 0; if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1, XML_TRUE) == XML_STATUS_OK) fail("Failed to fault unclosed doc"); if (found_rsqb == 0) fail("Did not catch the right square bracket"); /* And finally with a default handler */ XML_ParserReset(g_parser, NULL); XML_SetDefaultHandler(g_parser, rsqb_handler); XML_SetUserData(g_parser, &found_rsqb); found_rsqb = 0; if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1, XML_TRUE) == XML_STATUS_OK) fail("Failed to fault unclosed doc"); if (found_rsqb == 0) fail("Did not catch the right square bracket"); } END_TEST /* Test trailing right square bracket in an external entity parse */ static int XMLCALL external_entity_rsqb_catcher(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { const char *text = "]"; XML_Parser ext_parser; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); XML_SetCharacterDataHandler(ext_parser, rsqb_handler); if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) fail("Async entity error not caught"); if (XML_GetErrorCode(ext_parser) != XML_ERROR_ASYNC_ENTITY) xml_failure(ext_parser); XML_ParserFree(ext_parser); return XML_STATUS_OK; } START_TEST(test_ext_entity_trailing_rsqb) { const char *text = "\n" "]>\n" "&en;"; int found_rsqb; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher); XML_SetUserData(g_parser, &found_rsqb); found_rsqb = 0; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) xml_failure(g_parser); if (found_rsqb == 0) fail("No right square bracket found"); } END_TEST /* Test CDATA handling in an external entity */ static int XMLCALL external_entity_good_cdata_ascii(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { const char *text = "Hello, world!]]>"; const XML_Char *expected = XCS("Hello, world!"); CharData storage; XML_Parser ext_parser; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); CharData_Init(&storage); ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); XML_SetUserData(ext_parser, &storage); XML_SetCharacterDataHandler(ext_parser, accumulate_characters); if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(ext_parser); CharData_CheckXMLChars(&storage, expected); XML_ParserFree(ext_parser); return XML_STATUS_OK; } START_TEST(test_ext_entity_good_cdata) { const char *text = "\n" "]>\n" "&en;"; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) xml_failure(g_parser); } END_TEST /* Test user parameter settings */ /* Variable holding the expected handler userData */ static void *handler_data = NULL; /* Count of the number of times the comment handler has been invoked */ static int comment_count = 0; /* Count of the number of skipped entities */ static int skip_count = 0; /* Count of the number of times the XML declaration handler is invoked */ static int xdecl_count = 0; static void XMLCALL xml_decl_handler(void *userData, const XML_Char *version, const XML_Char *encoding, int standalone) { UNUSED_P(version); UNUSED_P(encoding); if (userData != handler_data) fail("User data (xml decl) not correctly set"); if (standalone != -1) fail("Standalone not flagged as not present in XML decl"); xdecl_count++; } static void XMLCALL param_check_skip_handler(void *userData, const XML_Char *entityName, int is_parameter_entity) { UNUSED_P(entityName); UNUSED_P(is_parameter_entity); if (userData != handler_data) fail("User data (skip) not correctly set"); skip_count++; } static void XMLCALL data_check_comment_handler(void *userData, const XML_Char *data) { UNUSED_P(data); /* Check that the userData passed through is what we expect */ if (userData != handler_data) fail("User data (parser) not correctly set"); /* Check that the user data in the parser is appropriate */ if (XML_GetUserData(userData) != (void *)1) fail("User data in parser not correctly set"); comment_count++; } static int XMLCALL external_entity_param_checker(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { const char *text = "\n" ""; XML_Parser ext_parser; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); handler_data = ext_parser; if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) { xml_failure(parser); return XML_STATUS_ERROR; } handler_data = parser; XML_ParserFree(ext_parser); return XML_STATUS_OK; } START_TEST(test_user_parameters) { const char *text = "\n" "\n" "\n" "&entity;"; const char *epilog = "\n" ""; comment_count = 0; skip_count = 0; xdecl_count = 0; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetXmlDeclHandler(g_parser, xml_decl_handler); XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker); XML_SetCommentHandler(g_parser, data_check_comment_handler); XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler); XML_UseParserAsHandlerArg(g_parser); XML_SetUserData(g_parser, (void *)1); handler_data = g_parser; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) == XML_STATUS_ERROR) xml_failure(g_parser); if (comment_count != 2) fail("Comment handler not invoked enough times"); /* Ensure we can't change policy mid-parse */ if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER)) fail("Changed param entity parsing policy while parsing"); if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); if (comment_count != 3) fail("Comment handler not invoked enough times"); if (skip_count != 1) fail("Skip handler not invoked enough times"); if (xdecl_count != 1) fail("XML declaration handler not invoked"); } END_TEST /* Test that an explicit external entity handler argument replaces * the parser as the first argument. * * We do not call the first parameter to the external entity handler * 'parser' for once, since the first time the handler is called it * will actually be a text string. We need to be able to access the * global 'parser' variable to create our external entity parser from, * since there are code paths we need to ensure get executed. */ static int XMLCALL external_entity_ref_param_checker(XML_Parser parameter, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { const char *text = ""; XML_Parser ext_parser; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); if ((void *)parameter != handler_data) fail("External entity ref handler parameter not correct"); /* Here we use the global 'parser' variable */ ext_parser = XML_ExternalEntityParserCreate(g_parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(ext_parser); XML_ParserFree(ext_parser); return XML_STATUS_OK; } START_TEST(test_ext_entity_ref_parameter) { const char *text = "\n" "\n" "&entity;"; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker); /* Set a handler arg that is not NULL and not parser (which is * what NULL would cause to be passed. */ XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text); handler_data = (void *)text; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); /* Now try again with unset args */ XML_ParserReset(g_parser, NULL); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker); XML_SetExternalEntityRefHandlerArg(g_parser, NULL); handler_data = (void *)g_parser; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST /* Test the parsing of an empty string */ START_TEST(test_empty_parse) { const char *text = ""; const char *partial = ""; if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR) fail("Parsing empty string faulted"); if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) fail("Parsing final empty string not faulted"); if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS) fail("Parsing final empty string faulted for wrong reason"); /* Now try with valid text before the empty end */ XML_ParserReset(g_parser, NULL); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) == XML_STATUS_ERROR) xml_failure(g_parser); if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR) fail("Parsing final empty string faulted"); /* Now try with invalid text before the empty end */ XML_ParserReset(g_parser, NULL); if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial), XML_FALSE) == XML_STATUS_ERROR) xml_failure(g_parser); if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) fail("Parsing final incomplete empty string not faulted"); } END_TEST /* Test odd corners of the XML_GetBuffer interface */ static enum XML_Status get_feature(enum XML_FeatureEnum feature_id, long *presult) { const XML_Feature *feature = XML_GetFeatureList(); if (feature == NULL) return XML_STATUS_ERROR; for (; feature->feature != XML_FEATURE_END; feature++) { if (feature->feature == feature_id) { *presult = feature->value; return XML_STATUS_OK; } } return XML_STATUS_ERROR; } /* Having an element name longer than 1024 characters exercises some * of the pool allocation code in the parser that otherwise does not * get executed. The count at the end of the line is the number of * characters (bytes) in the element name by that point.x */ static const char *get_buffer_test_text = "\n 0); if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL) fail("enlarging buffer not failed"); XML_ParserFree(parser); } END_TEST #endif // defined(XML_CONTEXT_BYTES) /* Test position information macros */ START_TEST(test_byte_info_at_end) { const char *text = ""; if (XML_GetCurrentByteIndex(g_parser) != -1 || XML_GetCurrentByteCount(g_parser) != 0) fail("Byte index/count incorrect at start of parse"); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); /* At end, the count will be zero and the index the end of string */ if (XML_GetCurrentByteCount(g_parser) != 0) fail("Terminal byte count incorrect"); if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text)) fail("Terminal byte index incorrect"); } END_TEST /* Test position information from errors */ #define PRE_ERROR_STR "" START_TEST(test_byte_info_at_error) { const char *text = PRE_ERROR_STR POST_ERROR_STR; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_OK) fail("Syntax error not faulted"); if (XML_GetCurrentByteCount(g_parser) != 0) fail("Error byte count incorrect"); if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR)) fail("Error byte index incorrect"); } END_TEST #undef PRE_ERROR_STR #undef POST_ERROR_STR /* Test position information in handler */ typedef struct ByteTestData { int start_element_len; int cdata_len; int total_string_len; } ByteTestData; static void byte_character_handler(void *userData, const XML_Char *s, int len) { #ifdef XML_CONTEXT_BYTES int offset, size; const char *buffer; ByteTestData *data = (ByteTestData *)userData; UNUSED_P(s); buffer = XML_GetInputContext(g_parser, &offset, &size); if (buffer == NULL) fail("Failed to get context buffer"); if (offset != data->start_element_len) fail("Context offset in unexpected position"); if (len != data->cdata_len) fail("CDATA length reported incorrectly"); if (size != data->total_string_len) fail("Context size is not full buffer"); if (XML_GetCurrentByteIndex(g_parser) != offset) fail("Character byte index incorrect"); if (XML_GetCurrentByteCount(g_parser) != len) fail("Character byte count incorrect"); #else UNUSED_P(s); UNUSED_P(userData); UNUSED_P(len); #endif } #define START_ELEMENT "" #define CDATA_TEXT "Hello" #define END_ELEMENT "" START_TEST(test_byte_info_at_cdata) { const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT; int offset, size; ByteTestData data; /* Check initial context is empty */ if (XML_GetInputContext(g_parser, &offset, &size) != NULL) fail("Unexpected context at start of parse"); data.start_element_len = (int)strlen(START_ELEMENT); data.cdata_len = (int)strlen(CDATA_TEXT); data.total_string_len = (int)strlen(text); XML_SetCharacterDataHandler(g_parser, byte_character_handler); XML_SetUserData(g_parser, &data); if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) xml_failure(g_parser); } END_TEST #undef START_ELEMENT #undef CDATA_TEXT #undef END_ELEMENT /* Test predefined entities are correctly recognised */ START_TEST(test_predefined_entities) { const char *text = "<>&"'"; const XML_Char *expected = XCS("<>&"'"); const XML_Char *result = XCS("<>&\"'"); CharData storage; XML_SetDefaultHandler(g_parser, accumulate_characters); /* run_character_check uses XML_SetCharacterDataHandler(), which * unfortunately heads off a code path that we need to exercise. */ CharData_Init(&storage); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); /* The default handler doesn't translate the entities */ CharData_CheckXMLChars(&storage, expected); /* Now try again and check the translation */ XML_ParserReset(g_parser, NULL); run_character_check(text, result); } END_TEST /* Regression test that an invalid tag in an external parameter * reference in an external DTD is correctly faulted. * * Only a few specific tags are legal in DTDs ignoring comments and * processing instructions, all of which begin with an exclamation * mark. "" is not one of them, so the parser should raise an * error on encountering it. */ static int XMLCALL external_entity_param(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { const char *text1 = "\n" "\n" "\n" "%e1;\n"; const char *text2 = "\n" "\n"; XML_Parser ext_parser; UNUSED_P(base); UNUSED_P(publicId); if (systemId == NULL) return XML_STATUS_OK; ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); if (! xcstrcmp(systemId, XCS("004-1.ent"))) { if (_XML_Parse_SINGLE_BYTES(ext_parser, text1, (int)strlen(text1), XML_TRUE) != XML_STATUS_ERROR) fail("Inner DTD with invalid tag not rejected"); if (XML_GetErrorCode(ext_parser) != XML_ERROR_EXTERNAL_ENTITY_HANDLING) xml_failure(ext_parser); } else if (! xcstrcmp(systemId, XCS("004-2.ent"))) { if (_XML_Parse_SINGLE_BYTES(ext_parser, text2, (int)strlen(text2), XML_TRUE) != XML_STATUS_ERROR) fail("Invalid tag in external param not rejected"); if (XML_GetErrorCode(ext_parser) != XML_ERROR_SYNTAX) xml_failure(ext_parser); } else { fail("Unknown system ID"); } XML_ParserFree(ext_parser); return XML_STATUS_ERROR; } START_TEST(test_invalid_tag_in_dtd) { const char *text = "\n" "\n"; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_param); expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, "Invalid tag IN DTD external param not rejected"); } END_TEST /* Test entities not quite the predefined ones are not mis-recognised */ START_TEST(test_not_predefined_entities) { const char *text[] = {"&pt;", "&amo;", "&quid;", "&apod;", NULL}; int i = 0; while (text[i] != NULL) { expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY, "Undefined entity not rejected"); XML_ParserReset(g_parser, NULL); i++; } } END_TEST /* Test conditional inclusion (IGNORE) */ static int XMLCALL external_entity_load_ignore(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { const char *text = "]]>"; XML_Parser ext_parser; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(parser); XML_ParserFree(ext_parser); return XML_STATUS_OK; } START_TEST(test_ignore_section) { const char *text = "\n" "&entity;"; const XML_Char *expected = XCS("]]>\n&entity;"); CharData storage; CharData_Init(&storage); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetUserData(g_parser, &storage); XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore); XML_SetDefaultHandler(g_parser, accumulate_characters); XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); XML_SetStartElementHandler(g_parser, dummy_start_element); XML_SetEndElementHandler(g_parser, dummy_end_element); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST static int XMLCALL external_entity_load_ignore_utf16(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { const char text[] = /* ]]> */ "<\0!\0[\0I\0G\0N\0O\0R\0E\0[\0" "<\0!\0E\0L\0E\0M\0E\0N\0T\0 \0e\0 \0" "(\0#\0P\0C\0D\0A\0T\0A\0)\0*\0>\0]\0]\0>\0"; XML_Parser ext_parser; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) xml_failure(parser); XML_ParserFree(ext_parser); return XML_STATUS_OK; } START_TEST(test_ignore_section_utf16) { const char text[] = /* */ "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 " "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0" /* &en; */ "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0"; const XML_Char *expected = XCS("]]>\n&en;"); CharData storage; CharData_Init(&storage); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetUserData(g_parser, &storage); XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16); XML_SetDefaultHandler(g_parser, accumulate_characters); XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); XML_SetStartElementHandler(g_parser, dummy_start_element); XML_SetEndElementHandler(g_parser, dummy_end_element); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST static int XMLCALL external_entity_load_ignore_utf16_be(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { const char text[] = /* ]]> */ "\0<\0!\0[\0I\0G\0N\0O\0R\0E\0[" "\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 \0e\0 " "\0(\0#\0P\0C\0D\0A\0T\0A\0)\0*\0>\0]\0]\0>"; XML_Parser ext_parser; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) xml_failure(parser); XML_ParserFree(ext_parser); return XML_STATUS_OK; } START_TEST(test_ignore_section_utf16_be) { const char text[] = /* */ "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 " "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n" /* &en; */ "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>"; const XML_Char *expected = XCS("]]>\n&en;"); CharData storage; CharData_Init(&storage); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetUserData(g_parser, &storage); XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16_be); XML_SetDefaultHandler(g_parser, accumulate_characters); XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); XML_SetStartElementHandler(g_parser, dummy_start_element); XML_SetEndElementHandler(g_parser, dummy_end_element); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST /* Test mis-formatted conditional exclusion */ START_TEST(test_bad_ignore_section) { const char *text = "\n" "&entity;"; ExtFaults faults[] = {{"", "Invalid XML character not faulted", NULL, XML_ERROR_INVALID_TOKEN}, {/* FIrst two bytes of a three-byte char */ "parse_text != NULL; fault++) { XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); XML_SetUserData(g_parser, fault); expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, "Incomplete IGNORE section not failed"); XML_ParserReset(g_parser, NULL); } } END_TEST /* Test recursive parsing */ static int XMLCALL external_entity_valuer(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { const char *text1 = "\n" "\n" "\n" "%e1;\n"; XML_Parser ext_parser; UNUSED_P(base); UNUSED_P(publicId); if (systemId == NULL) return XML_STATUS_OK; ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); if (! xcstrcmp(systemId, XCS("004-1.ent"))) { if (_XML_Parse_SINGLE_BYTES(ext_parser, text1, (int)strlen(text1), XML_TRUE) == XML_STATUS_ERROR) xml_failure(ext_parser); } else if (! xcstrcmp(systemId, XCS("004-2.ent"))) { ExtFaults *fault = (ExtFaults *)XML_GetUserData(parser); enum XML_Status status; enum XML_Error error; status = _XML_Parse_SINGLE_BYTES(ext_parser, fault->parse_text, (int)strlen(fault->parse_text), XML_TRUE); if (fault->error == XML_ERROR_NONE) { if (status == XML_STATUS_ERROR) xml_failure(ext_parser); } else { if (status != XML_STATUS_ERROR) fail(fault->fail_text); error = XML_GetErrorCode(ext_parser); if (error != fault->error && (fault->error != XML_ERROR_XML_DECL || error != XML_ERROR_TEXT_DECL)) xml_failure(ext_parser); } } XML_ParserFree(ext_parser); return XML_STATUS_OK; } START_TEST(test_external_entity_values) { const char *text = "\n" "\n"; ExtFaults data_004_2[] = { {"", NULL, NULL, XML_ERROR_NONE}, {"", "Invalid token not faulted", NULL, XML_ERROR_INVALID_TOKEN}, {"'wombat", "Unterminated string not faulted", NULL, XML_ERROR_UNCLOSED_TOKEN}, {"\xe2\x82", "Partial UTF-8 character not faulted", NULL, XML_ERROR_PARTIAL_CHAR}, {"\n", NULL, NULL, XML_ERROR_NONE}, {"", "Malformed XML declaration not faulted", NULL, XML_ERROR_XML_DECL}, {/* UTF-8 BOM */ "\xEF\xBB\xBF", NULL, NULL, XML_ERROR_NONE}, {"\n$", "Invalid token after text declaration not faulted", NULL, XML_ERROR_INVALID_TOKEN}, {"\n'wombat", "Unterminated string after text decl not faulted", NULL, XML_ERROR_UNCLOSED_TOKEN}, {"\n\xe2\x82", "Partial UTF-8 character after text decl not faulted", NULL, XML_ERROR_PARTIAL_CHAR}, {"%e1;", "Recursive parameter entity not faulted", NULL, XML_ERROR_RECURSIVE_ENTITY_REF}, {NULL, NULL, NULL, XML_ERROR_NONE}}; int i; for (i = 0; data_004_2[i].parse_text != NULL; i++) { XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer); XML_SetUserData(g_parser, &data_004_2[i]); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); XML_ParserReset(g_parser, NULL); } } END_TEST /* Test the recursive parse interacts with a not standalone handler */ static int XMLCALL external_entity_not_standalone(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { const char *text1 = "\n" "\n" "%e1;\n"; const char *text2 = ""; XML_Parser ext_parser; UNUSED_P(base); UNUSED_P(publicId); if (systemId == NULL) return XML_STATUS_OK; ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); if (! xcstrcmp(systemId, XCS("foo"))) { XML_SetNotStandaloneHandler(ext_parser, reject_not_standalone_handler); if (_XML_Parse_SINGLE_BYTES(ext_parser, text1, (int)strlen(text1), XML_TRUE) != XML_STATUS_ERROR) fail("Expected not standalone rejection"); if (XML_GetErrorCode(ext_parser) != XML_ERROR_NOT_STANDALONE) xml_failure(ext_parser); XML_SetNotStandaloneHandler(ext_parser, NULL); XML_ParserFree(ext_parser); return XML_STATUS_ERROR; } else if (! xcstrcmp(systemId, XCS("bar"))) { if (_XML_Parse_SINGLE_BYTES(ext_parser, text2, (int)strlen(text2), XML_TRUE) == XML_STATUS_ERROR) xml_failure(ext_parser); } XML_ParserFree(ext_parser); return XML_STATUS_OK; } START_TEST(test_ext_entity_not_standalone) { const char *text = "\n" ""; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone); expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, "Standalone rejection not caught"); } END_TEST static int XMLCALL external_entity_value_aborter(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { const char *text1 = "\n" "\n" "\n" "%e1;\n"; const char *text2 = ""; XML_Parser ext_parser; UNUSED_P(base); UNUSED_P(publicId); if (systemId == NULL) return XML_STATUS_OK; ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); if (! xcstrcmp(systemId, XCS("004-1.ent"))) { if (_XML_Parse_SINGLE_BYTES(ext_parser, text1, (int)strlen(text1), XML_TRUE) == XML_STATUS_ERROR) xml_failure(ext_parser); } if (! xcstrcmp(systemId, XCS("004-2.ent"))) { XML_SetXmlDeclHandler(ext_parser, entity_suspending_xdecl_handler); XML_SetUserData(ext_parser, ext_parser); if (_XML_Parse_SINGLE_BYTES(ext_parser, text2, (int)strlen(text2), XML_TRUE) != XML_STATUS_ERROR) fail("Aborted parse not faulted"); if (XML_GetErrorCode(ext_parser) != XML_ERROR_ABORTED) xml_failure(ext_parser); } XML_ParserFree(ext_parser); return XML_STATUS_OK; } START_TEST(test_ext_entity_value_abort) { const char *text = "\n" "\n"; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter); resumable = XML_FALSE; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST START_TEST(test_bad_public_doctype) { const char *text = "\n" "\n" ""; /* Setting a handler provokes a particular code path */ XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler, dummy_end_doctype_handler); expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed"); } END_TEST /* Test based on ibm/valid/P32/ibm32v04.xml */ START_TEST(test_attribute_enum_value) { const char *text = "\n" "\n" "This is a \n \n\nyellow tiger"; ExtTest dtd_data = {"\n" "\n" "", NULL, NULL}; const XML_Char *expected = XCS("This is a \n \n\nyellow tiger"); XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); XML_SetUserData(g_parser, &dtd_data); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); /* An attribute list handler provokes a different code path */ XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); run_ext_character_check(text, &dtd_data, expected); } END_TEST /* Slightly bizarrely, the library seems to silently ignore entity * definitions for predefined entities, even when they are wrong. The * language of the XML 1.0 spec is somewhat unhelpful as to what ought * to happen, so this is currently treated as acceptable. */ START_TEST(test_predefined_entity_redefinition) { const char *text = "\n" "]>\n" "'"; run_character_check(text, XCS("'")); } END_TEST /* Test that the parser stops processing the DTD after an unresolved * parameter entity is encountered. */ START_TEST(test_dtd_stop_processing) { const char *text = "\n" "]>"; XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); dummy_handler_flags = 0; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); if (dummy_handler_flags != 0) fail("DTD processing still going after undefined PE"); } END_TEST /* Test public notations with no system ID */ START_TEST(test_public_notation_no_sysid) { const char *text = "\n" "\n" "]>\n"; dummy_handler_flags = 0; XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); if (dummy_handler_flags != DUMMY_NOTATION_DECL_HANDLER_FLAG) fail("Notation declaration handler not called"); } END_TEST static void XMLCALL record_element_start_handler(void *userData, const XML_Char *name, const XML_Char **atts) { UNUSED_P(atts); CharData_AppendXMLChars((CharData *)userData, name, (int)xcstrlen(name)); } START_TEST(test_nested_groups) { const char *text = "\n" "" "]>\n" ""; CharData storage; CharData_Init(&storage); XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); XML_SetStartElementHandler(g_parser, record_element_start_handler); XML_SetUserData(g_parser, &storage); dummy_handler_flags = 0; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, XCS("doce")); if (dummy_handler_flags != DUMMY_ELEMENT_DECL_HANDLER_FLAG) fail("Element handler not fired"); } END_TEST START_TEST(test_group_choice) { const char *text = "\n" "\n" "\n" "\n" "]>\n" "\n" "\n" "This is a foo\n" "\n" "\n"; XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); dummy_handler_flags = 0; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); if (dummy_handler_flags != DUMMY_ELEMENT_DECL_HANDLER_FLAG) fail("Element handler flag not raised"); } END_TEST static int XMLCALL external_entity_public(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { const char *text1 = (const char *)XML_GetUserData(parser); const char *text2 = ""; const char *text = NULL; XML_Parser ext_parser; int parse_res; UNUSED_P(base); ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) return XML_STATUS_ERROR; if (systemId != NULL && ! xcstrcmp(systemId, XCS("http://example.org/"))) { text = text1; } else if (publicId != NULL && ! xcstrcmp(publicId, XCS("foo"))) { text = text2; } else fail("Unexpected parameters to external entity parser"); assert(text != NULL); parse_res = _XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE); XML_ParserFree(ext_parser); return parse_res; } START_TEST(test_standalone_parameter_entity) { const char *text = "\n" "'>\n" "%entity;\n" "]>\n" ""; char dtd_data[] = "\n"; XML_SetUserData(g_parser, dtd_data); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_public); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST /* Test skipping of parameter entity in an external DTD */ /* Derived from ibm/invalid/P69/ibm69i01.xml */ START_TEST(test_skipped_parameter_entity) { const char *text = "\n" "\n" "]>\n" ""; ExtTest dtd_data = {"%pe2;", NULL, NULL}; XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); XML_SetUserData(g_parser, &dtd_data); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler); dummy_handler_flags = 0; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); if (dummy_handler_flags != DUMMY_SKIP_HANDLER_FLAG) fail("Skip handler not executed"); } END_TEST /* Test recursive parameter entity definition rejected in external DTD */ START_TEST(test_recursive_external_parameter_entity) { const char *text = "\n" "\n" "]>\n" ""; ExtFaults dtd_data = {"\n%pe2;", "Recursive external parameter entity not faulted", NULL, XML_ERROR_RECURSIVE_ENTITY_REF}; XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); XML_SetUserData(g_parser, &dtd_data); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, "Recursive external parameter not spotted"); } END_TEST /* Test undefined parameter entity in external entity handler */ static int XMLCALL external_entity_devaluer(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { const char *text = "\n" "\n" "%e1;\n"; XML_Parser ext_parser; intptr_t clear_handler = (intptr_t)XML_GetUserData(parser); UNUSED_P(base); UNUSED_P(publicId); if (systemId == NULL || ! xcstrcmp(systemId, XCS("bar"))) return XML_STATUS_OK; if (xcstrcmp(systemId, XCS("foo"))) fail("Unexpected system ID"); ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could note create external entity parser"); if (clear_handler) XML_SetExternalEntityRefHandler(ext_parser, NULL); if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(ext_parser); XML_ParserFree(ext_parser); return XML_STATUS_OK; } START_TEST(test_undefined_ext_entity_in_external_dtd) { const char *text = "\n" "\n"; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer); XML_SetUserData(g_parser, (void *)(intptr_t)XML_FALSE); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); /* Now repeat without the external entity ref handler invoking * another copy of itself. */ XML_ParserReset(g_parser, NULL); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer); XML_SetUserData(g_parser, (void *)(intptr_t)XML_TRUE); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST static void XMLCALL aborting_xdecl_handler(void *userData, const XML_Char *version, const XML_Char *encoding, int standalone) { UNUSED_P(userData); UNUSED_P(version); UNUSED_P(encoding); UNUSED_P(standalone); XML_StopParser(g_parser, resumable); XML_SetXmlDeclHandler(g_parser, NULL); } /* Test suspending the parse on receiving an XML declaration works */ START_TEST(test_suspend_xdecl) { const char *text = long_character_data_text; XML_SetXmlDeclHandler(g_parser, aborting_xdecl_handler); resumable = XML_TRUE; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_SUSPENDED) xml_failure(g_parser); if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) xml_failure(g_parser); /* Attempt to start a new parse while suspended */ if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) fail("Attempt to parse while suspended not faulted"); if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED) fail("Suspended parse not faulted with correct error"); } END_TEST /* Test aborting the parse in an epilog works */ static void XMLCALL selective_aborting_default_handler(void *userData, const XML_Char *s, int len) { const XML_Char *match = (const XML_Char *)userData; if (match == NULL || (xcstrlen(match) == (unsigned)len && ! xcstrncmp(match, s, len))) { XML_StopParser(g_parser, resumable); XML_SetDefaultHandler(g_parser, NULL); } } START_TEST(test_abort_epilog) { const char *text = "\n\r\n"; XML_Char match[] = XCS("\r"); XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); XML_SetUserData(g_parser, match); resumable = XML_FALSE; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) fail("Abort not triggered"); if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED) xml_failure(g_parser); } END_TEST /* Test a different code path for abort in the epilog */ START_TEST(test_abort_epilog_2) { const char *text = "\n"; XML_Char match[] = XCS("\n"); XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); XML_SetUserData(g_parser, match); resumable = XML_FALSE; expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered"); } END_TEST /* Test suspension from the epilog */ START_TEST(test_suspend_epilog) { const char *text = "\n"; XML_Char match[] = XCS("\n"); XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); XML_SetUserData(g_parser, match); resumable = XML_TRUE; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_SUSPENDED) xml_failure(g_parser); } END_TEST static void XMLCALL suspending_end_handler(void *userData, const XML_Char *s) { UNUSED_P(s); XML_StopParser((XML_Parser)userData, 1); } START_TEST(test_suspend_in_sole_empty_tag) { const char *text = ""; enum XML_Status rc; XML_SetEndElementHandler(g_parser, suspending_end_handler); XML_SetUserData(g_parser, g_parser); rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE); if (rc == XML_STATUS_ERROR) xml_failure(g_parser); else if (rc != XML_STATUS_SUSPENDED) fail("Suspend not triggered"); rc = XML_ResumeParser(g_parser); if (rc == XML_STATUS_ERROR) xml_failure(g_parser); else if (rc != XML_STATUS_OK) fail("Resume failed"); } END_TEST START_TEST(test_unfinished_epilog) { const char *text = "<"; expect_failure(text, XML_ERROR_UNCLOSED_TOKEN, "Incomplete epilog entry not faulted"); } END_TEST START_TEST(test_partial_char_in_epilog) { const char *text = "\xe2\x82"; /* First check that no fault is raised if the parse is not finished */ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) == XML_STATUS_ERROR) xml_failure(g_parser); /* Now check that it is faulted once we finish */ if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR) fail("Partial character in epilog not faulted"); if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR) xml_failure(g_parser); } END_TEST START_TEST(test_hash_collision) { /* For full coverage of the lookup routine, we need to ensure a * hash collision even though we can only tell that we have one * through breakpoint debugging or coverage statistics. The * following will cause a hash collision on machines with a 64-bit * long type; others will have to experiment. The full coverage * tests invoked from qa.sh usually provide a hash collision, but * not always. This is an attempt to provide insurance. */ #define COLLIDING_HASH_SALT (unsigned long)_SIP_ULL(0xffffffffU, 0xff99fc90U) const char *text = "\n" "\n" "This is a foo\n" "\n" "\n" "\n" "This triggers the table growth and collides with b2\n" "\n"; XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST #undef COLLIDING_HASH_SALT /* Test resuming a parse suspended in entity substitution */ static void XMLCALL start_element_suspender(void *userData, const XML_Char *name, const XML_Char **atts) { UNUSED_P(userData); UNUSED_P(atts); if (! xcstrcmp(name, XCS("suspend"))) XML_StopParser(g_parser, XML_TRUE); if (! xcstrcmp(name, XCS("abort"))) XML_StopParser(g_parser, XML_FALSE); } START_TEST(test_suspend_resume_internal_entity) { const char *text = "HiHo'>\n" "]>\n" "&foo;\n"; const XML_Char *expected1 = XCS("Hi"); const XML_Char *expected2 = XCS("HiHo"); CharData storage; CharData_Init(&storage); XML_SetStartElementHandler(g_parser, start_element_suspender); XML_SetCharacterDataHandler(g_parser, accumulate_characters); XML_SetUserData(g_parser, &storage); if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_SUSPENDED) xml_failure(g_parser); CharData_CheckXMLChars(&storage, XCS("")); if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected1); if (XML_ResumeParser(g_parser) != XML_STATUS_OK) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected2); } END_TEST /* Test syntax error is caught at parse resumption */ START_TEST(test_resume_entity_with_syntax_error) { const char *text = "Hi'>\n" "]>\n" "&foo;\n"; XML_SetStartElementHandler(g_parser, start_element_suspender); if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_SUSPENDED) xml_failure(g_parser); if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR) fail("Syntax error in entity not faulted"); if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH) xml_failure(g_parser); } END_TEST /* Test suspending and resuming in a parameter entity substitution */ static void XMLCALL element_decl_suspender(void *userData, const XML_Char *name, XML_Content *model) { UNUSED_P(userData); UNUSED_P(name); XML_StopParser(g_parser, XML_TRUE); XML_FreeContentModel(g_parser, model); } START_TEST(test_suspend_resume_parameter_entity) { const char *text = "'>\n" "%foo;\n" "]>\n" "Hello, world"; const XML_Char *expected = XCS("Hello, world"); CharData storage; CharData_Init(&storage); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetElementDeclHandler(g_parser, element_decl_suspender); XML_SetCharacterDataHandler(g_parser, accumulate_characters); XML_SetUserData(g_parser, &storage); if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_SUSPENDED) xml_failure(g_parser); CharData_CheckXMLChars(&storage, XCS("")); if (XML_ResumeParser(g_parser) != XML_STATUS_OK) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST /* Test attempting to use parser after an error is faulted */ START_TEST(test_restart_on_error) { const char *text = "<$doc>"; if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) fail("Invalid tag name not faulted"); if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) xml_failure(g_parser); if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) fail("Restarting invalid parse not faulted"); if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) xml_failure(g_parser); } END_TEST /* Test that angle brackets in an attribute default value are faulted */ START_TEST(test_reject_lt_in_attribute_value) { const char *text = "'>]>\n" ""; expect_failure(text, XML_ERROR_INVALID_TOKEN, "Bad attribute default not faulted"); } END_TEST START_TEST(test_reject_unfinished_param_in_att_value) { const char *text = "]>\n" ""; expect_failure(text, XML_ERROR_INVALID_TOKEN, "Bad attribute default not faulted"); } END_TEST START_TEST(test_trailing_cr_in_att_value) { const char *text = ""; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST /* Try parsing a general entity within a parameter entity in a * standalone internal DTD. Covers a corner case in the parser. */ START_TEST(test_standalone_internal_entity) { const char *text = "\n" "\n" " '>\n" " \n" " %pe;\n" "]>\n" ""; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST /* Test that a reference to an unknown external entity is skipped */ START_TEST(test_skipped_external_entity) { const char *text = "\n" "\n"; ExtTest test_data = {"\n" "\n", NULL, NULL}; XML_SetUserData(g_parser, &test_data); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST /* Test a different form of unknown external entity */ typedef struct ext_hdlr_data { const char *parse_text; XML_ExternalEntityRefHandler handler; } ExtHdlrData; static int XMLCALL external_entity_oneshot_loader(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { ExtHdlrData *test_data = (ExtHdlrData *)XML_GetUserData(parser); XML_Parser ext_parser; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser."); /* Use the requested entity parser for further externals */ XML_SetExternalEntityRefHandler(ext_parser, test_data->handler); if (_XML_Parse_SINGLE_BYTES(ext_parser, test_data->parse_text, (int)strlen(test_data->parse_text), XML_TRUE) == XML_STATUS_ERROR) { xml_failure(ext_parser); } XML_ParserFree(ext_parser); return XML_STATUS_OK; } START_TEST(test_skipped_null_loaded_ext_entity) { const char *text = "\n" ""; ExtHdlrData test_data = {"\n" "\n" "%pe2;\n", external_entity_null_loader}; XML_SetUserData(g_parser, &test_data); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST START_TEST(test_skipped_unloaded_ext_entity) { const char *text = "\n" ""; ExtHdlrData test_data = {"\n" "\n" "%pe2;\n", NULL}; XML_SetUserData(g_parser, &test_data); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST /* Test that a parameter entity value ending with a carriage return * has it translated internally into a newline. */ START_TEST(test_param_entity_with_trailing_cr) { #define PARAM_ENTITY_NAME "pe" #define PARAM_ENTITY_CORE_VALUE "" const char *text = "\n" ""; ExtTest test_data = {"\n" "%" PARAM_ENTITY_NAME ";\n", NULL, NULL}; XML_SetUserData(g_parser, &test_data); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); XML_SetEntityDeclHandler(g_parser, param_entity_match_handler); entity_name_to_match = XCS(PARAM_ENTITY_NAME); entity_value_to_match = XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"); entity_match_flag = ENTITY_MATCH_NOT_FOUND; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); if (entity_match_flag == ENTITY_MATCH_FAIL) fail("Parameter entity CR->NEWLINE conversion failed"); else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND) fail("Parameter entity not parsed"); } #undef PARAM_ENTITY_NAME #undef PARAM_ENTITY_CORE_VALUE END_TEST START_TEST(test_invalid_character_entity) { const char *text = "\n" "]>\n" "&entity;"; expect_failure(text, XML_ERROR_BAD_CHAR_REF, "Out of range character reference not faulted"); } END_TEST START_TEST(test_invalid_character_entity_2) { const char *text = "\n" "]>\n" "&entity;"; expect_failure(text, XML_ERROR_INVALID_TOKEN, "Out of range character reference not faulted"); } END_TEST START_TEST(test_invalid_character_entity_3) { const char text[] = /* \n */ "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 " "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n" /* ]>\n */ "\0]\0>\0\n" /* &entity; */ "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>"; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) != XML_STATUS_ERROR) fail("Invalid start of entity name not faulted"); if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY) xml_failure(g_parser); } END_TEST START_TEST(test_invalid_character_entity_4) { const char *text = "\n" /* = � */ "]>\n" "&entity;"; expect_failure(text, XML_ERROR_BAD_CHAR_REF, "Out of range character reference not faulted"); } END_TEST /* Test that processing instructions are picked up by a default handler */ START_TEST(test_pi_handled_in_default) { const char *text = "\n"; const XML_Char *expected = XCS("\n"); CharData storage; CharData_Init(&storage); XML_SetDefaultHandler(g_parser, accumulate_characters); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST /* Test that comments are picked up by a default handler */ START_TEST(test_comment_handled_in_default) { const char *text = "\n"; const XML_Char *expected = XCS("\n"); CharData storage; CharData_Init(&storage); XML_SetDefaultHandler(g_parser, accumulate_characters); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST /* Test PIs that look almost but not quite like XML declarations */ static void XMLCALL accumulate_pi_characters(void *userData, const XML_Char *target, const XML_Char *data) { CharData *storage = (CharData *)userData; CharData_AppendXMLChars(storage, target, -1); CharData_AppendXMLChars(storage, XCS(": "), 2); CharData_AppendXMLChars(storage, data, -1); CharData_AppendXMLChars(storage, XCS("\n"), 1); } START_TEST(test_pi_yml) { const char *text = ""; const XML_Char *expected = XCS("yml: something like data\n"); CharData storage; CharData_Init(&storage); XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST START_TEST(test_pi_xnl) { const char *text = ""; const XML_Char *expected = XCS("xnl: nothing like data\n"); CharData storage; CharData_Init(&storage); XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST START_TEST(test_pi_xmm) { const char *text = ""; const XML_Char *expected = XCS("xmm: everything like data\n"); CharData storage; CharData_Init(&storage); XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST START_TEST(test_utf16_pi) { const char text[] = /* * where {KHO KHWAI} = U+0E04 * and {CHO CHAN} = U+0E08 */ "<\0?\0\x04\x0e\x08\x0e?\0>\0" /* */ "<\0q\0/\0>\0"; #ifdef XML_UNICODE const XML_Char *expected = XCS("\x0e04\x0e08: \n"); #else const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n"); #endif CharData storage; CharData_Init(&storage); XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST START_TEST(test_utf16_be_pi) { const char text[] = /* * where {KHO KHWAI} = U+0E04 * and {CHO CHAN} = U+0E08 */ "\0<\0?\x0e\x04\x0e\x08\0?\0>" /* */ "\0<\0q\0/\0>"; #ifdef XML_UNICODE const XML_Char *expected = XCS("\x0e04\x0e08: \n"); #else const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n"); #endif CharData storage; CharData_Init(&storage); XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST /* Test that comments can be picked up and translated */ static void XMLCALL accumulate_comment(void *userData, const XML_Char *data) { CharData *storage = (CharData *)userData; CharData_AppendXMLChars(storage, data, -1); } START_TEST(test_utf16_be_comment) { const char text[] = /* */ "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n" /* */ "\0<\0d\0o\0c\0/\0>"; const XML_Char *expected = XCS(" Comment A "); CharData storage; CharData_Init(&storage); XML_SetCommentHandler(g_parser, accumulate_comment); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST START_TEST(test_utf16_le_comment) { const char text[] = /* */ "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0" /* */ "<\0d\0o\0c\0/\0>\0"; const XML_Char *expected = XCS(" Comment B "); CharData storage; CharData_Init(&storage); XML_SetCommentHandler(g_parser, accumulate_comment); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST /* Test that the unknown encoding handler with map entries that expect * conversion but no conversion function is faulted */ static int XMLCALL failing_converter(void *data, const char *s) { UNUSED_P(data); UNUSED_P(s); /* Always claim to have failed */ return -1; } static int XMLCALL prefix_converter(void *data, const char *s) { UNUSED_P(data); /* If the first byte is 0xff, raise an error */ if (s[0] == (char)-1) return -1; /* Just add the low bits of the first byte to the second */ return (s[1] + (s[0] & 0x7f)) & 0x01ff; } static int XMLCALL MiscEncodingHandler(void *data, const XML_Char *encoding, XML_Encoding *info) { int i; int high_map = -2; /* Assume a 2-byte sequence */ if (! xcstrcmp(encoding, XCS("invalid-9")) || ! xcstrcmp(encoding, XCS("ascii-like")) || ! xcstrcmp(encoding, XCS("invalid-len")) || ! xcstrcmp(encoding, XCS("invalid-a")) || ! xcstrcmp(encoding, XCS("invalid-surrogate")) || ! xcstrcmp(encoding, XCS("invalid-high"))) high_map = -1; for (i = 0; i < 128; ++i) info->map[i] = i; for (; i < 256; ++i) info->map[i] = high_map; /* If required, put an invalid value in the ASCII entries */ if (! xcstrcmp(encoding, XCS("invalid-9"))) info->map[9] = 5; /* If required, have a top-bit set character starts a 5-byte sequence */ if (! xcstrcmp(encoding, XCS("invalid-len"))) info->map[0x81] = -5; /* If required, make a top-bit set character a valid ASCII character */ if (! xcstrcmp(encoding, XCS("invalid-a"))) info->map[0x82] = 'a'; /* If required, give a top-bit set character a forbidden value, * what would otherwise be the first of a surrogate pair. */ if (! xcstrcmp(encoding, XCS("invalid-surrogate"))) info->map[0x83] = 0xd801; /* If required, give a top-bit set character too high a value */ if (! xcstrcmp(encoding, XCS("invalid-high"))) info->map[0x84] = 0x010101; info->data = data; info->release = NULL; if (! xcstrcmp(encoding, XCS("failing-conv"))) info->convert = failing_converter; else if (! xcstrcmp(encoding, XCS("prefix-conv"))) info->convert = prefix_converter; else info->convert = NULL; return XML_STATUS_OK; } START_TEST(test_missing_encoding_conversion_fn) { const char *text = "\n" "\x81"; XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); /* MiscEncodingHandler sets up an encoding with every top-bit-set * character introducing a two-byte sequence. For this, it * requires a convert function. The above function call doesn't * pass one through, so when BadEncodingHandler actually gets * called it should supply an invalid encoding. */ expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, "Encoding with missing convert() not faulted"); } END_TEST START_TEST(test_failing_encoding_conversion_fn) { const char *text = "\n" "\x81"; XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); /* BadEncodingHandler sets up an encoding with every top-bit-set * character introducing a two-byte sequence. For this, it * requires a convert function. The above function call passes * one that insists all possible sequences are invalid anyway. */ expect_failure(text, XML_ERROR_INVALID_TOKEN, "Encoding with failing convert() not faulted"); } END_TEST /* Test unknown encoding conversions */ START_TEST(test_unknown_encoding_success) { const char *text = "\n" /* Equivalent to Hello, world */ "<\x81\x64\x80oc>Hello, world"; XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); run_character_check(text, XCS("Hello, world")); } END_TEST /* Test bad name character in unknown encoding */ START_TEST(test_unknown_encoding_bad_name) { const char *text = "\n" "<\xff\x64oc>Hello, world"; XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); expect_failure(text, XML_ERROR_INVALID_TOKEN, "Bad name start in unknown encoding not faulted"); } END_TEST /* Test bad mid-name character in unknown encoding */ START_TEST(test_unknown_encoding_bad_name_2) { const char *text = "\n" "Hello, world"; XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); expect_failure(text, XML_ERROR_INVALID_TOKEN, "Bad name in unknown encoding not faulted"); } END_TEST /* Test element name that is long enough to fill the conversion buffer * in an unknown encoding, finishing with an encoded character. */ START_TEST(test_unknown_encoding_long_name_1) { const char *text = "\n" "" "Hi" ""; const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop"); CharData storage; CharData_Init(&storage); XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); XML_SetStartElementHandler(g_parser, record_element_start_handler); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST /* Test element name that is long enough to fill the conversion buffer * in an unknown encoding, finishing with an simple character. */ START_TEST(test_unknown_encoding_long_name_2) { const char *text = "\n" "" "Hi" ""; const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop"); CharData storage; CharData_Init(&storage); XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); XML_SetStartElementHandler(g_parser, record_element_start_handler); XML_SetUserData(g_parser, &storage); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST START_TEST(test_invalid_unknown_encoding) { const char *text = "\n" "Hello world"; XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, "Invalid unknown encoding not faulted"); } END_TEST START_TEST(test_unknown_ascii_encoding_ok) { const char *text = "\n" "Hello, world"; XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); run_character_check(text, XCS("Hello, world")); } END_TEST START_TEST(test_unknown_ascii_encoding_fail) { const char *text = "\n" "Hello, \x80 world"; XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); expect_failure(text, XML_ERROR_INVALID_TOKEN, "Invalid character not faulted"); } END_TEST START_TEST(test_unknown_encoding_invalid_length) { const char *text = "\n" "Hello, world"; XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, "Invalid unknown encoding not faulted"); } END_TEST START_TEST(test_unknown_encoding_invalid_topbit) { const char *text = "\n" "Hello, world"; XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, "Invalid unknown encoding not faulted"); } END_TEST START_TEST(test_unknown_encoding_invalid_surrogate) { const char *text = "\n" "Hello, \x82 world"; XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); expect_failure(text, XML_ERROR_INVALID_TOKEN, "Invalid unknown encoding not faulted"); } END_TEST START_TEST(test_unknown_encoding_invalid_high) { const char *text = "\n" "Hello, world"; XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, "Invalid unknown encoding not faulted"); } END_TEST START_TEST(test_unknown_encoding_invalid_attr_value) { const char *text = "\n" ""; XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); expect_failure(text, XML_ERROR_INVALID_TOKEN, "Invalid attribute valid not faulted"); } END_TEST /* Test an external entity parser set to use latin-1 detects UTF-16 * BOMs correctly. */ enum ee_parse_flags { EE_PARSE_NONE = 0x00, EE_PARSE_FULL_BUFFER = 0x01 }; typedef struct ExtTest2 { const char *parse_text; int parse_len; const XML_Char *encoding; CharData *storage; enum ee_parse_flags flags; } ExtTest2; static int XMLCALL external_entity_loader2(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { ExtTest2 *test_data = (ExtTest2 *)XML_GetUserData(parser); XML_Parser extparser; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); extparser = XML_ExternalEntityParserCreate(parser, context, NULL); if (extparser == NULL) fail("Coulr not create external entity parser"); if (test_data->encoding != NULL) { if (! XML_SetEncoding(extparser, test_data->encoding)) fail("XML_SetEncoding() ignored for external entity"); } if (test_data->flags & EE_PARSE_FULL_BUFFER) { if (XML_Parse(extparser, test_data->parse_text, test_data->parse_len, XML_TRUE) == XML_STATUS_ERROR) { xml_failure(extparser); } } else if (_XML_Parse_SINGLE_BYTES(extparser, test_data->parse_text, test_data->parse_len, XML_TRUE) == XML_STATUS_ERROR) { xml_failure(extparser); } XML_ParserFree(extparser); return XML_STATUS_OK; } /* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */ static void XMLCALL ext2_accumulate_characters(void *userData, const XML_Char *s, int len) { ExtTest2 *test_data = (ExtTest2 *)userData; accumulate_characters(test_data->storage, s, len); } START_TEST(test_ext_entity_latin1_utf16le_bom) { const char *text = "\n" "]>\n" "&en;"; ExtTest2 test_data = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, * 0x4c = L and 0x20 is a space */ "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL, EE_PARSE_NONE}; #ifdef XML_UNICODE const XML_Char *expected = XCS("\x00ff\x00feL "); #else /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL "); #endif CharData storage; CharData_Init(&storage); test_data.storage = &storage; XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); XML_SetUserData(g_parser, &test_data); XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST START_TEST(test_ext_entity_latin1_utf16be_bom) { const char *text = "\n" "]>\n" "&en;"; ExtTest2 test_data = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, * 0x4c = L and 0x20 is a space */ "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL, EE_PARSE_NONE}; #ifdef XML_UNICODE const XML_Char *expected = XCS("\x00fe\x00ff L"); #else /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L"); #endif CharData storage; CharData_Init(&storage); test_data.storage = &storage; XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); XML_SetUserData(g_parser, &test_data); XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST /* Parsing the full buffer rather than a byte at a time makes a * difference to the encoding scanning code, so repeat the above tests * without breaking them down by byte. */ START_TEST(test_ext_entity_latin1_utf16le_bom2) { const char *text = "\n" "]>\n" "&en;"; ExtTest2 test_data = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, * 0x4c = L and 0x20 is a space */ "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL, EE_PARSE_FULL_BUFFER}; #ifdef XML_UNICODE const XML_Char *expected = XCS("\x00ff\x00feL "); #else /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL "); #endif CharData storage; CharData_Init(&storage); test_data.storage = &storage; XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); XML_SetUserData(g_parser, &test_data); XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST START_TEST(test_ext_entity_latin1_utf16be_bom2) { const char *text = "\n" "]>\n" "&en;"; ExtTest2 test_data = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, * 0x4c = L and 0x20 is a space */ "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL, EE_PARSE_FULL_BUFFER}; #ifdef XML_UNICODE const XML_Char *expected = XCS("\x00fe\x00ff L"); #else /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ const XML_Char *expected = "\xc3\xbe\xc3\xbf L"; #endif CharData storage; CharData_Init(&storage); test_data.storage = &storage; XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); XML_SetUserData(g_parser, &test_data); XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST /* Test little-endian UTF-16 given an explicit big-endian encoding */ START_TEST(test_ext_entity_utf16_be) { const char *text = "\n" "]>\n" "&en;"; ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL, EE_PARSE_NONE}; #ifdef XML_UNICODE const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00"); #else const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */ "\xe6\x94\x80" /* U+6500 */ "\xe2\xbc\x80" /* U+2F00 */ "\xe3\xb8\x80"); /* U+3E00 */ #endif CharData storage; CharData_Init(&storage); test_data.storage = &storage; XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); XML_SetUserData(g_parser, &test_data); XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST /* Test big-endian UTF-16 given an explicit little-endian encoding */ START_TEST(test_ext_entity_utf16_le) { const char *text = "\n" "]>\n" "&en;"; ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL, EE_PARSE_NONE}; #ifdef XML_UNICODE const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00"); #else const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */ "\xe6\x94\x80" /* U+6500 */ "\xe2\xbc\x80" /* U+2F00 */ "\xe3\xb8\x80"); /* U+3E00 */ #endif CharData storage; CharData_Init(&storage); test_data.storage = &storage; XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); XML_SetUserData(g_parser, &test_data); XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST /* Test little-endian UTF-16 given no explicit encoding. * The existing default encoding (UTF-8) is assumed to hold without a * BOM to contradict it, so the entity value will in fact provoke an * error because 0x00 is not a valid XML character. We parse the * whole buffer in one go rather than feeding it in byte by byte to * exercise different code paths in the initial scanning routines. */ typedef struct ExtFaults2 { const char *parse_text; int parse_len; const char *fail_text; const XML_Char *encoding; enum XML_Error error; } ExtFaults2; static int XMLCALL external_entity_faulter2(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { ExtFaults2 *test_data = (ExtFaults2 *)XML_GetUserData(parser); XML_Parser extparser; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); extparser = XML_ExternalEntityParserCreate(parser, context, NULL); if (extparser == NULL) fail("Could not create external entity parser"); if (test_data->encoding != NULL) { if (! XML_SetEncoding(extparser, test_data->encoding)) fail("XML_SetEncoding() ignored for external entity"); } if (XML_Parse(extparser, test_data->parse_text, test_data->parse_len, XML_TRUE) != XML_STATUS_ERROR) fail(test_data->fail_text); if (XML_GetErrorCode(extparser) != test_data->error) xml_failure(extparser); XML_ParserFree(extparser); return XML_STATUS_ERROR; } START_TEST(test_ext_entity_utf16_unknown) { const char *text = "\n" "]>\n" "&en;"; ExtFaults2 test_data = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL, XML_ERROR_INVALID_TOKEN}; XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2); XML_SetUserData(g_parser, &test_data); expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, "Invalid character should not have been accepted"); } END_TEST /* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */ START_TEST(test_ext_entity_utf8_non_bom) { const char *text = "\n" "]>\n" "&en;"; ExtTest2 test_data = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */ 3, NULL, NULL, EE_PARSE_NONE}; #ifdef XML_UNICODE const XML_Char *expected = XCS("\xfec0"); #else const XML_Char *expected = XCS("\xef\xbb\x80"); #endif CharData storage; CharData_Init(&storage); test_data.storage = &storage; XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); XML_SetUserData(g_parser, &test_data); XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); CharData_CheckXMLChars(&storage, expected); } END_TEST /* Test that UTF-8 in a CDATA section is correctly passed through */ START_TEST(test_utf8_in_cdata_section) { const char *text = ""; #ifdef XML_UNICODE const XML_Char *expected = XCS("one \x00e9 two"); #else const XML_Char *expected = XCS("one \xc3\xa9 two"); #endif run_character_check(text, expected); } END_TEST /* Test that little-endian UTF-16 in a CDATA section is handled */ START_TEST(test_utf8_in_cdata_section_2) { const char *text = ""; #ifdef XML_UNICODE const XML_Char *expected = XCS("\x00e9]\x00e9two"); #else const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two"); #endif run_character_check(text, expected); } END_TEST START_TEST(test_utf8_in_start_tags) { struct test_case { bool goodName; bool goodNameStart; const char *tagName; }; // The idea with the tests below is this: // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences // go to isNever and are hence not a concern. // // We start with a character that is a valid name character // (or even name-start character, see XML 1.0r4 spec) and then we flip // single bits at places where (1) the result leaves the UTF-8 encoding space // and (2) we stay in the same n-byte sequence family. // // The flipped bits are highlighted in angle brackets in comments, // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped // the most significant bit to 1 to leave UTF-8 encoding space. struct test_case cases[] = { // 1-byte UTF-8: [0xxx xxxx] {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':' {false, false, "\xBA"}, // [<1>011 1010] {true, false, "\x39"}, // [0011 1001] = ASCII nine '9' {false, false, "\xB9"}, // [<1>011 1001] // 2-byte UTF-8: [110x xxxx] [10xx xxxx] {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] = // Arabic small waw U+06E5 {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101] {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101] {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101] {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] = // combining char U+0301 {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001] {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001] {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001] // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx] {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] = // Devanagari Letter A U+0905 {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101] {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101] {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101] {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101] {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101] {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] = // combining char U+0901 {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001] {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001] {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001] {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001] {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001] }; const bool atNameStart[] = {true, false}; size_t i = 0; char doc[1024]; size_t failCount = 0; for (; i < sizeof(cases) / sizeof(cases[0]); i++) { size_t j = 0; for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) { const bool expectedSuccess = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName; sprintf(doc, "<%s%s>" "Hi"; int i; const int max_alloc_count = 15; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; XML_SetCommentHandler(g_parser, dummy_comment_handler); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing allocator"); if (i == max_alloc_count) fail("Parse failed with max allocations"); } END_TEST START_TEST(test_alloc_parse_comment_2) { const char *text = "\n" "" "Hello, world" "" ""; int i; const int max_alloc_count = 15; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; XML_SetCommentHandler(g_parser, dummy_comment_handler); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing allocator"); if (i == max_alloc_count) fail("Parse failed with max allocations"); } END_TEST static int XMLCALL external_entity_duff_loader(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { XML_Parser new_parser; unsigned int i; const unsigned int max_alloc_count = 10; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); /* Try a few different allocation levels */ for (i = 0; i < max_alloc_count; i++) { allocation_count = i; new_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (new_parser != NULL) { XML_ParserFree(new_parser); break; } } if (i == 0) fail("External parser creation ignored failing allocator"); else if (i == max_alloc_count) fail("Extern parser not created with max allocation count"); /* Make sure other random allocation doesn't now fail */ allocation_count = ALLOC_ALWAYS_SUCCEED; /* Make sure the failure code path is executed too */ return XML_STATUS_ERROR; } /* Test that external parser creation running out of memory is * correctly reported. Based on the external entity test cases. */ START_TEST(test_alloc_create_external_parser) { const char *text = "\n" "\n" "&entity;"; char foo_text[] = ""; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetUserData(g_parser, foo_text); XML_SetExternalEntityRefHandler(g_parser, external_entity_duff_loader); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) { fail("External parser allocator returned success incorrectly"); } } END_TEST /* More external parser memory allocation testing */ START_TEST(test_alloc_run_external_parser) { const char *text = "\n" "\n" "&entity;"; char foo_text[] = ""; unsigned int i; const unsigned int max_alloc_count = 15; for (i = 0; i < max_alloc_count; i++) { XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetUserData(g_parser, foo_text); XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader); allocation_count = i; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parsing ignored failing allocator"); else if (i == max_alloc_count) fail("Parsing failed with allocation count 10"); } END_TEST static int XMLCALL external_entity_dbl_handler(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { intptr_t callno = (intptr_t)XML_GetUserData(parser); const char *text; XML_Parser new_parser; int i; const int max_alloc_count = 20; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); if (callno == 0) { /* First time through, check how many calls to malloc occur */ text = ("\n" "\n" "\n"); allocation_count = 10000; new_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (new_parser == NULL) { fail("Unable to allocate first external parser"); return XML_STATUS_ERROR; } /* Stash the number of calls in the user data */ XML_SetUserData(parser, (void *)(intptr_t)(10000 - allocation_count)); } else { text = ("" ""); /* Try at varying levels to exercise more code paths */ for (i = 0; i < max_alloc_count; i++) { allocation_count = callno + i; new_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (new_parser != NULL) break; } if (i == 0) { fail("Second external parser unexpectedly created"); XML_ParserFree(new_parser); return XML_STATUS_ERROR; } else if (i == max_alloc_count) { fail("Second external parser not created"); return XML_STATUS_ERROR; } } allocation_count = ALLOC_ALWAYS_SUCCEED; if (_XML_Parse_SINGLE_BYTES(new_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) { xml_failure(new_parser); return XML_STATUS_ERROR; } XML_ParserFree(new_parser); return XML_STATUS_OK; } /* Test that running out of memory in dtdCopy is correctly reported. * Based on test_default_ns_from_ext_subset_and_ext_ge() */ START_TEST(test_alloc_dtd_copy_default_atts) { const char *text = "\n" "\n" "]>\n" "\n" "&en;\n" ""; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_dbl_handler); XML_SetUserData(g_parser, NULL); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); } END_TEST static int XMLCALL external_entity_dbl_handler_2(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { intptr_t callno = (intptr_t)XML_GetUserData(parser); const char *text; XML_Parser new_parser; enum XML_Status rv; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); if (callno == 0) { /* Try different allocation levels for whole exercise */ text = ("\n" "\n" "\n"); XML_SetUserData(parser, (void *)(intptr_t)1); new_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (new_parser == NULL) return XML_STATUS_ERROR; rv = _XML_Parse_SINGLE_BYTES(new_parser, text, (int)strlen(text), XML_TRUE); } else { /* Just run through once */ text = ("" ""); new_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (new_parser == NULL) return XML_STATUS_ERROR; rv = _XML_Parse_SINGLE_BYTES(new_parser, text, (int)strlen(text), XML_TRUE); } XML_ParserFree(new_parser); if (rv == XML_STATUS_ERROR) return XML_STATUS_ERROR; return XML_STATUS_OK; } /* Test more external entity allocation failure paths */ START_TEST(test_alloc_external_entity) { const char *text = "\n" "\n" "]>\n" "\n" "&en;\n" ""; int i; const int alloc_test_max_repeats = 50; for (i = 0; i < alloc_test_max_repeats; i++) { allocation_count = -1; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_dbl_handler_2); XML_SetUserData(g_parser, NULL); allocation_count = i; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_OK) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } allocation_count = -1; if (i == 0) fail("External entity parsed despite duff allocator"); if (i == alloc_test_max_repeats) fail("External entity not parsed at max allocation count"); } END_TEST /* Test more allocation failure paths */ static int XMLCALL external_entity_alloc_set_encoding(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { /* As for external_entity_loader() */ const char *text = "" "\xC3\xA9"; XML_Parser ext_parser; enum XML_Status status; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) return XML_STATUS_ERROR; if (! XML_SetEncoding(ext_parser, XCS("utf-8"))) { XML_ParserFree(ext_parser); return XML_STATUS_ERROR; } status = _XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE); XML_ParserFree(ext_parser); if (status == XML_STATUS_ERROR) return XML_STATUS_ERROR; return XML_STATUS_OK; } START_TEST(test_alloc_ext_entity_set_encoding) { const char *text = "\n" "]>\n" "&en;"; int i; const int max_allocation_count = 30; for (i = 0; i < max_allocation_count; i++) { XML_SetExternalEntityRefHandler(g_parser, external_entity_alloc_set_encoding); allocation_count = i; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_OK) break; allocation_count = -1; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Encoding check succeeded despite failing allocator"); if (i == max_allocation_count) fail("Encoding failed at max allocation count"); } END_TEST static int XMLCALL unknown_released_encoding_handler(void *data, const XML_Char *encoding, XML_Encoding *info) { UNUSED_P(data); if (! xcstrcmp(encoding, XCS("unsupported-encoding"))) { int i; for (i = 0; i < 256; i++) info->map[i] = i; info->data = NULL; info->convert = NULL; info->release = dummy_release; return XML_STATUS_OK; } return XML_STATUS_ERROR; } /* Test the effects of allocation failure in internal entities. * Based on test_unknown_encoding_internal_entity */ START_TEST(test_alloc_internal_entity) { const char *text = "\n" "]>\n" ""; unsigned int i; const unsigned int max_alloc_count = 20; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; XML_SetUnknownEncodingHandler(g_parser, unknown_released_encoding_handler, NULL); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Internal entity worked despite failing allocations"); else if (i == max_alloc_count) fail("Internal entity failed at max allocation count"); } END_TEST /* Test the robustness against allocation failure of element handling * Based on test_dtd_default_handling(). */ START_TEST(test_alloc_dtd_default_handling) { const char *text = "\n" "\n" "\n" "\n" "\n" "\n" "\n" "]>\n" ""; const XML_Char *expected = XCS("\n\n\n\n\n\n\n\n\ntext in doc"); CharData storage; int i; const int max_alloc_count = 25; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; dummy_handler_flags = 0; XML_SetDefaultHandler(g_parser, accumulate_characters); XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler, dummy_end_doctype_handler); XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler); XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler); XML_SetCommentHandler(g_parser, dummy_comment_handler); XML_SetCdataSectionHandler(g_parser, dummy_start_cdata_handler, dummy_end_cdata_handler); XML_SetUnparsedEntityDeclHandler(g_parser, dummy_unparsed_entity_decl_handler); CharData_Init(&storage); XML_SetUserData(g_parser, &storage); XML_SetCharacterDataHandler(g_parser, accumulate_characters); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Default DTD parsed despite allocation failures"); if (i == max_alloc_count) fail("Default DTD not parsed with maximum alloc count"); CharData_CheckXMLChars(&storage, expected); if (dummy_handler_flags != (DUMMY_START_DOCTYPE_HANDLER_FLAG | DUMMY_END_DOCTYPE_HANDLER_FLAG | DUMMY_ENTITY_DECL_HANDLER_FLAG | DUMMY_NOTATION_DECL_HANDLER_FLAG | DUMMY_ELEMENT_DECL_HANDLER_FLAG | DUMMY_ATTLIST_DECL_HANDLER_FLAG | DUMMY_COMMENT_HANDLER_FLAG | DUMMY_PI_HANDLER_FLAG | DUMMY_START_CDATA_HANDLER_FLAG | DUMMY_END_CDATA_HANDLER_FLAG | DUMMY_UNPARSED_ENTITY_DECL_HANDLER_FLAG)) fail("Not all handlers were called"); } END_TEST /* Test robustness of XML_SetEncoding() with a failing allocator */ START_TEST(test_alloc_explicit_encoding) { int i; const int max_alloc_count = 5; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; if (XML_SetEncoding(g_parser, XCS("us-ascii")) == XML_STATUS_OK) break; } if (i == 0) fail("Encoding set despite failing allocator"); else if (i == max_alloc_count) fail("Encoding not set at max allocation count"); } END_TEST /* Test robustness of XML_SetBase against a failing allocator */ START_TEST(test_alloc_set_base) { const XML_Char *new_base = XCS("/local/file/name.xml"); int i; const int max_alloc_count = 5; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; if (XML_SetBase(g_parser, new_base) == XML_STATUS_OK) break; } if (i == 0) fail("Base set despite failing allocator"); else if (i == max_alloc_count) fail("Base not set with max allocation count"); } END_TEST /* Test buffer extension in the face of a duff reallocator */ START_TEST(test_alloc_realloc_buffer) { const char *text = get_buffer_test_text; void *buffer; int i; const int max_realloc_count = 10; /* Get a smallish buffer */ for (i = 0; i < max_realloc_count; i++) { reallocation_count = i; buffer = XML_GetBuffer(g_parser, 1536); if (buffer == NULL) fail("1.5K buffer reallocation failed"); assert(buffer != NULL); memcpy(buffer, text, strlen(text)); if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE) == XML_STATUS_OK) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } reallocation_count = -1; if (i == 0) fail("Parse succeeded with no reallocation"); else if (i == max_realloc_count) fail("Parse failed with max reallocation count"); } END_TEST /* Same test for external entity parsers */ static int XMLCALL external_entity_reallocator(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { const char *text = get_buffer_test_text; XML_Parser ext_parser; void *buffer; enum XML_Status status; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) fail("Could not create external entity parser"); reallocation_count = (intptr_t)XML_GetUserData(parser); buffer = XML_GetBuffer(ext_parser, 1536); if (buffer == NULL) fail("Buffer allocation failed"); assert(buffer != NULL); memcpy(buffer, text, strlen(text)); status = XML_ParseBuffer(ext_parser, (int)strlen(text), XML_FALSE); reallocation_count = -1; XML_ParserFree(ext_parser); return (status == XML_STATUS_OK) ? XML_STATUS_OK : XML_STATUS_ERROR; } START_TEST(test_alloc_ext_entity_realloc_buffer) { const char *text = "\n" "]>\n" "&en;"; int i; const int max_realloc_count = 10; for (i = 0; i < max_realloc_count; i++) { XML_SetExternalEntityRefHandler(g_parser, external_entity_reallocator); XML_SetUserData(g_parser, (void *)(intptr_t)i); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_OK) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Succeeded with no reallocations"); if (i == max_realloc_count) fail("Failed with max reallocations"); } END_TEST /* Test elements with many attributes are handled correctly */ START_TEST(test_alloc_realloc_many_attributes) { const char *text = "\n" "\n" "\n" "]>\n" "" ""; int i; const int max_realloc_count = 10; for (i = 0; i < max_realloc_count; i++) { reallocation_count = i; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite no reallocations"); if (i == max_realloc_count) fail("Parse failed at max reallocations"); } END_TEST /* Test handling of a public entity with failing allocator */ START_TEST(test_alloc_public_entity_value) { const char *text = "\n" "\n"; char dtd_text[] = "\n" "\n" "\n" "%e1;\n"; int i; const int max_alloc_count = 50; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; dummy_handler_flags = 0; XML_SetUserData(g_parser, dtd_text); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_public); /* Provoke a particular code path */ XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parsing worked despite failing allocation"); if (i == max_alloc_count) fail("Parsing failed at max allocation count"); if (dummy_handler_flags != DUMMY_ENTITY_DECL_HANDLER_FLAG) fail("Entity declaration handler not called"); } END_TEST START_TEST(test_alloc_realloc_subst_public_entity_value) { const char *text = "\n" "\n"; char dtd_text[] = "\n" "\n" "%ThisIsAStupidlyLongParameterNameIntendedToTriggerPoolGrowth12345" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP;"; int i; const int max_realloc_count = 10; for (i = 0; i < max_realloc_count; i++) { reallocation_count = i; XML_SetUserData(g_parser, dtd_text); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_public); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parsing worked despite failing reallocation"); if (i == max_realloc_count) fail("Parsing failed at max reallocation count"); } END_TEST START_TEST(test_alloc_parse_public_doctype) { const char *text = "\n" "\n" ""; int i; const int max_alloc_count = 25; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; dummy_handler_flags = 0; XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_decl_handler, dummy_end_doctype_decl_handler); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing allocator"); if (i == max_alloc_count) fail("Parse failed at maximum allocation count"); if (dummy_handler_flags != (DUMMY_START_DOCTYPE_DECL_HANDLER_FLAG | DUMMY_END_DOCTYPE_DECL_HANDLER_FLAG)) fail("Doctype handler functions not called"); } END_TEST START_TEST(test_alloc_parse_public_doctype_long_name) { const char *text = "\n" "\n" ""; int i; const int max_alloc_count = 25; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_decl_handler, dummy_end_doctype_decl_handler); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing allocator"); if (i == max_alloc_count) fail("Parse failed at maximum allocation count"); } END_TEST static int XMLCALL external_entity_alloc(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { const char *text = (const char *)XML_GetUserData(parser); XML_Parser ext_parser; int parse_res; UNUSED_P(base); UNUSED_P(systemId); UNUSED_P(publicId); ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (ext_parser == NULL) return XML_STATUS_ERROR; parse_res = _XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE); XML_ParserFree(ext_parser); return parse_res; } /* Test foreign DTD handling */ START_TEST(test_alloc_set_foreign_dtd) { const char *text1 = "\n" "&entity;"; char text2[] = ""; int i; const int max_alloc_count = 25; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetUserData(g_parser, &text2); XML_SetExternalEntityRefHandler(g_parser, external_entity_alloc); if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) fail("Could not set foreign DTD"); if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing allocator"); if (i == max_alloc_count) fail("Parse failed at maximum allocation count"); } END_TEST /* Test based on ibm/valid/P32/ibm32v04.xml */ START_TEST(test_alloc_attribute_enum_value) { const char *text = "\n" "\n" "This is a \n \n\nyellow tiger"; char dtd_text[] = "\n" "\n" ""; int i; const int max_alloc_count = 30; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; XML_SetExternalEntityRefHandler(g_parser, external_entity_alloc); XML_SetUserData(g_parser, dtd_text); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); /* An attribute list handler provokes a different code path */ XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing allocator"); if (i == max_alloc_count) fail("Parse failed at maximum allocation count"); } END_TEST /* Test attribute enums sufficient to overflow the string pool */ START_TEST(test_alloc_realloc_attribute_enum_value) { const char *text = "\n" "\n" "This is a yellow tiger"; /* We wish to define a collection of attribute enums that will * cause the string pool storing them to have to expand. This * means more than 1024 bytes, including the parentheses and * separator bars. */ char dtd_text[] = "\n" ""; int i; const int max_realloc_count = 10; for (i = 0; i < max_realloc_count; i++) { reallocation_count = i; XML_SetExternalEntityRefHandler(g_parser, external_entity_alloc); XML_SetUserData(g_parser, dtd_text); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); /* An attribute list handler provokes a different code path */ XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing reallocator"); if (i == max_realloc_count) fail("Parse failed at maximum reallocation count"); } END_TEST /* Test attribute enums in a #IMPLIED attribute forcing pool growth */ START_TEST(test_alloc_realloc_implied_attribute) { /* Forcing this particular code path is a balancing act. The * addition of the closing parenthesis and terminal NUL must be * what pushes the string of enums over the 1024-byte limit, * otherwise a different code path will pick up the realloc. */ const char *text = "\n" "\n" "]>"; int i; const int max_realloc_count = 10; for (i = 0; i < max_realloc_count; i++) { reallocation_count = i; XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing reallocator"); if (i == max_realloc_count) fail("Parse failed at maximum reallocation count"); } END_TEST /* Test attribute enums in a defaulted attribute forcing pool growth */ START_TEST(test_alloc_realloc_default_attribute) { /* Forcing this particular code path is a balancing act. The * addition of the closing parenthesis and terminal NUL must be * what pushes the string of enums over the 1024-byte limit, * otherwise a different code path will pick up the realloc. */ const char *text = "\n" "\n]>"; int i; const int max_realloc_count = 10; for (i = 0; i < max_realloc_count; i++) { reallocation_count = i; XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing reallocator"); if (i == max_realloc_count) fail("Parse failed at maximum reallocation count"); } END_TEST /* Test long notation name with dodgy allocator */ START_TEST(test_alloc_notation) { const char *text = "\n" "\n" "\n" "]>\n"; int i; const int max_alloc_count = 20; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; dummy_handler_flags = 0; XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler); XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite allocation failures"); if (i == max_alloc_count) fail("Parse failed at maximum allocation count"); if (dummy_handler_flags != (DUMMY_ENTITY_DECL_HANDLER_FLAG | DUMMY_NOTATION_DECL_HANDLER_FLAG)) fail("Entity declaration handler not called"); } END_TEST /* Test public notation with dodgy allocator */ START_TEST(test_alloc_public_notation) { const char *text = "\n" "\n" "\n" "]>\n"; int i; const int max_alloc_count = 20; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; dummy_handler_flags = 0; XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite allocation failures"); if (i == max_alloc_count) fail("Parse failed at maximum allocation count"); if (dummy_handler_flags != DUMMY_NOTATION_DECL_HANDLER_FLAG) fail("Notation handler not called"); } END_TEST /* Test public notation with dodgy allocator */ START_TEST(test_alloc_system_notation) { const char *text = "\n" "\n" "\n" "]>\n"; int i; const int max_alloc_count = 20; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; dummy_handler_flags = 0; XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite allocation failures"); if (i == max_alloc_count) fail("Parse failed at maximum allocation count"); if (dummy_handler_flags != DUMMY_NOTATION_DECL_HANDLER_FLAG) fail("Notation handler not called"); } END_TEST START_TEST(test_alloc_nested_groups) { const char *text = "\n" "" "]>\n" ""; CharData storage; int i; const int max_alloc_count = 20; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; CharData_Init(&storage); XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); XML_SetStartElementHandler(g_parser, record_element_start_handler); XML_SetUserData(g_parser, &storage); dummy_handler_flags = 0; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing reallocator"); if (i == max_alloc_count) fail("Parse failed at maximum reallocation count"); CharData_CheckXMLChars(&storage, XCS("doce")); if (dummy_handler_flags != DUMMY_ELEMENT_DECL_HANDLER_FLAG) fail("Element handler not fired"); } END_TEST START_TEST(test_alloc_realloc_nested_groups) { const char *text = "\n" "" "]>\n" ""; CharData storage; int i; const int max_realloc_count = 10; for (i = 0; i < max_realloc_count; i++) { reallocation_count = i; CharData_Init(&storage); XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); XML_SetStartElementHandler(g_parser, record_element_start_handler); XML_SetUserData(g_parser, &storage); dummy_handler_flags = 0; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing reallocator"); if (i == max_realloc_count) fail("Parse failed at maximum reallocation count"); CharData_CheckXMLChars(&storage, XCS("doce")); if (dummy_handler_flags != DUMMY_ELEMENT_DECL_HANDLER_FLAG) fail("Element handler not fired"); } END_TEST START_TEST(test_alloc_large_group) { const char *text = "\n" "]>\n" "\n" "\n" "\n"; int i; const int max_alloc_count = 50; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); dummy_handler_flags = 0; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing allocator"); if (i == max_alloc_count) fail("Parse failed at maximum allocation count"); if (dummy_handler_flags != DUMMY_ELEMENT_DECL_HANDLER_FLAG) fail("Element handler flag not raised"); } END_TEST START_TEST(test_alloc_realloc_group_choice) { const char *text = "\n" "]>\n" "\n" "\n" "This is a foo\n" "\n" "\n"; int i; const int max_realloc_count = 10; for (i = 0; i < max_realloc_count; i++) { reallocation_count = i; XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); dummy_handler_flags = 0; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing reallocator"); if (i == max_realloc_count) fail("Parse failed at maximum reallocation count"); if (dummy_handler_flags != DUMMY_ELEMENT_DECL_HANDLER_FLAG) fail("Element handler flag not raised"); } END_TEST START_TEST(test_alloc_pi_in_epilog) { const char *text = "\n" ""; int i; const int max_alloc_count = 15; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler); dummy_handler_flags = 0; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse completed despite failing allocator"); if (i == max_alloc_count) fail("Parse failed at maximum allocation count"); if (dummy_handler_flags != DUMMY_PI_HANDLER_FLAG) fail("Processing instruction handler not invoked"); } END_TEST START_TEST(test_alloc_comment_in_epilog) { const char *text = "\n" ""; int i; const int max_alloc_count = 15; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; XML_SetCommentHandler(g_parser, dummy_comment_handler); dummy_handler_flags = 0; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse completed despite failing allocator"); if (i == max_alloc_count) fail("Parse failed at maximum allocation count"); if (dummy_handler_flags != DUMMY_COMMENT_HANDLER_FLAG) fail("Processing instruction handler not invoked"); } END_TEST START_TEST(test_alloc_realloc_long_attribute_value) { const char *text = "]>\n" ""; int i; const int max_realloc_count = 10; for (i = 0; i < max_realloc_count; i++) { reallocation_count = i; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing reallocator"); if (i == max_realloc_count) fail("Parse failed at maximum reallocation count"); } END_TEST START_TEST(test_alloc_attribute_whitespace) { const char *text = ""; int i; const int max_alloc_count = 15; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing allocator"); if (i == max_alloc_count) fail("Parse failed at maximum allocation count"); } END_TEST START_TEST(test_alloc_attribute_predefined_entity) { const char *text = ""; int i; const int max_alloc_count = 15; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing allocator"); if (i == max_alloc_count) fail("Parse failed at maximum allocation count"); } END_TEST /* Test that a character reference at the end of a suitably long * default value for an attribute can trigger pool growth, and recovers * if the allocator fails on it. */ START_TEST(test_alloc_long_attr_default_with_char_ref) { const char *text = "]>\n" ""; int i; const int max_alloc_count = 20; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing allocator"); if (i == max_alloc_count) fail("Parse failed at maximum allocation count"); } END_TEST /* Test that a long character reference substitution triggers a pool * expansion correctly for an attribute value. */ START_TEST(test_alloc_long_attr_value) { const char *text = "]>\n" ""; int i; const int max_alloc_count = 25; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing allocator"); if (i == max_alloc_count) fail("Parse failed at maximum allocation count"); } END_TEST /* Test that an error in a nested parameter entity substitution is * handled correctly. It seems unlikely that the code path being * exercised can be reached purely by carefully crafted XML, but an * allocation error in the right place will definitely do it. */ START_TEST(test_alloc_nested_entities) { const char *text = "\n" ""; ExtFaults test_data = {"\n" "\n" "%pe2;", "Memory Fail not faulted", NULL, XML_ERROR_NO_MEMORY}; /* Causes an allocation error in a nested storeEntityValue() */ allocation_count = 12; XML_SetUserData(g_parser, &test_data); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, "Entity allocation failure not noted"); } END_TEST START_TEST(test_alloc_realloc_param_entity_newline) { const char *text = "\n" ""; char dtd_text[] = "\n'>" "%pe;\n"; int i; const int max_realloc_count = 5; for (i = 0; i < max_realloc_count; i++) { reallocation_count = i; XML_SetUserData(g_parser, dtd_text); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_alloc); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing reallocator"); if (i == max_realloc_count) fail("Parse failed at maximum reallocation count"); } END_TEST START_TEST(test_alloc_realloc_ce_extends_pe) { const char *text = "\n" ""; char dtd_text[] = "\n'>" "%pe;\n"; int i; const int max_realloc_count = 5; for (i = 0; i < max_realloc_count; i++) { reallocation_count = i; XML_SetUserData(g_parser, dtd_text); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_alloc); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing reallocator"); if (i == max_realloc_count) fail("Parse failed at maximum reallocation count"); } END_TEST START_TEST(test_alloc_realloc_attributes) { const char *text = "]>\n" "wombat\n"; int i; const int max_realloc_count = 5; for (i = 0; i < max_realloc_count; i++) { reallocation_count = i; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parse succeeded despite failing reallocator"); if (i == max_realloc_count) fail("Parse failed at maximum reallocation count"); } END_TEST START_TEST(test_alloc_long_doc_name) { const char *text = /* 64 characters per line */ ""; int i; const int max_alloc_count = 20; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parsing worked despite failing reallocations"); else if (i == max_alloc_count) fail("Parsing failed even at max reallocation count"); } END_TEST START_TEST(test_alloc_long_base) { const char *text = "\n" "]>\n" "&e;"; char entity_text[] = "Hello world"; const XML_Char *base = /* 64 characters per line */ /* clang-format off */ XCS("LongBaseURI/that/will/overflow/an/internal/buffer/and/cause/it/t") XCS("o/have/to/grow/PQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/"); /* clang-format on */ int i; const int max_alloc_count = 25; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; XML_SetUserData(g_parser, entity_text); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_alloc); if (XML_SetBase(g_parser, base) == XML_STATUS_ERROR) { XML_ParserReset(g_parser, NULL); continue; } if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parsing worked despite failing allocations"); else if (i == max_alloc_count) fail("Parsing failed even at max allocation count"); } END_TEST START_TEST(test_alloc_long_public_id) { const char *text = "\n" "]>\n" "&e;"; char entity_text[] = "Hello world"; int i; const int max_alloc_count = 40; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; XML_SetUserData(g_parser, entity_text); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_alloc); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parsing worked despite failing allocations"); else if (i == max_alloc_count) fail("Parsing failed even at max allocation count"); } END_TEST START_TEST(test_alloc_long_entity_value) { const char *text = "\n" " \n" "]>\n" "&e2;"; char entity_text[] = "Hello world"; int i; const int max_alloc_count = 40; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; XML_SetUserData(g_parser, entity_text); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_alloc); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parsing worked despite failing allocations"); else if (i == max_alloc_count) fail("Parsing failed even at max allocation count"); } END_TEST START_TEST(test_alloc_long_notation) { const char *text = "\n" " \n" " \n" "]>\n" "&e2;"; ExtOption options[] = {{XCS("foo"), "Entity Foo"}, {XCS("bar"), "Entity Bar"}, {NULL, NULL}}; int i; const int max_alloc_count = 40; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; XML_SetUserData(g_parser, options); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_optioner); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_alloc_parse_xdecl() */ alloc_teardown(); alloc_setup(); } if (i == 0) fail("Parsing worked despite failing allocations"); else if (i == max_alloc_count) fail("Parsing failed even at max allocation count"); } END_TEST static void nsalloc_setup(void) { XML_Memory_Handling_Suite memsuite = {duff_allocator, duff_reallocator, free}; XML_Char ns_sep[2] = {' ', '\0'}; /* Ensure the parser creation will go through */ allocation_count = ALLOC_ALWAYS_SUCCEED; reallocation_count = REALLOC_ALWAYS_SUCCEED; g_parser = XML_ParserCreate_MM(NULL, &memsuite, ns_sep); if (g_parser == NULL) fail("Parser not created"); } static void nsalloc_teardown(void) { basic_teardown(); } /* Test the effects of allocation failure in simple namespace parsing. * Based on test_ns_default_with_empty_uri() */ START_TEST(test_nsalloc_xmlns) { const char *text = "\n" " \n" ""; unsigned int i; const unsigned int max_alloc_count = 30; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; /* Exercise more code paths with a default handler */ XML_SetDefaultHandler(g_parser, dummy_default_handler); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* Resetting the parser is insufficient, because some memory * allocations are cached within the parser. Instead we use * the teardown and setup routines to ensure that we have the * right sort of parser back in our hands. */ nsalloc_teardown(); nsalloc_setup(); } if (i == 0) fail("Parsing worked despite failing allocations"); else if (i == max_alloc_count) fail("Parsing failed even at maximum allocation count"); } END_TEST /* Test XML_ParseBuffer interface with namespace and a dicky allocator */ START_TEST(test_nsalloc_parse_buffer) { const char *text = "Hello"; void *buffer; /* Try a parse before the start of the world */ /* (Exercises new code path) */ if (XML_ParseBuffer(g_parser, 0, XML_FALSE) != XML_STATUS_ERROR) fail("Pre-init XML_ParseBuffer not faulted"); if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_BUFFER) fail("Pre-init XML_ParseBuffer faulted for wrong reason"); buffer = XML_GetBuffer(g_parser, 1 /* any small number greater than 0 */); if (buffer == NULL) fail("Could not acquire parse buffer"); allocation_count = 0; if (XML_ParseBuffer(g_parser, 0, XML_FALSE) != XML_STATUS_ERROR) fail("Pre-init XML_ParseBuffer not faulted"); if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_MEMORY) fail("Pre-init XML_ParseBuffer faulted for wrong reason"); /* Now with actual memory allocation */ allocation_count = ALLOC_ALWAYS_SUCCEED; if (XML_ParseBuffer(g_parser, 0, XML_FALSE) != XML_STATUS_OK) xml_failure(g_parser); /* Check that resuming an unsuspended parser is faulted */ if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR) fail("Resuming unsuspended parser not faulted"); if (XML_GetErrorCode(g_parser) != XML_ERROR_NOT_SUSPENDED) xml_failure(g_parser); /* Get the parser into suspended state */ XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); resumable = XML_TRUE; buffer = XML_GetBuffer(g_parser, (int)strlen(text)); if (buffer == NULL) fail("Could not acquire parse buffer"); assert(buffer != NULL); memcpy(buffer, text, strlen(text)); if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_TRUE) != XML_STATUS_SUSPENDED) xml_failure(g_parser); if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) xml_failure(g_parser); if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) fail("Suspended XML_ParseBuffer not faulted"); if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED) xml_failure(g_parser); if (XML_GetBuffer(g_parser, (int)strlen(text)) != NULL) fail("Suspended XML_GetBuffer not faulted"); /* Get it going again and complete the world */ XML_SetCharacterDataHandler(g_parser, NULL); if (XML_ResumeParser(g_parser) != XML_STATUS_OK) xml_failure(g_parser); if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) fail("Post-finishing XML_ParseBuffer not faulted"); if (XML_GetErrorCode(g_parser) != XML_ERROR_FINISHED) xml_failure(g_parser); if (XML_GetBuffer(g_parser, (int)strlen(text)) != NULL) fail("Post-finishing XML_GetBuffer not faulted"); } END_TEST /* Check handling of long prefix names (pool growth) */ START_TEST(test_nsalloc_long_prefix) { const char *text = "<" /* 64 characters per line */ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" ":foo xmlns:" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "='http://example.org/'>" ""; int i; const int max_alloc_count = 40; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_nsalloc_xmlns() */ nsalloc_teardown(); nsalloc_setup(); } if (i == 0) fail("Parsing worked despite failing allocations"); else if (i == max_alloc_count) fail("Parsing failed even at max allocation count"); } END_TEST /* Check handling of long uri names (pool growth) */ START_TEST(test_nsalloc_long_uri) { const char *text = "" ""; int i; const int max_alloc_count = 40; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_nsalloc_xmlns() */ nsalloc_teardown(); nsalloc_setup(); } if (i == 0) fail("Parsing worked despite failing allocations"); else if (i == max_alloc_count) fail("Parsing failed even at max allocation count"); } END_TEST /* Test handling of long attribute names with prefixes */ START_TEST(test_nsalloc_long_attr) { const char *text = "" ""; int i; const int max_alloc_count = 40; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_nsalloc_xmlns() */ nsalloc_teardown(); nsalloc_setup(); } if (i == 0) fail("Parsing worked despite failing allocations"); else if (i == max_alloc_count) fail("Parsing failed even at max allocation count"); } END_TEST /* Test handling of an attribute name with a long namespace prefix */ START_TEST(test_nsalloc_long_attr_prefix) { const char *text = "" ""; const XML_Char *elemstr[] = { /* clang-format off */ XCS("http://example.org/ e foo"), XCS("http://example.org/ a ") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ") XCS("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ") /* clang-format on */ }; int i; const int max_alloc_count = 40; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; XML_SetReturnNSTriplet(g_parser, XML_TRUE); XML_SetUserData(g_parser, (void *)elemstr); XML_SetElementHandler(g_parser, triplet_start_checker, triplet_end_checker); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_nsalloc_xmlns() */ nsalloc_teardown(); nsalloc_setup(); } if (i == 0) fail("Parsing worked despite failing allocations"); else if (i == max_alloc_count) fail("Parsing failed even at max allocation count"); } END_TEST /* Test attribute handling in the face of a dodgy reallocator */ START_TEST(test_nsalloc_realloc_attributes) { const char *text = "" ""; int i; const int max_realloc_count = 10; for (i = 0; i < max_realloc_count; i++) { reallocation_count = i; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_nsalloc_xmlns() */ nsalloc_teardown(); nsalloc_setup(); } if (i == 0) fail("Parsing worked despite failing reallocations"); else if (i == max_realloc_count) fail("Parsing failed at max reallocation count"); } END_TEST /* Test long element names with namespaces under a failing allocator */ START_TEST(test_nsalloc_long_element) { const char *text = "" ""; const XML_Char *elemstr[] = {XCS("http://example.org/") XCS(" thisisalongenoughelementnametotriggerareallocation foo"), XCS("http://example.org/ a bar")}; int i; const int max_alloc_count = 30; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; XML_SetReturnNSTriplet(g_parser, XML_TRUE); XML_SetUserData(g_parser, (void *)elemstr); XML_SetElementHandler(g_parser, triplet_start_checker, triplet_end_checker); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_nsalloc_xmlns() */ nsalloc_teardown(); nsalloc_setup(); } if (i == 0) fail("Parsing worked despite failing reallocations"); else if (i == max_alloc_count) fail("Parsing failed at max reallocation count"); } END_TEST /* Test the effects of reallocation failure when reassigning a * binding. * * XML_ParserReset does not free the BINDING structures used by a * parser, but instead adds them to an internal free list to be reused * as necessary. Likewise the URI buffers allocated for the binding * aren't freed, but kept attached to their existing binding. If the * new binding has a longer URI, it will need reallocation. This test * provokes that reallocation, and tests the control path if it fails. */ START_TEST(test_nsalloc_realloc_binding_uri) { const char *first = "\n" " \n" ""; const char *second = "\n" " \n" ""; unsigned i; const unsigned max_realloc_count = 10; /* First, do a full parse that will leave bindings around */ if (_XML_Parse_SINGLE_BYTES(g_parser, first, (int)strlen(first), XML_TRUE) == XML_STATUS_ERROR) xml_failure(g_parser); /* Now repeat with a longer URI and a duff reallocator */ for (i = 0; i < max_realloc_count; i++) { XML_ParserReset(g_parser, NULL); reallocation_count = i; if (_XML_Parse_SINGLE_BYTES(g_parser, second, (int)strlen(second), XML_TRUE) != XML_STATUS_ERROR) break; } if (i == 0) fail("Parsing worked despite failing reallocation"); else if (i == max_realloc_count) fail("Parsing failed at max reallocation count"); } END_TEST /* Check handling of long prefix names (pool growth) */ START_TEST(test_nsalloc_realloc_long_prefix) { const char *text = "<" /* 64 characters per line */ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" ":foo xmlns:" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "='http://example.org/'>" ""; int i; const int max_realloc_count = 12; for (i = 0; i < max_realloc_count; i++) { reallocation_count = i; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_nsalloc_xmlns() */ nsalloc_teardown(); nsalloc_setup(); } if (i == 0) fail("Parsing worked despite failing reallocations"); else if (i == max_realloc_count) fail("Parsing failed even at max reallocation count"); } END_TEST /* Check handling of even long prefix names (different code path) */ START_TEST(test_nsalloc_realloc_longer_prefix) { const char *text = "<" /* 64 characters per line */ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "Q:foo xmlns:" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "Q='http://example.org/'>" ""; int i; const int max_realloc_count = 12; for (i = 0; i < max_realloc_count; i++) { reallocation_count = i; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_nsalloc_xmlns() */ nsalloc_teardown(); nsalloc_setup(); } if (i == 0) fail("Parsing worked despite failing reallocations"); else if (i == max_realloc_count) fail("Parsing failed even at max reallocation count"); } END_TEST START_TEST(test_nsalloc_long_namespace) { const char *text1 = "<" /* 64 characters per line */ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" ":e xmlns:" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "='http://example.org/'>\n"; const char *text2 = "<" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" ":f " "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" ":attr='foo'/>\n" ""; int i; const int max_alloc_count = 40; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) != XML_STATUS_ERROR && _XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_nsalloc_xmlns() */ nsalloc_teardown(); nsalloc_setup(); } if (i == 0) fail("Parsing worked despite failing allocations"); else if (i == max_alloc_count) fail("Parsing failed even at max allocation count"); } END_TEST /* Using a slightly shorter namespace name provokes allocations in * slightly different places in the code. */ START_TEST(test_nsalloc_less_long_namespace) { const char *text = "<" /* 64 characters per line */ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345678" ":e xmlns:" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345678" "='http://example.org/'>\n" "<" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345678" ":f " "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345678" ":att='foo'/>\n" ""; int i; const int max_alloc_count = 40; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_nsalloc_xmlns() */ nsalloc_teardown(); nsalloc_setup(); } if (i == 0) fail("Parsing worked despite failing allocations"); else if (i == max_alloc_count) fail("Parsing failed even at max allocation count"); } END_TEST START_TEST(test_nsalloc_long_context) { const char *text = "\n" " \n" "]>\n" "\n" "&en;" ""; ExtOption options[] = { {XCS("foo"), ""}, {XCS("bar"), ""}, {NULL, NULL}}; int i; const int max_alloc_count = 70; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; XML_SetUserData(g_parser, options); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_optioner); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_nsalloc_xmlns() */ nsalloc_teardown(); nsalloc_setup(); } if (i == 0) fail("Parsing worked despite failing allocations"); else if (i == max_alloc_count) fail("Parsing failed even at max allocation count"); } END_TEST /* This function is void; it will throw a fail() on error, so if it * returns normally it must have succeeded. */ static void context_realloc_test(const char *text) { ExtOption options[] = { {XCS("foo"), ""}, {XCS("bar"), ""}, {NULL, NULL}}; int i; const int max_realloc_count = 6; for (i = 0; i < max_realloc_count; i++) { reallocation_count = i; XML_SetUserData(g_parser, options); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_optioner); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_nsalloc_xmlns() */ nsalloc_teardown(); nsalloc_setup(); } if (i == 0) fail("Parsing worked despite failing reallocations"); else if (i == max_realloc_count) fail("Parsing failed even at max reallocation count"); } START_TEST(test_nsalloc_realloc_long_context) { const char *text = "\n" "]>\n" "\n" "&en;" ""; context_realloc_test(text); } END_TEST START_TEST(test_nsalloc_realloc_long_context_2) { const char *text = "\n" "]>\n" "\n" "&en;" ""; context_realloc_test(text); } END_TEST START_TEST(test_nsalloc_realloc_long_context_3) { const char *text = "\n" "]>\n" "\n" "&en;" ""; context_realloc_test(text); } END_TEST START_TEST(test_nsalloc_realloc_long_context_4) { const char *text = "\n" "]>\n" "\n" "&en;" ""; context_realloc_test(text); } END_TEST START_TEST(test_nsalloc_realloc_long_context_5) { const char *text = "\n" "]>\n" "\n" "&en;" ""; context_realloc_test(text); } END_TEST START_TEST(test_nsalloc_realloc_long_context_6) { const char *text = "\n" "]>\n" "\n" "&en;" ""; context_realloc_test(text); } END_TEST START_TEST(test_nsalloc_realloc_long_context_7) { const char *text = "\n" "]>\n" "\n" "&en;" ""; context_realloc_test(text); } END_TEST START_TEST(test_nsalloc_realloc_long_ge_name) { const char *text = "\n" "]>\n" "\n" "&" /* 64 characters per line */ "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" ";" ""; ExtOption options[] = { {XCS("foo"), ""}, {XCS("bar"), ""}, {NULL, NULL}}; int i; const int max_realloc_count = 10; for (i = 0; i < max_realloc_count; i++) { reallocation_count = i; XML_SetUserData(g_parser, options); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_optioner); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_nsalloc_xmlns() */ nsalloc_teardown(); nsalloc_setup(); } if (i == 0) fail("Parsing worked despite failing reallocations"); else if (i == max_realloc_count) fail("Parsing failed even at max reallocation count"); } END_TEST /* Test that when a namespace is passed through the context mechanism * to an external entity parser, the parsers handle reallocation * failures correctly. The prefix is exactly the right length to * provoke particular uncommon code paths. */ START_TEST(test_nsalloc_realloc_long_context_in_dtd) { const char *text1 = "\n" "]>\n" "<" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" ":doc xmlns:" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" "='foo/Second'>&First;"; const char *text2 = ""; ExtOption options[] = {{XCS("foo/First"), "Hello world"}, {NULL, NULL}}; int i; const int max_realloc_count = 20; for (i = 0; i < max_realloc_count; i++) { reallocation_count = i; XML_SetUserData(g_parser, options); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_optioner); if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) != XML_STATUS_ERROR && _XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_nsalloc_xmlns() */ nsalloc_teardown(); nsalloc_setup(); } if (i == 0) fail("Parsing worked despite failing reallocations"); else if (i == max_realloc_count) fail("Parsing failed even at max reallocation count"); } END_TEST START_TEST(test_nsalloc_long_default_in_ext) { const char *text = "\n" " \n" "]>\n" "&x;"; ExtOption options[] = {{XCS("foo"), ""}, {NULL, NULL}}; int i; const int max_alloc_count = 50; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; XML_SetUserData(g_parser, options); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_optioner); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_nsalloc_xmlns() */ nsalloc_teardown(); nsalloc_setup(); } if (i == 0) fail("Parsing worked despite failing allocations"); else if (i == max_alloc_count) fail("Parsing failed even at max allocation count"); } END_TEST START_TEST(test_nsalloc_long_systemid_in_ext) { const char *text = "\n" "]>\n" "&en;"; ExtOption options[] = { {XCS("foo"), ""}, {/* clang-format off */ XCS("ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/") XCS("ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/") XCS("ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/") XCS("ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/") XCS("ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/") XCS("ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/") XCS("ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/") XCS("ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/") XCS("ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/") XCS("ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/") XCS("ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/") XCS("ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/") XCS("ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/") XCS("ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/") XCS("ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/") XCS("ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/"), /* clang-format on */ ""}, {NULL, NULL}}; int i; const int max_alloc_count = 55; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; XML_SetUserData(g_parser, options); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_optioner); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_nsalloc_xmlns() */ nsalloc_teardown(); nsalloc_setup(); } if (i == 0) fail("Parsing worked despite failing allocations"); else if (i == max_alloc_count) fail("Parsing failed even at max allocation count"); } END_TEST /* Test the effects of allocation failure on parsing an element in a * namespace. Based on test_nsalloc_long_context. */ START_TEST(test_nsalloc_prefixed_element) { const char *text = "\n" " \n" "]>\n" "\n" "&en;" ""; ExtOption options[] = { {XCS("foo"), ""}, {XCS("bar"), ""}, {NULL, NULL}}; int i; const int max_alloc_count = 70; for (i = 0; i < max_alloc_count; i++) { allocation_count = i; XML_SetUserData(g_parser, options); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_optioner); if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; /* See comment in test_nsalloc_xmlns() */ nsalloc_teardown(); nsalloc_setup(); } if (i == 0) fail("Success despite failing allocator"); else if (i == max_alloc_count) fail("Failed even at full allocation count"); } END_TEST #if defined(XML_DTD) typedef enum XML_Status (*XmlParseFunction)(XML_Parser, const char *, int, int); struct AccountingTestCase { const char *primaryText; const char *firstExternalText; /* often NULL */ const char *secondExternalText; /* often NULL */ const unsigned long long expectedCountBytesIndirectExtra; XML_Bool singleBytesWanted; }; static int accounting_external_entity_ref_handler(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { UNUSED_P(context); UNUSED_P(base); UNUSED_P(publicId); const struct AccountingTestCase *const testCase = (const struct AccountingTestCase *)XML_GetUserData(parser); const char *externalText = NULL; if (xcstrcmp(systemId, XCS("first.ent")) == 0) { externalText = testCase->firstExternalText; } else if (xcstrcmp(systemId, XCS("second.ent")) == 0) { externalText = testCase->secondExternalText; } else { assert(! "systemId is neither \"first.ent\" nor \"second.ent\""); } assert(externalText); XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0); assert(entParser); const XmlParseFunction xmlParseFunction = testCase->singleBytesWanted ? _XML_Parse_SINGLE_BYTES : XML_Parse; const enum XML_Status status = xmlParseFunction( entParser, externalText, (int)strlen(externalText), XML_TRUE); XML_ParserFree(entParser); return status; } START_TEST(test_accounting_precision) { const XML_Bool filled_later = XML_TRUE; /* value is arbitrary */ struct AccountingTestCase cases[] = { {"", NULL, NULL, 0, 0}, {"", NULL, NULL, 0, 0}, /* Attributes */ {"", NULL, NULL, 0, filled_later}, {"", NULL, NULL, 0, 0}, {"", NULL, NULL, 0, filled_later}, {"", NULL, NULL, sizeof(XML_Char) * 5 /* number of predefined entities */, filled_later}, {"\n" " \n" "", NULL, NULL, 0, filled_later}, /* Text */ {"text", NULL, NULL, 0, filled_later}, {"text1text2", NULL, NULL, 0, filled_later}, {"&'><"", NULL, NULL, sizeof(XML_Char) * 5 /* number of predefined entities */, filled_later}, {"A)", NULL, NULL, 0, filled_later}, /* Prolog */ {"", NULL, NULL, 0, filled_later}, /* Whitespace */ {" ", NULL, NULL, 0, filled_later}, {"", NULL, NULL, 0, filled_later}, {"", NULL, NULL, 0, filled_later}, /* Comments */ {"", NULL, NULL, 0, filled_later}, /* Processing instructions */ {"", NULL, NULL, 0, filled_later}, {"", "%e1;", "", 0, filled_later}, /* CDATA */ {"", NULL, NULL, 0, filled_later}, /* The following is the essence of this OSS-Fuzz finding: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=34302 https://oss-fuzz.com/testcase-detail/4860575394955264 */ {"333\">\n" "]>\n" "&e;\n", NULL, NULL, sizeof(XML_Char) * strlen("111333"), filled_later}, /* Conditional sections */ {"\n" "\n" "\n" "%import;\n" "]>\n" "\n", "]]>\n" "]]>", NULL, sizeof(XML_Char) * (strlen("INCLUDE") + strlen("IGNORE")), filled_later}, /* General entities */ {"\n" "]>\n" "&nine;", NULL, NULL, sizeof(XML_Char) * strlen("123456789"), filled_later}, {"\n" "]>\n" "", NULL, NULL, sizeof(XML_Char) * strlen("123456789"), filled_later}, {"\n" "\n" "]>\n" "&nine2;&nine2;&nine2;", NULL, NULL, sizeof(XML_Char) * 3 /* calls to &nine2; */ * 2 /* calls to &nine; */ * (strlen("&nine;") + strlen("123456789")), filled_later}, {"\n" "]>\n" "&five;", "12345", NULL, 0, filled_later}, /* Parameter entities */ {"\">\n" "%comment;\n" "]>\n" "", NULL, NULL, sizeof(XML_Char) * strlen(""), filled_later}, {"\n" "%ninedef;\n" "]>\n" "&nine;", NULL, NULL, sizeof(XML_Char) * (strlen("") + strlen("123456789")), filled_later}, {"\">\n" "%comment;\">\n" "%comment2;\n" "]>\n" "\n", NULL, NULL, sizeof(XML_Char) * (strlen("%comment;%comment;") + 2 * strlen("")), filled_later}, {"\n" " \n" " %five2def;\n" "]>\n" "&five2;", NULL, NULL, /* from "%five2def;": */ sizeof(XML_Char) * (strlen("") + 2 /* calls to "%five;" */ * strlen("12345") + /* from "&five2;": */ strlen("[12345][12345]]]]")), filled_later}, {"\n" "", "'>\n" "%comment;%comment;'>\n" "%comment2;", NULL, sizeof(XML_Char) * (strlen("%comment;%comment;") + 2 /* calls to "%comment;" */ * strlen("")), filled_later}, {"\n" "", "\n" "%e1;'>\n" "%e2;\n", "", sizeof(XML_Char) * strlen(""), filled_later}, { "\n" "", "\n" "", "\n" "hello\n" "xml" /* without trailing newline! */, 0, filled_later, }, { "\n" "", "\n" "", "\n" "hello\n" "xml\n" /* with trailing newline! */, 0, filled_later, }, {"\n" "\n", "\n" "\n" "\n" "%e1;\n", "\xEF\xBB\xBF" /* UTF-8 BOM */, strlen("\xEF\xBB\xBF"), filled_later}, {"\n" "]>\n" "&five;", "\xEF\xBB\xBF" /* UTF-8 BOM */, NULL, 0, filled_later}, }; const size_t countCases = sizeof(cases) / sizeof(cases[0]); size_t u = 0; for (; u < countCases; u++) { size_t v = 0; for (; v < 2; v++) { const XML_Bool singleBytesWanted = (v == 0) ? XML_FALSE : XML_TRUE; const unsigned long long expectedCountBytesDirect = strlen(cases[u].primaryText); const unsigned long long expectedCountBytesIndirect = (cases[u].firstExternalText ? strlen(cases[u].firstExternalText) : 0) + (cases[u].secondExternalText ? strlen(cases[u].secondExternalText) : 0) + cases[u].expectedCountBytesIndirectExtra; XML_Parser parser = XML_ParserCreate(NULL); XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); if (cases[u].firstExternalText) { XML_SetExternalEntityRefHandler(parser, accounting_external_entity_ref_handler); XML_SetUserData(parser, (void *)&cases[u]); cases[u].singleBytesWanted = singleBytesWanted; } const XmlParseFunction xmlParseFunction = singleBytesWanted ? _XML_Parse_SINGLE_BYTES : XML_Parse; enum XML_Status status = xmlParseFunction(parser, cases[u].primaryText, (int)strlen(cases[u].primaryText), XML_TRUE); if (status != XML_STATUS_OK) { _xml_failure(parser, __FILE__, __LINE__); } const unsigned long long actualCountBytesDirect = testingAccountingGetCountBytesDirect(parser); const unsigned long long actualCountBytesIndirect = testingAccountingGetCountBytesIndirect(parser); XML_ParserFree(parser); if (actualCountBytesDirect != expectedCountBytesDirect) { fprintf( stderr, "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ", %s: Expected " EXPAT_FMT_ULL( "") " count direct bytes, got " EXPAT_FMT_ULL("") " instead.\n", u + 1, countCases, singleBytesWanted ? "single bytes" : "chunks", expectedCountBytesDirect, actualCountBytesDirect); fail("Count of direct bytes is off"); } if (actualCountBytesIndirect != expectedCountBytesIndirect) { fprintf( stderr, "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ", %s: Expected " EXPAT_FMT_ULL( "") " count indirect bytes, got " EXPAT_FMT_ULL("") " instead.\n", u + 1, countCases, singleBytesWanted ? "single bytes" : "chunks", expectedCountBytesIndirect, actualCountBytesIndirect); fail("Count of indirect bytes is off"); } } } } END_TEST +static float +portableNAN() { + return strtof("nan", NULL); +} + +static float +portableINFINITY() { + return strtof("infinity", NULL); +} + START_TEST(test_billion_laughs_attack_protection_api) { XML_Parser parserWithoutParent = XML_ParserCreate(NULL); XML_Parser parserWithParent = XML_ExternalEntityParserCreate(parserWithoutParent, NULL, NULL); if (parserWithoutParent == NULL) fail("parserWithoutParent is NULL"); if (parserWithParent == NULL) fail("parserWithParent is NULL"); // XML_SetBillionLaughsAttackProtectionMaximumAmplification, error cases if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(NULL, 123.0f) == XML_TRUE) fail("Call with NULL parser is NOT supposed to succeed"); if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(parserWithParent, 123.0f) == XML_TRUE) fail("Call with non-root parser is NOT supposed to succeed"); if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( - parserWithoutParent, NAN) + parserWithoutParent, portableNAN()) == XML_TRUE) fail("Call with NaN limit is NOT supposed to succeed"); if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( parserWithoutParent, -1.0f) == XML_TRUE) fail("Call with negative limit is NOT supposed to succeed"); if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( parserWithoutParent, 0.9f) == XML_TRUE) fail("Call with positive limit <1.0 is NOT supposed to succeed"); // XML_SetBillionLaughsAttackProtectionMaximumAmplification, success cases if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( parserWithoutParent, 1.0f) == XML_FALSE) fail("Call with positive limit >=1.0 is supposed to succeed"); if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( parserWithoutParent, 123456.789f) == XML_FALSE) fail("Call with positive limit >=1.0 is supposed to succeed"); if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( - parserWithoutParent, INFINITY) + parserWithoutParent, portableINFINITY()) == XML_FALSE) fail("Call with positive limit >=1.0 is supposed to succeed"); // XML_SetBillionLaughsAttackProtectionActivationThreshold, error cases if (XML_SetBillionLaughsAttackProtectionActivationThreshold(NULL, 123) == XML_TRUE) fail("Call with NULL parser is NOT supposed to succeed"); if (XML_SetBillionLaughsAttackProtectionActivationThreshold(parserWithParent, 123) == XML_TRUE) fail("Call with non-root parser is NOT supposed to succeed"); // XML_SetBillionLaughsAttackProtectionActivationThreshold, success cases if (XML_SetBillionLaughsAttackProtectionActivationThreshold( parserWithoutParent, 123) == XML_FALSE) fail("Call with non-NULL parentless parser is supposed to succeed"); XML_ParserFree(parserWithParent); XML_ParserFree(parserWithoutParent); } END_TEST START_TEST(test_helper_unsigned_char_to_printable) { // Smoke test unsigned char uc = 0; for (; uc < (unsigned char)-1; uc++) { const char *const printable = unsignedCharToPrintable(uc); if (printable == NULL) fail("unsignedCharToPrintable returned NULL"); if (strlen(printable) < (size_t)1) fail("unsignedCharToPrintable returned empty string"); } // Two concrete samples if (strcmp(unsignedCharToPrintable('A'), "A") != 0) fail("unsignedCharToPrintable result mistaken"); if (strcmp(unsignedCharToPrintable('\\'), "\\\\") != 0) fail("unsignedCharToPrintable result mistaken"); } END_TEST #endif // defined(XML_DTD) static Suite * make_suite(void) { Suite *s = suite_create("basic"); TCase *tc_basic = tcase_create("basic tests"); TCase *tc_namespace = tcase_create("XML namespaces"); TCase *tc_misc = tcase_create("miscellaneous tests"); TCase *tc_alloc = tcase_create("allocation tests"); TCase *tc_nsalloc = tcase_create("namespace allocation tests"); #if defined(XML_DTD) TCase *tc_accounting = tcase_create("accounting tests"); #endif suite_add_tcase(s, tc_basic); tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown); tcase_add_test(tc_basic, test_nul_byte); tcase_add_test(tc_basic, test_u0000_char); tcase_add_test(tc_basic, test_siphash_self); tcase_add_test(tc_basic, test_siphash_spec); tcase_add_test(tc_basic, test_bom_utf8); tcase_add_test(tc_basic, test_bom_utf16_be); tcase_add_test(tc_basic, test_bom_utf16_le); tcase_add_test(tc_basic, test_nobom_utf16_le); tcase_add_test(tc_basic, test_illegal_utf8); tcase_add_test(tc_basic, test_utf8_auto_align); tcase_add_test(tc_basic, test_utf16); tcase_add_test(tc_basic, test_utf16_le_epilog_newline); tcase_add_test(tc_basic, test_not_utf16); tcase_add_test(tc_basic, test_bad_encoding); tcase_add_test(tc_basic, test_latin1_umlauts); tcase_add_test(tc_basic, test_long_utf8_character); tcase_add_test(tc_basic, test_long_latin1_attribute); tcase_add_test(tc_basic, test_long_ascii_attribute); /* Regression test for SF bug #491986. */ tcase_add_test(tc_basic, test_danish_latin1); /* Regression test for SF bug #514281. */ tcase_add_test(tc_basic, test_french_charref_hexidecimal); tcase_add_test(tc_basic, test_french_charref_decimal); tcase_add_test(tc_basic, test_french_latin1); tcase_add_test(tc_basic, test_french_utf8); tcase_add_test(tc_basic, test_utf8_false_rejection); tcase_add_test(tc_basic, test_line_number_after_parse); tcase_add_test(tc_basic, test_column_number_after_parse); tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers); tcase_add_test(tc_basic, test_line_number_after_error); tcase_add_test(tc_basic, test_column_number_after_error); tcase_add_test(tc_basic, test_really_long_lines); tcase_add_test(tc_basic, test_really_long_encoded_lines); tcase_add_test(tc_basic, test_end_element_events); tcase_add_test(tc_basic, test_attr_whitespace_normalization); tcase_add_test(tc_basic, test_xmldecl_misplaced); tcase_add_test(tc_basic, test_xmldecl_invalid); tcase_add_test(tc_basic, test_xmldecl_missing_attr); tcase_add_test(tc_basic, test_xmldecl_missing_value); tcase_add_test(tc_basic, test_unknown_encoding_internal_entity); tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity); tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset); tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset); tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone); tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset); tcase_add_test(tc_basic, test_not_standalone_handler_reject); tcase_add_test(tc_basic, test_not_standalone_handler_accept); tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset_standalone); tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone); tcase_add_test(tc_basic, test_wfc_no_recursive_entity_refs); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_suspended_parse); tcase_add_test(tc_basic, test_dtd_default_handling); tcase_add_test(tc_basic, test_dtd_attr_handling); tcase_add_test(tc_basic, test_empty_ns_without_namespaces); tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces); tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls); tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls); tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls); tcase_add_test(tc_basic, test_good_cdata_ascii); tcase_add_test(tc_basic, test_good_cdata_utf16); tcase_add_test(tc_basic, test_good_cdata_utf16_le); tcase_add_test(tc_basic, test_long_cdata_utf16); tcase_add_test(tc_basic, test_multichar_cdata_utf16); tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair); tcase_add_test(tc_basic, test_bad_cdata); tcase_add_test(tc_basic, test_bad_cdata_utf16); tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls); tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls); tcase_add_test(tc_basic, test_memory_allocation); tcase_add_test(tc_basic, test_default_current); tcase_add_test(tc_basic, test_dtd_elements); tcase_add_test(tc_basic, test_dtd_elements_nesting); tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd); tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone); tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd); tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype); tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_without_external_subset); tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd); tcase_add_test(tc_basic, test_set_base); tcase_add_test(tc_basic, test_attributes); tcase_add_test(tc_basic, test_reset_in_entity); tcase_add_test(tc_basic, test_resume_invalid_parse); tcase_add_test(tc_basic, test_resume_resuspended); tcase_add_test(tc_basic, test_cdata_default); tcase_add_test(tc_basic, test_subordinate_reset); tcase_add_test(tc_basic, test_subordinate_suspend); tcase_add_test(tc_basic, test_subordinate_xdecl_suspend); tcase_add_test(tc_basic, test_subordinate_xdecl_abort); tcase_add_test(tc_basic, test_explicit_encoding); tcase_add_test(tc_basic, test_trailing_cr); tcase_add_test(tc_basic, test_ext_entity_trailing_cr); tcase_add_test(tc_basic, test_trailing_rsqb); tcase_add_test(tc_basic, test_ext_entity_trailing_rsqb); tcase_add_test(tc_basic, test_ext_entity_good_cdata); tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter); tcase_add_test(tc_basic, test_empty_parse); tcase_add_test(tc_basic, test_get_buffer_1); tcase_add_test(tc_basic, test_get_buffer_2); #if defined(XML_CONTEXT_BYTES) tcase_add_test(tc_basic, test_get_buffer_3_overflow); #endif tcase_add_test(tc_basic, test_byte_info_at_end); tcase_add_test(tc_basic, test_byte_info_at_error); tcase_add_test(tc_basic, test_byte_info_at_cdata); tcase_add_test(tc_basic, test_predefined_entities); tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd); tcase_add_test(tc_basic, test_not_predefined_entities); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be); tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section); tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort); tcase_add_test(tc_basic, test_bad_public_doctype); tcase_add_test(tc_basic, test_attribute_enum_value); tcase_add_test(tc_basic, test_predefined_entity_redefinition); tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing); tcase_add_test(tc_basic, test_public_notation_no_sysid); tcase_add_test(tc_basic, test_nested_groups); tcase_add_test(tc_basic, test_group_choice); tcase_add_test(tc_basic, test_standalone_parameter_entity); tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity); tcase_add_test__ifdef_xml_dtd(tc_basic, test_recursive_external_parameter_entity); tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd); tcase_add_test(tc_basic, test_suspend_xdecl); tcase_add_test(tc_basic, test_abort_epilog); tcase_add_test(tc_basic, test_abort_epilog_2); tcase_add_test(tc_basic, test_suspend_epilog); tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag); tcase_add_test(tc_basic, test_unfinished_epilog); tcase_add_test(tc_basic, test_partial_char_in_epilog); tcase_add_test(tc_basic, test_hash_collision); tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity); tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error); tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity); tcase_add_test(tc_basic, test_restart_on_error); tcase_add_test(tc_basic, test_reject_lt_in_attribute_value); tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value); tcase_add_test(tc_basic, test_trailing_cr_in_att_value); tcase_add_test(tc_basic, test_standalone_internal_entity); tcase_add_test(tc_basic, test_skipped_external_entity); tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity); tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity); tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr); tcase_add_test(tc_basic, test_invalid_character_entity); tcase_add_test(tc_basic, test_invalid_character_entity_2); tcase_add_test(tc_basic, test_invalid_character_entity_3); tcase_add_test(tc_basic, test_invalid_character_entity_4); tcase_add_test(tc_basic, test_pi_handled_in_default); tcase_add_test(tc_basic, test_comment_handled_in_default); tcase_add_test(tc_basic, test_pi_yml); tcase_add_test(tc_basic, test_pi_xnl); tcase_add_test(tc_basic, test_pi_xmm); tcase_add_test(tc_basic, test_utf16_pi); tcase_add_test(tc_basic, test_utf16_be_pi); tcase_add_test(tc_basic, test_utf16_be_comment); tcase_add_test(tc_basic, test_utf16_le_comment); tcase_add_test(tc_basic, test_missing_encoding_conversion_fn); tcase_add_test(tc_basic, test_failing_encoding_conversion_fn); tcase_add_test(tc_basic, test_unknown_encoding_success); tcase_add_test(tc_basic, test_unknown_encoding_bad_name); tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2); tcase_add_test(tc_basic, test_unknown_encoding_long_name_1); tcase_add_test(tc_basic, test_unknown_encoding_long_name_2); tcase_add_test(tc_basic, test_invalid_unknown_encoding); tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok); tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail); tcase_add_test(tc_basic, test_unknown_encoding_invalid_length); tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit); tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate); tcase_add_test(tc_basic, test_unknown_encoding_invalid_high); tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value); tcase_add_test(tc_basic, test_ext_entity_latin1_utf16le_bom); tcase_add_test(tc_basic, test_ext_entity_latin1_utf16be_bom); tcase_add_test(tc_basic, test_ext_entity_latin1_utf16le_bom2); tcase_add_test(tc_basic, test_ext_entity_latin1_utf16be_bom2); tcase_add_test(tc_basic, test_ext_entity_utf16_be); tcase_add_test(tc_basic, test_ext_entity_utf16_le); tcase_add_test(tc_basic, test_ext_entity_utf16_unknown); tcase_add_test(tc_basic, test_ext_entity_utf8_non_bom); tcase_add_test(tc_basic, test_utf8_in_cdata_section); tcase_add_test(tc_basic, test_utf8_in_cdata_section_2); tcase_add_test(tc_basic, test_utf8_in_start_tags); tcase_add_test(tc_basic, test_trailing_spaces_in_elements); tcase_add_test(tc_basic, test_utf16_attribute); tcase_add_test(tc_basic, test_utf16_second_attr); tcase_add_test(tc_basic, test_attr_after_solidus); tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe); tcase_add_test(tc_basic, test_bad_attr_desc_keyword); tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16); tcase_add_test(tc_basic, test_bad_doctype); tcase_add_test(tc_basic, test_bad_doctype_utf8); tcase_add_test(tc_basic, test_bad_doctype_utf16); tcase_add_test(tc_basic, test_bad_doctype_plus); tcase_add_test(tc_basic, test_bad_doctype_star); tcase_add_test(tc_basic, test_bad_doctype_query); tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore); tcase_add_test(tc_basic, test_entity_in_utf16_be_attr); tcase_add_test(tc_basic, test_entity_in_utf16_le_attr); tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be); tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le); tcase_add_test(tc_basic, test_short_doctype); tcase_add_test(tc_basic, test_short_doctype_2); tcase_add_test(tc_basic, test_short_doctype_3); tcase_add_test(tc_basic, test_long_doctype); tcase_add_test(tc_basic, test_bad_entity); tcase_add_test(tc_basic, test_bad_entity_2); tcase_add_test(tc_basic, test_bad_entity_3); tcase_add_test(tc_basic, test_bad_entity_4); tcase_add_test(tc_basic, test_bad_notation); tcase_add_test(tc_basic, test_default_doctype_handler); tcase_add_test(tc_basic, test_empty_element_abort); suite_add_tcase(s, tc_namespace); tcase_add_checked_fixture(tc_namespace, namespace_setup, namespace_teardown); tcase_add_test(tc_namespace, test_return_ns_triplet); tcase_add_test(tc_namespace, test_ns_tagname_overwrite); tcase_add_test(tc_namespace, test_ns_tagname_overwrite_triplet); tcase_add_test(tc_namespace, test_start_ns_clears_start_element); tcase_add_test__ifdef_xml_dtd(tc_namespace, test_default_ns_from_ext_subset_and_ext_ge); tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_1); tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_2); tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_3); tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_4); tcase_add_test(tc_namespace, test_ns_unbound_prefix); tcase_add_test(tc_namespace, test_ns_default_with_empty_uri); tcase_add_test(tc_namespace, test_ns_duplicate_attrs_diff_prefixes); tcase_add_test(tc_namespace, test_ns_duplicate_hashes); tcase_add_test(tc_namespace, test_ns_unbound_prefix_on_attribute); tcase_add_test(tc_namespace, test_ns_unbound_prefix_on_element); tcase_add_test(tc_namespace, test_ns_parser_reset); tcase_add_test(tc_namespace, test_ns_long_element); tcase_add_test(tc_namespace, test_ns_mixed_prefix_atts); tcase_add_test(tc_namespace, test_ns_extend_uri_buffer); tcase_add_test(tc_namespace, test_ns_reserved_attributes); tcase_add_test(tc_namespace, test_ns_reserved_attributes_2); tcase_add_test(tc_namespace, test_ns_extremely_long_prefix); tcase_add_test(tc_namespace, test_ns_unknown_encoding_success); tcase_add_test(tc_namespace, test_ns_double_colon); tcase_add_test(tc_namespace, test_ns_double_colon_element); tcase_add_test(tc_namespace, test_ns_bad_attr_leafname); tcase_add_test(tc_namespace, test_ns_bad_element_leafname); tcase_add_test(tc_namespace, test_ns_utf16_leafname); tcase_add_test(tc_namespace, test_ns_utf16_element_leafname); tcase_add_test(tc_namespace, test_ns_utf16_doctype); tcase_add_test(tc_namespace, test_ns_invalid_doctype); tcase_add_test(tc_namespace, test_ns_double_colon_doctype); tcase_add_test(tc_namespace, test_ns_separator_in_uri); suite_add_tcase(s, tc_misc); tcase_add_checked_fixture(tc_misc, NULL, basic_teardown); tcase_add_test(tc_misc, test_misc_alloc_create_parser); tcase_add_test(tc_misc, test_misc_alloc_create_parser_with_encoding); tcase_add_test(tc_misc, test_misc_null_parser); tcase_add_test(tc_misc, test_misc_error_string); tcase_add_test(tc_misc, test_misc_version); tcase_add_test(tc_misc, test_misc_features); tcase_add_test(tc_misc, test_misc_attribute_leak); tcase_add_test(tc_misc, test_misc_utf16le); tcase_add_test(tc_misc, test_misc_stop_during_end_handler_issue_240_1); tcase_add_test(tc_misc, test_misc_stop_during_end_handler_issue_240_2); tcase_add_test__ifdef_xml_dtd( tc_misc, test_misc_deny_internal_entity_closing_doctype_issue_317); suite_add_tcase(s, tc_alloc); tcase_add_checked_fixture(tc_alloc, alloc_setup, alloc_teardown); tcase_add_test(tc_alloc, test_alloc_parse_xdecl); tcase_add_test(tc_alloc, test_alloc_parse_xdecl_2); tcase_add_test(tc_alloc, test_alloc_parse_pi); tcase_add_test(tc_alloc, test_alloc_parse_pi_2); tcase_add_test(tc_alloc, test_alloc_parse_pi_3); tcase_add_test(tc_alloc, test_alloc_parse_comment); tcase_add_test(tc_alloc, test_alloc_parse_comment_2); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_create_external_parser); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_run_external_parser); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_dtd_copy_default_atts); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_external_entity); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_ext_entity_set_encoding); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_internal_entity); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_dtd_default_handling); tcase_add_test(tc_alloc, test_alloc_explicit_encoding); tcase_add_test(tc_alloc, test_alloc_set_base); tcase_add_test(tc_alloc, test_alloc_realloc_buffer); tcase_add_test(tc_alloc, test_alloc_ext_entity_realloc_buffer); tcase_add_test(tc_alloc, test_alloc_realloc_many_attributes); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_public_entity_value); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_realloc_subst_public_entity_value); tcase_add_test(tc_alloc, test_alloc_parse_public_doctype); tcase_add_test(tc_alloc, test_alloc_parse_public_doctype_long_name); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_set_foreign_dtd); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_attribute_enum_value); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_realloc_attribute_enum_value); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_realloc_implied_attribute); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_realloc_default_attribute); tcase_add_test(tc_alloc, test_alloc_notation); tcase_add_test(tc_alloc, test_alloc_public_notation); tcase_add_test(tc_alloc, test_alloc_system_notation); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_nested_groups); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_realloc_nested_groups); tcase_add_test(tc_alloc, test_alloc_large_group); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_realloc_group_choice); tcase_add_test(tc_alloc, test_alloc_pi_in_epilog); tcase_add_test(tc_alloc, test_alloc_comment_in_epilog); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_realloc_long_attribute_value); tcase_add_test(tc_alloc, test_alloc_attribute_whitespace); tcase_add_test(tc_alloc, test_alloc_attribute_predefined_entity); tcase_add_test(tc_alloc, test_alloc_long_attr_default_with_char_ref); tcase_add_test(tc_alloc, test_alloc_long_attr_value); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_nested_entities); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_realloc_param_entity_newline); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_realloc_ce_extends_pe); tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_realloc_attributes); tcase_add_test(tc_alloc, test_alloc_long_doc_name); tcase_add_test(tc_alloc, test_alloc_long_base); tcase_add_test(tc_alloc, test_alloc_long_public_id); tcase_add_test(tc_alloc, test_alloc_long_entity_value); tcase_add_test(tc_alloc, test_alloc_long_notation); suite_add_tcase(s, tc_nsalloc); tcase_add_checked_fixture(tc_nsalloc, nsalloc_setup, nsalloc_teardown); tcase_add_test(tc_nsalloc, test_nsalloc_xmlns); tcase_add_test(tc_nsalloc, test_nsalloc_parse_buffer); tcase_add_test(tc_nsalloc, test_nsalloc_long_prefix); tcase_add_test(tc_nsalloc, test_nsalloc_long_uri); tcase_add_test(tc_nsalloc, test_nsalloc_long_attr); tcase_add_test(tc_nsalloc, test_nsalloc_long_attr_prefix); tcase_add_test(tc_nsalloc, test_nsalloc_realloc_attributes); tcase_add_test(tc_nsalloc, test_nsalloc_long_element); tcase_add_test(tc_nsalloc, test_nsalloc_realloc_binding_uri); tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_prefix); tcase_add_test(tc_nsalloc, test_nsalloc_realloc_longer_prefix); tcase_add_test(tc_nsalloc, test_nsalloc_long_namespace); tcase_add_test(tc_nsalloc, test_nsalloc_less_long_namespace); tcase_add_test(tc_nsalloc, test_nsalloc_long_context); tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context); tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_2); tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_3); tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_4); tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_5); tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_6); tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_7); tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_ge_name); tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_in_dtd); tcase_add_test(tc_nsalloc, test_nsalloc_long_default_in_ext); tcase_add_test(tc_nsalloc, test_nsalloc_long_systemid_in_ext); tcase_add_test(tc_nsalloc, test_nsalloc_prefixed_element); #if defined(XML_DTD) suite_add_tcase(s, tc_accounting); tcase_add_test(tc_accounting, test_accounting_precision); tcase_add_test(tc_accounting, test_billion_laughs_attack_protection_api); tcase_add_test(tc_accounting, test_helper_unsigned_char_to_printable); #endif return s; } int main(int argc, char *argv[]) { int i, nf; int verbosity = CK_NORMAL; Suite *s = make_suite(); SRunner *sr = srunner_create(s); /* run the tests for internal helper functions */ testhelper_is_whitespace_normalized(); for (i = 1; i < argc; ++i) { char *opt = argv[i]; if (strcmp(opt, "-v") == 0 || strcmp(opt, "--verbose") == 0) verbosity = CK_VERBOSE; else if (strcmp(opt, "-q") == 0 || strcmp(opt, "--quiet") == 0) verbosity = CK_SILENT; else { fprintf(stderr, "runtests: unknown option '%s'\n", opt); return 2; } } if (verbosity != CK_SILENT) printf("Expat version: %" XML_FMT_STR "\n", XML_ExpatVersion()); srunner_run_all(sr, verbosity); nf = srunner_ntests_failed(sr); srunner_free(sr); return (nf == 0) ? EXIT_SUCCESS : EXIT_FAILURE; }