123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334 |
- /*
- * Created by Phil on 19/07/2017.
- *
- * Distributed under the Boost Software License, Version 1.0. (See accompanying
- * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- */
- #include "catch_xmlwriter.h"
- #include "catch_enforce.h"
- #include <iomanip>
- #include <type_traits>
- namespace Catch {
- namespace {
- size_t trailingBytes(unsigned char c) {
- if ((c & 0xE0) == 0xC0) {
- return 2;
- }
- if ((c & 0xF0) == 0xE0) {
- return 3;
- }
- if ((c & 0xF8) == 0xF0) {
- return 4;
- }
- CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered");
- }
- uint32_t headerValue(unsigned char c) {
- if ((c & 0xE0) == 0xC0) {
- return c & 0x1F;
- }
- if ((c & 0xF0) == 0xE0) {
- return c & 0x0F;
- }
- if ((c & 0xF8) == 0xF0) {
- return c & 0x07;
- }
- CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered");
- }
- void hexEscapeChar(std::ostream& os, unsigned char c) {
- std::ios_base::fmtflags f(os.flags());
- os << "\\x"
- << std::uppercase << std::hex << std::setfill('0') << std::setw(2)
- << static_cast<int>(c);
- os.flags(f);
- }
- bool shouldNewline(XmlFormatting fmt) {
- return !!(static_cast<std::underlying_type<XmlFormatting>::type>(fmt & XmlFormatting::Newline));
- }
- bool shouldIndent(XmlFormatting fmt) {
- return !!(static_cast<std::underlying_type<XmlFormatting>::type>(fmt & XmlFormatting::Indent));
- }
- } // anonymous namespace
- XmlFormatting operator | (XmlFormatting lhs, XmlFormatting rhs) {
- return static_cast<XmlFormatting>(
- static_cast<std::underlying_type<XmlFormatting>::type>(lhs) |
- static_cast<std::underlying_type<XmlFormatting>::type>(rhs)
- );
- }
- XmlFormatting operator & (XmlFormatting lhs, XmlFormatting rhs) {
- return static_cast<XmlFormatting>(
- static_cast<std::underlying_type<XmlFormatting>::type>(lhs) &
- static_cast<std::underlying_type<XmlFormatting>::type>(rhs)
- );
- }
- XmlEncode::XmlEncode( std::string const& str, ForWhat forWhat )
- : m_str( str ),
- m_forWhat( forWhat )
- {}
- void XmlEncode::encodeTo( std::ostream& os ) const {
- // Apostrophe escaping not necessary if we always use " to write attributes
- // (see: http://www.w3.org/TR/xml/#syntax)
- for( std::size_t idx = 0; idx < m_str.size(); ++ idx ) {
- unsigned char c = m_str[idx];
- switch (c) {
- case '<': os << "<"; break;
- case '&': os << "&"; break;
- case '>':
- // See: http://www.w3.org/TR/xml/#syntax
- if (idx > 2 && m_str[idx - 1] == ']' && m_str[idx - 2] == ']')
- os << ">";
- else
- os << c;
- break;
- case '\"':
- if (m_forWhat == ForAttributes)
- os << """;
- else
- os << c;
- break;
- default:
- // Check for control characters and invalid utf-8
- // Escape control characters in standard ascii
- // see http://stackoverflow.com/questions/404107/why-are-control-characters-illegal-in-xml-1-0
- if (c < 0x09 || (c > 0x0D && c < 0x20) || c == 0x7F) {
- hexEscapeChar(os, c);
- break;
- }
- // Plain ASCII: Write it to stream
- if (c < 0x7F) {
- os << c;
- break;
- }
- // UTF-8 territory
- // Check if the encoding is valid and if it is not, hex escape bytes.
- // Important: We do not check the exact decoded values for validity, only the encoding format
- // First check that this bytes is a valid lead byte:
- // This means that it is not encoded as 1111 1XXX
- // Or as 10XX XXXX
- if (c < 0xC0 ||
- c >= 0xF8) {
- hexEscapeChar(os, c);
- break;
- }
- auto encBytes = trailingBytes(c);
- // Are there enough bytes left to avoid accessing out-of-bounds memory?
- if (idx + encBytes - 1 >= m_str.size()) {
- hexEscapeChar(os, c);
- break;
- }
- // The header is valid, check data
- // The next encBytes bytes must together be a valid utf-8
- // This means: bitpattern 10XX XXXX and the extracted value is sane (ish)
- bool valid = true;
- uint32_t value = headerValue(c);
- for (std::size_t n = 1; n < encBytes; ++n) {
- unsigned char nc = m_str[idx + n];
- valid &= ((nc & 0xC0) == 0x80);
- value = (value << 6) | (nc & 0x3F);
- }
- if (
- // Wrong bit pattern of following bytes
- (!valid) ||
- // Overlong encodings
- (value < 0x80) ||
- (0x80 <= value && value < 0x800 && encBytes > 2) ||
- (0x800 < value && value < 0x10000 && encBytes > 3) ||
- // Encoded value out of range
- (value >= 0x110000)
- ) {
- hexEscapeChar(os, c);
- break;
- }
- // If we got here, this is in fact a valid(ish) utf-8 sequence
- for (std::size_t n = 0; n < encBytes; ++n) {
- os << m_str[idx + n];
- }
- idx += encBytes - 1;
- break;
- }
- }
- }
- std::ostream& operator << ( std::ostream& os, XmlEncode const& xmlEncode ) {
- xmlEncode.encodeTo( os );
- return os;
- }
- XmlWriter::ScopedElement::ScopedElement( XmlWriter* writer, XmlFormatting fmt )
- : m_writer( writer ),
- m_fmt(fmt)
- {}
- XmlWriter::ScopedElement::ScopedElement( ScopedElement&& other ) noexcept
- : m_writer( other.m_writer ),
- m_fmt(other.m_fmt)
- {
- other.m_writer = nullptr;
- other.m_fmt = XmlFormatting::None;
- }
- XmlWriter::ScopedElement& XmlWriter::ScopedElement::operator=( ScopedElement&& other ) noexcept {
- if ( m_writer ) {
- m_writer->endElement();
- }
- m_writer = other.m_writer;
- other.m_writer = nullptr;
- m_fmt = other.m_fmt;
- other.m_fmt = XmlFormatting::None;
- return *this;
- }
- XmlWriter::ScopedElement::~ScopedElement() {
- if (m_writer) {
- m_writer->endElement(m_fmt);
- }
- }
- XmlWriter::ScopedElement& XmlWriter::ScopedElement::writeText( std::string const& text, XmlFormatting fmt ) {
- m_writer->writeText( text, fmt );
- return *this;
- }
- XmlWriter::XmlWriter( std::ostream& os ) : m_os( os )
- {
- writeDeclaration();
- }
- XmlWriter::~XmlWriter() {
- while (!m_tags.empty()) {
- endElement();
- }
- newlineIfNecessary();
- }
- XmlWriter& XmlWriter::startElement( std::string const& name, XmlFormatting fmt ) {
- ensureTagClosed();
- newlineIfNecessary();
- if (shouldIndent(fmt)) {
- m_os << m_indent;
- m_indent += " ";
- }
- m_os << '<' << name;
- m_tags.push_back( name );
- m_tagIsOpen = true;
- applyFormatting(fmt);
- return *this;
- }
- XmlWriter::ScopedElement XmlWriter::scopedElement( std::string const& name, XmlFormatting fmt ) {
- ScopedElement scoped( this, fmt );
- startElement( name, fmt );
- return scoped;
- }
- XmlWriter& XmlWriter::endElement(XmlFormatting fmt) {
- m_indent = m_indent.substr(0, m_indent.size() - 2);
- if( m_tagIsOpen ) {
- m_os << "/>";
- m_tagIsOpen = false;
- } else {
- newlineIfNecessary();
- if (shouldIndent(fmt)) {
- m_os << m_indent;
- }
- m_os << "</" << m_tags.back() << ">";
- }
- m_os << std::flush;
- applyFormatting(fmt);
- m_tags.pop_back();
- return *this;
- }
- XmlWriter& XmlWriter::writeAttribute( std::string const& name, std::string const& attribute ) {
- if( !name.empty() && !attribute.empty() )
- m_os << ' ' << name << "=\"" << XmlEncode( attribute, XmlEncode::ForAttributes ) << '"';
- return *this;
- }
- XmlWriter& XmlWriter::writeAttribute( std::string const& name, bool attribute ) {
- m_os << ' ' << name << "=\"" << ( attribute ? "true" : "false" ) << '"';
- return *this;
- }
- XmlWriter& XmlWriter::writeText( std::string const& text, XmlFormatting fmt) {
- if( !text.empty() ){
- bool tagWasOpen = m_tagIsOpen;
- ensureTagClosed();
- if (tagWasOpen && shouldIndent(fmt)) {
- m_os << m_indent;
- }
- m_os << XmlEncode( text );
- applyFormatting(fmt);
- }
- return *this;
- }
- XmlWriter& XmlWriter::writeComment( std::string const& text, XmlFormatting fmt) {
- ensureTagClosed();
- if (shouldIndent(fmt)) {
- m_os << m_indent;
- }
- m_os << "<!--" << text << "-->";
- applyFormatting(fmt);
- return *this;
- }
- void XmlWriter::writeStylesheetRef( std::string const& url ) {
- m_os << "<?xml-stylesheet type=\"text/xsl\" href=\"" << url << "\"?>\n";
- }
- XmlWriter& XmlWriter::writeBlankLine() {
- ensureTagClosed();
- m_os << '\n';
- return *this;
- }
- void XmlWriter::ensureTagClosed() {
- if( m_tagIsOpen ) {
- m_os << '>' << std::flush;
- newlineIfNecessary();
- m_tagIsOpen = false;
- }
- }
- void XmlWriter::applyFormatting(XmlFormatting fmt) {
- m_needsNewline = shouldNewline(fmt);
- }
- void XmlWriter::writeDeclaration() {
- m_os << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
- }
- void XmlWriter::newlineIfNecessary() {
- if( m_needsNewline ) {
- m_os << std::endl;
- m_needsNewline = false;
- }
- }
- }
|