catch_xmlwriter.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334
  1. /*
  2. * Created by Phil on 19/07/2017.
  3. *
  4. * Distributed under the Boost Software License, Version 1.0. (See accompanying
  5. * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. */
  7. #include "catch_xmlwriter.h"
  8. #include "catch_enforce.h"
  9. #include <iomanip>
  10. #include <type_traits>
  11. namespace Catch {
  12. namespace {
  13. size_t trailingBytes(unsigned char c) {
  14. if ((c & 0xE0) == 0xC0) {
  15. return 2;
  16. }
  17. if ((c & 0xF0) == 0xE0) {
  18. return 3;
  19. }
  20. if ((c & 0xF8) == 0xF0) {
  21. return 4;
  22. }
  23. CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered");
  24. }
  25. uint32_t headerValue(unsigned char c) {
  26. if ((c & 0xE0) == 0xC0) {
  27. return c & 0x1F;
  28. }
  29. if ((c & 0xF0) == 0xE0) {
  30. return c & 0x0F;
  31. }
  32. if ((c & 0xF8) == 0xF0) {
  33. return c & 0x07;
  34. }
  35. CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered");
  36. }
  37. void hexEscapeChar(std::ostream& os, unsigned char c) {
  38. std::ios_base::fmtflags f(os.flags());
  39. os << "\\x"
  40. << std::uppercase << std::hex << std::setfill('0') << std::setw(2)
  41. << static_cast<int>(c);
  42. os.flags(f);
  43. }
  44. bool shouldNewline(XmlFormatting fmt) {
  45. return !!(static_cast<std::underlying_type<XmlFormatting>::type>(fmt & XmlFormatting::Newline));
  46. }
  47. bool shouldIndent(XmlFormatting fmt) {
  48. return !!(static_cast<std::underlying_type<XmlFormatting>::type>(fmt & XmlFormatting::Indent));
  49. }
  50. } // anonymous namespace
  51. XmlFormatting operator | (XmlFormatting lhs, XmlFormatting rhs) {
  52. return static_cast<XmlFormatting>(
  53. static_cast<std::underlying_type<XmlFormatting>::type>(lhs) |
  54. static_cast<std::underlying_type<XmlFormatting>::type>(rhs)
  55. );
  56. }
  57. XmlFormatting operator & (XmlFormatting lhs, XmlFormatting rhs) {
  58. return static_cast<XmlFormatting>(
  59. static_cast<std::underlying_type<XmlFormatting>::type>(lhs) &
  60. static_cast<std::underlying_type<XmlFormatting>::type>(rhs)
  61. );
  62. }
  63. XmlEncode::XmlEncode( std::string const& str, ForWhat forWhat )
  64. : m_str( str ),
  65. m_forWhat( forWhat )
  66. {}
  67. void XmlEncode::encodeTo( std::ostream& os ) const {
  68. // Apostrophe escaping not necessary if we always use " to write attributes
  69. // (see: http://www.w3.org/TR/xml/#syntax)
  70. for( std::size_t idx = 0; idx < m_str.size(); ++ idx ) {
  71. unsigned char c = m_str[idx];
  72. switch (c) {
  73. case '<': os << "&lt;"; break;
  74. case '&': os << "&amp;"; break;
  75. case '>':
  76. // See: http://www.w3.org/TR/xml/#syntax
  77. if (idx > 2 && m_str[idx - 1] == ']' && m_str[idx - 2] == ']')
  78. os << "&gt;";
  79. else
  80. os << c;
  81. break;
  82. case '\"':
  83. if (m_forWhat == ForAttributes)
  84. os << "&quot;";
  85. else
  86. os << c;
  87. break;
  88. default:
  89. // Check for control characters and invalid utf-8
  90. // Escape control characters in standard ascii
  91. // see http://stackoverflow.com/questions/404107/why-are-control-characters-illegal-in-xml-1-0
  92. if (c < 0x09 || (c > 0x0D && c < 0x20) || c == 0x7F) {
  93. hexEscapeChar(os, c);
  94. break;
  95. }
  96. // Plain ASCII: Write it to stream
  97. if (c < 0x7F) {
  98. os << c;
  99. break;
  100. }
  101. // UTF-8 territory
  102. // Check if the encoding is valid and if it is not, hex escape bytes.
  103. // Important: We do not check the exact decoded values for validity, only the encoding format
  104. // First check that this bytes is a valid lead byte:
  105. // This means that it is not encoded as 1111 1XXX
  106. // Or as 10XX XXXX
  107. if (c < 0xC0 ||
  108. c >= 0xF8) {
  109. hexEscapeChar(os, c);
  110. break;
  111. }
  112. auto encBytes = trailingBytes(c);
  113. // Are there enough bytes left to avoid accessing out-of-bounds memory?
  114. if (idx + encBytes - 1 >= m_str.size()) {
  115. hexEscapeChar(os, c);
  116. break;
  117. }
  118. // The header is valid, check data
  119. // The next encBytes bytes must together be a valid utf-8
  120. // This means: bitpattern 10XX XXXX and the extracted value is sane (ish)
  121. bool valid = true;
  122. uint32_t value = headerValue(c);
  123. for (std::size_t n = 1; n < encBytes; ++n) {
  124. unsigned char nc = m_str[idx + n];
  125. valid &= ((nc & 0xC0) == 0x80);
  126. value = (value << 6) | (nc & 0x3F);
  127. }
  128. if (
  129. // Wrong bit pattern of following bytes
  130. (!valid) ||
  131. // Overlong encodings
  132. (value < 0x80) ||
  133. (0x80 <= value && value < 0x800 && encBytes > 2) ||
  134. (0x800 < value && value < 0x10000 && encBytes > 3) ||
  135. // Encoded value out of range
  136. (value >= 0x110000)
  137. ) {
  138. hexEscapeChar(os, c);
  139. break;
  140. }
  141. // If we got here, this is in fact a valid(ish) utf-8 sequence
  142. for (std::size_t n = 0; n < encBytes; ++n) {
  143. os << m_str[idx + n];
  144. }
  145. idx += encBytes - 1;
  146. break;
  147. }
  148. }
  149. }
  150. std::ostream& operator << ( std::ostream& os, XmlEncode const& xmlEncode ) {
  151. xmlEncode.encodeTo( os );
  152. return os;
  153. }
  154. XmlWriter::ScopedElement::ScopedElement( XmlWriter* writer, XmlFormatting fmt )
  155. : m_writer( writer ),
  156. m_fmt(fmt)
  157. {}
  158. XmlWriter::ScopedElement::ScopedElement( ScopedElement&& other ) noexcept
  159. : m_writer( other.m_writer ),
  160. m_fmt(other.m_fmt)
  161. {
  162. other.m_writer = nullptr;
  163. other.m_fmt = XmlFormatting::None;
  164. }
  165. XmlWriter::ScopedElement& XmlWriter::ScopedElement::operator=( ScopedElement&& other ) noexcept {
  166. if ( m_writer ) {
  167. m_writer->endElement();
  168. }
  169. m_writer = other.m_writer;
  170. other.m_writer = nullptr;
  171. m_fmt = other.m_fmt;
  172. other.m_fmt = XmlFormatting::None;
  173. return *this;
  174. }
  175. XmlWriter::ScopedElement::~ScopedElement() {
  176. if (m_writer) {
  177. m_writer->endElement(m_fmt);
  178. }
  179. }
  180. XmlWriter::ScopedElement& XmlWriter::ScopedElement::writeText( std::string const& text, XmlFormatting fmt ) {
  181. m_writer->writeText( text, fmt );
  182. return *this;
  183. }
  184. XmlWriter::XmlWriter( std::ostream& os ) : m_os( os )
  185. {
  186. writeDeclaration();
  187. }
  188. XmlWriter::~XmlWriter() {
  189. while (!m_tags.empty()) {
  190. endElement();
  191. }
  192. newlineIfNecessary();
  193. }
  194. XmlWriter& XmlWriter::startElement( std::string const& name, XmlFormatting fmt ) {
  195. ensureTagClosed();
  196. newlineIfNecessary();
  197. if (shouldIndent(fmt)) {
  198. m_os << m_indent;
  199. m_indent += " ";
  200. }
  201. m_os << '<' << name;
  202. m_tags.push_back( name );
  203. m_tagIsOpen = true;
  204. applyFormatting(fmt);
  205. return *this;
  206. }
  207. XmlWriter::ScopedElement XmlWriter::scopedElement( std::string const& name, XmlFormatting fmt ) {
  208. ScopedElement scoped( this, fmt );
  209. startElement( name, fmt );
  210. return scoped;
  211. }
  212. XmlWriter& XmlWriter::endElement(XmlFormatting fmt) {
  213. m_indent = m_indent.substr(0, m_indent.size() - 2);
  214. if( m_tagIsOpen ) {
  215. m_os << "/>";
  216. m_tagIsOpen = false;
  217. } else {
  218. newlineIfNecessary();
  219. if (shouldIndent(fmt)) {
  220. m_os << m_indent;
  221. }
  222. m_os << "</" << m_tags.back() << ">";
  223. }
  224. m_os << std::flush;
  225. applyFormatting(fmt);
  226. m_tags.pop_back();
  227. return *this;
  228. }
  229. XmlWriter& XmlWriter::writeAttribute( std::string const& name, std::string const& attribute ) {
  230. if( !name.empty() && !attribute.empty() )
  231. m_os << ' ' << name << "=\"" << XmlEncode( attribute, XmlEncode::ForAttributes ) << '"';
  232. return *this;
  233. }
  234. XmlWriter& XmlWriter::writeAttribute( std::string const& name, bool attribute ) {
  235. m_os << ' ' << name << "=\"" << ( attribute ? "true" : "false" ) << '"';
  236. return *this;
  237. }
  238. XmlWriter& XmlWriter::writeText( std::string const& text, XmlFormatting fmt) {
  239. if( !text.empty() ){
  240. bool tagWasOpen = m_tagIsOpen;
  241. ensureTagClosed();
  242. if (tagWasOpen && shouldIndent(fmt)) {
  243. m_os << m_indent;
  244. }
  245. m_os << XmlEncode( text );
  246. applyFormatting(fmt);
  247. }
  248. return *this;
  249. }
  250. XmlWriter& XmlWriter::writeComment( std::string const& text, XmlFormatting fmt) {
  251. ensureTagClosed();
  252. if (shouldIndent(fmt)) {
  253. m_os << m_indent;
  254. }
  255. m_os << "<!--" << text << "-->";
  256. applyFormatting(fmt);
  257. return *this;
  258. }
  259. void XmlWriter::writeStylesheetRef( std::string const& url ) {
  260. m_os << "<?xml-stylesheet type=\"text/xsl\" href=\"" << url << "\"?>\n";
  261. }
  262. XmlWriter& XmlWriter::writeBlankLine() {
  263. ensureTagClosed();
  264. m_os << '\n';
  265. return *this;
  266. }
  267. void XmlWriter::ensureTagClosed() {
  268. if( m_tagIsOpen ) {
  269. m_os << '>' << std::flush;
  270. newlineIfNecessary();
  271. m_tagIsOpen = false;
  272. }
  273. }
  274. void XmlWriter::applyFormatting(XmlFormatting fmt) {
  275. m_needsNewline = shouldNewline(fmt);
  276. }
  277. void XmlWriter::writeDeclaration() {
  278. m_os << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
  279. }
  280. void XmlWriter::newlineIfNecessary() {
  281. if( m_needsNewline ) {
  282. m_os << std::endl;
  283. m_needsNewline = false;
  284. }
  285. }
  286. }