38 return XmlDocument (textToParse).getDocumentElement();
41 std::unique_ptr<XmlElement> parseXML (
const String& textToParse)
43 return XmlDocument (textToParse).getDocumentElement();
46 std::unique_ptr<XmlElement> parseXML (
const File& file)
51 std::unique_ptr<XmlElement> parseXMLIfTagMatches (
const String& textToParse,
StringRef requiredTag)
53 return XmlDocument (textToParse).getDocumentElementIfTagMatches (requiredTag);
56 std::unique_ptr<XmlElement> parseXMLIfTagMatches (
const File& file,
StringRef requiredTag)
58 return XmlDocument (file).getDocumentElementIfTagMatches (requiredTag);
63 inputSource.reset (newSource);
68 ignoreEmptyTextElements = shouldBeIgnored;
71 namespace XmlIdentifierChars
73 static bool isIdentifierCharSlow (juce_wchar c) noexcept
76 || c ==
'_' || c ==
'-' || c ==
':' || c ==
'.';
79 static bool isIdentifierChar (juce_wchar c) noexcept
81 static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
83 return ((
int) c < (
int) numElementsInArray (legalChars) * 32) ? ((legalChars [c >> 5] & (uint32) (1 << (c & 31))) != 0)
84 : isIdentifierCharSlow (c);
101 static String::CharPointerType findEndOfToken (String::CharPointerType p) noexcept
103 while (isIdentifierChar (*p))
112 if (originalText.isEmpty() && inputSource !=
nullptr)
114 std::unique_ptr<InputStream> in (inputSource->createInputStream());
121 #if JUCE_STRING_UTF_TYPE == 8 125 auto* text =
static_cast<const char*
> (data.
getData());
138 return parseDocumentElement (String::CharPointerType (text), onlyReadOuterDocumentElement);
147 return parseDocumentElement (originalText.getCharPointer(), onlyReadOuterDocumentElement);
153 if (xml->hasTagName (requiredTag))
164 void XmlDocument::setLastError (
const String& desc,
const bool carryOn)
167 errorOccurred = ! carryOn;
170 String XmlDocument::getFileContents (
const String& filename)
const 172 if (inputSource !=
nullptr)
174 std::unique_ptr<InputStream> in (inputSource->createInputStreamFor (filename.
trim().
unquoted()));
177 return in->readEntireStreamAsString();
183 juce_wchar XmlDocument::readNextChar() noexcept
185 auto c = input.getAndAdvance();
196 std::unique_ptr<XmlElement> XmlDocument::parseDocumentElement (String::CharPointerType textToParse,
197 bool onlyReadOuterDocumentElement)
200 errorOccurred =
false;
202 needToLoadDTD =
true;
204 if (textToParse.isEmpty())
206 lastError =
"not enough input";
208 else if (! parseHeader())
210 lastError =
"malformed header";
212 else if (! parseDTD())
214 lastError =
"malformed DTD";
219 std::unique_ptr<XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
228 bool XmlDocument::parseHeader()
230 skipNextWhiteSpace();
236 if (headerEnd.isEmpty())
240 auto encoding =
String (input, headerEnd)
254 jassert (encoding.isEmpty() || encoding.startsWithIgnoreCase (
"utf-"));
257 input = headerEnd + 2;
258 skipNextWhiteSpace();
264 bool XmlDocument::parseDTD()
269 auto dtdStart = input;
271 for (
int n = 1; n > 0;)
273 auto c = readNextChar();
284 dtdText =
String (dtdStart, input - 1).
trim();
290 void XmlDocument::skipNextWhiteSpace()
294 input = input.findEndOfWhitespace();
311 if (closeComment < 0)
317 input += closeComment + 3;
326 if (closeBracket < 0)
332 input += closeBracket + 2;
341 void XmlDocument::readQuotedString (
String& result)
343 auto quote = readNextChar();
347 auto c = readNextChar();
364 auto character = *input;
366 if (character == quote)
373 if (character ==
'&')
381 setLastError (
"unmatched quotes",
false);
392 XmlElement* XmlDocument::readNextElement (
const bool alsoParseSubElements)
395 skipNextWhiteSpace();
403 auto endOfToken = XmlIdentifierChars::findEndOfToken (input);
405 if (endOfToken == input)
408 skipNextWhiteSpace();
409 endOfToken = XmlIdentifierChars::findEndOfToken (input);
411 if (endOfToken == input)
413 setLastError (
"tag name missing",
false);
425 skipNextWhiteSpace();
429 if (c ==
'/' && input[1] ==
'>')
440 if (alsoParseSubElements)
441 readChildElements (*node);
447 if (XmlIdentifierChars::isIdentifierChar (c))
449 auto attNameEnd = XmlIdentifierChars::findEndOfToken (input);
451 if (attNameEnd != input)
453 auto attNameStart = input;
455 skipNextWhiteSpace();
457 if (readNextChar() ==
'=')
459 skipNextWhiteSpace();
460 auto nextChar = *input;
462 if (nextChar ==
'"' || nextChar ==
'\'')
464 auto* newAtt =
new XmlElement::XmlAttributeNode (attNameStart, attNameEnd);
465 readQuotedString (newAtt->value);
466 attributeAppender.
append (newAtt);
472 setLastError (
"expected '=' after attribute '" 473 +
String (attNameStart, attNameEnd) +
"'",
false);
481 setLastError (
"illegal character found in " + node->
getTagName() +
": '" + c +
"'",
false);
491 void XmlDocument::readChildElements (
XmlElement& parent)
497 auto preWhitespaceInput = input;
498 skipNextWhiteSpace();
502 setLastError (
"unmatched tags",
false);
513 auto closeTag = input.indexOf ((juce_wchar)
'>');
516 input += closeTag + 1;
524 auto inputStart = input;
532 setLastError (
"unterminated CDATA section",
false);
537 if (c0 ==
']' && input[1] ==
']' && input[2] ==
'>')
550 if (
auto* n = readNextElement (
true))
558 input = preWhitespaceInput;
560 bool contentShouldBeUsed = ! ignoreEmptyTextElements;
568 if (input[1] ==
'!' && input[2] ==
'-' && input[3] ==
'-')
573 if (closeComment < 0)
575 setLastError (
"unterminated comment",
false);
580 input += closeComment + 3;
589 setLastError (
"unmatched tags",
false);
601 auto oldInput = input;
602 auto oldOutOfData = outOfData;
607 while (
auto* n = readNextElement (
true))
611 outOfData = oldOutOfData;
615 textElementContent << entity;
623 auto nextChar = *input;
625 if (nextChar ==
'\r')
629 if (input[1] ==
'\n')
633 if (nextChar ==
'<' || nextChar ==
'&')
638 setLastError (
"unmatched tags",
false);
649 if (contentShouldBeUsed)
655 void XmlDocument::readEntity (
String& result)
685 else if (*input ==
'#')
690 if (*input ==
'x' || *input ==
'X')
695 while (input[0] !=
';')
699 if (hexValue < 0 || ++numChars > 8)
701 setLastError (
"illegal escape sequence",
true);
705 charCode = (charCode << 4) | hexValue;
711 else if (input[0] >=
'0' && input[0] <=
'9')
715 while (input[0] !=
';')
719 setLastError (
"illegal escape sequence",
true);
723 charCode = charCode * 10 + ((int) input[0] -
'0');
731 setLastError (
"illegal escape sequence",
true);
736 result << (juce_wchar) charCode;
740 auto entityNameStart = input;
741 auto closingSemiColon = input.
indexOf ((juce_wchar)
';');
743 if (closingSemiColon < 0)
750 input += closingSemiColon + 1;
751 result += expandExternalEntity (
String (entityNameStart, (
size_t) closingSemiColon));
768 if (char1 ==
'x' || char1 ==
'X')
771 if (char1 >=
'0' && char1 <=
'9')
774 setLastError (
"illegal escape sequence",
false);
778 return expandExternalEntity (ent);
781 String XmlDocument::expandExternalEntity (
const String& entity)
785 if (dtdText.isNotEmpty())
788 tokenisedDTD.addTokens (dtdText,
true);
790 if (tokenisedDTD[tokenisedDTD.size() - 2].equalsIgnoreCase (
"system")
791 && tokenisedDTD[tokenisedDTD.size() - 1].isQuotedString())
793 auto fn = tokenisedDTD[tokenisedDTD.size() - 1];
795 tokenisedDTD.
clear();
796 tokenisedDTD.addTokens (getFileContents (fn),
true);
800 tokenisedDTD.clear();
801 auto openBracket = dtdText.indexOfChar (
'[');
805 auto closeBracket = dtdText.lastIndexOfChar (
']');
807 if (closeBracket > openBracket)
808 tokenisedDTD.addTokens (dtdText.substring (openBracket + 1,
809 closeBracket),
true);
813 for (
int i = tokenisedDTD.size(); --i >= 0;)
815 if (tokenisedDTD[i].startsWithChar (
'%')
816 && tokenisedDTD[i].endsWithChar (
';'))
818 auto parsed = getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1));
822 tokenisedDTD.remove (i);
824 for (
int j = newToks.
size(); --j >= 0;)
825 tokenisedDTD.insert (i, newToks[j]);
830 needToLoadDTD =
false;
833 for (
int i = 0; i < tokenisedDTD.size(); ++i)
835 if (tokenisedDTD[i] == entity)
837 if (tokenisedDTD[i - 1].equalsIgnoreCase (
"<!entity"))
844 while (ampersand >= 0)
846 auto semiColon = ent.
indexOf (i + 1,
";");
850 setLastError (
"entity without terminating semi-colon",
false);
854 auto resolved = expandEntity (ent.
substring (i + 1, semiColon));
868 setLastError (
"unknown entity",
true);
872 String XmlDocument::getParameterEntity (
const String& entity)
874 for (
int i = 0; i < tokenisedDTD.size(); ++i)
876 if (tokenisedDTD[i] == entity
877 && tokenisedDTD [i - 1] ==
"%" 878 && tokenisedDTD [i - 2].equalsIgnoreCase (
"<!entity"))
883 return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (
">"));
void setEmptyTextElementsIgnored(bool shouldBeIgnored) noexcept
String fromFirstOccurrenceOf(StringRef substringToStartFrom, bool includeSubStringInResult, bool ignoreCase) const
virtual bool writeByte(char byte)
static std::unique_ptr< XmlElement > parse(const File &file)
static XmlElement * createTextElement(const String &text)
std::unique_ptr< XmlElement > getDocumentElement(bool onlyReadOuterDocumentElement=false)
void setInputSource(InputSource *newSource) noexcept
void append(ObjectType *const newItem) noexcept
int64 writeFromInputStream(InputStream &, int64 maxNumBytesToWrite) override
String trimCharactersAtEnd(StringRef charactersToTrim) const
CharPointerType getCharPointer() const noexcept
static bool isWhitespace(char character) noexcept
int addTokens(StringRef stringToTokenise, bool preserveQuotedStrings)
bool equalsIgnoreCase(const String &other) const noexcept
const String & getLastParseError() const noexcept
String substring(int startIndex, int endIndex) const
static CharPointerType1 find(CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept
const void * getData() const noexcept
static int getHexDigitValue(juce_wchar digit) noexcept
int indexOf(StringRef textToLookFor) const noexcept
size_t getDataSize() const noexcept
static bool isByteOrderMark(const void *possibleByteOrder) noexcept
String upToFirstOccurrenceOf(StringRef substringToEndWith, bool includeSubStringInResult, bool ignoreCase) const
bool startsWithChar(juce_wchar character) const noexcept
static bool isByteOrderMarkLittleEndian(const void *possibleByteOrder) noexcept
int getIntValue() const noexcept
static String charToString(juce_wchar character)
int size() const noexcept
static bool isByteOrderMarkBigEndian(const void *possibleByteOrder) noexcept
bool appendUTF8Char(juce_wchar character)
bool containsNonWhitespaceChars() const noexcept
int indexOfChar(juce_wchar characterToLookFor) const noexcept
static bool isLetterOrDigit(char character) noexcept
void appendCharPointer(CharPointerType startOfTextToAppend, CharPointerType endOfTextToAppend)
XmlDocument(const String &documentText)
static int compareUpTo(CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
const String & getTagName() const noexcept
int getHexValue32() const noexcept
std::unique_ptr< XmlElement > getDocumentElementIfTagMatches(StringRef requiredTag)