// XMLParser.h // Interfaces to the CXMLParser class representing the base class for XML parsing // To use, derive a class from this base and override its OnStartElement(), OnEndElement() and OnCharacters() functions #pragma once #include "expat/expat.h" class CXMLParser { public: CXMLParser(void); virtual ~CXMLParser(); // The actual parsing, may be called several times; the last time needs iIsFinal == true (-> flush) int Parse(const char * iData, size_t iLength, bool iIsFinal = false); private: // LibExpat stuff: XML_Parser mParser; static void StartElementHandler(void * iContext, const XML_Char * iElement, const XML_Char ** iAttributes) { ((CXMLParser *) iContext)->OnStartElement(iElement, iAttributes); } static void EndElementHandler(void * iContext, const XML_Char * iElement) { ((CXMLParser *) iContext)->OnEndElement(iElement); } static void CharacterDataHandler(void * iContext, const XML_Char * iData, int iLength) { ((CXMLParser *) iContext)->OnCharacters(iData, iLength); } protected: virtual void OnStartElement(const XML_Char * iElement, const XML_Char ** iAttributes) = 0; virtual void OnEndElement(const XML_Char * iElement) = 0; virtual void OnCharacters(const XML_Char * iCharacters, int iLength) = 0; }; //////////////////////////////////////////////////////////////////////////////// // The following template has been modified from code available at // https://www.codeproject.com/Articles/1847/C-Wrappers-for-the-Expat-XML-Parser // It uses templates to remove the virtual function call penalty (both size and speed) for each callback /* Usage: 1, Declare a subclass: class CMyParser : public CExpatImpl 2, Declare handlers that you want in that subclass: void CMyParser::OnEndElement(const XML_Char * iTagName); 3, Create an instance of your class: CMyParser Parser; 4, Call Create(): Parser.Create(nullptr, nullptr); 4, Call Parse(), repeatedly: Parser.Parse(Buffer, Length); */ template class CExpatImpl { // @access Constructors and destructors public: // @cmember General constructor CExpatImpl() { m_p = nullptr; } // @cmember Destructor ~CExpatImpl() { Destroy(); } // @access Parser creation and deletion methods public: // @cmember Create a parser bool Create(const XML_Char * pszEncoding = nullptr, const XML_Char * pszSep = nullptr) { // Destroy the old parser Destroy(); // If the encoding or seperator are empty, then nullptr if ((pszEncoding != nullptr) && (pszEncoding[0] == 0)) { pszEncoding = nullptr; } if ((pszSep != nullptr) && (pszSep[0] == 0)) { pszSep = nullptr; } // Create the new parser m_p = XML_ParserCreate_MM(pszEncoding, nullptr, pszSep); if (m_p == nullptr) { return false; } // Invoke the post create routine _T * pThis = static_cast<_T *>(this); pThis->OnPostCreate(); // Set the user data used in callbacks XML_SetUserData(m_p, (void *) this); return true; } // @cmember Destroy the parser void Destroy(void) { if (m_p != nullptr) { XML_ParserFree(m_p); } m_p = nullptr; } // @cmember Parse a block of data bool Parse(const char * pszBuffer, int nLength, bool fIsFinal = true) { assert(m_p != nullptr); return (XML_Parse(m_p, pszBuffer, nLength, fIsFinal) != 0); } // @cmember Parse internal buffer bool ParseBuffer(int nLength, bool fIsFinal = true) { assert(m_p != nullptr); return XML_ParseBuffer(m_p, nLength, fIsFinal) != 0; } // @cmember Get the internal buffer void * GetBuffer(int nLength) { assert(m_p != nullptr); return XML_GetBuffer(m_p, nLength); } protected: // Parser callback enable / disable methods: // @cmember Enable / Disable the start element handler void EnableStartElementHandler(bool fEnable = true) { assert(m_p != nullptr); XML_SetStartElementHandler(m_p, fEnable ? StartElementHandler : nullptr); } // @cmember Enable / Disable the end element handler void EnableEndElementHandler(bool fEnable = true) { assert(m_p != nullptr); XML_SetEndElementHandler(m_p, fEnable ? EndElementHandler : nullptr); } // @cmember Enable / Disable the element handlers void EnableElementHandler(bool fEnable = true) { assert(m_p != nullptr); EnableStartElementHandler(fEnable); EnableEndElementHandler(fEnable); } // @cmember Enable / Disable the character data handler void EnableCharacterDataHandler(bool fEnable = true) { assert(m_p != nullptr); XML_SetCharacterDataHandler(m_p, fEnable ? CharacterDataHandler : nullptr); } // @cmember Enable / Disable the processing instruction handler void EnableProcessingInstructionHandler(bool fEnable = true) { assert(m_p != nullptr); XML_SetProcessingInstructionHandler(m_p, fEnable ? ProcessingInstructionHandler : nullptr); } // @cmember Enable / Disable the comment handler void EnableCommentHandler(bool fEnable = true) { assert(m_p != nullptr); XML_SetCommentHandler(m_p, fEnable ? CommentHandler : nullptr); } // @cmember Enable / Disable the start CDATA section handler void EnableStartCdataSectionHandler(bool fEnable = true) { assert(m_p != nullptr); XML_SetStartCdataSectionHandler(m_p, fEnable ? StartCdataSectionHandler : nullptr); } // @cmember Enable / Disable the end CDATA section handler void EnableEndCdataSectionHandler(bool fEnable = true) { assert(m_p != nullptr); XML_SetEndCdataSectionHandler(m_p, fEnable ? EndCdataSectionHandler : nullptr); } // @cmember Enable / Disable the CDATA section handlers void EnableCdataSectionHandler(bool fEnable = true) { assert(m_p != nullptr); EnableStartCdataSectionHandler(fEnable); EnableEndCdataSectionHandler(fEnable); } // @cmember Enable / Disable default handler void EnableDefaultHandler(bool fEnable = true, bool fExpand = true) { assert(m_p != nullptr); if (fExpand) { XML_SetDefaultHandlerExpand(m_p, fEnable ? DefaultHandler : nullptr); } else { XML_SetDefaultHandler(m_p, fEnable ? DefaultHandler : nullptr); } } // @cmember Enable / Disable external entity ref handler void EnableExternalEntityRefHandler(bool fEnable = true) { assert(m_p != nullptr); XML_SetExternalEntityRefHandler(m_p, fEnable ? ExternalEntityRefHandler : nullptr); } // @cmember Enable / Disable unknown encoding handler void EnableUnknownEncodingHandler(bool fEnable = true) { assert(m_p != nullptr); XML_SetUnknownEncodingHandler(m_p, fEnable ? UnknownEncodingHandler : nullptr); } // @cmember Enable / Disable start namespace handler void EnableStartNamespaceDeclHandler(bool fEnable = true) { assert(m_p != nullptr); XML_SetStartNamespaceDeclHandler(m_p, fEnable ? StartNamespaceDeclHandler : nullptr); } // @cmember Enable / Disable end namespace handler void EnableEndNamespaceDeclHandler(bool fEnable = true) { assert(m_p != nullptr); XML_SetEndNamespaceDeclHandler(m_p, fEnable ? EndNamespaceDeclHandler : nullptr); } // @cmember Enable / Disable namespace handlers void EnableNamespaceDeclHandler(bool fEnable = true) { EnableStartNamespaceDeclHandler(fEnable); EnableEndNamespaceDeclHandler(fEnable); } // @cmember Enable / Disable the XML declaration handler void EnableXmlDeclHandler(bool fEnable = true) { assert(m_p != nullptr); XML_SetXmlDeclHandler(m_p, fEnable ? XmlDeclHandler : nullptr); } // @cmember Enable / Disable the start DOCTYPE declaration handler void EnableStartDoctypeDeclHandler(bool fEnable = true) { assert(m_p != nullptr); XML_SetStartDoctypeDeclHandler(m_p, fEnable ? StartDoctypeDeclHandler : nullptr); } // @cmember Enable / Disable the end DOCTYPE declaration handler void EnableEndDoctypeDeclHandler(bool fEnable = true) { assert(m_p != nullptr); XML_SetEndDoctypeDeclHandler(m_p, fEnable ? EndDoctypeDeclHandler : nullptr); } // @cmember Enable / Disable the DOCTYPE declaration handler void EnableDoctypeDeclHandler(bool fEnable = true) { assert(m_p != nullptr); EnableStartDoctypeDeclHandler(fEnable); EnableEndDoctypeDeclHandler(fEnable); } public: // Parser error reporting methods // @cmember Get last error enum XML_Error GetErrorCode() { assert(m_p != nullptr); return XML_GetErrorCode(m_p); } // @cmember Get the current byte index long GetCurrentByteIndex() { assert(m_p != nullptr); return XML_GetCurrentByteIndex(m_p); } // @cmember Get the current line number int GetCurrentLineNumber() { assert(m_p != nullptr); return XML_GetCurrentLineNumber(m_p); } // @cmember Get the current column number int GetCurrentColumnNumber() { assert(m_p != nullptr); return XML_GetCurrentColumnNumber(m_p); } // @cmember Get the current byte count int GetCurrentByteCount() { assert(m_p != nullptr); return XML_GetCurrentByteCount(m_p); } // @cmember Get the input context const char * GetInputContext(int * pnOffset, int * pnSize) { assert(m_p != nullptr); return XML_GetInputContext(m_p, pnOffset, pnSize); } // @cmember Get last error string const XML_LChar * GetErrorString() { return XML_ErrorString(GetErrorCode()); } // @cmember Return the version string static const XML_LChar * GetExpatVersion() { return XML_ExpatVersion(); } // @cmember Get the version information static void GetExpatVersion(int * pnMajor, int * pnMinor, int * pnMicro) { XML_expat_version v = XML_ExpatVersionInfo(); if (pnMajor) { *pnMajor = v.major; } if (pnMinor) { *pnMinor = v.minor; } if (pnMicro) { *pnMicro = v.micro; } } // @cmember Get last error string static const XML_LChar * GetErrorString(enum XML_Error nError) { return XML_ErrorString(nError); } // Public handler methods: // The template parameter should provide their own implementation for those handlers that they want // @cmember Start element handler void OnStartElement(const XML_Char * pszName, const XML_Char ** papszAttrs) { return; } // @cmember End element handler void OnEndElement(const XML_Char * pszName) { return; } // @cmember Character data handler void OnCharacterData(const XML_Char * pszData, int nLength) { return; } // @cmember Processing instruction handler void OnProcessingInstruction(const XML_Char * pszTarget, const XML_Char * pszData) { return; } // @cmember Comment handler void OnComment(const XML_Char * pszData) { return; } // @cmember Start CDATA section handler void OnStartCdataSection() { return; } // @cmember End CDATA section handler void OnEndCdataSection() { return; } // @cmember Default handler void OnDefault(const XML_Char * pszData, int nLength) { return; } // @cmember External entity ref handler bool OnExternalEntityRef( const XML_Char * pszContext, const XML_Char * pszBase, const XML_Char * pszSystemID, const XML_Char * pszPublicID ) { return false; } // @cmember Unknown encoding handler bool OnUnknownEncoding(const XML_Char * pszName, XML_Encoding * pInfo) { return false; } // @cmember Start namespace declaration handler void OnStartNamespaceDecl(const XML_Char * pszPrefix, const XML_Char * pszURI) { return; } // @cmember End namespace declaration handler void OnEndNamespaceDecl(const XML_Char * pszPrefix) { return; } // @cmember XML declaration handler void OnXmlDecl(const XML_Char * pszVersion, const XML_Char * pszEncoding, bool fStandalone) { return; } // @cmember Start DOCTYPE declaration handler void OnStartDoctypeDecl( const XML_Char * pszDoctypeName, const XML_Char * pszSysID, const XML_Char * pszPubID, bool fHasInternalSubset ) { return; } // @cmember End DOCTYPE declaration handler void OnEndDoctypeDecl() { return; } // @access Protected methods protected: // @cmember Handle any post creation void OnPostCreate() {} // @access Protected static methods protected: // @cmember Start element handler wrapper static void __cdecl StartElementHandler(void * pUserData, const XML_Char * pszName, const XML_Char ** papszAttrs) { _T * pThis = static_cast<_T *>((CExpatImpl<_T> *) pUserData); pThis->OnStartElement(pszName, papszAttrs); } // @cmember End element handler wrapper static void __cdecl EndElementHandler(void * pUserData, const XML_Char * pszName) { _T * pThis = static_cast<_T *>((CExpatImpl<_T> *) pUserData); pThis->OnEndElement(pszName); } // @cmember Character data handler wrapper static void __cdecl CharacterDataHandler(void * pUserData, const XML_Char * pszData, int nLength) { _T * pThis = static_cast<_T *>((CExpatImpl<_T> *) pUserData); pThis->OnCharacterData(pszData, nLength); } // @cmember Processing instruction handler wrapper static void __cdecl ProcessingInstructionHandler( void * pUserData, const XML_Char * pszTarget, const XML_Char * pszData ) { _T * pThis = static_cast<_T *>((CExpatImpl<_T> *) pUserData); pThis->OnProcessingInstruction(pszTarget, pszData); } // @cmember Comment handler wrapper static void __cdecl CommentHandler(void * pUserData, const XML_Char * pszData) { _T * pThis = static_cast<_T *>((CExpatImpl<_T> *) pUserData); pThis->OnComment(pszData); } // @cmember Start CDATA section wrapper static void __cdecl StartCdataSectionHandler(void * pUserData) { _T * pThis = static_cast<_T *>((CExpatImpl<_T> *) pUserData); pThis->OnStartCdataSection(); } // @cmember End CDATA section wrapper static void __cdecl EndCdataSectionHandler(void * pUserData) { _T * pThis = static_cast<_T *>((CExpatImpl<_T> *) pUserData); pThis->OnEndCdataSection(); } // @cmember Default wrapper static void __cdecl DefaultHandler(void * pUserData, const XML_Char * pszData, int nLength) { _T * pThis = static_cast<_T *>((CExpatImpl<_T> *) pUserData); pThis->OnDefault(pszData, nLength); } // @cmember External entity ref wrapper static int __cdecl ExternalEntityRefHandler( void * pUserData, const XML_Char * pszContext, const XML_Char * pszBase, const XML_Char * pszSystemID, const XML_Char * pszPublicID ) { _T * pThis = static_cast<_T *>((CExpatImpl<_T> *) pUserData); return pThis->OnExternalEntityRef(pszContext, pszBase, pszSystemID, pszPublicID) ? 1 : 0; } // @cmember Unknown encoding wrapper static int __cdecl UnknownEncodingHandler(void * pUserData, const XML_Char * pszName, XML_Encoding * pInfo) { _T * pThis = static_cast<_T *>((CExpatImpl<_T> *) pUserData); return pThis->OnUnknownEncoding(pszName, pInfo) ? 1 : 0; } // @cmember Start namespace decl wrapper static void __cdecl StartNamespaceDeclHandler(void * pUserData, const XML_Char * pszPrefix, const XML_Char * pszURI) { _T * pThis = static_cast<_T *>((CExpatImpl<_T> *) pUserData); pThis->OnStartNamespaceDecl(pszPrefix, pszURI); } // @cmember End namespace decl wrapper static void __cdecl EndNamespaceDeclHandler(void * pUserData, const XML_Char * pszPrefix) { _T * pThis = static_cast<_T *>((CExpatImpl<_T> *) pUserData); pThis->OnEndNamespaceDecl(pszPrefix); } // @cmember XML declaration wrapper static void __cdecl XmlDeclHandler( void * pUserData, const XML_Char * pszVersion, const XML_Char * pszEncoding, int nStandalone ) { _T * pThis = static_cast<_T *>((CExpatImpl<_T> *) pUserData); pThis->OnXmlDecl(pszVersion, pszEncoding, nStandalone != 0); } // @cmember Start Doctype declaration wrapper static void __cdecl StartDoctypeDeclHandler( void * pUserData, const XML_Char * pszDoctypeName, const XML_Char * pszSysID, const XML_Char * pszPubID, int nHasInternalSubset ) { _T * pThis = static_cast<_T *>((CExpatImpl<_T> *) pUserData); pThis->OnStartDoctypeDecl(pszDoctypeName, pszSysID, pszPubID, nHasInternalSubset != 0); } // @cmember End Doctype declaration wrapper static void __cdecl EndDoctypeDeclHandler(void * pUserData) { _T * pThis = static_cast<_T *>((CExpatImpl<_T> *) pUserData); pThis->OnEndDoctypeDecl(); } protected: XML_Parser m_p; /** Returns the value of the specified attribute, if found; nullptr otherwise */ static const XML_Char * FindAttr(const XML_Char ** iAttrs, const XML_Char * iAttrToFind) { for (const XML_Char ** Attr = iAttrs; *Attr != nullptr; Attr += 2) { if (strcmp(*Attr, iAttrToFind) == 0) { return *(Attr + 1); } } // for Attr - iAttrs[] return nullptr; } };