http://xml.apache.org/http://www.apache.org/http://www.w3.org/

Home

Readme
Download
Installation
Build

API Docs
Samples
Schema

FAQs
Programming
Migration

Releases
Bug-Reporting
Feedback

Y2K Compliance
PDF Document

CVS Repository
Mail Archive

API Docs for SAX and DOM
 

Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

DOMParser.hpp

Go to the documentation of this file.
00001 /*
00002  * The Apache Software License, Version 1.1
00003  *
00004  * Copyright (c) 1999-2001 The Apache Software Foundation.  All rights
00005  * reserved.
00006  *
00007  * Redistribution and use in source and binary forms, with or without
00008  * modification, are permitted provided that the following conditions
00009  * are met:
00010  *
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  *
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in
00016  *    the documentation and/or other materials provided with the
00017  *    distribution.
00018  *
00019  * 3. The end-user documentation included with the redistribution,
00020  *    if any, must include the following acknowledgment:
00021  *       "This product includes software developed by the
00022  *        Apache Software Foundation (http://www.apache.org/)."
00023  *    Alternately, this acknowledgment may appear in the software itself,
00024  *    if and wherever such third-party acknowledgments normally appear.
00025  *
00026  * 4. The names "Xerces" and "Apache Software Foundation" must
00027  *    not be used to endorse or promote products derived from this
00028  *    software without prior written permission. For written
00029  *    permission, please contact apache\@apache.org.
00030  *
00031  * 5. Products derived from this software may not be called "Apache",
00032  *    nor may "Apache" appear in their name, without prior written
00033  *    permission of the Apache Software Foundation.
00034  *
00035  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
00036  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00037  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00038  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
00039  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00040  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00041  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
00042  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00043  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
00044  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00045  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00046  * SUCH DAMAGE.
00047  * ====================================================================
00048  *
00049  * This software consists of voluntary contributions made by many
00050  * individuals on behalf of the Apache Software Foundation, and was
00051  * originally based on software copyright (c) 1999, International
00052  * Business Machines, Inc., http://www.ibm.com .  For more information
00053  * on the Apache Software Foundation, please see
00054  * <http://www.apache.org/>.
00055  */
00056 
00057 /*
00058  * $Id: DOMParser.hpp,v 1.28 2001/08/01 19:11:02 tng Exp $
00059  *
00060  */
00061 
00062 #if !defined(DOMPARSER_HPP)
00063 #define DOMPARSER_HPP
00064 
00065 
00066 #include <dom/DOM_Document.hpp>
00067 #include <framework/XMLDocumentHandler.hpp>
00068 #include <framework/XMLErrorReporter.hpp>
00069 #include <framework/XMLEntityHandler.hpp>
00070 #include <util/ValueStackOf.hpp>
00071 
00072 #include <validators/DTD/DocTypeHandler.hpp>
00073 #include <dom/DOM_DocumentType.hpp>
00074 #include <validators/DTD/DTDElementDecl.hpp>
00075 
00076 class EntityResolver;
00077 class ErrorHandler;
00078 class XMLPScanToken;
00079 class XMLScanner;
00080 class XMLValidator;
00081 
00082 
00093 class  DOMParser :
00094 
00095     public XMLDocumentHandler
00096     , public XMLErrorReporter
00097     , public XMLEntityHandler
00098     , public DocTypeHandler
00099 {
00100 public :
00101     // -----------------------------------------------------------------------
00102     //  Class types
00103     // -----------------------------------------------------------------------
00104     enum ValSchemes
00105     {
00106         Val_Never
00107         , Val_Always
00108         , Val_Auto
00109     };
00110 
00111 
00112     // -----------------------------------------------------------------------
00113     //  Constructors and Detructor
00114     // -----------------------------------------------------------------------
00115 
00118 
00127     DOMParser(XMLValidator* const valToAdopt = 0);
00128 
00132     ~DOMParser();
00133 
00135 
00141     void reset();
00142 
00143 
00144     // -----------------------------------------------------------------------
00145     //  Getter methods
00146     // -----------------------------------------------------------------------
00147 
00150 
00160     DOM_Document getDocument();
00161 
00169     ErrorHandler* getErrorHandler();
00170 
00178     const ErrorHandler* getErrorHandler() const;
00179 
00187     EntityResolver* getEntityResolver();
00188 
00196     const EntityResolver* getEntityResolver() const;
00197 
00205     const XMLScanner& getScanner() const;
00206 
00214     const XMLValidator& getValidator() const;
00215 
00223     ValSchemes getValidationScheme() const;
00224 
00235     bool getDoSchema() const;
00236 
00247     bool getValidationSchemaFullChecking() const;
00248 
00260     int getErrorCount() const;
00261 
00272     bool getDoNamespaces() const;
00273 
00286     bool getExitOnFirstFatalError() const;
00287 
00298     bool getValidationConstraintFatal() const;
00299 
00311     bool getExpandEntityReferences() const;
00312 
00330     bool  getCreateEntityReferenceNodes()const;
00331 
00342     bool getIncludeIgnorableWhitespace() const;
00343 
00353     bool getToCreateXMLDeclTypeNode() const;
00354 
00356 
00357 
00358     // -----------------------------------------------------------------------
00359     //  Setter methods
00360     // -----------------------------------------------------------------------
00361 
00364 
00378     void setErrorHandler(ErrorHandler* const handler);
00379 
00395     void setEntityResolver(EntityResolver* const handler);
00396 
00415     void setDoNamespaces(const bool newState);
00416 
00433     void setExitOnFirstFatalError(const bool newState);
00434 
00450     void setValidationConstraintFatal(const bool newState);
00451 
00466     void setExpandEntityReferences(const bool expand);
00467 
00484     void setCreateEntityReferenceNodes(const bool create);
00485 
00507     void setIncludeIgnorableWhitespace(const bool include);
00508 
00525     void setValidationScheme(const ValSchemes newScheme);
00526 
00540     void setDoSchema(const bool newState);
00541 
00558     void setValidationSchemaFullChecking(const bool schemaFullChecking);
00559 
00570     void setToCreateXMLDeclTypeNode(const bool create);
00571 
00573 
00574 
00575     // -----------------------------------------------------------------------
00576     //  Parsing methods
00577     // -----------------------------------------------------------------------
00578 
00581 
00602     void parse(const InputSource& source, const bool reuseGrammar = false);
00603 
00623     void parse(const XMLCh* const systemId, const bool reuseGrammar = false);
00624 
00642     void parse(const char* const systemId, const bool reuseGrammar = false);
00643 
00673     bool parseFirst
00674     (
00675         const   XMLCh* const    systemId
00676         ,       XMLPScanToken&  toFill
00677         , const bool            reuseGrammar = false
00678     );
00679 
00710     bool parseFirst
00711     (
00712         const   char* const     systemId
00713         ,       XMLPScanToken&  toFill
00714         , const bool            reuseGrammar = false
00715     );
00716 
00747     bool parseFirst
00748     (
00749         const   InputSource&    source
00750         ,       XMLPScanToken&  toFill
00751         , const bool            reuseGrammar = false
00752     );
00753 
00776     bool parseNext(XMLPScanToken& token);
00777 
00803     void parseReset(XMLPScanToken& token);
00804 
00806 
00807 
00808 
00809     // -----------------------------------------------------------------------
00810     //  Implementation of the XMLErrorReporter interface.
00811     // -----------------------------------------------------------------------
00812 
00815 
00840     virtual void error
00841     (
00842         const   unsigned int                errCode
00843         , const XMLCh* const                msgDomain
00844         , const XMLErrorReporter::ErrTypes  errType
00845         , const XMLCh* const                errorText
00846         , const XMLCh* const                systemId
00847         , const XMLCh* const                publicId
00848         , const unsigned int                lineNum
00849         , const unsigned int                colNum
00850     );
00851 
00860     virtual void resetErrors();
00862 
00863 
00864     // -----------------------------------------------------------------------
00865     //  Implementation of the XMLEntityHandler interface.
00866     // -----------------------------------------------------------------------
00867 
00870 
00883     virtual void endInputSource(const InputSource& inputSource);
00884 
00900     virtual bool expandSystemId
00901     (
00902         const   XMLCh* const    systemId
00903         ,       XMLBuffer&      toFill
00904     );
00905 
00914     virtual void resetEntities();
00915 
00931     virtual InputSource* resolveEntity
00932     (
00933         const   XMLCh* const    publicId
00934         , const XMLCh* const    systemId
00935     );
00936 
00949     virtual void startInputSource(const InputSource& inputSource);
00950 
00952 
00953 
00954 
00955     // -----------------------------------------------------------------------
00956     //  Implementation of the XMLDocumentHandler interface.
00957     // -----------------------------------------------------------------------
00958 
00961 
00974     virtual void docCharacters
00975     (
00976         const   XMLCh* const    chars
00977         , const unsigned int    length
00978         , const bool            cdataSection
00979     );
00980 
00989     virtual void docComment
00990     (
00991         const   XMLCh* const    comment
00992     );
00993 
01006     virtual void docPI
01007     (
01008         const   XMLCh* const    target
01009         , const XMLCh* const    data
01010     );
01011 
01016     virtual void endDocument();
01017 
01031     virtual void endElement
01032     (
01033         const   XMLElementDecl& elemDecl
01034         , const unsigned int    urlId
01035         , const bool            isRoot
01036     );
01037 
01046     virtual void endEntityReference
01047     (
01048         const   XMLEntityDecl&  entDecl
01049     );
01050 
01069     virtual void ignorableWhitespace
01070     (
01071         const   XMLCh* const    chars
01072         , const unsigned int    length
01073         , const bool            cdataSection
01074     );
01075 
01082     virtual void resetDocument();
01083 
01088     virtual void startDocument();
01089 
01117     virtual void startElement
01118     (
01119         const   XMLElementDecl&         elemDecl
01120         , const unsigned int            urlId
01121         , const XMLCh* const            elemPrefix
01122         , const RefVectorOf<XMLAttr>&   attrList
01123         , const unsigned int            attrCount
01124         , const bool                    isEmpty
01125         , const bool                    isRoot
01126     );
01127 
01137     virtual void startEntityReference
01138     (
01139         const   XMLEntityDecl&  entDecl
01140     );
01141 
01160     virtual void XMLDecl
01161     (
01162         const   XMLCh* const    versionStr
01163         , const XMLCh* const    encodingStr
01164         , const XMLCh* const    standaloneStr
01165         , const XMLCh* const    actualEncStr
01166     );
01168 
01169 
01172 
01182     bool getDoValidation() const;
01183 
01197     void setDoValidation(const bool newState);
01198 
01202     virtual void attDef
01203     (
01204         const   DTDElementDecl&     elemDecl
01205         , const DTDAttDef&          attDef
01206         , const bool                ignoring
01207     );
01208 
01209     virtual void doctypeComment
01210     (
01211         const   XMLCh* const    comment
01212     );
01213 
01214     virtual void doctypeDecl
01215     (
01216         const   DTDElementDecl& elemDecl
01217         , const XMLCh* const    publicId
01218         , const XMLCh* const    systemId
01219         , const bool            hasIntSubset
01220     );
01221 
01222     virtual void doctypePI
01223     (
01224         const   XMLCh* const    target
01225         , const XMLCh* const    data
01226     );
01227 
01228     virtual void doctypeWhitespace
01229     (
01230         const   XMLCh* const    chars
01231         , const unsigned int    length
01232     );
01233 
01234     virtual void elementDecl
01235     (
01236         const   DTDElementDecl& decl
01237         , const bool            isIgnored
01238     );
01239 
01240     virtual void endAttList
01241     (
01242         const   DTDElementDecl& elemDecl
01243     );
01244 
01245     virtual void endIntSubset();
01246 
01247     virtual void endExtSubset();
01248 
01249     virtual void entityDecl
01250     (
01251         const   DTDEntityDecl&  entityDecl
01252         , const bool            isPEDecl
01253         , const bool            isIgnored
01254     );
01255 
01256     virtual void resetDocType();
01257 
01258     virtual void notationDecl
01259     (
01260         const   XMLNotationDecl&    notDecl
01261         , const bool                isIgnored
01262     );
01263 
01264     virtual void startAttList
01265     (
01266         const   DTDElementDecl& elemDecl
01267     );
01268 
01269     virtual void startIntSubset();
01270 
01271     virtual void startExtSubset();
01272 
01273     virtual void TextDecl
01274     (
01275         const   XMLCh* const    versionStr
01276         , const XMLCh* const    encodingStr
01277     );
01278 
01279 
01281 
01282 
01283 protected :
01284     // -----------------------------------------------------------------------
01285     //  Protected getter methods
01286     // -----------------------------------------------------------------------
01287 
01290 
01295     DOM_Node getCurrentNode();
01296 
01298 
01299 
01300     // -----------------------------------------------------------------------
01301     //  Protected setter methods
01302     // -----------------------------------------------------------------------
01303 
01306 
01314     void setCurrentNode(DOM_Node toSet);
01315 
01322     void setDocument(DOM_Document toSet);
01324 
01325 
01326 private :
01327     // -----------------------------------------------------------------------
01328     //  Private data members
01329     //
01330     //  fCurrentNode
01331     //  fCurrentParent
01332     //      Used to track the current node during nested element events. Since
01333     //      the tree must be built from a set of disjoint callbacks, we need
01334     //      these to keep up with where we currently are.
01335     //
01336     //  fDocument
01337     //      The root document object, filled with the document contents.
01338     //
01339     //  fEntityResolver
01340     //      The installed SAX entity resolver, if any. Null if none.
01341     //
01342     //  fErrorHandler
01343     //      The installed SAX error handler, if any. Null if none.
01344     //
01345     //  fCreateEntityReferenceNode
01346     //      Indicates whether entity reference nodes should be created.
01347     //
01348     //  fIncludeIgnorableWhitespace
01349     //      Indicates whether ignorable whiltespace should be added to
01350     //      the DOM tree for validating parsers.
01351     //
01352     //  fNodeStack
01353     //      Used to track previous parent nodes during nested element events.
01354     //
01355     //  fParseInProgress
01356     //      Used to prevent multiple entrance to the parser while its doing
01357     //      a parse.
01358     //
01359     //  fScanner
01360     //      The scanner used for this parser. This is created during the
01361     //      constructor.
01362     //
01363     //  fWithinElement
01364     //      A flag to indicate that the parser is within at least one level
01365     //      of element processing.
01366     //
01367     //  fDocumentType
01368     //      Used to store and update the documentType variable information
01369     //      in fDocument
01370     //
01371     //  fToCreateXMLDecTypeNode
01372     //      A flag to create a DOM_XMLDecl node in the ODM tree if it exists
01373     //      This is an extension to xerces implementation
01374     //
01375     // -----------------------------------------------------------------------
01376     DOM_Node                fCurrentParent;
01377     DOM_Node                fCurrentNode;
01378     DOM_Document            fDocument;
01379     EntityResolver*         fEntityResolver;
01380     ErrorHandler*           fErrorHandler;
01381     bool                    fCreateEntityReferenceNodes;
01382     bool                    fIncludeIgnorableWhitespace;
01383     ValueStackOf<DOM_Node>* fNodeStack;
01384     bool                    fParseInProgress;
01385     XMLScanner*             fScanner;
01386     bool                    fWithinElement;
01387     DocumentTypeImpl*       fDocumentType;
01388     bool                    fToCreateXMLDeclTypeNode;
01389 };
01390 
01391 
01392 
01393 // ---------------------------------------------------------------------------
01394 //  DOMParser: Handlers for the XMLEntityHandler interface
01395 // ---------------------------------------------------------------------------
01396 inline void DOMParser::endInputSource(const InputSource&)
01397 {
01398     // The DOM entity resolver doesn't handle this
01399 }
01400 
01401 inline bool DOMParser::expandSystemId(const XMLCh* const, XMLBuffer&)
01402 {
01403     // The DOM entity resolver doesn't handle this
01404     return false;
01405 }
01406 
01407 inline void DOMParser::resetEntities()
01408 {
01409     // Nothing to do on this one
01410 }
01411 
01412 inline void DOMParser::startInputSource(const InputSource&)
01413 {
01414     // The DOM entity resolver doesn't handle this
01415 }
01416 
01417 
01418 // ---------------------------------------------------------------------------
01419 //  DOMParser: Getter methods
01420 // ---------------------------------------------------------------------------
01421 inline DOM_Document DOMParser::getDocument()
01422 {
01423     return fDocument;
01424 }
01425 
01426 inline ErrorHandler* DOMParser::getErrorHandler()
01427 {
01428     return fErrorHandler;
01429 }
01430 
01431 inline const ErrorHandler* DOMParser::getErrorHandler() const
01432 {
01433     return fErrorHandler;
01434 }
01435 
01436 inline EntityResolver* DOMParser::getEntityResolver()
01437 {
01438     return fEntityResolver;
01439 }
01440 
01441 inline const EntityResolver* DOMParser::getEntityResolver() const
01442 {
01443     return fEntityResolver;
01444 }
01445 
01446 inline bool DOMParser::getExpandEntityReferences() const
01447 {
01448     return fCreateEntityReferenceNodes;
01449 }
01450 inline bool DOMParser::getCreateEntityReferenceNodes() const
01451 {
01452     return fCreateEntityReferenceNodes;
01453 }
01454 
01455 inline bool DOMParser::getIncludeIgnorableWhitespace() const
01456 {
01457     return fIncludeIgnorableWhitespace;
01458 }
01459 
01460 inline const XMLScanner& DOMParser::getScanner() const
01461 {
01462     return *fScanner;
01463 }
01464 
01465 inline bool DOMParser::getToCreateXMLDeclTypeNode() const
01466 {
01467     return fToCreateXMLDeclTypeNode;
01468 }
01469 
01470 
01471 // ---------------------------------------------------------------------------
01472 //  DOMParser: Setter methods
01473 // ---------------------------------------------------------------------------
01474 inline void DOMParser::setExpandEntityReferences(const bool expand)
01475 {
01476     fCreateEntityReferenceNodes = expand;
01477 }
01478 
01479 inline void DOMParser::setCreateEntityReferenceNodes(const bool create)
01480 {
01481     fCreateEntityReferenceNodes = create;
01482 }
01483 
01484 inline void DOMParser::setIncludeIgnorableWhitespace(const bool include)
01485 {
01486     fIncludeIgnorableWhitespace = include;
01487 }
01488 
01489 inline void DOMParser::setToCreateXMLDeclTypeNode(const bool create)
01490 {
01491     fToCreateXMLDeclTypeNode = create;
01492 }
01493 
01494 
01495 // ---------------------------------------------------------------------------
01496 //  DOMParser: Protected getter methods
01497 // ---------------------------------------------------------------------------
01498 inline DOM_Node DOMParser::getCurrentNode()
01499 {
01500     return fCurrentNode;
01501 }
01502 
01503 
01504 // ---------------------------------------------------------------------------
01505 //  DOMParser: Protected setter methods
01506 // ---------------------------------------------------------------------------
01507 inline void DOMParser::setCurrentNode(DOM_Node toSet)
01508 {
01509     fCurrentNode = toSet;
01510 }
01511 
01512 inline void DOMParser::setDocument(DOM_Document toSet)
01513 {
01514     fDocument = toSet;
01515 }
01516 
01517 #endif


Copyright © 2000 The Apache Software Foundation. All Rights Reserved.