include/HTMLParser/HTMLEscape.hpp

Go to the documentation of this file.
00001 #ifndef hpp_CPP_HTMLEscape_CPP_hpp
00002 #define hpp_CPP_HTMLEscape_CPP_hpp
00003 
00004 // We need strings declaration
00005 #include "../Strings/Strings.hpp"
00006 
00007 namespace HTML
00008 {
00009 
00011     struct CharsetFunctions
00012     {
00020         virtual int findNextStartTag(const tchar * const input, const int length) const = 0;
00021 
00029         virtual int findNextTagEnd(const tchar * const input, const int length) const = 0;
00030 
00031 
00041         virtual int findSpecificTagEnd(const char * tagName, const unsigned int tagNameLen, const tchar * const input, const int length) const = 0;
00042 
00043 
00052         virtual int findTagName(const tchar * const input, const int length, int & tagStartPos) const = 0;
00053         
00062         virtual int findAttributeName(const tchar * const input, const int length, int & attributeStartPos) const = 0;
00063 
00071         virtual int findAttributeMark(const tchar * const input, const int length) const = 0;
00072 
00081         virtual int findNextAttribute(const tchar * const input, const int length, int & attribStartPos) const = 0;
00082         
00091         virtual int findNextAttributeContent(const tchar * const input, const int length, int & attribStartPos) const = 0;
00092 
00098         virtual unsigned int findLength(const tchar * const input) const = 0;
00099         
00106         virtual int findAChar(const tchar * const input, const int length, const tchar ch) const = 0;
00107 
00112         virtual char const * const lastFoundTagName() const = 0;
00113 
00118         virtual const int & lastFoundTagNameLength() const = 0;
00119 
00124         virtual char const * const lastFoundAttributeName() const = 0;
00125 
00130         virtual const int & lastFoundAttributeNameLength() const = 0;
00131     };
00132 
00133     /*
00134     struct CharsetASCII : public CharsetFunctions
00135     {
00136         // This method look for the last position from the string which would match ch 
00137         inline int reverseFindChar_slow(const tchar * const input, const int length, const tchar charToSearch);
00138         // This method look for the last position from the string which wouldn't match ch 
00139         inline int inverseReverseFindChar_slow(const tchar * const input, const int length, const tchar charToSearch);
00140         inline int findChar(const tchar * const input, const int length, const unsigned int magic);
00141         inline int eatChar(const tchar * const input, const int length, const unsigned int magic);
00142         inline int findChars(const tchar * const input, const int length, const unsigned int magic, const unsigned int magic2);
00143 
00144         virtual int findNextStartTag(const tchar * const input, const int length);
00145         virtual int findNextTagEnd(const tchar * const input, const int length);
00146         virtual int findAttributeMark(const tchar * const input, const int length);
00147 
00148         // Currently this method is implemented as forward search for '=', then backward inverse search for ' ', then backward search for ' '
00149         inline int findNextAttributeChar(const tchar * const input, const int length, int & attributeLength);
00150 
00151         int findNextAttributeRL(const tchar * const input, const int length, int & attributeLength);
00152 
00153         // Find the length of a string. We can rely on strlen for ASCII. However, this version is highly optimized 
00154         virtual unsigned int findLength(const tchar * const input);
00155     };
00156     */
00158     extern const CharsetFunctions & getDefaultCharsetFunctions();
00159 
00160 
00168     struct Escape
00169     {
00171         enum EntityMapping
00172         {               
00173             quot        = 34,
00174             amp         = 38,
00175             lt          = 60,
00176             gt          = 62,
00177             OElig       = 338,
00178             oelig       = 339,
00179             Scaron      = 352,
00180             scaron      = 353,
00181             Yuml        = 376,
00182             circ        = 710,
00183             tilde       = 732,
00184             ensp        = 8194,
00185             emsp        = 8195,
00186             thinsp      = 8201,
00187             zwnj        = 8204,
00188             zwj         = 8205,
00189             lrm         = 8206,
00190             rlm         = 8207,
00191             ndash       = 8211,
00192             mdash       = 8212,
00193             lsquo       = 8216,
00194             rsquo       = 8217,
00195             sbquo       = 8218,
00196             ldquo       = 8220,
00197             rdquo       = 8221,
00198             bdquo       = 8222,
00199             dagger      = 8224,
00200             Dagger      = 8225,
00201             permil      = 8240,
00202             lsaquo      = 8249,
00203             rsaquo      = 8250,
00204             euro        = 8364,
00205             bull        = 8226,
00206             nbsp        = 160,
00207             iexcl       = 161,
00208             cent        = 162,
00209             pound       = 163,
00210             curren      = 164,
00211             yen         = 165,
00212             brvbar      = 166,
00213             sect        = 167,
00214             uml         = 168,
00215             copy        = 169,
00216             ordf        = 170,
00217             laquo       = 171,
00218             not         = 172,
00219             shy         = 173,
00220             reg         = 174,
00221             macr        = 175,
00222             deg         = 176,
00223             plusmn      = 177,
00224             sup2        = 178,
00225             sup3        = 179,
00226             acute       = 180,
00227             micro       = 181,
00228             para        = 182,
00229             middot      = 183,
00230             cedil       = 184,
00231             sup1        = 185,
00232             ordm        = 186,
00233             raquo       = 187,
00234             frac14      = 188,
00235             frac12      = 189,
00236             frac34      = 190,
00237             iquest      = 191,
00238             Agrave      = 192,
00239             Aacute      = 193,
00240             Acirc       = 194,
00241             Atilde      = 195,
00242             Auml        = 196,
00243             Aring       = 197,
00244             AElig       = 198,
00245             Ccedil      = 199,
00246             Egrave      = 200,
00247             Eacute      = 201,
00248             Ecirc       = 202,
00249             Euml        = 203,
00250             Igrave      = 204,
00251             Iacute      = 205,
00252             Icirc       = 206,
00253             Iuml        = 207,
00254             ETH         = 208,
00255             Ntilde      = 209,
00256             Ograve      = 210,
00257             Oacute      = 211,
00258             Ocirc       = 212,
00259             Otilde      = 213,
00260             Ouml        = 214,
00261             times       = 215,
00262             Oslash      = 216,
00263             Ugrave      = 217,
00264             Uacute      = 218,
00265             Ucirc       = 219,
00266             Uuml        = 220,
00267             Yacute      = 221,
00268             THORN       = 222,
00269             szlig       = 223,
00270             agrave      = 224,
00271             aacute      = 225,
00272             acirc       = 226,
00273             atilde      = 227,
00274             auml        = 228,
00275             aring       = 229,
00276             aelig       = 230,
00277             ccedil      = 231,
00278             egrave      = 232,
00279             eacute      = 233,
00280             ecirc       = 234,
00281             euml        = 235,
00282             igrave      = 236,
00283             iacute      = 237,
00284             icirc       = 238,
00285             iuml        = 239,
00286             eth         = 240,
00287             ntilde      = 241,
00288             ograve      = 242,
00289             oacute      = 243,
00290             ocirc       = 244,
00291             otilde      = 245,
00292             ouml        = 246,
00293             divide      = 247,
00294             oslash      = 248,
00295             ugrave      = 249,
00296             uacute      = 250,
00297             ucirc       = 251,
00298             uuml        = 252,
00299             yacute      = 253,
00300             thorn       = 254,
00301             yuml        = 255,
00302             Null        = 0,
00303         };
00304 
00306         static const EntityMapping fromEntity(const char * input, const int length);
00308         static Strings::FastString trimAndEscape(const char * input, const int length, const CharsetFunctions & charset = getDefaultCharsetFunctions());
00309 
00310     };
00311 
00312 
00313 
00314 
00315 }
00316 
00317 
00318 
00319 #endif

(C) An X-Ryl669 project 2007

This document describes Unlimited Zooming Interface source code. UZI stands for Unlimited Zooming Interface, and source code license is