00001 #ifndef hpp_CPP_HTMLEscape_CPP_hpp
00002 #define hpp_CPP_HTMLEscape_CPP_hpp
00003
00004
00005 #include "../Strings/Strings.hpp"
00006
00007 namespace HTML
00008 {
00009
00011 struct CharsetFunctions
00012 {
00020 virtual int findNextStartTag(const tchar * const input, const int length) const = 0;
00021
00029 virtual int findNextTagEnd(const tchar * const input, const int length) const = 0;
00030
00031
00041 virtual int findSpecificTagEnd(const char * tagName, const unsigned int tagNameLen, const tchar * const input, const int length) const = 0;
00042
00043
00052 virtual int findTagName(const tchar * const input, const int length, int & tagStartPos) const = 0;
00053
00062 virtual int findAttributeName(const tchar * const input, const int length, int & attributeStartPos) const = 0;
00063
00071 virtual int findAttributeMark(const tchar * const input, const int length) const = 0;
00072
00081 virtual int findNextAttribute(const tchar * const input, const int length, int & attribStartPos) const = 0;
00082
00091 virtual int findNextAttributeContent(const tchar * const input, const int length, int & attribStartPos) const = 0;
00092
00098 virtual unsigned int findLength(const tchar * const input) const = 0;
00099
00106 virtual int findAChar(const tchar * const input, const int length, const tchar ch) const = 0;
00107
00112 virtual char const * const lastFoundTagName() const = 0;
00113
00118 virtual const int & lastFoundTagNameLength() const = 0;
00119
00124 virtual char const * const lastFoundAttributeName() const = 0;
00125
00130 virtual const int & lastFoundAttributeNameLength() const = 0;
00131 };
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00158 extern const CharsetFunctions & getDefaultCharsetFunctions();
00159
00160
00168 struct Escape
00169 {
00171 enum EntityMapping
00172 {
00173 quot = 34,
00174 amp = 38,
00175 lt = 60,
00176 gt = 62,
00177 OElig = 338,
00178 oelig = 339,
00179 Scaron = 352,
00180 scaron = 353,
00181 Yuml = 376,
00182 circ = 710,
00183 tilde = 732,
00184 ensp = 8194,
00185 emsp = 8195,
00186 thinsp = 8201,
00187 zwnj = 8204,
00188 zwj = 8205,
00189 lrm = 8206,
00190 rlm = 8207,
00191 ndash = 8211,
00192 mdash = 8212,
00193 lsquo = 8216,
00194 rsquo = 8217,
00195 sbquo = 8218,
00196 ldquo = 8220,
00197 rdquo = 8221,
00198 bdquo = 8222,
00199 dagger = 8224,
00200 Dagger = 8225,
00201 permil = 8240,
00202 lsaquo = 8249,
00203 rsaquo = 8250,
00204 euro = 8364,
00205 bull = 8226,
00206 nbsp = 160,
00207 iexcl = 161,
00208 cent = 162,
00209 pound = 163,
00210 curren = 164,
00211 yen = 165,
00212 brvbar = 166,
00213 sect = 167,
00214 uml = 168,
00215 copy = 169,
00216 ordf = 170,
00217 laquo = 171,
00218 not = 172,
00219 shy = 173,
00220 reg = 174,
00221 macr = 175,
00222 deg = 176,
00223 plusmn = 177,
00224 sup2 = 178,
00225 sup3 = 179,
00226 acute = 180,
00227 micro = 181,
00228 para = 182,
00229 middot = 183,
00230 cedil = 184,
00231 sup1 = 185,
00232 ordm = 186,
00233 raquo = 187,
00234 frac14 = 188,
00235 frac12 = 189,
00236 frac34 = 190,
00237 iquest = 191,
00238 Agrave = 192,
00239 Aacute = 193,
00240 Acirc = 194,
00241 Atilde = 195,
00242 Auml = 196,
00243 Aring = 197,
00244 AElig = 198,
00245 Ccedil = 199,
00246 Egrave = 200,
00247 Eacute = 201,
00248 Ecirc = 202,
00249 Euml = 203,
00250 Igrave = 204,
00251 Iacute = 205,
00252 Icirc = 206,
00253 Iuml = 207,
00254 ETH = 208,
00255 Ntilde = 209,
00256 Ograve = 210,
00257 Oacute = 211,
00258 Ocirc = 212,
00259 Otilde = 213,
00260 Ouml = 214,
00261 times = 215,
00262 Oslash = 216,
00263 Ugrave = 217,
00264 Uacute = 218,
00265 Ucirc = 219,
00266 Uuml = 220,
00267 Yacute = 221,
00268 THORN = 222,
00269 szlig = 223,
00270 agrave = 224,
00271 aacute = 225,
00272 acirc = 226,
00273 atilde = 227,
00274 auml = 228,
00275 aring = 229,
00276 aelig = 230,
00277 ccedil = 231,
00278 egrave = 232,
00279 eacute = 233,
00280 ecirc = 234,
00281 euml = 235,
00282 igrave = 236,
00283 iacute = 237,
00284 icirc = 238,
00285 iuml = 239,
00286 eth = 240,
00287 ntilde = 241,
00288 ograve = 242,
00289 oacute = 243,
00290 ocirc = 244,
00291 otilde = 245,
00292 ouml = 246,
00293 divide = 247,
00294 oslash = 248,
00295 ugrave = 249,
00296 uacute = 250,
00297 ucirc = 251,
00298 uuml = 252,
00299 yacute = 253,
00300 thorn = 254,
00301 yuml = 255,
00302 Null = 0,
00303 };
00304
00306 static const EntityMapping fromEntity(const char * input, const int length);
00308 static Strings::FastString trimAndEscape(const char * input, const int length, const CharsetFunctions & charset = getDefaultCharsetFunctions());
00309
00310 };
00311
00312
00313
00314
00315 }
00316
00317
00318
00319 #endif