UnlimitedZoomingInterface: include/Tests/ParserTests.hpp Source File

00001 #ifndef hpp_ParserTests_hpp
00002 #define hpp_ParserTests_hpp
00003 
00004 // We need parser declaration
00005 #include "../HTMLParser/Parser.hpp"
00006 // We need test vectors
00007 #include "TestVectors.hpp"
00008 // We need time
00009 #include <time.h>
00010 
00011 #ifndef DebugText
00012 #define DebugText OutputDebugString
00013 #endif 
00014 
00015 namespace Tests
00016 {
00021     struct ParserTest
00022     {
00024         bool testTagging()
00025         {
00026             for (int i = 0; i < sizeof(HTML::allowedElements)/sizeof(HTML::allowedElements[0]); i++)
00027             {
00028                 if (HTML::Elements::fromTag(HTML::allowedElements[i].name, strlen(HTML::allowedElements[i].name)) != HTML::allowedElements[i].elementID)
00029                     return false;
00030             }
00031 
00032             if (HTML::Elements::fromTag("Z", 1) != HTML::GenericElement::Unknown) return false;
00033             if (HTML::Elements::fromTag("ZA", 2) != HTML::GenericElement::Unknown) return false;
00034             if (HTML::Elements::fromTag("ZAA", 3) != HTML::GenericElement::Unknown) return false;
00035             if (HTML::Elements::fromTag("ADDE", 4) != HTML::GenericElement::Unknown) return false;
00036             if (HTML::Elements::fromTag("ZACOP", 5) != HTML::GenericElement::Unknown) return false;
00037             if (HTML::Elements::fromTag("BUTTIN", 6) != HTML::GenericElement::Unknown) return false;
00038             if (HTML::Elements::fromTag("ADDREST", 7) != HTML::GenericElement::Unknown) return false;
00039             if (HTML::Elements::fromTag("CALGROUP", 8) != HTML::GenericElement::Unknown) return false;
00040             if (HTML::Elements::fromTag("RETEDFIEH", 9) != HTML::GenericElement::Unknown) return false;
00041             if (HTML::Elements::fromTag("BLOCKQUOTI", 10) != HTML::GenericElement::Unknown) return false;
00042             if (HTML::Elements::fromTag("ZEFEGZERERG", 11) != HTML::GenericElement::Unknown) return false;
00043             
00044             return true;
00045         }
00046 
00048         bool checkTaggingSpeed()
00049         {
00050             // First check tagging speed for classic search
00051             const char * arrayToFind[] = { "A", "BLOCKQUOTE", "PRE", "TEXT", "NOTHING", "AREA", "COLGROUP", "OPTION", "NOSCRIPT", "VAR" };
00052 
00053             int nbTurn = 100000;
00054             clock_t timeStart = clock();
00055             for (; nbTurn; nbTurn --)
00056                 for (int i = 0; i < sizeof(arrayToFind) / sizeof(arrayToFind[0]); i++)
00057                 {
00058                     for (int j = 0; j < sizeof(HTML::allowedElements)/sizeof(HTML::allowedElements[0]); j++)
00059                         if (strcmp(HTML::allowedElements[i].name, arrayToFind[i]) == 0) break;
00060                 }
00061             clock_t timeElapsed1 = clock() - timeStart;
00062 
00063             // Redo the process with our code
00064             nbTurn = 100000;
00065             timeStart = clock();
00066             for (; nbTurn; nbTurn --)
00067                 for (int i = 0; i < sizeof(arrayToFind) / sizeof(arrayToFind[0]); i++)
00068                 {
00069                     HTML::GenericElement::ElementID el = HTML::Elements::fromTag(arrayToFind[i], strlen(arrayToFind[i]));
00070                 }
00071             clock_t timeElapsed2 = clock() - timeStart;
00072 
00073             // On my computer, the new algorithm is 10x faster!
00074             return timeElapsed2 < timeElapsed1;
00075         }
00076 
00077         bool parseDocHTMLFile()
00078         {
00079             Stream::InputFileStream xFS("../../../testFiles/docs.htm");
00080             // Create an node allocator if not done yet (this can as simple as the "new" based allocator)
00081             HTML::Elements::Allocators::SimpleHeap xAllocator;
00082             // Create the parser now 
00083             HTML::Parser xParser(xFS, xAllocator, HTML::Parser::InstantParsing, HTML::LooseDTD);
00084             // Get the last error
00085             const HTML::Parser::ParsingError & xError = xParser.getLastParsingError();
00086             // Get the DOM tree
00087             HTML::DOMTree & tree = xParser.getDOMTree();
00088             // Dump it
00089             dumpDOM(tree);
00090             
00091             // Get a reference to the DOM tree now
00092 //            HTML::DOMTree & xTree = xParser.getDOMTree();
00093             return !xError.isError();
00094         }
00095 
00096         bool dumpDOM(HTML::DOMTree & domTree)
00097         {
00098             HTML::DOMTree::Node * node = domTree.getRootNode();
00099             
00100             if (!node) return false;
00101             // Apply the dump method to the current node 
00102             dumpMethod(node->getData(), 0);
00103             node->applyOnChildrenDataLevel(*this, &ParserTest::dumpMethod, 1);
00104             return true;
00105         }
00106 
00107         int dumpMethod(const DOM::Node * node, int level)
00108         {
00109             // Should print element here
00110             if (!node) return 0;
00111             // We currently only support Element for now
00112             if (node->nodeType != DOM::Node::NodeType::Element) 
00113             {
00114                 if (node->nodeType != DOM::Node::NodeType::Text) return 1;
00115                 const DOM::Text * text = (const DOM::Text*)node;
00116                 // Get the text
00117                 Bstrlib::String result;
00118                 while (level)
00119                 {
00120                     result += "    ";
00121                     level --;
00122                 }
00123                 result += "{";
00124                 result += Strings::convert(text->nodeValue());
00125                 result += "}\r\n";
00126                 DebugText(result);
00127                 return 1;
00128             }
00129             HTML::Elements::Element * element = ((DOM::Element*)node)->getElement();
00130             if (!element) return 0;
00131             Bstrlib::String result;
00132             while (level)
00133             {
00134                 result += "    ";
00135                 level --;
00136             }
00137             
00138             result += element->tagName();
00139 
00140             const DOM::NamedNodeMap * attributes = ((DOM::Element*)node)->attributes();
00141             if (attributes && attributes->length())
00142             {
00143                 result += " [";
00144                 for (uint32 i = 0; i < attributes->length(); i++)
00145                 {
00146                     const DOM::Attr * attr = (const DOM::Attr*)attributes->item(i);
00147                     if (attr)
00148                     {
00149                         result += Strings::convert(attr->name());
00150                         result += " = \"";
00151                         result += Strings::convert(attr->value());
00152                         result += "\", ";
00153                     }
00154                 }
00155                 result += "]";
00156             }
00157 
00158             //result += "\r\n";
00159             DebugText(result);
00160             return 1;
00161         }
00162 
00163     };
00164 
00165     void createParserTests(Test::Vector & xVector, Bstrlib::String & results)
00166     {
00167         typedef Bstrlib::String String; 
00168         xVector.addUnitToTest(Test::MakeLineDelimiter("Parser testing"));
00169         {
00170             ParserTest xPT;
00171             xVector.addUnitToTest(Test::MakeUnitTest(MakeTestName( "Testing tagging all defined and some undefined element in HTML4.01 standard" ), xPT, &ParserTest::testTagging));
00172             xVector.addUnitToTest(Test::MakeUnitTest(MakeTestName( "Testing tagging speed for the current algorithm" ), xPT, &ParserTest::checkTaggingSpeed));
00173             xVector.addUnitToTest(Test::MakeUnitTest(MakeTestName( "Testing parsing an example file" ), xPT, &ParserTest::parseDocHTMLFile));
00174 /*
00175             xVector.addUnitToTest(Test::MakeUnitTest(MakeTestName( "Checking node accessibility" ), xTT, &NTreeTest::countNodes, byRef(xTT.xTree)));
00176             xVector.addUnitToTest(Test::MakeUnitTest(MakeTestName( "Checking node creation count" ), xTT, &NTreeTest::refCheck, byRef(xTT.xTree)));
00177             xVector.addUnitToTest(Test::MakeUnitTest(MakeTestName( "Checking Node's access methods" ), xTT, &NTreeTest::testMethod1, byRef(xTT.xTree)));
00178             xVector.addUnitToTest(Test::MakeUnitTest(MakeTestName( "Checking Node's find and apply methods" ), xTT, &NTreeTest::testMethod2, byRef(xTT.xTree)));
00179             xVector.addUnitToTest(Test::MakeUnitTest(MakeTestName( "Checking Node deletion methods" ), xTT, &NTreeTest::testMethod3, byRef(xTT.xTree)));
00180             */
00181             xVector.testLastInsertedVectors(results);
00182         }
00183     }
00184 
00185 }
00186 
00187 #endif