parser_test.php 22KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551
  1. <?php
  2. // $Id: parser_test.php 1748 2008-04-14 01:50:41Z lastcraft $
  3. require_once(dirname(__FILE__) . '/../autorun.php');
  4. require_once(dirname(__FILE__) . '/../parser.php');
  5. Mock::generate('SimpleHtmlSaxParser');
  6. Mock::generate('SimpleSaxListener');
  7. class TestOfParallelRegex extends UnitTestCase {
  8. function testNoPatterns() {
  9. $regex = new ParallelRegex(false);
  10. $this->assertFalse($regex->match("Hello", $match));
  11. $this->assertEqual($match, "");
  12. }
  13. function testNoSubject() {
  14. $regex = new ParallelRegex(false);
  15. $regex->addPattern(".*");
  16. $this->assertTrue($regex->match("", $match));
  17. $this->assertEqual($match, "");
  18. }
  19. function testMatchAll() {
  20. $regex = new ParallelRegex(false);
  21. $regex->addPattern(".*");
  22. $this->assertTrue($regex->match("Hello", $match));
  23. $this->assertEqual($match, "Hello");
  24. }
  25. function testCaseSensitive() {
  26. $regex = new ParallelRegex(true);
  27. $regex->addPattern("abc");
  28. $this->assertTrue($regex->match("abcdef", $match));
  29. $this->assertEqual($match, "abc");
  30. $this->assertTrue($regex->match("AAABCabcdef", $match));
  31. $this->assertEqual($match, "abc");
  32. }
  33. function testCaseInsensitive() {
  34. $regex = new ParallelRegex(false);
  35. $regex->addPattern("abc");
  36. $this->assertTrue($regex->match("abcdef", $match));
  37. $this->assertEqual($match, "abc");
  38. $this->assertTrue($regex->match("AAABCabcdef", $match));
  39. $this->assertEqual($match, "ABC");
  40. }
  41. function testMatchMultiple() {
  42. $regex = new ParallelRegex(true);
  43. $regex->addPattern("abc");
  44. $regex->addPattern("ABC");
  45. $this->assertTrue($regex->match("abcdef", $match));
  46. $this->assertEqual($match, "abc");
  47. $this->assertTrue($regex->match("AAABCabcdef", $match));
  48. $this->assertEqual($match, "ABC");
  49. $this->assertFalse($regex->match("Hello", $match));
  50. }
  51. function testPatternLabels() {
  52. $regex = new ParallelRegex(false);
  53. $regex->addPattern("abc", "letter");
  54. $regex->addPattern("123", "number");
  55. $this->assertIdentical($regex->match("abcdef", $match), "letter");
  56. $this->assertEqual($match, "abc");
  57. $this->assertIdentical($regex->match("0123456789", $match), "number");
  58. $this->assertEqual($match, "123");
  59. }
  60. }
  61. class TestOfStateStack extends UnitTestCase {
  62. function testStartState() {
  63. $stack = new SimpleStateStack("one");
  64. $this->assertEqual($stack->getCurrent(), "one");
  65. }
  66. function testExhaustion() {
  67. $stack = new SimpleStateStack("one");
  68. $this->assertFalse($stack->leave());
  69. }
  70. function testStateMoves() {
  71. $stack = new SimpleStateStack("one");
  72. $stack->enter("two");
  73. $this->assertEqual($stack->getCurrent(), "two");
  74. $stack->enter("three");
  75. $this->assertEqual($stack->getCurrent(), "three");
  76. $this->assertTrue($stack->leave());
  77. $this->assertEqual($stack->getCurrent(), "two");
  78. $stack->enter("third");
  79. $this->assertEqual($stack->getCurrent(), "third");
  80. $this->assertTrue($stack->leave());
  81. $this->assertTrue($stack->leave());
  82. $this->assertEqual($stack->getCurrent(), "one");
  83. }
  84. }
  85. class TestParser {
  86. function accept() {
  87. }
  88. function a() {
  89. }
  90. function b() {
  91. }
  92. }
  93. Mock::generate('TestParser');
  94. class TestOfLexer extends UnitTestCase {
  95. function testEmptyPage() {
  96. $handler = new MockTestParser();
  97. $handler->expectNever("accept");
  98. $handler->setReturnValue("accept", true);
  99. $handler->expectNever("accept");
  100. $handler->setReturnValue("accept", true);
  101. $lexer = new SimpleLexer($handler);
  102. $lexer->addPattern("a+");
  103. $this->assertTrue($lexer->parse(""));
  104. }
  105. function testSinglePattern() {
  106. $handler = new MockTestParser();
  107. $handler->expectAt(0, "accept", array("aaa", LEXER_MATCHED));
  108. $handler->expectAt(1, "accept", array("x", LEXER_UNMATCHED));
  109. $handler->expectAt(2, "accept", array("a", LEXER_MATCHED));
  110. $handler->expectAt(3, "accept", array("yyy", LEXER_UNMATCHED));
  111. $handler->expectAt(4, "accept", array("a", LEXER_MATCHED));
  112. $handler->expectAt(5, "accept", array("x", LEXER_UNMATCHED));
  113. $handler->expectAt(6, "accept", array("aaa", LEXER_MATCHED));
  114. $handler->expectAt(7, "accept", array("z", LEXER_UNMATCHED));
  115. $handler->expectCallCount("accept", 8);
  116. $handler->setReturnValue("accept", true);
  117. $lexer = new SimpleLexer($handler);
  118. $lexer->addPattern("a+");
  119. $this->assertTrue($lexer->parse("aaaxayyyaxaaaz"));
  120. }
  121. function testMultiplePattern() {
  122. $handler = new MockTestParser();
  123. $target = array("a", "b", "a", "bb", "x", "b", "a", "xxxxxx", "a", "x");
  124. for ($i = 0; $i < count($target); $i++) {
  125. $handler->expectAt($i, "accept", array($target[$i], '*'));
  126. }
  127. $handler->expectCallCount("accept", count($target));
  128. $handler->setReturnValue("accept", true);
  129. $lexer = new SimpleLexer($handler);
  130. $lexer->addPattern("a+");
  131. $lexer->addPattern("b+");
  132. $this->assertTrue($lexer->parse("ababbxbaxxxxxxax"));
  133. }
  134. }
  135. class TestOfLexerModes extends UnitTestCase {
  136. function testIsolatedPattern() {
  137. $handler = new MockTestParser();
  138. $handler->expectAt(0, "a", array("a", LEXER_MATCHED));
  139. $handler->expectAt(1, "a", array("b", LEXER_UNMATCHED));
  140. $handler->expectAt(2, "a", array("aa", LEXER_MATCHED));
  141. $handler->expectAt(3, "a", array("bxb", LEXER_UNMATCHED));
  142. $handler->expectAt(4, "a", array("aaa", LEXER_MATCHED));
  143. $handler->expectAt(5, "a", array("x", LEXER_UNMATCHED));
  144. $handler->expectAt(6, "a", array("aaaa", LEXER_MATCHED));
  145. $handler->expectAt(7, "a", array("x", LEXER_UNMATCHED));
  146. $handler->expectCallCount("a", 8);
  147. $handler->setReturnValue("a", true);
  148. $lexer = new SimpleLexer($handler, "a");
  149. $lexer->addPattern("a+", "a");
  150. $lexer->addPattern("b+", "b");
  151. $this->assertTrue($lexer->parse("abaabxbaaaxaaaax"));
  152. }
  153. function testModeChange() {
  154. $handler = new MockTestParser();
  155. $handler->expectAt(0, "a", array("a", LEXER_MATCHED));
  156. $handler->expectAt(1, "a", array("b", LEXER_UNMATCHED));
  157. $handler->expectAt(2, "a", array("aa", LEXER_MATCHED));
  158. $handler->expectAt(3, "a", array("b", LEXER_UNMATCHED));
  159. $handler->expectAt(4, "a", array("aaa", LEXER_MATCHED));
  160. $handler->expectAt(0, "b", array(":", LEXER_ENTER));
  161. $handler->expectAt(1, "b", array("a", LEXER_UNMATCHED));
  162. $handler->expectAt(2, "b", array("b", LEXER_MATCHED));
  163. $handler->expectAt(3, "b", array("a", LEXER_UNMATCHED));
  164. $handler->expectAt(4, "b", array("bb", LEXER_MATCHED));
  165. $handler->expectAt(5, "b", array("a", LEXER_UNMATCHED));
  166. $handler->expectAt(6, "b", array("bbb", LEXER_MATCHED));
  167. $handler->expectAt(7, "b", array("a", LEXER_UNMATCHED));
  168. $handler->expectCallCount("a", 5);
  169. $handler->expectCallCount("b", 8);
  170. $handler->setReturnValue("a", true);
  171. $handler->setReturnValue("b", true);
  172. $lexer = new SimpleLexer($handler, "a");
  173. $lexer->addPattern("a+", "a");
  174. $lexer->addEntryPattern(":", "a", "b");
  175. $lexer->addPattern("b+", "b");
  176. $this->assertTrue($lexer->parse("abaabaaa:ababbabbba"));
  177. }
  178. function testNesting() {
  179. $handler = new MockTestParser();
  180. $handler->setReturnValue("a", true);
  181. $handler->setReturnValue("b", true);
  182. $handler->expectAt(0, "a", array("aa", LEXER_MATCHED));
  183. $handler->expectAt(1, "a", array("b", LEXER_UNMATCHED));
  184. $handler->expectAt(2, "a", array("aa", LEXER_MATCHED));
  185. $handler->expectAt(3, "a", array("b", LEXER_UNMATCHED));
  186. $handler->expectAt(0, "b", array("(", LEXER_ENTER));
  187. $handler->expectAt(1, "b", array("bb", LEXER_MATCHED));
  188. $handler->expectAt(2, "b", array("a", LEXER_UNMATCHED));
  189. $handler->expectAt(3, "b", array("bb", LEXER_MATCHED));
  190. $handler->expectAt(4, "b", array(")", LEXER_EXIT));
  191. $handler->expectAt(4, "a", array("aa", LEXER_MATCHED));
  192. $handler->expectAt(5, "a", array("b", LEXER_UNMATCHED));
  193. $handler->expectCallCount("a", 6);
  194. $handler->expectCallCount("b", 5);
  195. $lexer = new SimpleLexer($handler, "a");
  196. $lexer->addPattern("a+", "a");
  197. $lexer->addEntryPattern("(", "a", "b");
  198. $lexer->addPattern("b+", "b");
  199. $lexer->addExitPattern(")", "b");
  200. $this->assertTrue($lexer->parse("aabaab(bbabb)aab"));
  201. }
  202. function testSingular() {
  203. $handler = new MockTestParser();
  204. $handler->setReturnValue("a", true);
  205. $handler->setReturnValue("b", true);
  206. $handler->expectAt(0, "a", array("aa", LEXER_MATCHED));
  207. $handler->expectAt(1, "a", array("aa", LEXER_MATCHED));
  208. $handler->expectAt(2, "a", array("xx", LEXER_UNMATCHED));
  209. $handler->expectAt(3, "a", array("xx", LEXER_UNMATCHED));
  210. $handler->expectAt(0, "b", array("b", LEXER_SPECIAL));
  211. $handler->expectAt(1, "b", array("bbb", LEXER_SPECIAL));
  212. $handler->expectCallCount("a", 4);
  213. $handler->expectCallCount("b", 2);
  214. $lexer = new SimpleLexer($handler, "a");
  215. $lexer->addPattern("a+", "a");
  216. $lexer->addSpecialPattern("b+", "a", "b");
  217. $this->assertTrue($lexer->parse("aabaaxxbbbxx"));
  218. }
  219. function testUnwindTooFar() {
  220. $handler = new MockTestParser();
  221. $handler->setReturnValue("a", true);
  222. $handler->expectAt(0, "a", array("aa", LEXER_MATCHED));
  223. $handler->expectAt(1, "a", array(")", LEXER_EXIT));
  224. $handler->expectCallCount("a", 2);
  225. $lexer = new SimpleLexer($handler, "a");
  226. $lexer->addPattern("a+", "a");
  227. $lexer->addExitPattern(")", "a");
  228. $this->assertFalse($lexer->parse("aa)aa"));
  229. }
  230. }
  231. class TestOfLexerHandlers extends UnitTestCase {
  232. function testModeMapping() {
  233. $handler = new MockTestParser();
  234. $handler->setReturnValue("a", true);
  235. $handler->expectAt(0, "a", array("aa", LEXER_MATCHED));
  236. $handler->expectAt(1, "a", array("(", LEXER_ENTER));
  237. $handler->expectAt(2, "a", array("bb", LEXER_MATCHED));
  238. $handler->expectAt(3, "a", array("a", LEXER_UNMATCHED));
  239. $handler->expectAt(4, "a", array("bb", LEXER_MATCHED));
  240. $handler->expectAt(5, "a", array(")", LEXER_EXIT));
  241. $handler->expectAt(6, "a", array("b", LEXER_UNMATCHED));
  242. $handler->expectCallCount("a", 7);
  243. $lexer = new SimpleLexer($handler, "mode_a");
  244. $lexer->addPattern("a+", "mode_a");
  245. $lexer->addEntryPattern("(", "mode_a", "mode_b");
  246. $lexer->addPattern("b+", "mode_b");
  247. $lexer->addExitPattern(")", "mode_b");
  248. $lexer->mapHandler("mode_a", "a");
  249. $lexer->mapHandler("mode_b", "a");
  250. $this->assertTrue($lexer->parse("aa(bbabb)b"));
  251. }
  252. }
  253. class TestOfSimpleHtmlLexer extends UnitTestCase {
  254. function &createParser() {
  255. $parser = new MockSimpleHtmlSaxParser();
  256. $parser->setReturnValue('acceptStartToken', true);
  257. $parser->setReturnValue('acceptEndToken', true);
  258. $parser->setReturnValue('acceptAttributeToken', true);
  259. $parser->setReturnValue('acceptEntityToken', true);
  260. $parser->setReturnValue('acceptTextToken', true);
  261. $parser->setReturnValue('ignore', true);
  262. return $parser;
  263. }
  264. function testNoContent() {
  265. $parser = &$this->createParser();
  266. $parser->expectNever('acceptStartToken');
  267. $parser->expectNever('acceptEndToken');
  268. $parser->expectNever('acceptAttributeToken');
  269. $parser->expectNever('acceptEntityToken');
  270. $parser->expectNever('acceptTextToken');
  271. $lexer = new SimpleHtmlLexer($parser);
  272. $this->assertTrue($lexer->parse(''));
  273. }
  274. function testUninteresting() {
  275. $parser = &$this->createParser();
  276. $parser->expectOnce('acceptTextToken', array('<html></html>', '*'));
  277. $lexer = new SimpleHtmlLexer($parser);
  278. $this->assertTrue($lexer->parse('<html></html>'));
  279. }
  280. function testSkipCss() {
  281. $parser = &$this->createParser();
  282. $parser->expectNever('acceptTextToken');
  283. $parser->expectAtLeastOnce('ignore');
  284. $lexer = new SimpleHtmlLexer($parser);
  285. $this->assertTrue($lexer->parse("<style>Lot's of styles</style>"));
  286. }
  287. function testSkipJavaScript() {
  288. $parser = &$this->createParser();
  289. $parser->expectNever('acceptTextToken');
  290. $parser->expectAtLeastOnce('ignore');
  291. $lexer = new SimpleHtmlLexer($parser);
  292. $this->assertTrue($lexer->parse("<SCRIPT>Javascript code {';:^%^%£$'@\"*(}</SCRIPT>"));
  293. }
  294. function testSkipHtmlComments() {
  295. $parser = &$this->createParser();
  296. $parser->expectNever('acceptTextToken');
  297. $parser->expectAtLeastOnce('ignore');
  298. $lexer = new SimpleHtmlLexer($parser);
  299. $this->assertTrue($lexer->parse("<!-- <title>title</title><style>styles</style> -->"));
  300. }
  301. function testTagWithNoAttributes() {
  302. $parser = &$this->createParser();
  303. $parser->expectAt(0, 'acceptStartToken', array('<title', '*'));
  304. $parser->expectAt(1, 'acceptStartToken', array('>', '*'));
  305. $parser->expectCallCount('acceptStartToken', 2);
  306. $parser->expectOnce('acceptTextToken', array('Hello', '*'));
  307. $parser->expectOnce('acceptEndToken', array('</title>', '*'));
  308. $lexer = new SimpleHtmlLexer($parser);
  309. $this->assertTrue($lexer->parse('<title>Hello</title>'));
  310. }
  311. function testTagWithAttributes() {
  312. $parser = &$this->createParser();
  313. $parser->expectOnce('acceptTextToken', array('label', '*'));
  314. $parser->expectAt(0, 'acceptStartToken', array('<a', '*'));
  315. $parser->expectAt(1, 'acceptStartToken', array('href', '*'));
  316. $parser->expectAt(2, 'acceptStartToken', array('>', '*'));
  317. $parser->expectCallCount('acceptStartToken', 3);
  318. $parser->expectAt(0, 'acceptAttributeToken', array('= "', '*'));
  319. $parser->expectAt(1, 'acceptAttributeToken', array('here.html', '*'));
  320. $parser->expectAt(2, 'acceptAttributeToken', array('"', '*'));
  321. $parser->expectCallCount('acceptAttributeToken', 3);
  322. $parser->expectOnce('acceptEndToken', array('</a>', '*'));
  323. $lexer = new SimpleHtmlLexer($parser);
  324. $this->assertTrue($lexer->parse('<a href = "here.html">label</a>'));
  325. }
  326. }
  327. class TestOfHtmlSaxParser extends UnitTestCase {
  328. function &createListener() {
  329. $listener = new MockSimpleSaxListener();
  330. $listener->setReturnValue('startElement', true);
  331. $listener->setReturnValue('addContent', true);
  332. $listener->setReturnValue('endElement', true);
  333. return $listener;
  334. }
  335. function testFramesetTag() {
  336. $listener = &$this->createListener();
  337. $listener->expectOnce('startElement', array('frameset', array()));
  338. $listener->expectOnce('addContent', array('Frames'));
  339. $listener->expectOnce('endElement', array('frameset'));
  340. $parser = new SimpleHtmlSaxParser($listener);
  341. $this->assertTrue($parser->parse('<frameset>Frames</frameset>'));
  342. }
  343. function testTagWithUnquotedAttributes() {
  344. $listener = &$this->createListener();
  345. $listener->expectOnce(
  346. 'startElement',
  347. array('input', array('name' => 'a.b.c', 'value' => 'd')));
  348. $parser = new SimpleHtmlSaxParser($listener);
  349. $this->assertTrue($parser->parse('<input name=a.b.c value = d>'));
  350. }
  351. function testTagInsideContent() {
  352. $listener = &$this->createListener();
  353. $listener->expectOnce('startElement', array('a', array()));
  354. $listener->expectAt(0, 'addContent', array('<html>'));
  355. $listener->expectAt(1, 'addContent', array('</html>'));
  356. $parser = new SimpleHtmlSaxParser($listener);
  357. $this->assertTrue($parser->parse('<html><a></a></html>'));
  358. }
  359. function testTagWithInternalContent() {
  360. $listener = &$this->createListener();
  361. $listener->expectOnce('startElement', array('a', array()));
  362. $listener->expectOnce('addContent', array('label'));
  363. $listener->expectOnce('endElement', array('a'));
  364. $parser = new SimpleHtmlSaxParser($listener);
  365. $this->assertTrue($parser->parse('<a>label</a>'));
  366. }
  367. function testLinkAddress() {
  368. $listener = &$this->createListener();
  369. $listener->expectOnce('startElement', array('a', array('href' => 'here.html')));
  370. $listener->expectOnce('addContent', array('label'));
  371. $listener->expectOnce('endElement', array('a'));
  372. $parser = new SimpleHtmlSaxParser($listener);
  373. $this->assertTrue($parser->parse("<a href = 'here.html'>label</a>"));
  374. }
  375. function testEncodedAttribute() {
  376. $listener = &$this->createListener();
  377. $listener->expectOnce('startElement', array('a', array('href' => 'here&there.html')));
  378. $listener->expectOnce('addContent', array('label'));
  379. $listener->expectOnce('endElement', array('a'));
  380. $parser = new SimpleHtmlSaxParser($listener);
  381. $this->assertTrue($parser->parse("<a href = 'here&amp;there.html'>label</a>"));
  382. }
  383. function testTagWithId() {
  384. $listener = &$this->createListener();
  385. $listener->expectOnce('startElement', array('a', array('id' => '0')));
  386. $listener->expectOnce('addContent', array('label'));
  387. $listener->expectOnce('endElement', array('a'));
  388. $parser = new SimpleHtmlSaxParser($listener);
  389. $this->assertTrue($parser->parse('<a id="0">label</a>'));
  390. }
  391. function testTagWithEmptyAttributes() {
  392. $listener = &$this->createListener();
  393. $listener->expectOnce(
  394. 'startElement',
  395. array('option', array('value' => '', 'selected' => '')));
  396. $listener->expectOnce('addContent', array('label'));
  397. $listener->expectOnce('endElement', array('option'));
  398. $parser = new SimpleHtmlSaxParser($listener);
  399. $this->assertTrue($parser->parse('<option value="" selected>label</option>'));
  400. }
  401. function testComplexTagWithLotsOfCaseVariations() {
  402. $listener = &$this->createListener();
  403. $listener->expectOnce(
  404. 'startElement',
  405. array('a', array('href' => 'here.html', 'style' => "'cool'")));
  406. $listener->expectOnce('addContent', array('label'));
  407. $listener->expectOnce('endElement', array('a'));
  408. $parser = new SimpleHtmlSaxParser($listener);
  409. $this->assertTrue($parser->parse('<A HREF = \'here.html\' Style="\'cool\'">label</A>'));
  410. }
  411. function testXhtmlSelfClosingTag() {
  412. $listener = &$this->createListener();
  413. $listener->expectOnce(
  414. 'startElement',
  415. array('input', array('type' => 'submit', 'name' => 'N', 'value' => 'V')));
  416. $parser = new SimpleHtmlSaxParser($listener);
  417. $this->assertTrue($parser->parse('<input type="submit" name="N" value="V" />'));
  418. }
  419. function testNestedFrameInFrameset() {
  420. $listener = &$this->createListener();
  421. $listener->expectAt(0, 'startElement', array('frameset', array()));
  422. $listener->expectAt(1, 'startElement', array('frame', array('src' => 'frame.html')));
  423. $listener->expectCallCount('startElement', 2);
  424. $listener->expectOnce('addContent', array('<noframes>Hello</noframes>'));
  425. $listener->expectOnce('endElement', array('frameset'));
  426. $parser = new SimpleHtmlSaxParser($listener);
  427. $this->assertTrue($parser->parse(
  428. '<frameset><frame src="frame.html"><noframes>Hello</noframes></frameset>'));
  429. }
  430. }
  431. class TestOfTextExtraction extends UnitTestCase {
  432. function testImageSuppressionWhileKeepingParagraphsAndAltText() {
  433. $this->assertEqual(
  434. SimpleHtmlSaxParser::normalise('<img src="foo.png" /><p>some text</p><img src="bar.png" alt="bar" />'),
  435. 'some text bar');
  436. }
  437. function testSpaceNormalisation() {
  438. $this->assertEqual(
  439. SimpleHtmlSaxParser::normalise("\nOne\tTwo \nThree\t"),
  440. 'One Two Three');
  441. }
  442. function testMultilinesCommentSuppression() {
  443. $this->assertEqual(
  444. SimpleHtmlSaxParser::normalise('<!--\n Hello \n-->'),
  445. '');
  446. }
  447. function testCommentSuppression() {
  448. $this->assertEqual(
  449. SimpleHtmlSaxParser::normalise('<!--Hello-->'),
  450. '');
  451. }
  452. function testJavascriptSuppression() {
  453. $this->assertEqual(
  454. SimpleHtmlSaxParser::normalise('<script attribute="test">\nHello\n</script>'),
  455. '');
  456. $this->assertEqual(
  457. SimpleHtmlSaxParser::normalise('<script attribute="test">Hello</script>'),
  458. '');
  459. $this->assertEqual(
  460. SimpleHtmlSaxParser::normalise('<script>Hello</script>'),
  461. '');
  462. }
  463. function testTagSuppression() {
  464. $this->assertEqual(
  465. SimpleHtmlSaxParser::normalise('<b>Hello</b>'),
  466. 'Hello');
  467. }
  468. function testAdjoiningTagSuppression() {
  469. $this->assertEqual(
  470. SimpleHtmlSaxParser::normalise('<b>Hello</b><em>Goodbye</em>'),
  471. 'HelloGoodbye');
  472. }
  473. function testExtractImageAltTextWithDifferentQuotes() {
  474. $this->assertEqual(
  475. SimpleHtmlSaxParser::normalise('<img alt="One"><img alt=\'Two\'><img alt=Three>'),
  476. 'One Two Three');
  477. }
  478. function testExtractImageAltTextMultipleTimes() {
  479. $this->assertEqual(
  480. SimpleHtmlSaxParser::normalise('<img alt="One"><img alt="Two"><img alt="Three">'),
  481. 'One Two Three');
  482. }
  483. function testHtmlEntityTranslation() {
  484. $this->assertEqual(
  485. SimpleHtmlSaxParser::normalise('&lt;&gt;&quot;&amp;&#039;'),
  486. '<>"&\'');
  487. }
  488. }
  489. ?>