utf8_test.php 3.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. <?php
  2. // $Id: parser_test.php 1608 2007-12-27 09:03:07Z pp11 $
  3. // Handle with care : this file is UTF8.
  4. require_once(dirname(__FILE__) . '/../autorun.php');
  5. require_once(dirname(__FILE__) . '/../parser.php');
  6. require_once(dirname(__FILE__) . '/../url.php');
  7. Mock::generate('SimpleHtmlSaxParser');
  8. Mock::generate('SimpleSaxListener');
  9. class TestOfHtmlSaxParserWithDifferentCharset extends UnitTestCase {
  10. function testWithTextInUTF8() {
  11. $regex = &new ParallelRegex(false);
  12. $regex->addPattern("eé");
  13. $this->assertTrue($regex->match("eéêè", $match));
  14. $this->assertEqual($match, "eé");
  15. }
  16. function testWithTextInLatin1() {
  17. $regex = &new ParallelRegex(false);
  18. $regex->addPattern(utf8_decode("eé"));
  19. $this->assertTrue($regex->match(utf8_decode("eéêè"), $match));
  20. $this->assertEqual($match, utf8_decode("eé"));
  21. }
  22. function &createParser() {
  23. $parser = &new MockSimpleHtmlSaxParser();
  24. $parser->setReturnValue('acceptStartToken', true);
  25. $parser->setReturnValue('acceptEndToken', true);
  26. $parser->setReturnValue('acceptAttributeToken', true);
  27. $parser->setReturnValue('acceptEntityToken', true);
  28. $parser->setReturnValue('acceptTextToken', true);
  29. $parser->setReturnValue('ignore', true);
  30. return $parser;
  31. }
  32. function testTagWithAttributesInUTF8() {
  33. $parser = &$this->createParser();
  34. $parser->expectOnce('acceptTextToken', array('label', '*'));
  35. $parser->expectAt(0, 'acceptStartToken', array('<a', '*'));
  36. $parser->expectAt(1, 'acceptStartToken', array('href', '*'));
  37. $parser->expectAt(2, 'acceptStartToken', array('>', '*'));
  38. $parser->expectCallCount('acceptStartToken', 3);
  39. $parser->expectAt(0, 'acceptAttributeToken', array('= "', '*'));
  40. $parser->expectAt(1, 'acceptAttributeToken', array('hère.html', '*'));
  41. $parser->expectAt(2, 'acceptAttributeToken', array('"', '*'));
  42. $parser->expectCallCount('acceptAttributeToken', 3);
  43. $parser->expectOnce('acceptEndToken', array('</a>', '*'));
  44. $lexer = &new SimpleHtmlLexer($parser);
  45. $this->assertTrue($lexer->parse('<a href = "hère.html">label</a>'));
  46. }
  47. function testTagWithAttributesInLatin1() {
  48. $parser = &$this->createParser();
  49. $parser->expectOnce('acceptTextToken', array('label', '*'));
  50. $parser->expectAt(0, 'acceptStartToken', array('<a', '*'));
  51. $parser->expectAt(1, 'acceptStartToken', array('href', '*'));
  52. $parser->expectAt(2, 'acceptStartToken', array('>', '*'));
  53. $parser->expectCallCount('acceptStartToken', 3);
  54. $parser->expectAt(0, 'acceptAttributeToken', array('= "', '*'));
  55. $parser->expectAt(1, 'acceptAttributeToken', array(utf8_decode('hère.html'), '*'));
  56. $parser->expectAt(2, 'acceptAttributeToken', array('"', '*'));
  57. $parser->expectCallCount('acceptAttributeToken', 3);
  58. $parser->expectOnce('acceptEndToken', array('</a>', '*'));
  59. $lexer = &new SimpleHtmlLexer($parser);
  60. $this->assertTrue($lexer->parse(utf8_decode('<a href = "hère.html">label</a>')));
  61. }
  62. }
  63. class TestOfUrlithDifferentCharset extends UnitTestCase {
  64. function testUsernameAndPasswordInUTF8() {
  65. $url = new SimpleUrl('http://pÈrick:penËt@www.lastcraft.com');
  66. $this->assertEqual($url->getUsername(), 'pÈrick');
  67. $this->assertEqual($url->getPassword(), 'penËt');
  68. }
  69. }
  70. ?>