'http://somewhere'));
$tag->addContent('Label');
$page = new MockSimplePage();
$page->expect('acceptTag', array($tag));
$page->expectCallCount('acceptTag', 1);
$builder = new PartialSimplePageBuilder();
$builder->returns('createPage', $page);
$builder->returns('createParser', new MockSimpleHtmlSaxParser());
$builder->__construct();
$builder->parse(new MockSimpleHttpResponse());
$this->assertTrue($builder->startElement(
'a',
array('href' => 'http://somewhere')));
$this->assertTrue($builder->addContent('Label'));
$this->assertTrue($builder->endElement('a'));
}
function testLinkWithId() {
$tag = new SimpleAnchorTag(array("href" => "http://somewhere", "id" => "44"));
$tag->addContent("Label");
$page = new MockSimplePage();
$page->expect("acceptTag", array($tag));
$page->expectCallCount("acceptTag", 1);
$builder = new PartialSimplePageBuilder();
$builder->returns('createPage', $page);
$builder->returns('createParser', new MockSimpleHtmlSaxParser());
$builder->__construct();
$builder->parse(new MockSimpleHttpResponse());
$this->assertTrue($builder->startElement(
"a",
array("href" => "http://somewhere", "id" => "44")));
$this->assertTrue($builder->addContent("Label"));
$this->assertTrue($builder->endElement("a"));
}
function testLinkExtraction() {
$tag = new SimpleAnchorTag(array("href" => "http://somewhere"));
$tag->addContent("Label");
$page = new MockSimplePage();
$page->expect("acceptTag", array($tag));
$page->expectCallCount("acceptTag", 1);
$builder = new PartialSimplePageBuilder();
$builder->returns('createPage', $page);
$builder->returns('createParser', new MockSimpleHtmlSaxParser());
$builder->__construct();
$builder->parse(new MockSimpleHttpResponse());
$this->assertTrue($builder->addContent("Starting stuff"));
$this->assertTrue($builder->startElement(
"a",
array("href" => "http://somewhere")));
$this->assertTrue($builder->addContent("Label"));
$this->assertTrue($builder->endElement("a"));
$this->assertTrue($builder->addContent("Trailing stuff"));
}
function testMultipleLinks() {
$a1 = new SimpleAnchorTag(array("href" => "http://somewhere"));
$a1->addContent("1");
$a2 = new SimpleAnchorTag(array("href" => "http://elsewhere"));
$a2->addContent("2");
$page = new MockSimplePage();
$page->expectAt(0, "acceptTag", array($a1));
$page->expectAt(1, "acceptTag", array($a2));
$page->expectCallCount("acceptTag", 2);
$builder = new PartialSimplePageBuilder();
$builder->returns('createPage', $page);
$builder->returns('createParser', new MockSimpleHtmlSaxParser());
$builder->__construct();
$builder->parse(new MockSimpleHttpResponse());
$builder->startElement("a", array("href" => "http://somewhere"));
$builder->addContent("1");
$builder->endElement("a");
$builder->addContent("Padding");
$builder->startElement("a", array("href" => "http://elsewhere"));
$builder->addContent("2");
$builder->endElement("a");
}
function testTitle() {
$tag = new SimpleTitleTag(array());
$tag->addContent("HereThere");
$page = new MockSimplePage();
$page->expect("acceptTag", array($tag));
$page->expectCallCount("acceptTag", 1);
$builder = new PartialSimplePageBuilder();
$builder->returns('createPage', $page);
$builder->returns('createParser', new MockSimpleHtmlSaxParser());
$builder->__construct();
$builder->parse(new MockSimpleHttpResponse());
$builder->startElement("title", array());
$builder->addContent("Here");
$builder->addContent("There");
$builder->endElement("title");
}
function testForm() {
$page = new MockSimplePage();
$page->expectOnce("acceptFormStart", array(new SimpleFormTag(array())));
$page->expectOnce("acceptFormEnd", array());
$builder = new PartialSimplePageBuilder();
$builder->returns('createPage', $page);
$builder->returns('createParser', new MockSimpleHtmlSaxParser());
$builder->__construct();
$builder->parse(new MockSimpleHttpResponse());
$builder->startElement("form", array());
$builder->addContent("Stuff");
$builder->endElement("form");
}
}
class TestOfPageParsing extends UnitTestCase {
function testParseMechanics() {
$parser = new MockSimpleHtmlSaxParser();
$parser->expectOnce('parse', array('stuff'));
$page = new MockSimplePage();
$page->expectOnce('acceptPageEnd');
$builder = new PartialSimplePageBuilder();
$builder->returns('createPage', $page);
$builder->returns('createParser', $parser);
$builder->__construct();
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getContent', 'stuff');
$builder->parse($response);
}
}
class TestOfPageInterface extends UnitTestCase {
function testInterfaceOnEmptyPage() {
$page = new SimplePage();
$this->assertEqual($page->getTransportError(), 'No page fetched yet');
$this->assertIdentical($page->getRaw(), false);
$this->assertIdentical($page->getHeaders(), false);
$this->assertIdentical($page->getMimeType(), false);
$this->assertIdentical($page->getResponseCode(), false);
$this->assertIdentical($page->getAuthentication(), false);
$this->assertIdentical($page->getRealm(), false);
$this->assertFalse($page->hasFrames());
$this->assertIdentical($page->getUrls(), array());
$this->assertIdentical($page->getTitle(), false);
}
}
class TestOfPageHeaders extends UnitTestCase {
function testUrlAccessor() {
$headers = new MockSimpleHttpHeaders();
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getHeaders', $headers);
$response->setReturnValue('getMethod', 'POST');
$response->setReturnValue('getUrl', new SimpleUrl('here'));
$response->setReturnValue('getRequestData', array('a' => 'A'));
$page = new SimplePage($response);
$this->assertEqual($page->getMethod(), 'POST');
$this->assertEqual($page->getUrl(), new SimpleUrl('here'));
$this->assertEqual($page->getRequestData(), array('a' => 'A'));
}
function testTransportError() {
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getError', 'Ouch');
$page = new SimplePage($response);
$this->assertEqual($page->getTransportError(), 'Ouch');
}
function testHeadersAccessor() {
$headers = new MockSimpleHttpHeaders();
$headers->setReturnValue('getRaw', 'My: Headers');
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getHeaders', $headers);
$page = new SimplePage($response);
$this->assertEqual($page->getHeaders(), 'My: Headers');
}
function testMimeAccessor() {
$headers = new MockSimpleHttpHeaders();
$headers->setReturnValue('getMimeType', 'text/html');
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getHeaders', $headers);
$page = new SimplePage($response);
$this->assertEqual($page->getMimeType(), 'text/html');
}
function testResponseAccessor() {
$headers = new MockSimpleHttpHeaders();
$headers->setReturnValue('getResponseCode', 301);
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getHeaders', $headers);
$page = new SimplePage($response);
$this->assertIdentical($page->getResponseCode(), 301);
}
function testAuthenticationAccessors() {
$headers = new MockSimpleHttpHeaders();
$headers->setReturnValue('getAuthentication', 'Basic');
$headers->setReturnValue('getRealm', 'Secret stuff');
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getHeaders', $headers);
$page = new SimplePage($response);
$this->assertEqual($page->getAuthentication(), 'Basic');
$this->assertEqual($page->getRealm(), 'Secret stuff');
}
}
class TestOfHtmlPage extends UnitTestCase {
function testRawAccessor() {
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getContent', 'Raw HTML');
$page = new SimplePage($response);
$this->assertEqual($page->getRaw(), 'Raw HTML');
}
function testTextAccessor() {
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getContent', 'Some "messy" HTML');
$page = new SimplePage($response);
$this->assertEqual($page->getText(), 'Some "messy" HTML');
}
function testNoLinks() {
$page = new SimplePage(new MockSimpleHttpResponse());
$this->assertIdentical($page->getUrls(), array());
$this->assertIdentical($page->getUrlsByLabel('Label'), array());
}
function testAddAbsoluteLink() {
$link = new SimpleAnchorTag(array('href' => 'http://somewhere.com'));
$link->addContent('Label');
$page = new SimplePage(new MockSimpleHttpResponse());
$page->AcceptTag($link);
$this->assertEqual(
$page->getUrlsByLabel('Label'),
array(new SimpleUrl('http://somewhere.com')));
}
function testAddStrictRelativeLink() {
$link = new SimpleAnchorTag(array('href' => './somewhere.php'));
$link->addContent('Label');
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getUrl', new SimpleUrl('http://host/'));
$page = new SimplePage($response);
$page->AcceptTag($link);
$this->assertEqual(
$page->getUrlsByLabel('Label'),
array(new SimpleUrl('http://host/somewhere.php')));
}
function testAddBareRelativeLink() {
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getUrl', new SimpleUrl('http://host/'));
$page = new SimplePage($response);
$page->AcceptTag(new SimpleAnchorTag(array('href' => 'somewhere.php')));
$this->assertIdentical($page->getUrls(), array('http://host/somewhere.php'));
}
function testAddRelativeLinkWithBaseTag() {
$link = new SimpleAnchorTag(array('href' => 'somewhere.php'));
$link->addContent('Label');
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getUrl', new SimpleUrl('http://host/'));
$page = new SimplePage($response);
$page->AcceptTag($link);
$base = new SimpleBaseTag(array('href' => 'www.lastcraft.com/stuff/'));
$page->AcceptTag($base);
$this->assertEqual(
$page->getUrlsByLabel('Label'),
array(new SimpleUrl('www.lastcraft.com/stuff/somewhere.php')));
}
function testAddAbsoluteLinkWithBaseTag() {
$link = new SimpleAnchorTag(array('href' => 'http://here.com/somewhere.php'));
$link->addContent('Label');
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getUrl', new SimpleUrl('http://host/'));
$page = new SimplePage($response);
$page->AcceptTag($link);
$base = new SimpleBaseTag(array('href' => 'www.lastcraft.com/stuff/'));
$page->AcceptTag($base);
$this->assertEqual(
$page->getUrlsByLabel('Label'),
array(new SimpleUrl('http://here.com/somewhere.php')));
}
function testLinkIds() {
$link = new SimpleAnchorTag(array('href' => './somewhere.php', 'id' => 33));
$link->addContent('Label');
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getUrl', new SimpleUrl('http://host/'));
$page = new SimplePage($response);
$page->AcceptTag($link);
$this->assertEqual(
$page->getUrlsByLabel('Label'),
array(new SimpleUrl('http://host/somewhere.php')));
$this->assertFalse($page->getUrlById(0));
$this->assertEqual(
$page->getUrlById(33),
new SimpleUrl('http://host/somewhere.php'));
}
function testFindLinkWithNormalisation() {
$link = new SimpleAnchorTag(array('href' => './somewhere.php', 'id' => 33));
$link->addContent(' Long & thin ');
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getUrl', new SimpleUrl('http://host/'));
$page = new SimplePage($response);
$page->AcceptTag($link);
$this->assertEqual(
$page->getUrlsByLabel('Long & thin'),
array(new SimpleUrl('http://host/somewhere.php')));
}
function testFindLinkWithImage() {
$link = new SimpleAnchorTag(array('href' => './somewhere.php', 'id' => 33));
$link->addContent('');
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getUrl', new SimpleUrl('http://host/'));
$page = new SimplePage($response);
$page->AcceptTag($link);
$this->assertEqual(
$page->getUrlsByLabel(''),
array(new SimpleUrl('http://host/somewhere.php')));
}
function testTitleSetting() {
$title = new SimpleTitleTag(array());
$title->addContent('Title');
$page = new SimplePage(new MockSimpleHttpResponse());
$page->AcceptTag($title);
$this->assertEqual($page->getTitle(), 'Title');
}
function testFramesetAbsence() {
$url = new SimpleUrl('here');
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getUrl', $url);
$page = new SimplePage($response);
$this->assertFalse($page->hasFrames());
$this->assertIdentical($page->getFrameset(), false);
}
function testHasEmptyFrameset() {
$page = new SimplePage(new MockSimpleHttpResponse());
$page->acceptFramesetStart(new SimpleTag('frameset', array()));
$page->acceptFramesetEnd();
$this->assertTrue($page->hasFrames());
$this->assertIdentical($page->getFrameset(), array());
}
function testFramesInPage() {
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getUrl', new SimpleUrl('http://here'));
$page = new SimplePage($response);
$page->acceptFrame(new SimpleFrameTag(array('src' => '1.html')));
$page->acceptFramesetStart(new SimpleTag('frameset', array()));
$page->acceptFrame(new SimpleFrameTag(array('src' => '2.html')));
$page->acceptFrame(new SimpleFrameTag(array('src' => '3.html')));
$page->acceptFramesetEnd();
$page->acceptFrame(new SimpleFrameTag(array('src' => '4.html')));
$this->assertTrue($page->hasFrames());
$this->assertIdentical($page->getFrameset(), array(
1 => new SimpleUrl('http://here/2.html'),
2 => new SimpleUrl('http://here/3.html')));
}
function testNamedFramesInPage() {
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getUrl', new SimpleUrl('http://here'));
$page = new SimplePage($response);
$page->acceptFramesetStart(new SimpleTag('frameset', array()));
$page->acceptFrame(new SimpleFrameTag(array('src' => '1.html')));
$page->acceptFrame(new SimpleFrameTag(array('src' => '2.html', 'name' => 'A')));
$page->acceptFrame(new SimpleFrameTag(array('src' => '3.html', 'name' => 'B')));
$page->acceptFrame(new SimpleFrameTag(array('src' => '4.html')));
$page->acceptFramesetEnd();
$this->assertTrue($page->hasFrames());
$this->assertIdentical($page->getFrameset(), array(
1 => new SimpleUrl('http://here/1.html'),
'A' => new SimpleUrl('http://here/2.html'),
'B' => new SimpleUrl('http://here/3.html'),
4 => new SimpleUrl('http://here/4.html')));
}
function testRelativeFramesRespectBaseTag() {
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getUrl', new SimpleUrl('http://here.com/'));
$page = new SimplePage($response);
$base = new SimpleBaseTag(array('href' => 'https://there.com/stuff/'));
$page->AcceptTag($base);
$page->acceptFramesetStart(new SimpleTag('frameset', array()));
$page->acceptFrame(new SimpleFrameTag(array('src' => '1.html')));
$page->acceptFramesetEnd();
$this->assertIdentical(
$page->getFrameset(),
array(1 => new SimpleUrl('https://there.com/stuff/1.html')));
}
}
class TestOfFormsCreatedFromEventStream extends UnitTestCase {
function testFormCanBeSubmitted() {
$page = new SimplePage(new MockSimpleHttpResponse());
$page->acceptFormStart(
new SimpleFormTag(array('method' => 'GET', 'action' => 'here.php')));
$page->AcceptTag(
new SimpleSubmitTag(array('type' => 'submit', 'name' => 's')));
$page->acceptFormEnd();
$form = &$page->getFormBySubmit(new SimpleByLabel('Submit'));
$this->assertEqual(
$form->submitButton(new SimpleByLabel('Submit')),
new SimpleGetEncoding(array('s' => 'Submit')));
}
function testInputFieldCanBeReadBack() {
$page = new SimplePage(new MockSimpleHttpResponse());
$page->acceptFormStart(
new SimpleFormTag(array("method" => "GET", "action" => "here.php")));
$page->AcceptTag(
new SimpleTextTag(array("type" => "text", "name" => "a", "value" => "A")));
$page->AcceptTag(
new SimpleSubmitTag(array("type" => "submit", "name" => "s")));
$page->acceptFormEnd();
$this->assertEqual($page->getField(new SimpleByName('a')), 'A');
}
function testInputFieldCanBeReadBackByLabel() {
$label = new SimpleLabelTag(array());
$page = new SimplePage(new MockSimpleHttpResponse());
$page->acceptFormStart(
new SimpleFormTag(array("method" => "GET", "action" => "here.php")));
$page->acceptLabelStart($label);
$label->addContent('l');
$page->AcceptTag(
new SimpleTextTag(array("type" => "text", "name" => "a", "value" => "A")));
$page->acceptLabelEnd();
$page->AcceptTag(
new SimpleSubmitTag(array("type" => "submit", "name" => "s")));
$page->acceptFormEnd();
$this->assertEqual($page->getField(new SimpleByLabel('l')), 'A');
}
}
class TestOfPageScraping extends UnitTestCase {
function parse($response) {
$builder = new SimplePageBuilder();
$page = $builder->parse($response);
return $page;
}
function testEmptyPage() {
$page = new SimplePage(new MockSimpleHttpResponse());
$this->assertIdentical($page->getUrls(), array());
$this->assertIdentical($page->getTitle(), false);
}
function testUninterestingPage() {
$response = new MockSimpleHttpResponse();
$response->setReturnValue('getContent', ' Stuff