Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unit test 及新聞來源修正 #19

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "webdata/stdlibs/url-normalizer.js"]
path = webdata/stdlibs/url-normalizer.js
url = https://github.com/g0v/url-normalizer.js.git
8 changes: 8 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
language: php
php:
- 5.5
- 5.4
- 5.3
- hhvm

script: php tests/run-all.php
20 changes: 20 additions & 0 deletions tests/models/Crawler/Appledaily/Crawl.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?php

class TestCase_Crawler_Appledaily_findLinksIn extends UnitTestCase
{

function __construct()
{
$this->label = 'Crawler_Appledaily::findLinksIn test (listing.html)';
}

function testCrawlLinksIn()
{
$body = file_get_contents(__DIR__ . '/data/listing.html');
$links = array_values(Crawler_Appledaily::findLinksIn($body));
sort($links);
$expected_links = json_decode(file_get_contents(__DIR__ . '/data/listing.json'), TRUE);
$this->assertIdentical($links, $expected_links, 'Crawling links');
}

}
29 changes: 29 additions & 0 deletions tests/models/Crawler/Appledaily/Parse.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<?php

class TestCase_Crawler_Appledaily_Parse_523056 extends UnitTestCase
{

function __construct()
{
$this->label = 'Crawler_Appledaily::parse (523056.html)';
}

function testParse()
{
$content = file_get_contents(__DIR__ . '/data/523056.html');
$result = Crawler_Appledaily::parse(Crawler::prepareContent($content));
$reference = json_decode(file_get_contents(__DIR__ . '/data/523056.json'));

$this->assertEqual(
$result->title,
$reference->title,
'Parsed Page Title'
);
$this->assertEqual(
$result->body,
$reference->body,
'Parsed Page Body'
);
}

}
599 changes: 599 additions & 0 deletions tests/models/Crawler/Appledaily/data/523056.html

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions tests/models/Crawler/Appledaily/data/523056.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"title": "阿拉比卡墜空頭!要咖啡降價?別想了~",
"body": "《華爾街日報》報導,阿拉比卡(arabica)咖啡豆價格在周四墜入空頭,但別急著期待你每天喝的咖啡降價!因為全球最大阿拉比卡咖啡豆產地巴西近來的降雨,緩解了下季咖啡豆產量減少的疑慮。\n \n美國洲際期貨交易所(ICE Futures)的阿拉比卡咖啡豆3月期貨昨下跌1.2%,收每磅1.764美元,寫下近5個月來交易最活絡合約的新低價,而且自10月22日每磅2.219美元的高點挫跌逾20%,正式墜入空頭熊市。\n \n今年稍早阿拉比卡的價格曾因巴西天候乾旱而不斷飛漲,所以即使如今在短短不到2個月內就跌入空頭,阿拉比卡今年來漲幅仍高居所有商品之冠。\n \n今年至今阿拉比卡期貨價累計大漲59%,咖啡業者依然持續再將高漲的成本逐步轉嫁給消費者。阿拉比卡品質較羅布斯塔(robusta)咖啡豆優異,所以通常是專業咖啡業者採用的選擇,羅布斯塔則主要作為即溶咖啡原料。(劉利貞/綜合外電報導)\n\n \n\n \n\n \n\n \n\n <a href=\"\/animation\/\"><<即時新聞動起來>>\n\n【動即時】:最鄉民的影音頻道<\/a>\n\n<a href=\"\/realtimenews\/forum\/\">【蘋論陣線】:最新評論及獨立媒體每日總覽<\/a>\n\n<a href=\"mailto:[email protected]?subject=%A6%B3%B8%DC%ADn%BB%A1%A7%EB%BDZ%A1u%A7Y%AE%C9%BD%D7%BE%C2%A1v\">有話要說 投稿「即時論壇」\n\n [email protected]<\/a>"
}
14 changes: 14 additions & 0 deletions tests/models/Crawler/Appledaily/data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Appledaily (TW) data
==================

The `.html` files are downloaded 2014-12-12.
They represents the normal data crawled from the site.

The `.json` files are the expected parse result for the
`.html` files of the same name.

- `listing.html`
From the result of crawlIndex()

- `523056.html`
From: http://www.appledaily.com.tw/realtimenews/article/finance/20141212/523056/
13,347 changes: 13,347 additions & 0 deletions tests/models/Crawler/Appledaily/data/listing.html

Large diffs are not rendered by default.

767 changes: 767 additions & 0 deletions tests/models/Crawler/Appledaily/data/listing.json

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions tests/models/Crawler/BCC/Crawl.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?php

class TestCase_Crawler_BCC_findLinksIn extends UnitTestCase
{

function __construct()
{
$this->label = 'Crawler_BCC::findLinksIn test (listing.html)';
}

function testCrawlLinksIn()
{
$body = file_get_contents(__DIR__ . '/data/listing.html');
$links = array_values(Crawler_BCC::findLinksIn($body));
sort($links);
$expected_links = json_decode(file_get_contents(__DIR__ . '/data/listing.json'), TRUE);
$this->assertIdentical($links, $expected_links, 'Crawling links');
}

}
29 changes: 29 additions & 0 deletions tests/models/Crawler/BCC/Parse.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<?php

class TestCase_Crawler_BCC_Parse_2470299 extends UnitTestCase
{

function __construct()
{
$this->label = 'Crawler_BCC::parse (2470299.html)';
}

function testParse()
{
$content = file_get_contents(__DIR__ . '/data/2470299.html');
$result = Crawler_BCC::parse(Crawler::prepareContent($content));
$reference = json_decode(file_get_contents(__DIR__ . '/data/2470299.json'));

$this->assertEqual(
$result->title,
$reference->title,
'Parsed Page Title'
);
$this->assertEqual(
$result->body,
$reference->body,
'Parsed Page Body'
);
}

}
Loading