Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add userdict and add time range addition and deletion #5

Merged
merged 6 commits into from
Sep 3, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 28 additions & 10 deletions app/controllers/NewsSegController.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,20 @@ public function index($date = null, $all_terms = null) {
$all_terms = Input::get('all');
}

return $this->_yieldView($date, $all_terms, null);
return $this->_yieldView($date, $all_terms, null, null);
}

public function keywordTerms($keyword = null, $date = null) {
return $this->_yieldView($date, null, $keyword);
public function keywordTerms($keyword = null, $display = null, $date = null) {
return $this->_yieldView($date, null, $keyword, $display);
}

private function _yieldView($date, $all_terms, $keyword) {
private function _yieldView($date, $all_terms, $keyword, $display) {
$black_set = array();

if (!$display) {
$display = 'day';
}

if (!$date) {
$date = Input::get('date');
$date = trim($date);
Expand All @@ -53,10 +57,23 @@ private function _yieldView($date, $all_terms, $keyword) {

$redis = \RedisL4::connection();
$dataNum = 1000;
if (isset($keyword)) {
$res = $redis->zRevRange("CKIP:TERMS:$keyword:$date", 0, $dataNum, 'WITHSCORES');
} else {
$res = $redis->zRevRange("CKIP:TERMS:$date", 0, $dataNum, 'WITHSCORES');
if ($display === 'day') {
if (isset($keyword)) {
$res = $redis->zRevRange("CKIP:TERMS:$keyword:$date", 0, $dataNum, 'WITHSCORES');
} else {
$res = $redis->zRevRange("CKIP:TERMS:$date", 0, $dataNum, 'WITHSCORES');
}
} else if ($display === 'week') {
if (isset($keyword)) {
$redis->zUnionStore('CKIP:TERMS:TEMP', 1, "CKIP:TERMS:$keyword:$date");
for ($i=1; $i<=6; $i++) {
$nowDate = date('Y-m-d', strtotime("$date - $i days"));
$redis->zUnionStore('CKIP:TERMS:TEMP', 2, 'CKIP:TERMS:TEMP', "CKIP:TERMS:$keyword:$nowDate");
}
$res = $redis->zRevRange("CKIP:TERMS:TEMP", 0, $dataNum, 'WITHSCORES');
} else {
$res = $redis->zRevRange("CKIP:TERMS:$date", 0, $dataNum, 'WITHSCORES');
}
}
if (count($res) > 0) {
$prevDate = date('Y-m-d', strtotime("$date - 1 days"));
Expand All @@ -66,8 +83,9 @@ private function _yieldView($date, $all_terms, $keyword) {
$prevRes = $redis->zRevRange("CKIP:TERMS:$prevDate", 0, $dataNum, 'WITHSCORES');
}

foreach ($redis->sMembers('CKIP:TERMS:BLACK_SET') as $element)
foreach ($redis->sMembers('CKIP:TERMS:BLACK_SET') as $element) {
$black_set[$element] = '';
}

if ($all_terms) {
$prevRes = $this->_changeResStruct($prevRes, $black_set);
Expand Down Expand Up @@ -116,7 +134,7 @@ private function _yieldView($date, $all_terms, $keyword) {
// no data
}

return View::make('pure-bootstrap3.array-to-table', array('data' => $res, 'date' => $date, 'keyword' => $keyword));
return View::make('pure-bootstrap3.array-to-table', array('data' => $res, 'date' => $date, 'keyword' => $keyword, 'display' => $display));
}

}
94 changes: 49 additions & 45 deletions app/lib/Us/Job/Keyword.py → app/lib/Us/Job/DataHandler
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python
#encoding=utf-8

class Keyword:
class DataHandler:
def getOptions(self):
import time
import getopt
Expand All @@ -10,32 +11,41 @@ def getOptions(self):
limit = 10000
startTime = None
endTime = None
word = None
keyword = None
term = None
userDict = None
action = 'get'

opts, args = getopt.getopt(sys.argv[1:], '', ['action=', 'limit=', 'keyword=', 'term=', 'start-time=', 'end-time=', 'source=', 'help'])
opts, args = getopt.getopt(sys.argv[1:], '', ['action=', 'limit=', 'keyword=', 'term=', 'start-time=', 'end-time=', 'source=', 'userdict=', 'help'])
for theOpt in opts:
if theOpt[0] == '--action':
action = theOpt[1]
elif theOpt[0] == '--limit':
limit = int(theOpt[1])
elif theOpt[0] == '--keyword':
word = theOpt[1]
word = word.decode('utf-8')
keyword = theOpt[1]
keyword = keyword.decode('utf-8')
elif theOpt[0] == '--term':
term = theOpt[1]
elif theOpt[0] == '--start-time':
startTime = time.mktime(datetime.datetime.strptime(theOpt[1], '%Y-%m-%d %H:%M:%S').timetuple())
theTime = theOpt[1]
if not ':' in theTime:
theTime = '%s 00:00:00' % (theTime)
startTime = time.mktime(datetime.datetime.strptime(theTime, '%Y-%m-%d %H:%M:%S').timetuple())
elif theOpt[0] == '--end-time':
endTime = time.mktime(datetime.datetime.strptime(theOpt[1], '%Y-%m-%d %H:%M:%S').timetuple())
theTime = theOpt[1]
if not ':' in theTime:
theTime = '%s 23:59:59' % (theTime)
endTime = time.mktime(datetime.datetime.strptime(theTime, '%Y-%m-%d %H:%M:%S').timetuple())
elif theOpt[0] == '--source':
source = theOpt[1]
elif theOpt[0] == '--userdict':
userDict = theOpt[1]
elif theOpt[0] == '--help':
self.showHelp()
sys.exit(0)

return action, word, term, startTime, endTime, limit
return action, keyword, term, startTime, endTime, limit, userDict

def showHelp(self):
import sys
Expand All @@ -48,7 +58,7 @@ def showHelp(self):
sys.stderr.write("--end-time='2014-07-31 23:59:59'\n")
sys.stderr.write("--source=1\n")

def getNews(self, word, startTime, endTime, limit):
def getNews(self, keyword, startTime, endTime, limit):
import sys
import datetime

Expand Down Expand Up @@ -105,7 +115,7 @@ def getNews(self, word, startTime, endTime, limit):
body = record[4]

handleDataCount += 1;
if (word == None) or (word != None and (word in title or word in body)):
if (keyword == None) or (keyword != None and (keyword in title or keyword in body)):
newsDateTime = datetime.datetime.fromtimestamp(time).strftime('%Y-%m-%d')
title = title.replace('\r', '')
body = body.replace('\r', '')
Expand All @@ -117,63 +127,57 @@ def getNews(self, word, startTime, endTime, limit):
break
lastId = newsId
# end for record in tmpData
# end while
# end while
self.cursor.close()
self.con.close()
return dataArr

def mysqlCon(self):
import MySQLdb

self.con = MySQLdb.connect(host='news-ckip.source.today', user='sma', passwd='sma', db='newsdiff', charset='utf8')
self.con = MySQLdb.connect(host='news-ckip.source.today', user='sma', passwd='amsamsams', db='newsdiff', charset='utf8')
self.cursor = self.con.cursor()

def cutWord(self, articleArr):
def cutWord(self, articleArr, userDict):
import jieba
import itertools
import redis

if userDict != None:
jieba.load_userdict(userDict)

cutResultArr = []
cutResult = {}
r = redis.Redis(host='localhost', port=6379, charset='utf-8')
for article in articleArr:
seg_title_list = jieba.cut(article['SubjectHtml'], cut_all=False)
seg_body_list = jieba.cut(article['TextHtml'], cut_all=False)
for term in itertools.chain(seg_title_list, seg_body_list):
if term in cutResult:
cutResult[term] += 1
else:
cutResult.update({term: 1})
cutResultArr.append({'Published': article['Published'], 'WordCutResult': cutResult})
return cutResultArr

def saveKeywordArticle(self, cutResultArr, word):
import redis
if len(term) >= 2:
if keyword == None:
r.zincrby('CKIP:TERMS:%s' % (article['Published']), term, 1)
else:
r.zincrby('CKIP:TERMS:%s:%s' % (keyword, article['Published']), term, 1)

r = redis.Redis(host='localhost', port=6379, charset='utf-8')
for cutResult in cutResultArr:
for theWord in cutResult['WordCutResult']:
if len(theWord) >= 2:
r.zadd('CKIP:TERMS:%s:%s' % (word, cutResult['Published']), theWord, cutResult['WordCutResult'][theWord])

def delWord(self, word, term, startTime, endTime):
def delKeys(self, keyword, startTime, endTime):
import redis
import datetime
import time
import datetime

r = redis.Redis(host='localhost', port=6379, charset='utf-8')
theTime = startTime
while theTime <= endTime:
strTime = datetime.datetime.fromtimestamp(int(theTime)).strftime('%Y-%m-%d')
r.zrem('CKIP:TERMS:%s:%s' % (word, strTime), term)
structTime = datetime.datetime.strptime(strTime, '%Y-%m-%d')
structTime = structTime + datetime.timedelta(days=1)
theTime = time.mktime(structTime.timetuple())
nowTime = datetime.datetime.fromtimestamp(int(startTime))
endTime = datetime.datetime.fromtimestamp(int(endTime))
while (endTime - nowTime) >= datetime.timedelta(days=0):
strTime = nowTime.strftime('%Y-%m-%d')
if keyword == None:
r.delete('CKIP:TERMS:%s' % (strTime))
else:
r.delete('CKIP:TERMS:%s:%s' % (keyword, strTime))
nowTime = nowTime + datetime.timedelta(days=1)

if __name__ == '__main__':
kw = Keyword()
action, word, term, startTime, endTime, limit = kw.getOptions()
dh = DataHandler()
action, keyword, term, startTime, endTime, limit, userDict = dh.getOptions()
if action == 'get':
dataArr = kw.getNews(word, startTime, endTime, limit)
cutResultArr = kw.cutWord(dataArr)
kw.saveKeywordArticle(cutResultArr, word)
dataArr = dh.getNews(keyword, startTime, endTime, limit)
dh.cutWord(dataArr, userDict)
elif action == 'delete':
kw.delWord(word, term, startTime, endTime)
dh.delKeys(keyword, startTime, endTime)
3 changes: 2 additions & 1 deletion app/routes.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
Route::get('/', array( 'as' => 'home', 'uses' =>'NewsSegController@index'));
Route::get('/news-terms/{date}', array( 'as' => 'home', 'uses' =>'NewsSegController@index'));
Route::get('/keyword-terms/{keyword}', array( 'as' => 'home', 'uses' =>'NewsSegController@keywordTerms'));
Route::get('/keyword-terms/{keyword}/{date}', array( 'as' => 'home', 'uses' =>'NewsSegController@keywordTerms'));
Route::get('/keyword-terms/{keyword}/{display}', array( 'as' => 'home', 'uses' =>'NewsSegController@keywordTerms'));
Route::get('/keyword-terms/{keyword}/{display}/{date}', array( 'as' => 'home', 'uses' =>'NewsSegController@keywordTerms'));
Route::get('/{date}', array( 'as' => 'home', 'uses' =>'NewsSegController@index'));
Route::get('/{date}/{all}', array( 'as' => 'home', 'uses' =>'NewsSegController@index'));
24 changes: 20 additions & 4 deletions app/views/pure-bootstrap3/array-to-table.blade.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,31 @@
</style>
@section('main')
<?php
$prev = date('Y-m-d', strtotime("{$date} - 1 days"));
$next = date('Y-m-d', strtotime("{$date} + 1 days"));
if ($display === 'day') {
$prev = date('Y-m-d', strtotime("{$date} - 1 days"));
$next = date('Y-m-d', strtotime("{$date} + 1 days"));
} else if ($display === 'week') {
$prev = date('Y-m-d', strtotime("{$date} - 7 days"));
$next = date('Y-m-d', strtotime("{$date} + 7 days"));
$startDate = date('Y-m-d', strtotime("{$date} - 6 days"));
$date = "$startDate ~ $date";
}
if (isset($keyword)) {
$prev = "keyword-terms/$keyword/$prev";
$next = "keyword-terms/$keyword/$next";
$prev = "keyword-terms/$keyword/$display/$prev";
$next = "keyword-terms/$keyword/$display/$next";
}
?>
<div class="row">
<div class="col-md-12">
@if (isset($keyword))
@if ($display == "day")
<span>日顯示</span>
<a href="/keyword-terms/{{{ $keyword }}}/week">週顯示</a>
@elseif ($display == "week")
<a href="/keyword-terms/{{{ $keyword }}}/day">日顯示</a>
<span>週顯示</span>
@endif
@endif
<h3>
<a href="/{{{$prev}}}">prev <span class="glyphicon glyphicon-chevron-left"></span></a>
{{{ $date }}}
Expand Down
1 change: 1 addition & 0 deletions app/views/pure-bootstrap3/layouts/default.blade.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
<li><a href="/credit-and-thanks">About</a></li>
<li><a href="/keyword-terms/連勝文">連勝文</a></li>
<li><a href="/keyword-terms/柯文哲">柯文哲</a></li>
<li><a href="/keyword-terms/馮光遠">馮光遠</a></li>
</ul>
</div>
</div>
Expand Down