Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/crawling #10

Merged
merged 3 commits into from
Oct 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions api/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import region from './regions'
import cliques from './cliques'
import users from './users'


const app = express()
app.use(express.json({ limit: '50mb' }))
app.use(express.urlencoded({ limit: '50mb', extended: false }))
Expand All @@ -15,4 +16,5 @@ app.use('/users', users)
app.use('/regions', region)
app.use('/cliques', cliques)


export default app
124 changes: 124 additions & 0 deletions crawling/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
const statistics = require("./statistics.js")
const place = require("./place.js")
const log = console.log
const request = require('request')
const APIKEY = '05155040bd41ead57e6020cd820e2b40'

let db

function main() {
crawl()
setInterval(crawl, 1 * 60 * 60 * 1000);
}

async function crawl() {
log(`Log(${new Date().toLocaleString('ko-KR', { timeZone: 'UTC' })}): Crawling... `)
const statisticsResult = await statistics.getStatistics()
const placeResult = await place.getPlaces()

const testPlaceResult = {
theadarr: [ '지역', '유형', '상호명', '노출', '소독' ],
tbodyarr: [['광주', '북구', '시장', '말바우시장 (광주 북구 동문대로85번길 62)', '8/22~9/4', '완료']]
}

// TODO: create new statistics log
// TODO: create new place log
// log(statisticsResult)
// log(placeResult)

// TODO: update to new statistics
updateDBStatistic(statisticsResult)


// TODO: update to new place
// updateDBInfectedPlace(placeResult)
updateDBInfectedPlace(testPlaceResult)
}

async function updateDBStatistic(result) {
const month = result.time.split('.')[0]
const day = result.time.split('.')[1]
const time = result.time.split(' ')[1].split('시')[0]
const statistic = await db.Statistic.create({
data: JSON.stringify(result.dataTableTbodyArr),
time: `2020-${month}-${day} ${time}:00:00`
})

result.dataTableTbodyArr.forEach(async function(region) {
// console.log(region)
const rg = await db.Region.findOne({
where: {
regionName: region[0]
}
})
rg.stdDay = result.time
rg.updateDT = result.time
rg.deathCnt = parseInt(region[7].replace(/,/g, ''))
rg.isolClearCnt = parseInt(region[6].replace(/,/g, ''))
rg.qurRate = region[8] == '-' ? 0 : parseFloat(region[8])
rg.incDecAllCnt = parseInt(region[1].replace(/,/g, ''))
rg.incDecInCnt = parseInt(region[2].replace(/,/g, ''))
rg.incDecOutCnt = parseInt(region[3].replace(/,/g, ''))
rg.patientCnt = parseInt(region[4].replace(/,/g, ''))
rg.isolProcCnt = parseInt(region[5].replace(/,/g, ''))
await rg.save()
})

}
async function updateDBInfectedPlace(result) {
const addressRe = /(\([가-힣a-zA-Z0-9 ]+\))/g
const date = Date.now()
result.tbodyarr.forEach(place => {
const address = place[3]
searchByAdressKAKAO(address.match(addressRe)[0])
.then(async function(result) {
// log(result)
const infectedPlace = await db.InfectedPlace.create({
infectedPlaceName: place[3].split('(')[0].trim(),
infectedPlaceNameEn: `-`,
adress: place[3],
note: `소독: ${place[5]}`,
longitude: result.documents[0].x,
latitude: result.documents[0].y,
infectedDate: `2020-${place[4].split('~')[0].split('/')[0].padStart(2, '0')}-${place[4].split('~')[0].split('/')[1].padStart(2, '0')} 00:00:00`,
infectedTime: `00:00:00`,
createAt: date,
updateAt: date,
firstVisitTime: `2020-${place[4].split('~')[0].split('/')[0].padStart(2, '0')}-${place[4].split('~')[0].split('/')[1].padStart(2, '0')} 00:00:00`,
lastVisitTime: `2020-${place[4].split('~')[0].split('/')[0].padStart(2, '0')}-${place[4].split('~')[0].split('/')[1].padStart(2, '0')} 23:59:59`,
})
})
});
}

function searchByAdressKAKAO(queryString){
return new Promise((resolve, reject) => {
const options = {
uri: "https://dapi.kakao.com/v2/local/search/address.json",
qs: {
query: queryString
},
headers: {
'Authorization': `KakaoAK ${APIKEY}`
}
}
request(options, function (error, response, body) {
// log(response)
if(error) {
reject(error)
}else{
// log(JSON.parse(body))
resolve(JSON.parse(body))
}


});
})
}

function start(database) {
db = database
main()
}

exports.start = start;
67 changes: 67 additions & 0 deletions crawling/place.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@


const axios = require("axios");
const cheerio = require("cheerio");
const log = console.log;

const getHtml = async () => {
try {
return await axios.get("http://ncov.mohw.go.kr/bdBoardList_Real.do?brdId=1&brdGubun=12&ncvContSeq=&contSeq=&board_id=&gubun=");
} catch (error) {
console.error(error);
}
};

const getPlaces = function(){
return new Promise(
(resolve, reject) => {
try {
getHtml()
.then(html => {
let ulList = [];
const $ = cheerio.load(html.data);
const $thead = $("table.midsize.big").children("thead").children("tr").children("th")
const $tbody = $("table.midsize.big").children("tbody").children("tr")

// log($thead)
// log($tbody)
theadarr = []
for(i = 0; i < $thead.length; i++){
if($thead[i].name == "th"){
theadarr.push($thead[i].children[0].data)
}
}

// log(theadarr)


tbodyarr = []
for(i = 0; i < $tbody.length; i++){
if($tbody[i].attribs.class == "sumline_cmn_top"){
continue;
}
$tr = $tbody[i]
// log($tr)
tmparr = []
for(j = 0; j < $tr.children.length; j++){
if($tr.children[j].name == "td"){
// log($tr.children[j].children[0].data)
tmparr.push($tr.children[j].children[0].data)
}

}
tbodyarr.push(tmparr)
}
// log(tbodyarr)


resolve( {theadarr, tbodyarr});
})
} catch (error) {
console.error(error);
reject(error)
}
}
)
}
exports.getPlaces = getPlaces;
80 changes: 80 additions & 0 deletions crawling/statistics.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@


const axios = require("axios");
const cheerio = require("cheerio");
const log = console.log;

const getHtml = async () => {
try {
return await axios.get("http://ncov.mohw.go.kr/bdBoardList_Real.do?brdId=1&brdGubun=13&ncvContSeq=&contSeq=&board_id=&gubun=");
} catch (error) {
console.error(error);
}
};

const getStatistics = function() {
return new Promise(
(resolve, reject) => {
try {
getHtml()
.then(html => {
const $ = cheerio.load(html.data);
const $time = $("div.timetable").children('p.info').children()
const dataTable = $("div.data_table")
const $dataTableThead = dataTable.children('table.num.midsize').children('thead')
const $dataTableTbody = dataTable.children('table.num.midsize').children('tbody')
const dataTableTheadArr = []
const dataTableTbodyArr = []
const time = $time[0].children[0].data.trim()
// log($time[0].children[0].data.trim())
// log($dataTableThead.children()[0].children[0])
for(let trIdx = 0; trIdx < $dataTableThead.children().length; trIdx++){
const $tr = $dataTableThead.children()[trIdx]
for(let thIdx = 0; thIdx < $tr.children.length; thIdx++){
const $th = $tr.children[thIdx]
if($th.children){
let text = ''
for(let textIdx = 0; textIdx < $th.children.length; textIdx++){
if($th.children[textIdx].type === 'text') {
// console.log($th.children[textIdx])
text += $th.children[textIdx].data
}else if($th.children[textIdx].name === 'br') {
text += ' '
}
}
dataTableTheadArr.push(text)
}
}
}
for(let trIdx = 0; trIdx < $dataTableTbody.children().length; trIdx++){
const $tr = $dataTableTbody.children()[trIdx]
const tmpArr = []
for(let thIdx = 0; thIdx < $tr.children.length; thIdx++){
const $th = $tr.children[thIdx]
if($th.children){
let text = ''
for(let textIdx = 0; textIdx < $th.children.length; textIdx++){
if($th.children[textIdx].type === 'text') {
// console.log($th.children[textIdx])
text += $th.children[textIdx].data
}else if($th.children[textIdx].name === 'br') {
text += ' '
}
}
tmpArr.push(text)
}
}
dataTableTbodyArr.push(tmpArr)
}
// log(dataTableTheadArr)
// log(dataTableTbodyArr)
resolve( {time, dataTableTheadArr, dataTableTbodyArr})
})
} catch (error) {
console.error(error)
reject(error)
}
}
)
}
exports.getStatistics = getStatistics;
3 changes: 3 additions & 0 deletions database/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import Sequelize from 'sequelize'
import config from './data/config.json'
import initData from './data/initData.json'
import { initCliqueType, initRegion } from './init'
import crawling from '../crawling'

const sequelize = new Sequelize(
config.database,
Expand Down Expand Up @@ -49,6 +50,8 @@ db.Sequelize = Sequelize
global.dbInitialized = true
await initCliqueType(db.CliqueType)
await initRegion(db.Region)

crawling.start(db)
}
})()

Expand Down
4 changes: 2 additions & 2 deletions database/models/InfectedPlace.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ module.exports = function (sequlize, DataTypes) {
defaultValue: 30,
},
firstVisitTime: {
type: DataTypes.Date,
type: DataTypes.DATE,
allowNull: false,
},
lastVisitTime: {
type: DataTypes.Date,
type: DataTypes.DATE,
allowNull: false,
},
visitCount: {
Expand Down
24 changes: 22 additions & 2 deletions database/models/Region.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,27 @@ module.exports = function (sequlize, DataTypes) {
type: DataTypes.STRING(100),
allowNull: false,
},
deathCnt: {
incDecAllCnt: {
type: DataTypes.INTEGER(11),
allowNull: false,
defaultValue: 0,
},
incDecInCnt: {
type: DataTypes.INTEGER(11),
allowNull: false,
defaultValue: 0,
},
incDecOutCnt: {
type: DataTypes.INTEGER(11),
allowNull: false,
defaultValue: 0,
},
incDecCnt: {
patientCnt: {
type: DataTypes.INTEGER(11),
allowNull: false,
defaultValue: 0,
},
isolProcCnt: {
type: DataTypes.INTEGER(11),
allowNull: false,
defaultValue: 0,
Expand All @@ -39,6 +54,11 @@ module.exports = function (sequlize, DataTypes) {
allowNull: false,
defaultValue: 0,
},
deathCnt: {
type: DataTypes.INTEGER(11),
allowNull: false,
defaultValue: 0,
},
qurRate: {
type: DataTypes.DOUBLE,
allowNull: false,
Expand Down
28 changes: 28 additions & 0 deletions database/models/Statistic.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
module.exports = function (sequelize, DataTypes) {
// console.log(DataTypes)
const Statistic = sequelize.define('Statistic', {
Id: {
type: DataTypes.INTEGER(11),
autoIncrement: true,
primaryKey: true,
allowNull: false,
},
data: {
type: DataTypes.STRING(4000),
allowNull: false,
},
time: {
type: 'TIMESTAMP',
defaultValue: sequelize.literal('CURRENT_TIMESTAMP'),
allowNull: false,
},
},
{
tableName: 'Statistic',
freezeTableName: false,
timestamps: true,
underscored: false,
})

return Statistic
}
Loading