From 49de632578b41f169ad0d65faff1471b74bdbf21 Mon Sep 17 00:00:00 2001 From: yxhwxn Date: Mon, 12 Aug 2024 04:47:41 +0900 Subject: [PATCH 1/5] =?UTF-8?q?Refactor:=20=ED=81=AC=EB=A1=A4=EB=9F=AC=20?= =?UTF-8?q?=EC=86=8C=EC=9A=94=EC=8B=9C=EA=B0=84=EC=9D=84=20=EA=B3=A0?= =?UTF-8?q?=EB=A0=A4=ED=95=9C=20Nginx=20=ED=83=80=EC=9E=84=EC=95=84?= =?UTF-8?q?=EC=9B=83=20=EC=84=A4=EC=A0=95=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .platform/nginx.conf | 102 ++++++++++++++++++++++--------------------- 1 file changed, 53 insertions(+), 49 deletions(-) diff --git a/.platform/nginx.conf b/.platform/nginx.conf index 471a08e..118f133 100644 --- a/.platform/nginx.conf +++ b/.platform/nginx.conf @@ -11,53 +11,57 @@ events { } http { - include /etc/nginx/mime.types; - default_type application/octet-stream; - - - log_format main '$remote_addr - $remote_user [$time_local] "$request" ' - '$status $body_bytes_sent "$http_referer" ' - '"$http_user_agent" "$http_x_forwarded_for"'; - - include conf.d/*.conf; - - map $http_upgrade $connection_upgrade { - default "upgrade"; - } - - upstream springboot { - server 127.0.0.1:8080; - keepalive 1024; - } - - server { - listen 80 default_server; - listen [::]:80 default_server; - - location / { - proxy_pass http://springboot; - # CORS 관련 헤더 추가 - add_header 'Access-Control-Allow-Origin' '*'; - add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, OPTIONS'; - add_header 'Access-Control-Allow-Headers' 'Authorization, Content-Type'; - proxy_http_version 1.1; - proxy_set_header Connection $connection_upgrade; - proxy_set_header Upgrade $http_upgrade; - - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - } - - access_log /var/log/nginx/access.log main; - - client_header_timeout 60; - client_body_timeout 60; - keepalive_timeout 60; - gzip off; - gzip_comp_level 4; - - # Include the Elastic Beanstalk generated locations - include conf.d/elasticbeanstalk/healthd.conf; - } + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + include conf.d/*.conf; + + map $http_upgrade $connection_upgrade { + default "upgrade"; + } + + upstream springboot { + server 127.0.0.1:8080; + keepalive 1024; + } + + server { + listen 80 default_server; + listen [::]:80 default_server; + + location / { + proxy_pass http://springboot; + # CORS 관련 헤더 추가 + add_header 'Access-Control-Allow-Origin' '*'; + add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, OPTIONS'; + add_header 'Access-Control-Allow-Headers' 'Authorization, Content-Type'; + proxy_http_version 1.1; + proxy_set_header Connection $connection_upgrade; + proxy_set_header Upgrade $http_upgrade; + + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + + # 타임아웃 설정 추가 + proxy_read_timeout 900s; # 백엔드 서버로부터의 응답을 기다리는 시간 + proxy_connect_timeout 900s; # 백엔드 서버에 연결을 시도하는 시간 + proxy_send_timeout 900s; # Nginx가 백엔드 서버로 요청을 전송하는 시간 + } + + access_log /var/log/nginx/access.log main; + + client_header_timeout 60; + client_body_timeout 60; + keepalive_timeout 60; + gzip off; + gzip_comp_level 4; + + # Include the Elastic Beanstalk generated locations + include conf.d/elasticbeanstalk/healthd.conf; + } } From 710d02553113cf095527182dbab44bce24cb1627 Mon Sep 17 00:00:00 2001 From: yxhwxn Date: Mon, 12 Aug 2024 07:11:31 +0900 Subject: [PATCH 2/5] =?UTF-8?q?Refactor:=20=EC=A0=84=EC=B2=B4=20=EB=8C=93?= =?UTF-8?q?=EA=B8=80=EC=9D=B4=20=EB=A1=9C=EB=93=9C=EB=90=9C=20=EA=B2=BD?= =?UTF-8?q?=EC=9A=B0,=20=ED=81=AC=EB=A1=A4=EB=A7=81=EC=9D=84=20=EC=A1=B0?= =?UTF-8?q?=EA=B8=B0=EC=97=90=20=EC=A2=85=EB=A3=8C=ED=95=98=EB=8F=84?= =?UTF-8?q?=EB=A1=9D=20=EB=A1=9C=EC=A7=81=EC=9D=84=20=EC=A1=B0=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../suppin/event/crawl/service/CrawlService.java | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/cmc/suppin/event/crawl/service/CrawlService.java b/src/main/java/com/cmc/suppin/event/crawl/service/CrawlService.java index 62c4f46..70a3ff4 100644 --- a/src/main/java/com/cmc/suppin/event/crawl/service/CrawlService.java +++ b/src/main/java/com/cmc/suppin/event/crawl/service/CrawlService.java @@ -100,18 +100,23 @@ public void crawlYoutubeComments(String url, Long eventId, String userId, boolea try { Thread.sleep(5000); // 초기 로딩 대기 - long endTime = System.currentTimeMillis() + 300000; // 스크롤 시간 조정 (필요에 따라 조정) + long endTime = System.currentTimeMillis() + 600000; // 스크롤 시간을 10분으로 설정 (600,000ms) JavascriptExecutor jsExecutor = (JavascriptExecutor) driver; + int previousCommentCount = 0; + int currentCommentCount; + while (System.currentTimeMillis() < endTime) { jsExecutor.executeScript("window.scrollTo(0, document.documentElement.scrollHeight);"); - Thread.sleep(1000); + Thread.sleep(1000); // 1초 대기 String pageSource = driver.getPageSource(); Document doc = Jsoup.parse(pageSource); Elements comments = doc.select("ytd-comment-thread-renderer"); + currentCommentCount = comments.size(); + for (Element commentElement : comments) { String author = commentElement.select("#author-text span").text(); String text = commentElement.select("#content yt-attributed-string#content-text").text(); @@ -126,6 +131,13 @@ public void crawlYoutubeComments(String url, Long eventId, String userId, boolea commentRepository.save(comment); } } + + // 더 이상 새로운 댓글이 없을 때, 크롤링 종료 + if (currentCommentCount == previousCommentCount) { + break; // 새로운 댓글이 로드되지 않으면 루프를 종료합니다. + } + + previousCommentCount = currentCommentCount; } } catch (InterruptedException e) { e.printStackTrace(); From a99f6bfd33ae44a9ccb629d16fcf2827d9397f8b Mon Sep 17 00:00:00 2001 From: yxhwxn Date: Mon, 12 Aug 2024 07:12:09 +0900 Subject: [PATCH 3/5] =?UTF-8?q?Chore:=20=EB=B0=B0=ED=8F=AC=20=EC=84=9C?= =?UTF-8?q?=EB=B2=84=EC=99=80=20selenium=20=EB=B2=84=EC=A0=84=20=EB=8F=99?= =?UTF-8?q?=EA=B8=B0=ED=99=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- build.gradle | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/build.gradle b/build.gradle index 0d09a05..8cb7934 100644 --- a/build.gradle +++ b/build.gradle @@ -39,10 +39,9 @@ dependencies { implementation 'io.jsonwebtoken:jjwt-jackson:0.12.2' //selenium - implementation 'org.seleniumhq.selenium:selenium-java:4.1.4' - implementation 'io.github.bonigarcia:webdrivermanager:5.0.3' + implementation 'org.seleniumhq.selenium:selenium-java:4.22.0' + implementation 'io.github.bonigarcia:webdrivermanager:5.4.0' implementation 'org.jsoup:jsoup:1.13.1' - testImplementation 'org.seleniumhq.selenium:selenium-java:4.22.0' //Google Firebase implementation 'com.google.firebase:firebase-admin:9.2.0' From d593e3471cadaf2c8d53902572bb87cc7ca86414 Mon Sep 17 00:00:00 2001 From: yxhwxn Date: Mon, 12 Aug 2024 07:12:27 +0900 Subject: [PATCH 4/5] =?UTF-8?q?Chore:=20Timeout=20=EB=B0=A9=EC=A7=80=20?= =?UTF-8?q?=EC=84=A4=EC=A0=95=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/resources/application.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 86f9082..7d322b7 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -1,6 +1,7 @@ spring: server: port: 8080 + connection-timeout: 15m jackson: time-zone: Asia/Seoul datasource: @@ -38,6 +39,9 @@ spring: protocol: smtp default-encoding: UTF-8 test-connection: false + mvc: + async: + request-timeout: 15m jwt: token: From db3407ef76f385419705884a6eb96862d5143a37 Mon Sep 17 00:00:00 2001 From: yxhwxn Date: Tue, 13 Aug 2024 00:23:50 +0900 Subject: [PATCH 5/5] =?UTF-8?q?Refactor:=20=EC=9C=A0=ED=8A=9C=EB=B8=8C=20?= =?UTF-8?q?=EB=8C=93=EA=B8=80=20=ED=81=AC=EB=A1=A4=EB=A7=81=20API=20respon?= =?UTF-8?q?se=20=EA=B0=92=20=EC=88=98=EC=A0=95(=EC=8B=9C=EA=B0=84,=20?= =?UTF-8?q?=EC=B4=9D=20=EB=8C=93=EA=B8=80=20=EC=88=98=20=EC=B6=94=EA=B0=80?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/cmc/suppin/event/crawl/controller/CrawlApi.java | 7 ++++--- .../event/crawl/controller/dto/CrawlResponseDTO.java | 5 ++--- .../suppin/event/crawl/converter/CommentConverter.java | 8 ++++++++ .../com/cmc/suppin/event/crawl/service/CrawlService.java | 4 +++- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/cmc/suppin/event/crawl/controller/CrawlApi.java b/src/main/java/com/cmc/suppin/event/crawl/controller/CrawlApi.java index d2948e7..a8678ac 100644 --- a/src/main/java/com/cmc/suppin/event/crawl/controller/CrawlApi.java +++ b/src/main/java/com/cmc/suppin/event/crawl/controller/CrawlApi.java @@ -1,5 +1,6 @@ package com.cmc.suppin.event.crawl.controller; +import com.cmc.suppin.event.crawl.controller.dto.CrawlResponseDTO; import com.cmc.suppin.event.crawl.service.CrawlService; import com.cmc.suppin.global.response.ApiResponse; import com.cmc.suppin.global.response.ResponseCode; @@ -51,9 +52,9 @@ public ResponseEntity> checkExistingComments(@RequestParam(" "크롤링하려는 URL이 중복되지 않았을 때의 요청이기 때문에, 새로운 댓글을 크롤링합니다.
" + "- DB에 기존 댓글이 존재하는 경우: 크롤링을 중지하고 예외를 던집니다.
" + "- DB에 기존 댓글이 존재하지 않는 경우: 새로운 댓글을 크롤링하고 이를 DB에 저장합니다.") - public ResponseEntity> crawlYoutubeComments(@RequestParam("url") String url, @RequestParam("eventId") Long eventId, @RequestParam("forceUpdate") boolean forceUpdate, @CurrentAccount Account account) { - crawlService.crawlYoutubeComments(url, eventId, account.userId(), forceUpdate); - return ResponseEntity.ok(ApiResponse.of(ResponseCode.SUCCESS, "댓글 수집이 완료되었습니다.")); + public ResponseEntity> crawlYoutubeComments(@RequestParam("url") String url, @RequestParam("eventId") Long eventId, @RequestParam("forceUpdate") boolean forceUpdate, @CurrentAccount Account account) { + CrawlResponseDTO.CrawlResultDTO crawlResultDTO = crawlService.crawlYoutubeComments(url, eventId, account.userId(), forceUpdate); + return ResponseEntity.ok(ApiResponse.of(ResponseCode.SUCCESS, crawlResultDTO)); } // @GetMapping("/count") diff --git a/src/main/java/com/cmc/suppin/event/crawl/controller/dto/CrawlResponseDTO.java b/src/main/java/com/cmc/suppin/event/crawl/controller/dto/CrawlResponseDTO.java index 2df078c..d96971a 100644 --- a/src/main/java/com/cmc/suppin/event/crawl/controller/dto/CrawlResponseDTO.java +++ b/src/main/java/com/cmc/suppin/event/crawl/controller/dto/CrawlResponseDTO.java @@ -12,8 +12,7 @@ public class CrawlResponseDTO { @NoArgsConstructor @AllArgsConstructor public static class CrawlResultDTO { - private String author; - private String commentText; - private String date; + private String crawlingDate; + private int totalCommentCount; } } diff --git a/src/main/java/com/cmc/suppin/event/crawl/converter/CommentConverter.java b/src/main/java/com/cmc/suppin/event/crawl/converter/CommentConverter.java index 89871b6..e83d55e 100644 --- a/src/main/java/com/cmc/suppin/event/crawl/converter/CommentConverter.java +++ b/src/main/java/com/cmc/suppin/event/crawl/converter/CommentConverter.java @@ -2,6 +2,7 @@ import com.cmc.suppin.event.crawl.controller.dto.CommentRequestDTO; import com.cmc.suppin.event.crawl.controller.dto.CommentResponseDTO; +import com.cmc.suppin.event.crawl.controller.dto.CrawlResponseDTO; import com.cmc.suppin.event.crawl.domain.Comment; import com.cmc.suppin.event.events.domain.Event; @@ -56,5 +57,12 @@ public static CommentResponseDTO.WinnerResponseDTO toWinnerResponseDTO(List new IllegalArgumentException("Member not found")); @@ -144,6 +145,7 @@ public void crawlYoutubeComments(String url, Long eventId, String userId, boolea } finally { driver.quit(); } + return CommentConverter.toCrawlResultDTO(LocalDateTime.now(), uniqueComments.size()); } }