Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor: 크롤러 및 Nginx 수정 #36

Merged
merged 4 commits into from
Aug 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 53 additions & 49 deletions .platform/nginx.conf
Original file line number Diff line number Diff line change
Expand Up @@ -11,53 +11,57 @@ events {
}

http {
include /etc/nginx/mime.types;
default_type application/octet-stream;


log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';

include conf.d/*.conf;

map $http_upgrade $connection_upgrade {
default "upgrade";
}

upstream springboot {
server 127.0.0.1:8080;
keepalive 1024;
}

server {
listen 80 default_server;
listen [::]:80 default_server;

location / {
proxy_pass http://springboot;
# CORS 관련 헤더 추가
add_header 'Access-Control-Allow-Origin' '*';
add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, OPTIONS';
add_header 'Access-Control-Allow-Headers' 'Authorization, Content-Type';
proxy_http_version 1.1;
proxy_set_header Connection $connection_upgrade;
proxy_set_header Upgrade $http_upgrade;

proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
}

access_log /var/log/nginx/access.log main;

client_header_timeout 60;
client_body_timeout 60;
keepalive_timeout 60;
gzip off;
gzip_comp_level 4;

# Include the Elastic Beanstalk generated locations
include conf.d/elasticbeanstalk/healthd.conf;
}
include /etc/nginx/mime.types;
default_type application/octet-stream;

log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';

include conf.d/*.conf;

map $http_upgrade $connection_upgrade {
default "upgrade";
}

upstream springboot {
server 127.0.0.1:8080;
keepalive 1024;
}

server {
listen 80 default_server;
listen [::]:80 default_server;

location / {
proxy_pass http://springboot;
# CORS 관련 헤더 추가
add_header 'Access-Control-Allow-Origin' '*';
add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, OPTIONS';
add_header 'Access-Control-Allow-Headers' 'Authorization, Content-Type';
proxy_http_version 1.1;
proxy_set_header Connection $connection_upgrade;
proxy_set_header Upgrade $http_upgrade;

proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;

# 타임아웃 설정 추가
proxy_read_timeout 900s; # 백엔드 서버로부터의 응답을 기다리는 시간
proxy_connect_timeout 900s; # 백엔드 서버에 연결을 시도하는 시간
proxy_send_timeout 900s; # Nginx가 백엔드 서버로 요청을 전송하는 시간
}

access_log /var/log/nginx/access.log main;

client_header_timeout 60;
client_body_timeout 60;
keepalive_timeout 60;
gzip off;
gzip_comp_level 4;

# Include the Elastic Beanstalk generated locations
include conf.d/elasticbeanstalk/healthd.conf;
}
}
5 changes: 2 additions & 3 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,9 @@ dependencies {
implementation 'io.jsonwebtoken:jjwt-jackson:0.12.2'

//selenium
implementation 'org.seleniumhq.selenium:selenium-java:4.1.4'
implementation 'io.github.bonigarcia:webdrivermanager:5.0.3'
implementation 'org.seleniumhq.selenium:selenium-java:4.22.0'
implementation 'io.github.bonigarcia:webdrivermanager:5.4.0'
implementation 'org.jsoup:jsoup:1.13.1'
testImplementation 'org.seleniumhq.selenium:selenium-java:4.22.0'

//Google Firebase
implementation 'com.google.firebase:firebase-admin:9.2.0'
Expand Down
16 changes: 14 additions & 2 deletions src/main/java/com/cmc/suppin/event/crawl/service/CrawlService.java
Original file line number Diff line number Diff line change
Expand Up @@ -100,18 +100,23 @@ public void crawlYoutubeComments(String url, Long eventId, String userId, boolea
try {
Thread.sleep(5000); // 초기 로딩 대기

long endTime = System.currentTimeMillis() + 300000; // 스크롤 시간 조정 (필요에 따라 조정)
long endTime = System.currentTimeMillis() + 600000; // 스크롤 시간을 10분으로 설정 (600,000ms)
JavascriptExecutor jsExecutor = (JavascriptExecutor) driver;

int previousCommentCount = 0;
int currentCommentCount;

while (System.currentTimeMillis() < endTime) {
jsExecutor.executeScript("window.scrollTo(0, document.documentElement.scrollHeight);");

Thread.sleep(1000);
Thread.sleep(1000); // 1초 대기

String pageSource = driver.getPageSource();
Document doc = Jsoup.parse(pageSource);
Elements comments = doc.select("ytd-comment-thread-renderer");

currentCommentCount = comments.size();

for (Element commentElement : comments) {
String author = commentElement.select("#author-text span").text();
String text = commentElement.select("#content yt-attributed-string#content-text").text();
Expand All @@ -126,6 +131,13 @@ public void crawlYoutubeComments(String url, Long eventId, String userId, boolea
commentRepository.save(comment);
}
}

// 더 이상 새로운 댓글이 없을 때, 크롤링 종료
if (currentCommentCount == previousCommentCount) {
break; // 새로운 댓글이 로드되지 않으면 루프를 종료합니다.
}

previousCommentCount = currentCommentCount;
}
} catch (InterruptedException e) {
e.printStackTrace();
Expand Down
4 changes: 4 additions & 0 deletions src/main/resources/application.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
spring:
server:
port: 8080
connection-timeout: 15m
jackson:
time-zone: Asia/Seoul
datasource:
Expand Down Expand Up @@ -38,6 +39,9 @@ spring:
protocol: smtp
default-encoding: UTF-8
test-connection: false
mvc:
async:
request-timeout: 15m

jwt:
token:
Expand Down
Loading