-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat : 정책 기간 (policyDateType) 크롤링 후 분류 로직 구현
- Loading branch information
Showing
5 changed files
with
266 additions
and
1 deletion.
There are no files selected for viewing
179 changes: 179 additions & 0 deletions
179
src/main/java/com/cmc/zenefitserver/domain/policy/application/PolicyDateClassifier.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
package com.cmc.zenefitserver.domain.policy.application; | ||
|
||
import com.cmc.zenefitserver.domain.policy.dao.PolicyRepository; | ||
import com.cmc.zenefitserver.domain.policy.domain.ApplyPeriod; | ||
import com.cmc.zenefitserver.domain.policy.domain.Policy; | ||
import com.cmc.zenefitserver.domain.policy.domain.enums.PolicyDateType; | ||
import lombok.RequiredArgsConstructor; | ||
import org.jsoup.Jsoup; | ||
import org.jsoup.nodes.Document; | ||
import org.jsoup.nodes.Element; | ||
import org.jsoup.select.Elements; | ||
import org.springframework.stereotype.Service; | ||
|
||
import java.io.IOException; | ||
import java.time.LocalDate; | ||
import java.time.format.DateTimeFormatter; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
@RequiredArgsConstructor | ||
@Service | ||
public class PolicyDateClassifier { | ||
|
||
private final PolicyRepository policyRepository; | ||
private static final String URL = "https://www.youthcenter.go.kr/youngPlcyUnif/youngPlcyUnifDtl.do?bizId="; | ||
|
||
/** | ||
* 1. 상시 | ||
* 2. 미정 | ||
* 3. 날짜데이터 | ||
* 3-1. 매년 ( 현재 날짜의 년도를 붙임 ) | ||
* 3-2. 매월 ( 현재 날짜의 년도와 1월부터 12월까지 모두 넣음 ) | ||
* 3-3. 여러 날짜 데이터 | ||
* => 다 저장하고, endDate 를 기준으로 정렬 후 현재날짜 이후에 나오는 첫번째 endDate 찾아서 해당 하는 정책의 신청시작일과 신청종료일을 분류함 | ||
* 3-4. 하나의 날짜 데이터 | ||
* => 정책의 신청시작일과 신청종료일을 저장 | ||
* 4. 빈값 ( 비고 데이터 값을 비교해보자 ) | ||
* 4-1. | ||
* 4-2. | ||
*/ | ||
public void go() throws IOException { | ||
|
||
List<Policy> policies = policyRepository.findAll(); | ||
LocalDate now = LocalDate.now(); | ||
int num = 0; | ||
|
||
for (Policy policy : policies) { | ||
|
||
String newUrl = URL + policy.getBizId(); | ||
|
||
Document document = Jsoup.connect(newUrl).get(); | ||
|
||
Elements elements = document.select(".policy-detail"); | ||
num++; | ||
for (Element element : elements) { | ||
Elements businessApplicationPeriodElement = element.select(".list_tit:contains(사업 신청 기간)"); | ||
Elements remarkElement = element.select(".list_tit:contains(비고)"); | ||
|
||
String businessApplicationPeriodText = businessApplicationPeriodElement.get(0).nextElementSibling().text(); | ||
String remarkText = remarkElement.get(0).nextElementSibling().text(); | ||
|
||
// 1. 상시 | ||
if (businessApplicationPeriodText.contains("상시")) { | ||
// policy.updatePolicyDateType(PolicyDateType.CONSTANT); | ||
break; | ||
} | ||
|
||
// 2. 미정 | ||
if (businessApplicationPeriodText.contains("미정")) { | ||
// policy.updatePolicyDateType(PolicyDateType.UNDECIDED); | ||
break; | ||
} | ||
|
||
// 4. 빈값 -> 비고 확인 | ||
if (businessApplicationPeriodText.equals("")) { | ||
// policy.updatePolicyDateType(PolicyDateType.BLANK); | ||
// System.out.println("bizId : " + policy.getBizId() + " 비고 : " + remarkText); | ||
// policy.updateRemark(remarkText); | ||
break; | ||
} | ||
|
||
// 3-1. 매년 | ||
if (businessApplicationPeriodText.contains("매년")) { | ||
Pattern pattern = Pattern.compile("\\d{2}월\\d{2}일~\\d{2}월\\d{2}일"); | ||
|
||
Matcher matcher = pattern.matcher(businessApplicationPeriodText.replace(" ", "")); | ||
|
||
List<ApplyPeriod> applyPeriods = new ArrayList<>(); | ||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy년MM월dd일"); | ||
while (matcher.find()) { | ||
String[] splited = matcher.group().split("~"); | ||
StringBuilder sttDate = new StringBuilder(); | ||
sttDate.append("2023년"); | ||
sttDate.append(splited[0]); | ||
StringBuilder endDate = new StringBuilder(); | ||
endDate.append("2023년"); | ||
endDate.append(splited[1]); | ||
|
||
applyPeriods.add(ApplyPeriod.builder().sttDate(LocalDate.parse(sttDate, formatter)).endDate(LocalDate.parse(endDate, formatter)).build()); | ||
} | ||
// policy.updateDateType(PolicyDateType.PERIOD); | ||
// updateApplySttDateAndApplyEndDate(policy, applyPeriods, now); | ||
break; | ||
} | ||
|
||
// 3-2. 매월 | ||
if (businessApplicationPeriodText.contains("매월")) { | ||
Pattern pattern = Pattern.compile("\\d{2}일~\\d{2}일"); | ||
|
||
Matcher matcher = pattern.matcher(businessApplicationPeriodText.replace(" ", "")); | ||
|
||
List<ApplyPeriod> applyPeriods = new ArrayList<>(); | ||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy년MM월dd일"); | ||
while (matcher.find()) { | ||
String[] splited = matcher.group().split("~"); | ||
|
||
for (int i = 1; i <= 12; i++) { | ||
StringBuilder sttDate = new StringBuilder(); | ||
StringBuilder endDate = new StringBuilder(); | ||
sttDate.append("2023년"); | ||
endDate.append("2023년"); | ||
String month = String.format("%02d", i); | ||
sttDate.append(month + "월").append(splited[0]); | ||
endDate.append(month + "월").append(splited[1]); | ||
applyPeriods.add(ApplyPeriod.builder().sttDate(LocalDate.parse(sttDate, formatter)).endDate(LocalDate.parse(endDate, formatter)).build()); | ||
} | ||
|
||
} | ||
// policy.updateDateType(PolicyDateType.PERIOD); | ||
// updateApplySttDateAndApplyEndDate(policy, applyPeriods, now); | ||
break; | ||
} | ||
|
||
// 정규 표현식 패턴 | ||
Pattern pattern = Pattern.compile("\\d{4}년\\d{2}월\\d{2}일~\\d{4}년\\d{2}월\\d{2}일"); | ||
|
||
// Matcher를 사용하여 패턴과 일치하는 부분을 찾아냄 | ||
Matcher matcher = pattern.matcher(businessApplicationPeriodText.replace(" ", "")); | ||
|
||
List<ApplyPeriod> applyPeriods = new ArrayList<>(); | ||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy년MM월dd일"); | ||
while (matcher.find()) { | ||
String[] splited = matcher.group().split("~"); | ||
applyPeriods.add(ApplyPeriod.builder().sttDate(LocalDate.parse(splited[0], formatter)).endDate(LocalDate.parse(splited[1], formatter)).build()); | ||
} | ||
|
||
// Collections.sort(applyPeriods); | ||
|
||
// 3-3. 하나의 이상의 날짜 데이터 | ||
if (applyPeriods.size() > 0) { | ||
policy.updateDateType(PolicyDateType.PERIOD); | ||
updateApplySttDateAndApplyEndDate(policy, applyPeriods, now); | ||
// System.out.print("bizId = " + policy.getBizId());; | ||
// System.out.print(" period = " + applyPeriods.get(0)); | ||
// System.out.println(" applyPeriods = " + applyPeriods); | ||
// System.out.println("----------------------------"); | ||
System.out.println("applyPeriods = " + applyPeriods); | ||
|
||
} | ||
} | ||
|
||
} | ||
|
||
} | ||
|
||
private static void updateApplySttDateAndApplyEndDate(Policy policy, List<ApplyPeriod> applyPeriods, LocalDate now) { | ||
ApplyPeriod period = applyPeriods.stream() | ||
.filter(p -> p.getEndDate().isAfter(LocalDate.now())) | ||
.findFirst() | ||
.orElse(null); | ||
|
||
if (period != null) { | ||
policy.updateApplySttDateAndApplyEndDate(period.getSttDate(), period.getEndDate()); | ||
|
||
} | ||
} | ||
} |
42 changes: 42 additions & 0 deletions
42
src/main/java/com/cmc/zenefitserver/domain/policy/domain/ApplyPeriod.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
package com.cmc.zenefitserver.domain.policy.domain; | ||
|
||
import lombok.*; | ||
|
||
import javax.persistence.Entity; | ||
import javax.persistence.GeneratedValue; | ||
import javax.persistence.GenerationType; | ||
import javax.persistence.Id; | ||
import java.time.LocalDate; | ||
|
||
@Getter | ||
@ToString | ||
@NoArgsConstructor(access = AccessLevel.PROTECTED) | ||
@Entity | ||
public class ApplyPeriod implements Comparable<ApplyPeriod>{ | ||
|
||
@Id | ||
@GeneratedValue(strategy = GenerationType.SEQUENCE) | ||
private Long id; | ||
|
||
private LocalDate sttDate; | ||
|
||
private LocalDate endDate; | ||
|
||
@Builder | ||
public ApplyPeriod(LocalDate sttDate, LocalDate endDate) { | ||
this.sttDate = sttDate; | ||
this.endDate = endDate; | ||
} | ||
|
||
@Override | ||
public int compareTo(ApplyPeriod o) { | ||
return this.endDate.compareTo(o.endDate); | ||
} | ||
|
||
// @Override | ||
// public int compareTo(ApplyPeriod o) { | ||
// return this.endDate.compareTo(o.endDate); | ||
// } | ||
} | ||
|
||
|
13 changes: 13 additions & 0 deletions
13
src/main/java/com/cmc/zenefitserver/domain/policy/domain/PolicyDateContent.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
package com.cmc.zenefitserver.domain.policy.domain; | ||
|
||
import lombok.*; | ||
|
||
@Getter | ||
@Setter | ||
@ToString | ||
@NoArgsConstructor | ||
public class PolicyDateContent { | ||
|
||
private String dateContent; // 사업 신청 기간 | ||
private String content; // 비고 | ||
} |
28 changes: 28 additions & 0 deletions
28
src/main/java/com/cmc/zenefitserver/domain/policy/domain/enums/PolicyDateType.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
package com.cmc.zenefitserver.domain.policy.domain.enums; | ||
|
||
|
||
import com.cmc.zenefitserver.global.error.exception.BusinessException; | ||
import lombok.Getter; | ||
|
||
@Getter | ||
public enum PolicyDateType { | ||
|
||
CONSTANT("상시"), | ||
UNDECIDED("미정"), | ||
|
||
PERIOD("기간 신청"), | ||
BLANK("빈값"); | ||
|
||
private final String description; | ||
|
||
PolicyDateType(String description) { | ||
this.description = description; | ||
} | ||
|
||
// public PolicyDateType findPolicyTypeByDescription(String description){ | ||
// return Arrays.stream(PolicyDateType.values()) | ||
// .filter(p -> p.description.equals(description)) | ||
// .findFirst() | ||
// .orElseThrow(()->new BusinessException(NOT_FOUND_POLICY_DATE_TYPE)); | ||
// } | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters