diff --git a/app/database/repository.py b/app/database/repository.py index cfd7884..b9ecbfd 100644 --- a/app/database/repository.py +++ b/app/database/repository.py @@ -1,13 +1,11 @@ from typing import Any, Callable, Generic, Sequence, TypeVar -from fastapi import Depends, HTTPException +from fastapi import HTTPException from sqlalchemy import BinaryExpression, Row, RowMapping, desc, select from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import class_mapper -from app.database.session import get_db_session - Base = declarative_base() Model = TypeVar("Model", bound=Base) @@ -118,7 +116,7 @@ async def filter( def get_repository( model: type[Base], ) -> Callable[[AsyncSession], DatabaseRepository[Base]]: - def func(session: AsyncSession = Depends(get_db_session)): + def func(session: AsyncSession): if session is None: raise HTTPException(status_code=500, detail="DB Connection Error") return DatabaseRepository(model, session) diff --git a/app/database/session.py b/app/database/session.py index 0ccef6d..3df52ab 100644 --- a/app/database/session.py +++ b/app/database/session.py @@ -1,5 +1,6 @@ import os from collections.abc import AsyncGenerator +from contextlib import asynccontextmanager from dotenv import load_dotenv from fastapi import HTTPException @@ -25,7 +26,8 @@ engine = create_async_engine(DB_CONFIG) -async def get_db_session() -> AsyncGenerator[AsyncSession, None]: +@asynccontextmanager +async def db_session() -> AsyncGenerator[AsyncSession, None]: factory = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) async with factory() as session: @@ -44,3 +46,8 @@ async def get_db_session() -> AsyncGenerator[AsyncSession, None]: ) from error # pylint: disable=line-too-long finally: await session.close() + + +async def get_db_session() -> AsyncSession: + async with db_session() as session: + yield session diff --git a/app/main.py b/app/main.py index 508ea41..2ccb5c5 100644 --- a/app/main.py +++ b/app/main.py @@ -9,7 +9,6 @@ LoggingMiddleware, ) from app.router.generate_simple_article_router import simple_article_router -from app.router.news_scrap_router import news_scrap_rotuer from app.router.newsletter_article_crud_router import newsletter_article_router from app.router.send_email_service_router import send_email_service_router from app.router.subscription_crud_router import subscription_router @@ -30,7 +29,6 @@ async def startup_event(): app.add_middleware(LoggingMiddleware) # routers -app.include_router(news_scrap_rotuer) app.include_router(subscription_router) app.include_router(newsletter_article_router) app.include_router(send_email_service_router) diff --git a/app/model/prompt/simple_article/2024-07-02.txt b/app/model/prompt/simple_article/2024-07-02.txt index 2870733..55da96b 100644 --- a/app/model/prompt/simple_article/2024-07-02.txt +++ b/app/model/prompt/simple_article/2024-07-02.txt @@ -1,13 +1,13 @@ 너는 내가 제공하는 어려운 대한민국 경제 신문 본문을 20대 초반이 읽어도 이해하기 쉽게, 한국어로 기사를 재생성하는 기자이다. - 아래 json 형식에 맞게 기사를 재생성해야 한다. 단, 기사 본문의 경우 문단을 나누어야 한다. + 아래 json 형식에 맞게 기사를 재생성해야 한다. 단, 기사 본문의 경우 문단을 나누어야 한다. 줄바꿈 문자는 \n으로 표시한다. 다음은 json 형식의 예시이다: { "title": "기사 제목(한국어)", - "content": "기사 본문 (한국어)" 단, 경제 기사의 독자층이 20대 초반인 것을 고려하여 친근하고 꽤 간결하게 설명할 것" - "phrase": ["어려웠던 경제 표현들" : "어려웠던 경제 표현들을 쉽게 바꾼 문구"] (문자열 리스트) 예를 들어 "환율" : "다른 나라 돈과 우리나라 돈을 교환하는 비율"과 같이, - "comment": "기사를 3줄 이하로 요약하기. 단, 친근하게. (한국어)", - "category": "기사가 어느 카테고리, 즉 enum에 속하는지 (한국어)" + "content": "기사 본문 (한국어)" 단, 경제 기사의 독자층이 경제 지식이 부족한 20대 초반인 것을 고려하여 적당한 이모지를 사용하여 친근하고 간결하게 설명할 것" + "phrase": ["어려웠던 경제 표현들" : "어려웠던 경제 표현들을 쉽게 바꾼 문구"] (예시: "환율" : "다른 나라 돈과 우리나라 돈을 교환하는 비율") + "comment": "기사를 보고 추론할 수 있는 것 1문장을 친구에게 설명하는 듯한 표현으로", + "category": "Category 중 하나" } enum Category: @@ -18,4 +18,4 @@ enum Category: OPINION_AND_ANALYSIS = "오피니언 및 분석" -결과는 json형식이어야 하며, 여러 줄일 경우 줄바꿈 문자는 \n으로 표시한다. +결과는 json형식이어야 한다. diff --git a/app/router/news_scrap_router.py b/app/router/news_scrap_router.py deleted file mode 100644 index 1ff5601..0000000 --- a/app/router/news_scrap_router.py +++ /dev/null @@ -1,28 +0,0 @@ -from fastapi import APIRouter -from groq import BaseModel - -from app.model.article_model import ArticleResponse -from app.service.crawl_article_service import CrawlArticleService -from app.utils.generic_response import GenericResponseDTO - -news_scrap_rotuer = APIRouter() - - -class ArticleCreateRequestDTO(BaseModel): - url: str - publisher: str - - -@news_scrap_rotuer.post( - "/extract-article", response_model=GenericResponseDTO[ArticleResponse] -) -async def extract_article_api( - articleCreateRequestDTO: ArticleCreateRequestDTO, -): - article = await CrawlArticleService().crawl_article( - news_type=articleCreateRequestDTO.publisher, - url=articleCreateRequestDTO.url, - ) - return GenericResponseDTO( - data=article, message="Article extracted successfully", result=True - ) diff --git a/app/service/news_scheduling_service.py b/app/service/news_scheduling_service.py index 9be042c..ea334a4 100644 --- a/app/service/news_scheduling_service.py +++ b/app/service/news_scheduling_service.py @@ -6,7 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.config.loguru_config import logger -from app.database.session import get_db_session +from app.database.session import db_session from app.model.article_publisher import Publisher from app.service.article_manage_service import ArticleManageService from app.service.simple_article_service import generate_simple_article @@ -55,9 +55,14 @@ async def fetch_and_store_all_publisher_feeds(): async def run_crawl_and_store(session: AsyncSession): articles = await fetch_and_store_all_publisher_feeds() + + # 기존에 저장된 기사들을 가져옴 exist_articles = await ArticleManageService().get_all_articles(session=session) + + # 기존에 저장된 기사들의 URL을 가져옴 exist_urls = {article.url for article in exist_articles} + # 새로운 기사들만 필터링 new_articles = [ article for article in articles if article["link"] not in exist_urls ] @@ -69,7 +74,7 @@ async def run_crawl_and_store(session: AsyncSession): ) for article in new_articles ] - await asyncio.gather(*tasks) + await asyncio.gather(*tasks, return_exceptions=True) else: logger.info("No new articles") @@ -83,7 +88,7 @@ async def schedule_task(): delay = (target_time - now).total_seconds() await asyncio.sleep(delay) - async with get_db_session() as session: # pylint: disable=not-async-context-manager + async with db_session() as session: await run_crawl_and_store(session)