From 528f8422568669ad2e65e6f6a1db900a95a4c18f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=A0=95=EC=A7=80=EB=AF=BC?= <108014449+stopmin@users.noreply.github.com> Date: Tue, 16 Jul 2024 00:05:41 +0900 Subject: [PATCH 1/3] =?UTF-8?q?fi=20=EB=B9=A0=EC=A7=90..=20(#104)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/deploy-ecs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/deploy-ecs.yml b/.github/workflows/deploy-ecs.yml index d0bfc7e..7bc7901 100644 --- a/.github/workflows/deploy-ecs.yml +++ b/.github/workflows/deploy-ecs.yml @@ -132,6 +132,7 @@ jobs: if [ "$CURRENT_TASK_DEF_ARN" != "$NEW_TASK_DEF_ARN" ]; then echo "Deployment failed." exit 1 + fi - name: Post Slack Channel that Build Success if: success() From 77478ff97c345cc75fac5e8d901027a7c653f9b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=ED=83=9C=EC=9C=A4?= <77539625+pykido@users.noreply.github.com> Date: Tue, 16 Jul 2024 00:16:23 +0900 Subject: [PATCH 2/3] =?UTF-8?q?feat=20:=20=EC=A7=80=EB=AF=BC=EC=9D=B4?= =?UTF-8?q?=EA=B0=80=20=EB=A7=8C=EB=93=A0=20RAG/LangChanin=20=ED=99=9C?= =?UTF-8?q?=EC=9A=A9=ED=95=98=EC=97=AC=20[=EA=B2=BD=EB=8B=A8=EC=B1=97?= =?UTF-8?q?=EB=B4=87=20-=20=EA=B8=B0=EC=82=AC=20API]=20=EA=B5=AC=ED=98=84?= =?UTF-8?q?=20(#102)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/main.py | 2 + app/router/__init__.py | 0 app/router/chatbot_article_detail_router.py | 29 +++++ app/service/chatbot_article_detail_service.py | 120 ++++++++++++++++++ 4 files changed, 151 insertions(+) create mode 100644 app/router/__init__.py create mode 100644 app/router/chatbot_article_detail_router.py create mode 100644 app/service/chatbot_article_detail_service.py diff --git a/app/main.py b/app/main.py index 309142d..a105921 100644 --- a/app/main.py +++ b/app/main.py @@ -4,6 +4,7 @@ from fastapi import FastAPI from starlette.exceptions import HTTPException +from app.router.chatbot_article_detail_router import chatbot_article_router from app.config.exception_handler import exception_handler, http_exception_handler from app.config.middlewares.request_response_logging_middle_ware import ( LoggingMiddleware, @@ -31,6 +32,7 @@ async def startup_event(): app.include_router(user_type_router) app.include_router(simple_article_router) app.include_router(send_email_service_router) +app.include_router(chatbot_article_router) # exception handlers app.add_exception_handler(Exception, exception_handler) diff --git a/app/router/__init__.py b/app/router/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/router/chatbot_article_detail_router.py b/app/router/chatbot_article_detail_router.py new file mode 100644 index 0000000..56b01d6 --- /dev/null +++ b/app/router/chatbot_article_detail_router.py @@ -0,0 +1,29 @@ +from fastapi import APIRouter +from pydantic import BaseModel + +from app.service.chatbot_article_detail_service import request_rag_applied_openai +from app.utils.generic_response import GenericResponseDTO + +chatbot_article_router = APIRouter() + +# 사용자 요청 +class GenerateDetailArticleRequestDTO(BaseModel): + news_content: str + prompt: str + + +@chatbot_article_router.post( + "/chatbot-article-detail", response_model=GenericResponseDTO +) +async def chatbot_article_detail_( + request: GenerateDetailArticleRequestDTO, +): + rag_applied_result = await request_rag_applied_openai( + original_text=request.news_content, + system_prompt=request.prompt + ) + + return GenericResponseDTO( + result=True, data=rag_applied_result, message="Related articles found successfully." + ) + diff --git a/app/service/chatbot_article_detail_service.py b/app/service/chatbot_article_detail_service.py new file mode 100644 index 0000000..df0f175 --- /dev/null +++ b/app/service/chatbot_article_detail_service.py @@ -0,0 +1,120 @@ +import os + +import aiohttp +from fastapi import HTTPException +from langchain_core.messages import HumanMessage +from langchain_openai import ChatOpenAI + +from app.config.loguru_config import logger +from app.rag_lang_chain.chromadb_manager import ChromaDBManager +from app.rag_lang_chain.google_cse_retriver import ( + AsyncGoogleSearchAPIWrapper, + GoogleCSERetriever, +) + +from langchain.schema import Document + +from typing import List, Union, Dict + + +class RagAppliedResult: + def __init__(self, result_text: str, related_documents: List[Union[Document, dict]]): + self.result_text = result_text + self.related_documents = related_documents + + def to_dict(self) -> Dict: + related_docs = [] + for doc in self.related_documents: + if isinstance(doc, Document): + related_docs.append({ + 'title': doc.metadata.get('title', 'No title'), + 'content': doc.page_content, + 'source': doc.metadata.get('source', 'No source') + }) + else: + related_docs.append(doc) + return { + 'result_text': self.result_text, + 'related_documents': related_docs + } + + +async def request_rag_applied_openai( + original_text: str, # OriginalText: 기사 원문(Google Custom Search에 보낼 용도) + system_prompt: str, # SystemPrompt: 시스템 프롬프트 +) -> Dict: + openai_api_key = os.getenv("OPENAI_API_KEY") + google_api_key = os.getenv("GOOGLE_API_KEY") + google_cse_id = os.getenv("GOOGLE_CSE_ID") + + search = AsyncGoogleSearchAPIWrapper(api_key=google_api_key, cse_id=google_cse_id) + google_cse_retriever = GoogleCSERetriever( + api_key=google_api_key, cse_id=google_cse_id + ) + + # Step 1: Google Custom Search API를 사용하여 사용자가 입력한 original_text 관련 정보 전부 수집 + # original_text와 관련된 웹 페이지의 목록을 반환함. 각 웹 페이지는 title(검색 결과 제목), link(웹 페이지 url), snippet(검색 결과의 요약)으로 구성됨. + google_results = await google_cse_retriever.retrieve( + original_text + ) + if not google_results: + raise HTTPException(status_code=404, detail="No results found from Google.") + logger.info(f"1. Google results: {google_results}") + + # Step 2: 검색 결과를 벡터화하고 ChromaDB에 저장 + chroma_db_manager = ChromaDBManager() + await chroma_db_manager.add_documents(google_results) + + # Step 3: 저장된 문서 중에서 사용자 쿼리와 유사한 문서 검색, AsyncGoogleSearchAPIWrapper를 사용하여 추가 정보 수집 + search_results = await chroma_db_manager.search_documents(original_text) # 벡터 유사도 검색 수행 + logger.info(f"3. Search results: {search_results}") + additional_info = await search.aget_relevant_documents(original_text, num_results=3) + logger.info(f"3. Additional info: {additional_info}") + + # Step 4: 프롬프트 생성(원문 + 검색 결과 + 추가 정보) + + rag_applied_prompt = await create_rag_applied_prompt( + original_prompt=system_prompt, relevant_info=search_results + additional_info + ) + + # Step 5: OpenAI 요청 결과 반환 + try: + search_llm = ChatOpenAI( + temperature=0, model="gpt-4", max_tokens=1500, api_key=openai_api_key + ) + response = await search_llm.agenerate( + messages=[[HumanMessage(rag_applied_prompt)]] + ) + except aiohttp.ClientResponseError as e: + if e.status == 429: + raise HTTPException( + 429, "Too many requests. Please try again later." + ) from e + raise HTTPException(500, "Internal Server Error") from e + + logger.info(f"최종 Response: {response}") + + return RagAppliedResult( + result_text=response.generations[0][0].text, + related_documents=search_results + additional_info, + ).to_dict() + + +async def create_rag_applied_prompt( + original_prompt: str, relevant_info: List[Union[Document, dict]] +) -> str: + for idx, info in enumerate(relevant_info): + if isinstance(info, Document): + title = info.metadata.get("title", "제목 없음") + link = info.metadata.get("source", "URL 없음") + snippet = info.page_content + else: + title = info.get("title", "제목 없음") + link = info.get("link", "URL 없음") + snippet = info.get("snippet", "내용 없음") + original_prompt += ( + f"\n{idx + 1}. 제목: {title}\n URL: {link}\n 내용: {snippet}\n" + ) + + logger.info(f"RAG Applied Prompt: {original_prompt}") + return original_prompt From c3052c1bede2d4e23de5aa2d50beed28c7ad71d2 Mon Sep 17 00:00:00 2001 From: mandu <88422717+YunseongJeong@users.noreply.github.com> Date: Tue, 16 Jul 2024 20:00:11 +0900 Subject: [PATCH 3/3] =?UTF-8?q?=EC=B6=94=EC=B2=9C=20=EC=8B=9C=EC=8A=A4?= =?UTF-8?q?=ED=85=9C=20implicit=EB=A1=9C=20=EB=B3=80=EA=B2=BD=20(#107)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * lightfm 주석처리 - 추가적으로 plotly 누락 수정 * install implicit * feat(user_type_serivce.py) : enum 수정에 따른 변경 * feat(news_scheduling_service.py) : 추천 방식 변경 * feat(news_scheduling_service.py) : 일관성 유지 * feat(recommend.py) : 추천 테이블 생성 * feat(recommend_crud.py) : 추천 테이블용 crud 생성 * feat(recommend_service_deactive.py) : 주석 처리 옮김 * feat(recommend_service.py) : implicit로 추천시스템 변경 * feat(user_classification.csv) : 유저 분류 변경 * refact(user_type.csv) : 오타 수정 등 * feat(user_type_crud.csv) : enum 구조 변경에 따른 수정 * feat(user_type_service.py) : enum 오타 수정 --------- Co-authored-by: stopmin --- Pipfile | 2 + Pipfile.lock | 97 ++++- app/model/recommend.py | 11 + app/model/user_type.py | 22 +- app/recommend/recommend_service.py | 395 +++++++----------- app/recommend/recommend_service_deactivate.py | 243 +++++++++++ app/recommend/user_classification.csv | 32 +- app/repository/recommend_crud.py | 19 + app/repository/user_type_crud.py | 10 +- app/service/news_scheduling_service.py | 22 +- app/service/user_type_service.py | 80 ++-- 11 files changed, 594 insertions(+), 339 deletions(-) create mode 100644 app/model/recommend.py create mode 100644 app/recommend/recommend_service_deactivate.py create mode 100644 app/repository/recommend_crud.py diff --git a/Pipfile b/Pipfile index 46c70f7..864964e 100644 --- a/Pipfile +++ b/Pipfile @@ -31,6 +31,8 @@ langchain-openai = "*" langchain-google-community = "*" wikipedia-api = "*" plotly = "*" +implicit = "*" + [dev-packages] flake8 = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 4073b9a..dc7e1f2 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,8 @@ { "_meta": { "hash": { - "sha256": "fcbb46438648285b6c3ef92c5dc51e8fa5a0a7e1ce3bf3bf954770c6e84ef93f" + "sha256": "79d44a1a99c2c3499ab6f824c498a5e1fda1be281d5c0c41b7541b9bb56639b0" + }, "pipfile-spec": 6, "requires": { @@ -274,11 +275,11 @@ }, "cachetools": { "hashes": [ - "sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945", - "sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105" + "sha256:3ae3b49a3d5e28a77a0be2b37dbcb89005058959cb2323858c2657c4a8cab474", + "sha256:b8adc2e7c07f105ced7bc56dbb6dfbe7c4a00acce20e2227b3f355be89bc6827" ], "markers": "python_version >= '3.7'", - "version": "==5.3.3" + "version": "==5.4.0" }, "certifi": { "hashes": [ @@ -887,6 +888,45 @@ "markers": "python_version >= '3.5'", "version": "==3.7" }, + "implicit": { + "hashes": [ + "sha256:0bfff5e332d73cfc5896beb1ab09e0aef1c0c28713a2799b978290757b536af9", + "sha256:0c909fa69ef743ac82be788e3b9708636540dae3677f692c9275f5bf8307adf0", + "sha256:0ecb6fcf2581a47f0fbaf50cf412c377684670e09c0a6764dc2fc112b7bfa4af", + "sha256:1c755cb0e6ac69b44c8215a80ba7c5c132f2767453078b573e3d18900220aa57", + "sha256:1f161c97d455b710e6d68937e87ecd22ff212bc6e22dc57748b76c6ade42abc2", + "sha256:2e7ac1e9df353729a1a45daa5417d417b748242e6271b423e384bc6b6216747b", + "sha256:32d3ad57a4217ed50e3d126ce3782b2f27d0a1ed9fcbb71a7ccf5cfebc96430d", + "sha256:335925bf728579ba729d60570b223713312a725ad647d000e0773f25bcf2f5a5", + "sha256:3cc71e673fad6a76a3f6f2b1361e4b01f80ded61e1ca41ceb3c4e8d265ceb5f0", + "sha256:3d5e87f4051655bd5eb489dad28df737a21141b62a330266ea406bbe11693404", + "sha256:3d8f61d1783630e7a894cf53a1e4f0a56b73e7218776dc8fe03e3e6c1d85da72", + "sha256:3f6b93dbf377875c2aeccac52239b0ff2db987821b0fb29e0f1efd95474aeab4", + "sha256:60bfb1bc1c6d3f219db11e20537e5b7108dbb1f7daaf8677e1ba3ccff643f497", + "sha256:61f7048d0f1b2579796ccc25bb79ad3517a52c60252fd6e18572f75f0a506b70", + "sha256:67e08aaf57e5072b9fe17218e2dad6b11369f1ad4ac732dc13ded76228a50970", + "sha256:6a9058d5b2c1e19de344d4fc0b031504bc6be0fe7f7f5c91f11db66c28727894", + "sha256:6dab0506685dd23c27e648337f86875272dd8341400c3ff306fa99fe3f049bdf", + "sha256:7f6785ad869f1db2ac3e37e2195e3e733900e21aa2a55a9d580e2b4b00016382", + "sha256:9a243c0a9d22bd902dcc0cd0622fbed05c020613bf272934498cf88cf187a1be", + "sha256:a3209629ba593e5e1365cde1e5ffa57a62bca6ca99eda9b1e464a70eea91632b", + "sha256:a4ee10107ba7a2745d5166e17f9202f5b73e7b362ac248229e16be8819cde49c", + "sha256:a6cce64d839272b3ae0c7e9799ee326ee0cb7da9d69b1de7205ef1139379ff22", + "sha256:ab28f9fe3d90d1461694c33eb2741e8f5446737561786cc118ca92fe51d7ffe7", + "sha256:b615b1d037a4175ba3640fb9fa446811861d80b3b9a01f54957816464c31066c", + "sha256:bacf79120c87ad0744a4365a089409fddd33231880aef4a495bab2d4f888291d", + "sha256:bd10b250c53c7917b77b883b1cd0a1f94368115521816338995a32112eab8321", + "sha256:c2805ceea609dddb0ecd6f948f1dabd48bd9c47edffd3ff50584a4248a66c719", + "sha256:c9519fdb8a1a06b201b7c55dfa51d6a0f00ff55223600b1484fcacd8d248a47a", + "sha256:cb4b78ff885ca21d8d443f31f6e28bba0a67a640f7682cc9996c0c1cc2e585cc", + "sha256:db359d570729e72cda15f5eae98f93427f567cd20a28cb47732196e66c74a32c", + "sha256:f08f9c15dd7724368aad47458c77f888385dec5a69221432e50b689996a6455f", + "sha256:f533cb8e42f80ed5457a287d0c3ae5b7789c2ea098519eecee58c45adcb4c3d7", + "sha256:ff779f25d71c5cab26403b732e358ed813adaa04ecc08ac5b3e3ed781e565f51" + ], + "index": "pypi", + "version": "==0.7.2" + }, "importlib-metadata": { "hashes": [ "sha256:30962b96c0c223483ed6cc7280e7f0199feb01a0e40cfae4d4450fc6fab1f570", @@ -937,12 +977,12 @@ }, "langchain": { "hashes": [ - "sha256:8742f363d2890854501e0075af04fcb470600f201dec251c9bd5841e1990e73d", - "sha256:98e79e0b9a60a9c740b44d5b0135c85f649219308f30d373cf5f10d0efe18b87" + "sha256:53e7dfe50294a14200f33bec22b4e14cb63857ccf0a5500b0d18b0fd51285d58", + "sha256:7fecb309e3558cde4e5cf7e9ffb7c1ab3f07121c40a7ff3b0c27135f8120c296" ], "index": "pypi", "markers": "python_version < '4.0' and python_full_version >= '3.8.1'", - "version": "==0.2.7" + "version": "==0.2.8" }, "langchain-community": { "hashes": [ @@ -989,11 +1029,11 @@ }, "langsmith": { "hashes": [ - "sha256:acff31f9e53efa48586cf8e32f65625a335c74d7c4fa306d1655ac18452296f6", - "sha256:c1f94384f10cea96f7b4d33fd3db7ec180c03c7468877d50846f881d2017ff94" + "sha256:2e66577817253327b99b727588c3173fbba217fe0ca07ac6b7cdd23fc4894104", + "sha256:55ed80cc6e98f9761f9b3ec3c49e01f6745d13e40bef80d9f831acabfd9a8a1e" ], "markers": "python_version < '4.0' and python_full_version >= '3.8.1'", - "version": "==0.1.85" + "version": "==0.1.86" }, "loguru": { "hashes": [ @@ -1380,12 +1420,12 @@ }, "openai": { "hashes": [ - "sha256:36ec3e93e0d1f243f69be85c89b9221a471c3e450dfd9df16c9829e3cdf63e60", - "sha256:c684f3945608baf7d2dcc0ef3ee6f3e27e4c66f21076df0b47be45d57e6ae6e4" + "sha256:394ba1dfd12ecec1d634c50e512d24ff1858bbc2674ffcce309b822785a058de", + "sha256:adadf8c176e0b8c47ad782ed45dc20ef46438ee1f02c7103c4155cff79c8f68b" ], "index": "pypi", "markers": "python_full_version >= '3.7.1'", - "version": "==1.35.13" + "version": "==1.35.14" }, "opentelemetry-api": { "hashes": [ @@ -1980,6 +2020,37 @@ "markers": "python_version >= '3.7'", "version": "==1.2.2" }, + "scipy": { + "hashes": [ + "sha256:076c27284c768b84a45dcf2e914d4000aac537da74236a0d45d82c6fa4b7b3c0", + "sha256:07e179dc0205a50721022344fb85074f772eadbda1e1b3eecdc483f8033709b7", + "sha256:176c6f0d0470a32f1b2efaf40c3d37a24876cebf447498a4cefb947a79c21e9d", + "sha256:42470ea0195336df319741e230626b6225a740fd9dce9642ca13e98f667047c0", + "sha256:4c4161597c75043f7154238ef419c29a64ac4a7c889d588ea77690ac4d0d9b20", + "sha256:5b083c8940028bb7e0b4172acafda6df762da1927b9091f9611b0bcd8676f2bc", + "sha256:64b2ff514a98cf2bb734a9f90d32dc89dc6ad4a4a36a312cd0d6327170339eb0", + "sha256:65df4da3c12a2bb9ad52b86b4dcf46813e869afb006e58be0f516bc370165159", + "sha256:687af0a35462402dd851726295c1a5ae5f987bd6e9026f52e9505994e2f84ef6", + "sha256:6a9c9a9b226d9a21e0a208bdb024c3982932e43811b62d202aaf1bb59af264b1", + "sha256:6d056a8709ccda6cf36cdd2eac597d13bc03dba38360f418560a93050c76a16e", + "sha256:7d3da42fbbbb860211a811782504f38ae7aaec9de8764a9bef6b262de7a2b50f", + "sha256:7e911933d54ead4d557c02402710c2396529540b81dd554fc1ba270eb7308484", + "sha256:94c164a9e2498e68308e6e148646e486d979f7fcdb8b4cf34b5441894bdb9caf", + "sha256:9e3154691b9f7ed73778d746da2df67a19d046a6c8087c8b385bc4cdb2cfca74", + "sha256:9eee2989868e274aae26125345584254d97c56194c072ed96cb433f32f692ed8", + "sha256:a01cc03bcdc777c9da3cfdcc74b5a75caffb48a6c39c8450a9a05f82c4250a14", + "sha256:a7d46c3e0aea5c064e734c3eac5cf9eb1f8c4ceee756262f2c7327c4c2691c86", + "sha256:ad36af9626d27a4326c8e884917b7ec321d8a1841cd6dacc67d2a9e90c2f0359", + "sha256:b5923f48cb840380f9854339176ef21763118a7300a88203ccd0bdd26e58527b", + "sha256:bbc0471b5f22c11c389075d091d3885693fd3f5e9a54ce051b46308bc787e5d4", + "sha256:bff2438ea1330e06e53c424893ec0072640dac00f29c6a43a575cbae4c99b2b9", + "sha256:c40003d880f39c11c1edbae8144e3813904b10514cd3d3d00c277ae996488cdb", + "sha256:d91db2c41dd6c20646af280355d41dfa1ec7eead235642178bd57635a3f82209", + "sha256:f0a50da861a7ec4573b7c716b2ebdcdf142b66b756a0d392c236ae568b3a93fb" + ], + "markers": "python_version >= '3.10'", + "version": "==1.14.0" + }, "setuptools": { "hashes": [ "sha256:f171bab1dfbc86b132997f26a119f6056a57950d058587841a0082e8830f9dc5", diff --git a/app/model/recommend.py b/app/model/recommend.py new file mode 100644 index 0000000..b710025 --- /dev/null +++ b/app/model/recommend.py @@ -0,0 +1,11 @@ +from sqlalchemy import BigInteger, Column, ARRAY, Integer + +from app.database.repository import Base + + +class Recommend(Base): + __tablename__ = "recommends" + __table_args__ = {"schema": "gyeongdan"} + + classification_id = Column(BigInteger, primary_key=True, index=True) + recommend_article_ids = Column(ARRAY(Integer), nullable=True) diff --git a/app/model/user_type.py b/app/model/user_type.py index 3efc2b9..dc3957e 100644 --- a/app/model/user_type.py +++ b/app/model/user_type.py @@ -1,6 +1,6 @@ from enum import Enum -from sqlalchemy import BigInteger, Column, Integer +from sqlalchemy import BigInteger, Column, Integer, CHAR from app.database.repository import Base @@ -15,12 +15,20 @@ class UserType(Base): user_type_entertainer = Column(Integer, nullable=True) user_type_tech_specialist = Column(Integer, nullable=True) user_type_professionals = Column(Integer, nullable=True) + user_type = Column(CHAR(255), nullable=True) class UserTypes(Enum): - NONE= -1 - ISSUE_FINDER= 0 - LIFESTYLE_CONSUMER= 1 - ENTERTAINER= 2 - TECH_SEPCIALIST= 3 - PROFESSIONALS= 4 + NONE= {'id':-1, + 'name':'NONE' + } + ISSUE_FINDER= {'id':0, + 'name':'ISSUE_FINDER'} + LIFESTYLE_CONSUMER= {'id':1, + 'name':'LIFESTYLE_CONSUMER'} + ENTERTAINER= {'id':2, + 'name':'ENTERTAINER'} + TECH_SPECIALIST= {'id':3, + 'name':'TECH_SPECIALIST'} + PROFESSIONALS= {'id':4, + 'name':'PROFESSIONALS'} diff --git a/app/recommend/recommend_service.py b/app/recommend/recommend_service.py index d2b391e..de93318 100644 --- a/app/recommend/recommend_service.py +++ b/app/recommend/recommend_service.py @@ -1,243 +1,152 @@ -# # pylint: disable=missing-module-docstring, missing-module-docstring, attribute-defined-outside-init, unnecessary-comprehension, not-callable, consider-using-f-string, unused-variable -# -# import asyncio -# import os -# import warnings -# from datetime import datetime -# -# import numpy as np -# import pandas as pd -# from fastapi import Depends -# from sqlalchemy.ext.asyncio import AsyncSession -# -# from app.database.repository import model_to_dict -# from app.database.session import get_db_session -# from app.model.crawled_article import Articles -# from app.repository.crawled_article_crud import CrawledArticleRepository -# from app.service.article_manage_service import ArticleManageService -# from app.repository.interaction_crud import InteractionRepository -# from app.model.interaction import Interaction -# from lightfm import LightFM -# from lightfm.data import Dataset # pylint: disable=E0611 -# -# warnings.filterwarnings("ignore") -# -# -# def articles_to_dataframe(articles: list[Articles]) -> pd.DataFrame: -# # 객체 리스트를 딕셔너리 리스트로 변환 -# articles_dict_list = [ -# { -# "article_id": article.id, -# 'ECONOMY_AND_BUSINESS': 0, -# 'POLITICS_AND_SOCIETY': 0, -# 'SPORTS_AND_LEISURE': 0, -# 'TECHNOLOGY_AND_CULTURE': 0 -# # "created_at": article.created_at.strftime('%Y-%m-%d'), -# } -# for article in articles -# ] -# for i in range(len(articles_dict_list)): -# articles_dict_list[i][articles[i].category] = 1 -# -# df = pd.DataFrame(articles_dict_list) -# return df -# -# def interaction_to_dataframe(interactions : list[Interaction]) -> pd.DataFrame: -# interaction_dict_list = [ -# { -# "classification_id": interaction.classification_id, -# "article_id": interaction.article_id, -# "duration_time": interaction.duration_time -# } -# for interaction in interactions -# ] -# df = pd.DataFrame(interaction_dict_list) -# return df -# -# class ArticleDataInfo: -# def __init__(self, article_id, category, created_at): -# self.article_data = pd.DataFrame( -# { -# "article_id": article_id, -# "경제 및 기업": [0], -# "정치 및 사회": [0], -# "기술 및 문화": [0], -# "스포츠 및 여가": [0], -# "오피니언 및 분석": [0], -# # "created_at": [created_at], -# } -# ) -# -# self.article_data.iloc[0][category] = 1 -# -# -# class InteractionDataInfo: -# def __init__(self, user_id, article_id, duration_time): -# self.interaction_data = pd.DataFrame( -# { -# "classification_id": [user_id], -# "article_id": [article_id], -# "duration_time": [duration_time], -# } -# ) -# -# -# class RecommendService: -# # pylint: disable=too-many-instance-attributes -# -# def __init__(self): -# self.interaction_datas = None -# self.num_classification = 5 -# -# def set_user_datas(self, user_data_path): -# self.user_data_path = user_data_path -# self.user_datas = pd.read_csv(os.path.dirname(os.path.abspath(__file__)) + user_data_path) -# -# -# async def initialize_data(self, session): -# self.set_user_datas("/./user_classification.csv") -# await self.set_article_datas(session) -# await self.set_interaction_datas(session) -# -# async def set_article_datas(self, session): -# # session = Depends(get_db_session) -# articles = await ArticleManageService().get_all_articles(session=session) -# self.article_datas = pd.get_dummies(articles_to_dataframe(articles)) -# -# async def set_interaction_datas(self, session): -# # session = Depends(get_db_session) -# interactions = await InteractionRepository().get_all(session=session) -# self.interaction_datas = interaction_to_dataframe(interactions) -# print(self.interaction_datas.columns) -# -# def make_dataset(self): -# self.user_datas = pd.get_dummies(self.user_datas) -# self.user_features_col = self.user_datas.drop( -# columns=["classification_id"] -# ).columns.values -# self.user_feat = self.user_datas.drop(columns=["classification_id"]).to_dict( -# orient="records" -# ) -# -# self.item_features = self.article_datas -# self.item_features_col = self.item_features.drop( -# columns=["article_id"] -# ).columns.values -# self.item_feat = self.item_features.drop( -# columns=["article_id"] -# ).to_dict(orient="records") -# -# self.dataset = Dataset() -# self.dataset.fit( -# users=[x for x in self.user_datas["classification_id"]], -# items=[x for x in self.article_datas["article_id"]], -# item_features=self.item_features_col, -# user_features=self.user_features_col, -# ) -# -# print(self.item_feat) -# self.item_features = self.dataset.build_item_features( -# (x, y) for x, y in zip(self.item_features["article_id"], self.item_feat) -# ) -# self.user_features = self.dataset.build_user_features( -# (x, y) for x, y in zip(self.user_datas["classification_id"], self.user_feat) -# ) -# -# (self.interactions, self.weights) = self.dataset.build_interactions( -# (x, y, z) -# for x, y, z in zip( -# self.interaction_datas["classification_id"], -# self.interaction_datas["article_id"], -# self.interaction_datas["duration_time"], -# ) -# ) -# -# num_users, num_items = self.dataset.interactions_shape() -# print("Num users: {}, num_items {}.".format(num_users, num_items)) -# -# def make_model( -# self, -# n_components: int = 30, -# loss: str = "warp", -# epoch: int = 30, -# num_thread: int = 4, -# ): -# self.n_components = n_components -# self.loss = loss -# self.epoch = epoch -# self.num_thread = num_thread -# self.model = LightFM( -# no_components=self.n_components, loss=self.loss, random_state=1616 -# ) -# -# def fit_model(self): -# self.make_dataset() -# self.make_model() -# self.model.fit( -# self.interactions, -# user_features=self.user_features, -# item_features=self.item_features, -# epochs=self.epoch, -# num_threads=self.num_thread, -# sample_weight=self.weights, -# ) -# -# def get_top_n_articles(self, user_id: int, article_num: int): -# item_ids = np.arange(self.interactions.shape[1]) # 예측할 아이템 ID 배열 -# -# predictions = self.model.predict(user_id, item_ids) -# top_items = self.article_datas.iloc[np.argsort(-predictions)[:article_num]] -# return top_items -# -# def similar_items(self, item_id, N=10): -# item_bias, item_representations = self.model.get_item_representations( -# features=self.item_features -# ) -# -# scores = item_representations.dot(item_representations[item_id, :]) -# best = np.argpartition(scores, -N)[-N:] -# -# return self.article_datas.iloc[best] -# -# async def get_classification_for_article(self, article_id:id, session:AsyncSession): -# scores = self.model.predict(np.arange(len(self.user_datas)), np.full(len(self.user_datas), article_id)) -# top_users = np.argsort(-scores) -# -# score_for_classification = [0 for _ in range(self.num_classification)] -# weight = 10 -# for user_id in top_users[:10]: -# for i in range(self.num_classification): -# score_for_classification[i] += self.user_datas.iloc[user_id][self.user_datas.columns[i+2]] * (2 ** weight) -# weight -= 1 -# -# total = sum(score_for_classification) -# for i in range(self.num_classification): -# score_for_classification[i] = (int)(score_for_classification[i] / (total/100)) -# -# await CrawledArticleRepository().set_interest_type(article_id, score_for_classification, session) -# -# return score_for_classification -# -# def get_time_weight(self, article_id): -# today = datetime.now().date() -# date_obj = datetime.strptime( -# self.article_datas[self.article_datas["article_id"] == article_id][ -# "created_at" -# ].iloc[0], -# "%Y-%m-%d", -# ).date() -# difference = today - date_obj -# return max(1 - ((difference.days // 30) / 5), 0) -# -# def fit_model_partialy(self): -# self.make_dataset() -# self.model.fit_partial(self.interactions, item_features=self.item_features) -# -# def add_interaction_data(self, interaction_data: InteractionDataInfo): -# InteractionRepository().create( -# Interaction( -# classification_id=interaction_data.interaction_data['classification_id'], -# article_id=interaction_data.interaction_data['article_id'], -# duration_time=interaction_data.interaction_data['duration_time'] -# ) -# ) +# pylint: disable=missing-module-docstring, missing-module-docstring, attribute-defined-outside-init, unnecessary-comprehension, not-callable, consider-using-f-string, unused-variable + +import os + +from scipy.sparse import csr_matrix +import numpy as np +import pandas as pd +from sqlalchemy.ext.asyncio import AsyncSession + +from app.model.crawled_article import Articles +from app.model.user_type import UserTypes +from app.repository.recommend_crud import RecommendRepository +from app.service.article_manage_service import ArticleManageService +from app.repository.interaction_crud import InteractionRepository +from app.model.interaction import Interaction + +import implicit + +from app.service.user_type_service import UserTypeService + + +async def user_id_to_classification_id(user_id, session:AsyncSession): + userType = await UserTypeService().get_user_type_by_id(user_id, session) + target_features = [[userType.user_type_issue_finder, UserTypes.ISSUE_FINDER], + [userType.user_type_lifestyle_consumer, UserTypes.LIFESTYLE_CONSUMER], + [userType.user_type_entertainer, UserTypes.ENTERTAINER], + [userType.user_type_tech_specialist, UserTypes.TECH_SPECIALIST], + [userType.user_type_professionals, UserTypes.PROFESSIONALS]] + target_features.sort(key=lambda x: x[0], reverse=True) + data = { + 'classification_id': range(1, 11), + 'ISSUE_FINDER': [1, 1, 1, 1, 1, 1, 0, 0, 0, 0], + 'LIFESTYLE_CONSUMER': [1, 1, 1, 0, 0, 0, 1, 1, 1, 0], + 'ENTERTAINER': [1, 0, 0, 1, 1, 0, 1, 1, 0, 1], + 'TECH_SPECIALIST': [0, 1, 0, 1, 0, 1, 1, 0, 1, 1], + 'PROFESSIONALS': [0, 0, 1, 0, 1, 1, 0, 1, 1, 1] + } + df = pd.DataFrame(data) + filtered_df = df[ + (df[target_features[0][1].value['name']] == 1) & + (df[target_features[1][1].value['name']] == 1) & + (df[target_features[2][1].value['name']] == 1) + ] + return (int)(filtered_df.iloc[0]['classification_id']) + +def articles_to_dataframe(articles: list[Articles]) -> pd.DataFrame: + # 객체 리스트를 딕셔너리 리스트로 변환 + articles_dict_list = [ + { + "article_id": article.id, + 'ECONOMY_AND_BUSINESS': 0, + 'POLITICS_AND_SOCIETY': 0, + 'SPORTS_AND_LEISURE': 0, + 'TECHNOLOGY_AND_CULTURE': 0 + # "created_at": article.created_at.strftime('%Y-%m-%d'), + } + for article in articles + ] + for i in range(len(articles_dict_list)): + articles_dict_list[i][articles[i].category] = 1 + + df = pd.DataFrame(articles_dict_list) + return df + +def interaction_to_dataframe(interactions : list[Interaction]) -> pd.DataFrame: + interaction_dict_list = [ + { + "classification_id": interaction.classification_id, + "article_id": interaction.article_id, + "duration_time": interaction.duration_time + } + for interaction in interactions + ] + df = pd.DataFrame(interaction_dict_list) + return df + +class RecommendService: + # pylint: disable=too-many-instance-attributes + + def __init__(self): + self.interaction_datas = None + self.classification_datas = None + self.num_classifications = None + self.num_articles = None + self.user_item_matrix = None + self.model = None + self.user_data_path = "/./user_classification.csv" + + async def initialize_data(self, session): + self.set_user_datas() + await self.set_article_datas(session) + await self.set_interaction_datas(session) + + def set_user_datas(self): + self.classification_datas = pd.read_csv(os.path.dirname(os.path.abspath(__file__)) + self.user_data_path) + self.num_classifications = len(self.classification_datas) + print(self.num_classifications) + + async def set_article_datas(self, session): + articles = await ArticleManageService().get_all_articles(session=session) + self.num_articles = len(articles) + print(self.num_articles) + + async def set_interaction_datas(self, session): + interactions = await InteractionRepository().get_all(session=session) + self.interaction_datas = interaction_to_dataframe(interactions) + + def make_dataset(self): + print(self.interaction_datas) + self.user_item_matrix = csr_matrix((self.interaction_datas['duration_time'].tolist(), + (self.interaction_datas['classification_id'].tolist(), + self.interaction_datas['article_id'].tolist())) + , shape=(self.num_classifications+1, self.num_articles+1)) + + self.user_item_matrix = (self.user_item_matrix > 0).astype(np.float32) + print("Num users: {}, num_items {}.".format(self.num_classifications, self.num_articles)) + + def make_model( + self, + factors: int = 5, + regulartization: float = 0.1, + iterations: int = 20 + ): + self.model = implicit.als.AlternatingLeastSquares(factors=factors, + regularization=regulartization, + iterations=iterations) + + def fit_model(self): + self.make_dataset() + self.make_model() + self.model.fit( + self.user_item_matrix + ) + + async def get_recommend_articles(self, classification_id: int, session: AsyncSession, N: int = 10): + indices, scores = self.model.recommend(userid=classification_id, user_items=csr_matrix(self.user_item_matrix.toarray()[classification_id]), N=N) + for i in range(N): + indices[i] += 1 + + await RecommendRepository().update_recommend(id=classification_id+1, article_ids=indices, session=session) + + return indices + + + def add_interaction_data(self, classification_id: int, article_id: int, duration_time:int = 1): + InteractionRepository().create( + Interaction( + classification_id=classification_id, + article_id=article_id, + duration_time=duration_time + ) + ) diff --git a/app/recommend/recommend_service_deactivate.py b/app/recommend/recommend_service_deactivate.py new file mode 100644 index 0000000..d2b391e --- /dev/null +++ b/app/recommend/recommend_service_deactivate.py @@ -0,0 +1,243 @@ +# # pylint: disable=missing-module-docstring, missing-module-docstring, attribute-defined-outside-init, unnecessary-comprehension, not-callable, consider-using-f-string, unused-variable +# +# import asyncio +# import os +# import warnings +# from datetime import datetime +# +# import numpy as np +# import pandas as pd +# from fastapi import Depends +# from sqlalchemy.ext.asyncio import AsyncSession +# +# from app.database.repository import model_to_dict +# from app.database.session import get_db_session +# from app.model.crawled_article import Articles +# from app.repository.crawled_article_crud import CrawledArticleRepository +# from app.service.article_manage_service import ArticleManageService +# from app.repository.interaction_crud import InteractionRepository +# from app.model.interaction import Interaction +# from lightfm import LightFM +# from lightfm.data import Dataset # pylint: disable=E0611 +# +# warnings.filterwarnings("ignore") +# +# +# def articles_to_dataframe(articles: list[Articles]) -> pd.DataFrame: +# # 객체 리스트를 딕셔너리 리스트로 변환 +# articles_dict_list = [ +# { +# "article_id": article.id, +# 'ECONOMY_AND_BUSINESS': 0, +# 'POLITICS_AND_SOCIETY': 0, +# 'SPORTS_AND_LEISURE': 0, +# 'TECHNOLOGY_AND_CULTURE': 0 +# # "created_at": article.created_at.strftime('%Y-%m-%d'), +# } +# for article in articles +# ] +# for i in range(len(articles_dict_list)): +# articles_dict_list[i][articles[i].category] = 1 +# +# df = pd.DataFrame(articles_dict_list) +# return df +# +# def interaction_to_dataframe(interactions : list[Interaction]) -> pd.DataFrame: +# interaction_dict_list = [ +# { +# "classification_id": interaction.classification_id, +# "article_id": interaction.article_id, +# "duration_time": interaction.duration_time +# } +# for interaction in interactions +# ] +# df = pd.DataFrame(interaction_dict_list) +# return df +# +# class ArticleDataInfo: +# def __init__(self, article_id, category, created_at): +# self.article_data = pd.DataFrame( +# { +# "article_id": article_id, +# "경제 및 기업": [0], +# "정치 및 사회": [0], +# "기술 및 문화": [0], +# "스포츠 및 여가": [0], +# "오피니언 및 분석": [0], +# # "created_at": [created_at], +# } +# ) +# +# self.article_data.iloc[0][category] = 1 +# +# +# class InteractionDataInfo: +# def __init__(self, user_id, article_id, duration_time): +# self.interaction_data = pd.DataFrame( +# { +# "classification_id": [user_id], +# "article_id": [article_id], +# "duration_time": [duration_time], +# } +# ) +# +# +# class RecommendService: +# # pylint: disable=too-many-instance-attributes +# +# def __init__(self): +# self.interaction_datas = None +# self.num_classification = 5 +# +# def set_user_datas(self, user_data_path): +# self.user_data_path = user_data_path +# self.user_datas = pd.read_csv(os.path.dirname(os.path.abspath(__file__)) + user_data_path) +# +# +# async def initialize_data(self, session): +# self.set_user_datas("/./user_classification.csv") +# await self.set_article_datas(session) +# await self.set_interaction_datas(session) +# +# async def set_article_datas(self, session): +# # session = Depends(get_db_session) +# articles = await ArticleManageService().get_all_articles(session=session) +# self.article_datas = pd.get_dummies(articles_to_dataframe(articles)) +# +# async def set_interaction_datas(self, session): +# # session = Depends(get_db_session) +# interactions = await InteractionRepository().get_all(session=session) +# self.interaction_datas = interaction_to_dataframe(interactions) +# print(self.interaction_datas.columns) +# +# def make_dataset(self): +# self.user_datas = pd.get_dummies(self.user_datas) +# self.user_features_col = self.user_datas.drop( +# columns=["classification_id"] +# ).columns.values +# self.user_feat = self.user_datas.drop(columns=["classification_id"]).to_dict( +# orient="records" +# ) +# +# self.item_features = self.article_datas +# self.item_features_col = self.item_features.drop( +# columns=["article_id"] +# ).columns.values +# self.item_feat = self.item_features.drop( +# columns=["article_id"] +# ).to_dict(orient="records") +# +# self.dataset = Dataset() +# self.dataset.fit( +# users=[x for x in self.user_datas["classification_id"]], +# items=[x for x in self.article_datas["article_id"]], +# item_features=self.item_features_col, +# user_features=self.user_features_col, +# ) +# +# print(self.item_feat) +# self.item_features = self.dataset.build_item_features( +# (x, y) for x, y in zip(self.item_features["article_id"], self.item_feat) +# ) +# self.user_features = self.dataset.build_user_features( +# (x, y) for x, y in zip(self.user_datas["classification_id"], self.user_feat) +# ) +# +# (self.interactions, self.weights) = self.dataset.build_interactions( +# (x, y, z) +# for x, y, z in zip( +# self.interaction_datas["classification_id"], +# self.interaction_datas["article_id"], +# self.interaction_datas["duration_time"], +# ) +# ) +# +# num_users, num_items = self.dataset.interactions_shape() +# print("Num users: {}, num_items {}.".format(num_users, num_items)) +# +# def make_model( +# self, +# n_components: int = 30, +# loss: str = "warp", +# epoch: int = 30, +# num_thread: int = 4, +# ): +# self.n_components = n_components +# self.loss = loss +# self.epoch = epoch +# self.num_thread = num_thread +# self.model = LightFM( +# no_components=self.n_components, loss=self.loss, random_state=1616 +# ) +# +# def fit_model(self): +# self.make_dataset() +# self.make_model() +# self.model.fit( +# self.interactions, +# user_features=self.user_features, +# item_features=self.item_features, +# epochs=self.epoch, +# num_threads=self.num_thread, +# sample_weight=self.weights, +# ) +# +# def get_top_n_articles(self, user_id: int, article_num: int): +# item_ids = np.arange(self.interactions.shape[1]) # 예측할 아이템 ID 배열 +# +# predictions = self.model.predict(user_id, item_ids) +# top_items = self.article_datas.iloc[np.argsort(-predictions)[:article_num]] +# return top_items +# +# def similar_items(self, item_id, N=10): +# item_bias, item_representations = self.model.get_item_representations( +# features=self.item_features +# ) +# +# scores = item_representations.dot(item_representations[item_id, :]) +# best = np.argpartition(scores, -N)[-N:] +# +# return self.article_datas.iloc[best] +# +# async def get_classification_for_article(self, article_id:id, session:AsyncSession): +# scores = self.model.predict(np.arange(len(self.user_datas)), np.full(len(self.user_datas), article_id)) +# top_users = np.argsort(-scores) +# +# score_for_classification = [0 for _ in range(self.num_classification)] +# weight = 10 +# for user_id in top_users[:10]: +# for i in range(self.num_classification): +# score_for_classification[i] += self.user_datas.iloc[user_id][self.user_datas.columns[i+2]] * (2 ** weight) +# weight -= 1 +# +# total = sum(score_for_classification) +# for i in range(self.num_classification): +# score_for_classification[i] = (int)(score_for_classification[i] / (total/100)) +# +# await CrawledArticleRepository().set_interest_type(article_id, score_for_classification, session) +# +# return score_for_classification +# +# def get_time_weight(self, article_id): +# today = datetime.now().date() +# date_obj = datetime.strptime( +# self.article_datas[self.article_datas["article_id"] == article_id][ +# "created_at" +# ].iloc[0], +# "%Y-%m-%d", +# ).date() +# difference = today - date_obj +# return max(1 - ((difference.days // 30) / 5), 0) +# +# def fit_model_partialy(self): +# self.make_dataset() +# self.model.fit_partial(self.interactions, item_features=self.item_features) +# +# def add_interaction_data(self, interaction_data: InteractionDataInfo): +# InteractionRepository().create( +# Interaction( +# classification_id=interaction_data.interaction_data['classification_id'], +# article_id=interaction_data.interaction_data['article_id'], +# duration_time=interaction_data.interaction_data['duration_time'] +# ) +# ) diff --git a/app/recommend/user_classification.csv b/app/recommend/user_classification.csv index 398400a..09f9e05 100644 --- a/app/recommend/user_classification.csv +++ b/app/recommend/user_classification.csv @@ -1,21 +1,11 @@ -classification_id,sex,issue finder,lifestyle consumer,entertainer,tech specialist,professionals -1,F,1,1,1,0,0 -2,F,1,1,0,1,0 -3,F,1,1,0,0,1 -4,F,1,0,1,1,0 -5,F,1,0,1,0,1 -6,F,1,0,0,1,1 -7,F,0,1,1,1,0 -8,F,0,1,1,0,1 -9,F,0,1,0,1,1 -10,F,0,0,1,1,1 -11,M,1,1,1,0,0 -12,M,1,1,0,1,0 -13,M,1,1,0,0,1 -14,M,1,0,1,1,0 -15,M,1,0,1,0,1 -16,M,1,0,0,1,1 -17,M,0,1,1,1,0 -18,M,0,1,1,0,1 -19,M,0,1,0,1,1 -20,M,0,0,1,1,1 +classification_id,issue finder,lifestyle consumer,entertainer,tech specialist,professionals +1,1,1,1,0,0 +2,1,1,0,1,0 +3,1,1,0,0,1 +4,1,0,1,1,0 +5,1,0,1,0,1 +6,1,0,0,1,1 +7,0,1,1,1,0 +8,0,1,1,0,1 +9,0,1,0,1,1 +10,0,0,1,1,1 \ No newline at end of file diff --git a/app/repository/recommend_crud.py b/app/repository/recommend_crud.py new file mode 100644 index 0000000..b0882f8 --- /dev/null +++ b/app/repository/recommend_crud.py @@ -0,0 +1,19 @@ +from typing import List + +from fastapi import HTTPException +from sqlalchemy.ext.asyncio import AsyncSession + +from app.database.repository import get_repository +from app.model.recommend import Recommend + +class RecommendRepository: + async def update_recommend( + self, id: int, article_ids: List[int], session: AsyncSession + ): + repository = get_repository(Recommend)(session) + return await repository.update_by_pk( + pk=id, + data={ + "recommend_article_ids": article_ids + }, + ) \ No newline at end of file diff --git a/app/repository/user_type_crud.py b/app/repository/user_type_crud.py index 390d008..c0a5ac7 100644 --- a/app/repository/user_type_crud.py +++ b/app/repository/user_type_crud.py @@ -27,15 +27,15 @@ async def update_user_type( return await repository.update_by_pk( pk=id, data={ - "user_type_issue_finder": user_types[UserTypes.ISSUE_FINDER.value], + "user_type_issue_finder": user_types[UserTypes.ISSUE_FINDER.value['id']], "user_type_lifestyle_consumer": user_types[ - UserTypes.LIFESTYLE_CONSUMER.value + UserTypes.LIFESTYLE_CONSUMER.value['id'] ], - "user_type_entertainer": user_types[UserTypes.ENTERTAINER.value], + "user_type_entertainer": user_types[UserTypes.ENTERTAINER.value['id']], "user_type_tech_specialist": user_types[ - UserTypes.TECH_SEPCIALIST.value + UserTypes.TECH_SPECIALIST.value['id'] ], - "user_type_professionals": user_types[UserTypes.PROFESSIONALS.value], + "user_type_professionals": user_types[UserTypes.PROFESSIONALS.value['id']], }, ) diff --git a/app/service/news_scheduling_service.py b/app/service/news_scheduling_service.py index b7f70d6..169ce1a 100644 --- a/app/service/news_scheduling_service.py +++ b/app/service/news_scheduling_service.py @@ -78,22 +78,14 @@ async def run_crawl_and_store(session: AsyncSession): else: logger.info("No new articles") - # new_exist_articles = await ArticleManageService().get_all_articles(session=session) + recommend_service = RecommendService() + await recommend_service.initialize_data(session=session) + recommend_service.fit_model() + for user_id in range(10): + await recommend_service.get_recommend_articles( + classification_id=user_id, session=session + ) - # 새로운 기사들만 필터링 - # new_articles_id = [ - # article.id - # for article in new_exist_articles - # if article.probability_issue_finder == -1 - # ] - # recommend_service = RecommendService() - # await recommend_service.initialize_data(session=session) - # recommend_service.fit_model() - # if new_articles: - # for article_id in new_articles_id: - # await recommend_service.get_classification_for_article( - # article_id=article_id, session=session - # ) async def schedule_task(): diff --git a/app/service/user_type_service.py b/app/service/user_type_service.py index 8256011..490ed45 100644 --- a/app/service/user_type_service.py +++ b/app/service/user_type_service.py @@ -69,63 +69,63 @@ def __init__(self): self.questionnaire_data = [ [ "최신 경제 이슈에 대해 얼마나 잘 알고 있습니까?", - ["매우 잘 알고 있다.", 10, UserTypes.ISSUE_FINDER.value], - ["다소 알고 있다.", 5, UserTypes.ISSUE_FINDER.value], - ["잘 모른다.", 0, UserTypes.NONE.value], + ["매우 잘 알고 있다.", 10, UserTypes.ISSUE_FINDER.value['id']], + ["다소 알고 있다.", 5, UserTypes.ISSUE_FINDER.value['id']], + ["잘 모른다.", 0, UserTypes.NONE.value['id']], ], [ "경제 뉴스를 얼마나 자주 찾아보십니까?", - ["매일 확인한다.", 10, UserTypes.ISSUE_FINDER.value], - ["주간 단위로 확인한다.", 5, UserTypes.ISSUE_FINDER.value], - ["가끔 확인한다.", 0, UserTypes.NONE.value], + ["매일 확인한다.", 10, UserTypes.ISSUE_FINDER.value['id']], + ["주간 단위로 확인한다.", 5, UserTypes.ISSUE_FINDER.value['id']], + ["가끔 확인한다.", 0, UserTypes.NONE.value['id']], ], [ "경제 관련 논란이나 논쟁에 얼마나 관심이 있습니까?", - ["매우 관심이 있다.", 10, UserTypes.ISSUE_FINDER.value], - ["다소 관심이 있다.", 5, UserTypes.ISSUE_FINDER.value], - ["잘 모른다.", 0, UserTypes.NONE.value], + ["매우 관심이 있다.", 10, UserTypes.ISSUE_FINDER.value['id']], + ["다소 관심이 있다.", 5, UserTypes.ISSUE_FINDER.value['id']], + ["잘 모른다.", 0, UserTypes.NONE.value['id']], ], [ "경제 정보를 어떻게 활용하시나요?", - ["일상 생활에 적용해본다.", 10, UserTypes.LIFESTYLE_CONSUMER.value], - ["흥미로운 정보는 기억한다.", 10, UserTypes.ENTERTAINER.value], - ["크게 활용하지 않는다.", 0, UserTypes.NONE.value], + ["일상 생활에 적용해본다.", 10, UserTypes.LIFESTYLE_CONSUMER.value['id']], + ["흥미로운 정보는 기억한다.", 10, UserTypes.ENTERTAINER.value['id']], + ["크게 활용하지 않는다.", 0, UserTypes.NONE.value['id']], ], [ "절약이나 소비자 팁에 관심이 있으신가요?", - ["매우 관심이 있다.", 10, UserTypes.LIFESTYLE_CONSUMER.value], - ["다소 관심이 있다.", 5, UserTypes.LIFESTYLE_CONSUMER.value], - ["별로 관심이 없다.", 0, UserTypes.NONE.value], + ["매우 관심이 있다.", 10, UserTypes.LIFESTYLE_CONSUMER.value['id']], + ["다소 관심이 있다.", 5, UserTypes.LIFESTYLE_CONSUMER.value['id']], + ["별로 관심이 없다.", 0, UserTypes.NONE.value['id']], ], [ "경제 관련 이야기를 어떻게 즐기시나요?", - ["심도 깊게 분석한다.", 10, UserTypes.PROFESSIONALS.value], - ["가벼운 마음으로 즐긴다.", 5, UserTypes.ENTERTAINER.value], - ["별로 관심이 없다.", 0, UserTypes.NONE.value], + ["심도 깊게 분석한다.", 10, UserTypes.PROFESSIONALS.value['id']], + ["가벼운 마음으로 즐긴다.", 5, UserTypes.ENTERTAINER.value['id']], + ["별로 관심이 없다.", 0, UserTypes.NONE.value['id']], ], [ "기술과 경제의 결합에 대해 얼마나 잘 이해하고 있습니까?", - ["매우 잘 이해한다.", 10, UserTypes.TECH_SEPCIALIST.value], - ["다소 이해한다.", 5, UserTypes.TECH_SEPCIALIST.value], - ["잘 모른다.", 0, UserTypes.NONE.value], + ["매우 잘 이해한다.", 10, UserTypes.TECH_SPECIALIST.value['id']], + ["다소 이해한다.", 5, UserTypes.TECH_SPECIALIST.value['id']], + ["잘 모른다.", 0, UserTypes.NONE.value['id']], ], [ "기술 발전이 경제에 미치는 영향에 대해 얼마나 알고 있습니까?", - ["깊이 있는 지식이 있다. ", 10, UserTypes.TECH_SEPCIALIST.value], - ["일반적인 이해만 한다. ", 5, UserTypes.TECH_SEPCIALIST.value], - ["잘 모른다.", 0, UserTypes.NONE.value], + ["깊이 있는 지식이 있다. ", 10, UserTypes.TECH_SPECIALIST.value['id']], + ["일반적인 이해만 한다. ", 5, UserTypes.TECH_SPECIALIST.value['id']], + ["잘 모른다.", 0, UserTypes.NONE.value['id']], ], [ "전문가 의견이나 통계 데이터에 관심이 있으신가요?", - ["매우 관심이 있다.", 10, UserTypes.PROFESSIONALS.value], - ["다소 관심이 있다.", 5, UserTypes.PROFESSIONALS.value], - ["별로 관심이 없다. ", 5, UserTypes.ENTERTAINER.value], + ["매우 관심이 있다.", 10, UserTypes.PROFESSIONALS.value['id']], + ["다소 관심이 있다.", 5, UserTypes.PROFESSIONALS.value['id']], + ["별로 관심이 없다. ", 5, UserTypes.ENTERTAINER.value['id']], ], [ "경제 분석을 얼마나 자주 읽거나 들으시나요?", - ["자주 읽거나 듣는다.", 10, UserTypes.PROFESSIONALS.value], - ["가끔 읽거나 듣는다.", 5, UserTypes.PROFESSIONALS.value], - ["별로 읽거나 듣지 않는다.", 5, UserTypes.ENTERTAINER.value], + ["자주 읽거나 듣는다.", 10, UserTypes.PROFESSIONALS.value['id']], + ["가끔 읽거나 듣는다.", 5, UserTypes.PROFESSIONALS.value['id']], + ["별로 읽거나 듣지 않는다.", 5, UserTypes.ENTERTAINER.value['id']], ], ] @@ -139,16 +139,26 @@ async def create_user_type( session: AsyncSession, ) -> UserType: user_types = calculate_user_type(answers, self.questionnaire_data) + max_user_type = max(user_types) + user_type_id = -1 + user_type_enum = UserTypes.NONE + for idx, col in enumerate(user_types): + if col == max_user_type: + user_type_id = idx + for user_type in UserTypes: + if user_type.value['id'] == user_type_id: + user_type_enum = user_type return await UserTypeRepository().create( user_type=UserType( user_id=answers.id, - user_type_issue_finder=user_types[UserTypes.ISSUE_FINDER.value], + user_type_issue_finder=user_types[UserTypes.ISSUE_FINDER.value['id']], user_type_lifestyle_consumer=user_types[ - UserTypes.LIFESTYLE_CONSUMER.value + UserTypes.LIFESTYLE_CONSUMER.value['id'] ], - user_type_entertainer=user_types[UserTypes.ENTERTAINER.value], - user_type_tech_specialist=user_types[UserTypes.TECH_SEPCIALIST.value], - user_type_professionals=user_types[UserTypes.PROFESSIONALS.value], + user_type_entertainer=user_types[UserTypes.ENTERTAINER.value['id']], + user_type_tech_specialist=user_types[UserTypes.TECH_SPECIALIST.value['id']], + user_type_professionals=user_types[UserTypes.PROFESSIONALS.value['id']], + user_type=user_type_enum.value['name'] ), session=session, )