diff --git a/.gitignore b/.gitignore index e8a7ad9..90a3384 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,6 @@ data.json *.pptx /backend/output *.pyc -*.log \ No newline at end of file +*.log +mongodb/ +db-backup/ \ No newline at end of file diff --git a/README.md b/README.md index 235acbe..5970f80 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,10 @@ + # knowlix + Microservice to autmatically create onboarding slides ![LIX Builder](https://github.com/vg-leanix/pptx-tool/blob/main/Thumbnail.png) + +## Architecture +![Architecture](https://github.com/vg-leanix/knowlix/blob/v1.1_mongodb/knowlix%20architecture.png) diff --git a/backend/.gitignore b/backend/.gitignore new file mode 100644 index 0000000..ae412d6 --- /dev/null +++ b/backend/.gitignore @@ -0,0 +1 @@ +env/ \ No newline at end of file diff --git a/backend/Dockerfile b/backend/Dockerfile index fbd141e..3588bf5 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -2,10 +2,11 @@ FROM python:3.8.1 WORKDIR /usr/app + COPY req.txt ./ COPY api.py core.py main.py master.pptx req.txt server.py ./ -# RUN mkdir output +RUN mkdir output RUN pip install --upgrade pip RUN pip install -r req.txt --no-cache-dir diff --git a/backend/api.py b/backend/api.py index 1864dab..1de3eaf 100644 --- a/backend/api.py +++ b/backend/api.py @@ -9,31 +9,36 @@ from pptx import Presentation from server import celery import json +import uuid +from datetime import datetime +from pymongo import MongoClient file_path = "master.pptx" -pres= Presentation(file_path) +pres = Presentation(file_path) +MONGODB = os.getenv("MONGODB") +client = MongoClient(MONGODB) +db = client["taskdb"]["ta"] -tags_metadata= [ +tags_metadata = [ { - "name": "powerpoint", - "description": "handling powerpoint" + "name": "powerpoint", + "description": "handling powerpoint" }, { - "name": "job management", - "description": "managing celery tasks" + "name": "job management", + "description": "managing celery tasks" }, - + ] app = FastAPI( - title= "SurfBoard", - description= "API Hub for the LeanIX Onboarding Deck", - version= "1.0.0", + title="Knowlix", + description="API Hub for the LeanIX Onboarding Deck", + version="1.0.0", openapi_tags=tags_metadata) - app.add_middleware( CORSMiddleware, allow_origins=["*"], @@ -43,42 +48,91 @@ expose_headers=[] ) + class PPTX(BaseModel): sections: List[str] - - -@app.get("/v1/sections", tags = ["powerpoint"]) + + +class Download(BaseModel): + taskID: str + + +@app.get("/v1/sections", tags=["powerpoint"]) async def provide_sections(): sections = get_sections(pres) - if (not sections) or (len(sections)==0): - raise HTTPException(status_code=404, detail="No Sections in Master pptx") + if (not sections) or (len(sections) == 0): + raise HTTPException( + status_code=404, detail="No Sections in Master pptx") - return JSONResponse(sections,status_code=200) + return JSONResponse(sections, status_code=200) -@app.post("/v1/pptxjob", tags = ["job management"]) +@app.post("/v1/pptxjob", tags=["job management"]) async def deliver_pptx(pptx: PPTX): task_name = "pptx" sections = pptx.sections - kwargs ={ - 'sections':sections, - 'downloadStatus': 'ready' - } - + no_sections = len(sections) + sections_available = True + exists_already = False + status = None + custom_id = str(uuid.uuid4().hex) + kwargs = { + 'sections': sections, + 'customID': custom_id, + 'downloaded': False + + } + + if no_sections != 0: + exists_already = check_existence(sections, db) + else: + sections_available = False + + if not exists_already and sections_available: + task = celery.send_task(task_name, kwargs=kwargs, serializer='json') + if sections_available and not exists_already: + status = "success" - task = celery.send_task(task_name, kwargs = kwargs, serializer='json') + elif not sections_available: + status = "no_sections" + + elif exists_already: + status = "pptx_exists" package = { - 'taskID': task.id, - 'sections': sections + 'taskID': custom_id, + 'sections': sections, + 'status': status } - return JSONResponse(package) - - - - + + +def check_existence(sections, db): + exists_already = False + no_sections = len(sections) + query = {"kwargs.sections": {"$size": no_sections, "$all": sections}} + + hits = db.count_documents(query) + + if hits > 0: + exists_already = True + + return exists_already + + +@app.post("/v1/download", tags=["powerpoint"]) +async def download_pptx(download: Download): + + task_id = download.taskID + + result = db.find_one({"kwargs.customID": task_id}, {'result': 1, '_id': 0}) + unpack = result["result"] + unpack = json.loads(unpack) + file_path = unpack["filePath"] + + # return file_path + return FileResponse(file_path) diff --git a/backend/clean_output.py b/backend/clean_output.py deleted file mode 100644 index 2e043e5..0000000 --- a/backend/clean_output.py +++ /dev/null @@ -1,7 +0,0 @@ -import os - -folder = os.path.join(os.getcwd(),'backend/output') -filelist = [f for f in os.listdir(folder)if f.endswith(".pptx")] - -for f in filelist: - os.remove(os.path.join(folder,f)) diff --git a/backend/req.txt b/backend/req.txt index 1efa76d..023c0a5 100644 --- a/backend/req.txt +++ b/backend/req.txt @@ -1,27 +1,26 @@ -certifi==2020.6.20 -click==7.1.2 -fastapi==0.61.2 -h11==0.11.0 -lxml==4.6.1 -Pillow==8.0.1 -pydantic==1.7.2 -python-pptx==0.6.18 -starlette==0.13.6 -typing==3.7.4.3 -uvicorn==0.12.2 -XlsxWriter==1.3.7 aiofiles==0.6.0 amqp==5.0.2 billiard==3.6.3.0 celery==5.0.3 +certifi==2020.6.20 click==7.1.2 click-didyoumean==0.0.3 click-plugins==1.1.1 click-repl==0.1.6 +fastapi==0.61.2 +h11==0.11.0 kombu==5.0.2 +lxml==4.6.1 +Pillow==8.0.1 prompt-toolkit==3.0.8 +pydantic==1.7.2 +pymongo==3.11.2 +python-pptx==0.6.18 pytz==2020.4 -redis==3.5.3 six==1.15.0 +starlette==0.13.6 +typing==3.7.4.3 +uvicorn==0.12.2 vine==5.0.0 -wcwidth==0.2.5 \ No newline at end of file +wcwidth==0.2.5 +XlsxWriter==1.3.7 diff --git a/backend/server.py b/backend/server.py index fcb32e6..b71325e 100644 --- a/backend/server.py +++ b/backend/server.py @@ -2,10 +2,14 @@ from celery import Celery CELERY_BROKER_URL = os.getenv("RABBITMQ") -CELERY_RESULT_BACKEND = os.getenv("REDISSERVER") +CELERY_RESULT_BACKEND = os.getenv("MONGODB") celery = Celery("worker", backend=CELERY_RESULT_BACKEND, broker=CELERY_BROKER_URL) celery.conf.update( - result_extended=True + result_extended=True, + mongodb_backend_settings={ + 'database': 'taskdb', + 'taskmeta_collection': 'ta', + } ) diff --git a/celery/.gitignore b/celery/.gitignore new file mode 100644 index 0000000..ae412d6 --- /dev/null +++ b/celery/.gitignore @@ -0,0 +1 @@ +env/ \ No newline at end of file diff --git a/celery/core.py b/celery/core.py index 92af411..771b72b 100644 --- a/celery/core.py +++ b/celery/core.py @@ -2,87 +2,89 @@ import uuid import lxml.etree as etree + def extract_slide_mapping(slidelist): - """this method will get the mapping between a slide_id and rID""" + """this method will get the mapping between a slide_id and rID. This is necessary to maintain the xml syntax valid + after injection / modification""" + + slide_mapping = dict() - slide_mapping=dict() - for slide in slidelist: - rid=slide.attrib['{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id'] - slide_id=slide.attrib['id'] - slide_mapping[slide_id]=rid - + rid = slide.attrib['{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id'] + slide_id = slide.attrib['id'] + slide_mapping[slide_id] = rid + return slide_mapping -def prepare_sections(keys, presentation, mapping,all_sections=False): + +def prepare_sections(keys, presentation, mapping, all_sections=False): """this method will prepare a xml tree based on the passed section names the user wants to have in the pptx""" - nmap=presentation.slides._sldIdLst.nsmap - - all_sections=compile_sections(presentation,mapping) - root=etree.Element('{http://schemas.openxmlformats.org/presentationml/2006/main}sldIdLst', nsmap=nmap) - - #TODO: create toggle for - if (all_sections) and (len(keys)!=0): + nmap = presentation.slides._sldIdLst.nsmap + + all_sections = compile_sections(presentation, mapping) + root = etree.Element( + '{http://schemas.openxmlformats.org/presentationml/2006/main}sldIdLst', nsmap=nmap) + + # TODO: create toggle for + if (all_sections) and (len(keys) != 0): for key in keys: - section=all_sections[key] + section = all_sections[key] for slide in section: - etree.SubElement(root, '{http://schemas.openxmlformats.org/presentationml/2006/main}sldId',attrib=slide,nsmap=nmap) - - + etree.SubElement( + root, '{http://schemas.openxmlformats.org/presentationml/2006/main}sldId', attrib=slide, nsmap=nmap) + return root + def compile_sections(presentation, mapping): """this method will get all the sections that are in the pptx""" - - ns='{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id' - xml=etree.fromstring(presentation.part.blob) - nsmap = {'p14':'http://schemas.microsoft.com/office/powerpoint/2010/main'} + + ns = '{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id' + xml = etree.fromstring(presentation.part.blob) + nsmap = {'p14': 'http://schemas.microsoft.com/office/powerpoint/2010/main'} sections = xml.xpath('.//p14:sectionLst', namespaces=nsmap)[0] - - collector=dict() - pairs_col=list() - + collector = dict() + pairs_col = list() + for section in sections: - key=section.attrib['name'] + key = section.attrib['name'] for slidelist in section: for slide in slidelist: - pairs=dict() - slide_id=slide.attrib['id'] + pairs = dict() + slide_id = slide.attrib['id'] + # lookup in slide mapping to get rID + rID = mapping[slide_id] - #lookup in slide mapping to get rID - rID=mapping[slide_id] - - pairs['id']=slide_id - pairs[ns]=rID + pairs['id'] = slide_id + pairs[ns] = rID pairs_col.append(pairs) + collector[key] = pairs_col + pairs_col = list() - collector[key]=pairs_col - pairs_col=list() - return collector - -def replace_slides(new_xml,presentation,folder, save=False): + + +def replace_slides(new_xml, presentation, folder, save=False): """This method will take a xml tree and create the final pptx out of it""" - uid=str(uuid.uuid4().hex)[:10] - file_path= f"{folder}/{uid}.pptx" - slidelist=presentation.slides._sldIdLst + uid = str(uuid.uuid4().hex)[:10] + file_path = f"{folder}/{uid}.pptx" + slidelist = presentation.slides._sldIdLst + + slidelist.getparent().replace(slidelist, new_xml) - - slidelist.getparent().replace(slidelist,new_xml) - if save: presentation.save(file_path) - + return file_path -def print_xml(xml): - print(etree.tostring(xml, pretty_print=True, encoding="unicode")) \ No newline at end of file +def _print_xml(xml): + print(etree.tostring(xml, pretty_print=True, encoding="unicode")) diff --git a/celery/req.txt b/celery/req.txt index 2a2f26c..f1fc5d9 100644 --- a/celery/req.txt +++ b/celery/req.txt @@ -9,9 +9,9 @@ kombu==5.0.2 lxml==4.6.2 Pillow==8.0.1 prompt-toolkit==3.0.8 +pymongo==3.11.2 python-pptx==0.6.18 pytz==2020.4 -redis==3.5.3 six==1.15.0 vine==5.0.0 wcwidth==0.2.5 diff --git a/celery/server.py b/celery/server.py index 1c2b195..e39ac1d 100644 --- a/celery/server.py +++ b/celery/server.py @@ -1,14 +1,17 @@ import os from celery import Celery -import redis + CELERY_BROKER_URL = os.getenv("RABBITMQ") -CELERY_RESULT_BACKEND = os.getenv("REDISSERVER") +CELERY_RESULT_BACKEND = os.getenv("MONGODB") app = Celery("worker", backend=CELERY_RESULT_BACKEND, - broker=CELERY_BROKER_URL) + broker=CELERY_BROKER_URL) app.conf.update( result_extended=True, - - ) + mongodb_backend_settings={ + 'database': 'taskdb', + 'taskmeta_collection': 'ta', + } +) diff --git a/celery/worker.py b/celery/worker.py index 098eedd..8434d54 100644 --- a/celery/worker.py +++ b/celery/worker.py @@ -5,32 +5,32 @@ from time import sleep from celery.exceptions import Ignore from pptx import Presentation -import redis +from pymongo import MongoClient from pptx_handler import create_pptx import json -CELERY_BROKER_URL = os.getenv("RABBITMQ") -CELERY_RESULT_BACKEND = os.getenv("REDISSERVER") -from server import app +from server import app +MONGODB = os.getenv("MONGODB") +client = MongoClient(MONGODB) +db = client["taskdb"]["ta"] @app.task(name='pptx', bind=True, max_retries=3) -def generate_pptx(self, sections, downloadStatus): +def generate_pptx(self, sections, customID, downloaded): try: pptx_path = "master.pptx" - task_id=self.request.id - # task_prefix = "celery-task-meta-" - # task_str = task_prefix + task_id - - - pres= Presentation(pptx_path) - file_path=create_pptx(pres,sections) - - except Exception as exec: - self.retry(exec=exec, countdown = 2 ** self.request.retries) + task_id = self.request.id + pres = Presentation(pptx_path) + file_path = create_pptx(pres, sections) + + except Exception as exec: + self.retry(exec=exec, countdown=2 ** self.request.retries) - return f"{task_id} finished. stored {file_path}" + output = { + 'filePath': file_path + } + return output diff --git a/docker-compose.yml b/docker-compose.yml index 9abd112..6c65ba5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,12 +10,18 @@ services: - "3000:3000" depends_on: - apihub + - mongodb1 + - mongodb2 volumes: - ./frontend/nuxt-fe:/app command: npm run dev + networks: + - mongonetwork nginx: build: ./nginx/ + networks: + - mongonetwork container_name: nginx ports: - "80:80" @@ -23,6 +29,8 @@ services: - ./nginx/default.conf:/etc/nginx/conf.d/default.conf depends_on: - nuxt + - mongodb1 + - mongodb2 celery: container_name: celery @@ -30,21 +38,78 @@ services: volumes: - ./celery/server.py:/celery/server.py - ./celery/worker.py:/celery/worker.py + - pptxdecks:/celery/output environment: - REDISSERVER: redis://redis + MONGODB: mongodb://mongodb2 RABBITMQ: pyamqp://rabbitmq + networks: + - mongonetwork depends_on: - - redis + - mongodb1 + - mongodb2 - rabbitmq + + mongo-stream: + container_name: mongo-stream + restart: on-failure + build: ./mongo-stream/ + command: python3 -u server.py + environment: + MONGODB: mongodb://mongodb2 + ports: + - "3333:3333" + depends_on: + - mongo-startup + - mongodb1 + - mongodb2 + volumes: + - ./mongo-stream/server.py:/usr/app/server.py + networks: + - mongonetwork + + mongodb1: + image: mongo:latest + container_name: replica1 + networks: + - mongonetwork + ports: + - 30001:27017 + entrypoint: [ "/usr/bin/mongod", "--bind_ip_all", "--replSet", "rs0" ] + + mongodb2: + image: mongo:latest + container_name: replica2 + volumes: + - ./db-backup/primary:/data/db + networks: + - mongonetwork + ports: + - 30002:27017 + entrypoint: [ "/usr/bin/mongod", "--bind_ip_all", "--replSet", "rs0" ] + + mongo-startup: + image: mongo + container_name: startup-agent + networks: + - mongonetwork + depends_on: + - mongodb1 + - mongodb2 + + volumes: + - ./mongo-startup:/mongo-startup + entrypoint: + - /mongo-startup/initiate_replica.sh apihub: build: ./backend/ container_name: apihub restart: on-failure environment: - REDISSERVER: redis://redis + MONGODB: mongodb://mongodb2 RABBITMQ: pyamqp://rabbitmq volumes: + - pptxdecks:/usr/app/output - type: bind source: ./backend/api.py target: /usr/app/api.py @@ -68,17 +133,16 @@ services: ports: - "8000:8000" depends_on: - - redis + - mongodb1 + - mongo-startup - rabbitmq + networks: + - mongonetwork - redis: - container_name: redis - image: redis:6.0-buster - ports: - - "6379:6379" - rabbitmq: container_name: rabbitmq + networks: + - mongonetwork image: rabbitmq:3.8-management ports: - "15672:15672" @@ -87,11 +151,18 @@ services: container_name: flower image: mher/flower:0.9.5 command: ["flower", "--broker=amqp://rabbitmq", "--port=5555"] + networks: + - mongonetwork ports: - "5555:5555" depends_on: - - redis + - mongodb1 + - mongodb2 - rabbitmq - celery - \ No newline at end of file +networks: + mongonetwork: + +volumes: + pptxdecks: \ No newline at end of file diff --git a/dockercommands.txt b/dockercommands.txt new file mode 100644 index 0000000..c317b72 --- /dev/null +++ b/dockercommands.txt @@ -0,0 +1,5 @@ +# Delete all containers # +docker rm -f $(docker ps -a -q) + +# Delete all images +docker system prune -a \ No newline at end of file diff --git a/frontend/nuxt-fe/components/AppHeader.vue b/frontend/nuxt-fe/components/AppHeader.vue index 1b01dc3..a623d7d 100644 --- a/frontend/nuxt-fe/components/AppHeader.vue +++ b/frontend/nuxt-fe/components/AppHeader.vue @@ -138,6 +138,8 @@ export default { }, methods: { + + downloadPptx() { this.$store.commit("changeDownloadStatus"); this.$store.dispatch("sendTask"); @@ -149,6 +151,10 @@ export default { watch: { $route(to, from) { this.$store.commit("changeCurrentRoute", to.name); + + if (to.name == "downloads"){ + this.$store.dispatch("clearAlertList"); + } }, }, }; diff --git a/frontend/nuxt-fe/components/DownloadItem.vue b/frontend/nuxt-fe/components/DownloadItem.vue index 19faf80..c3a3370 100644 --- a/frontend/nuxt-fe/components/DownloadItem.vue +++ b/frontend/nuxt-fe/components/DownloadItem.vue @@ -48,7 +48,7 @@ viewBox="0 0 20 20" fill="currentColor" class="w-5 h-5 cursor-pointer update" - @click="sort(0)" + @click="sort(1)" > - + -
+
-
- {{ task.taskID }} +
+ {{ index + 1}}
@@ -148,7 +149,11 @@ - Download @@ -175,10 +180,19 @@ export default { table.order.listener("#update", id); }, + getDownload: function(taskID){ + // console.log(taskID) + this.$store.dispatch("downloadPresentation",taskID) + } + }, + created() { + if(process.browser){ + this.$store.commit('initialiseStore'); + } + }, - mounted() { - + var table = $("#downloads").DataTable({ responsive: false, paging: false, @@ -191,6 +205,8 @@ export default { // $("th").off(); $("th").removeAttr("style"); + // $('.dataTables_empty').remove(); + }, }; diff --git a/frontend/nuxt-fe/components/alert.vue b/frontend/nuxt-fe/components/alert.vue new file mode 100644 index 0000000..2d0a498 --- /dev/null +++ b/frontend/nuxt-fe/components/alert.vue @@ -0,0 +1,98 @@ + + + + + \ No newline at end of file diff --git a/frontend/nuxt-fe/components/alertJob.vue b/frontend/nuxt-fe/components/alertJob.vue deleted file mode 100644 index e311128..0000000 --- a/frontend/nuxt-fe/components/alertJob.vue +++ /dev/null @@ -1,51 +0,0 @@ - - - - - \ No newline at end of file diff --git a/frontend/nuxt-fe/nuxt.config.js b/frontend/nuxt-fe/nuxt.config.js index b240440..8055e2f 100644 --- a/frontend/nuxt-fe/nuxt.config.js +++ b/frontend/nuxt-fe/nuxt.config.js @@ -2,9 +2,10 @@ export default { target: 'server', + // Global page headers (https://go.nuxtjs.dev/config-head) head: { - title: 'LIX Builder', + title: 'knowlix', meta: [ { charset: 'utf-8' }, { name: 'viewport', content: 'width=device-width, initial-scale=1' }, @@ -40,6 +41,7 @@ export default { ] }, + diff --git a/frontend/nuxt-fe/pages/index.vue b/frontend/nuxt-fe/pages/index.vue index 9d31e61..a102e1b 100644 --- a/frontend/nuxt-fe/pages/index.vue +++ b/frontend/nuxt-fe/pages/index.vue @@ -1,11 +1,12 @@