-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
First commit extracted from https://github.com/ozone-his/ozone-analytics
- Loading branch information
0 parents
commit 7b3e6f1
Showing
51 changed files
with
2,873 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
.idea/ | ||
target/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
|
||
# Created by https://www.toptal.com/developers/gitignore/api/intellij | ||
# Edit at https://www.toptal.com/developers/gitignore?templates=intellij | ||
|
||
### Intellij ### | ||
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider | ||
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 | ||
.idea/ | ||
target/ | ||
# User-specific stuff | ||
.idea/**/workspace.xml | ||
.idea/**/tasks.xml | ||
.idea/**/usage.statistics.xml | ||
.idea/**/dictionaries | ||
.idea/**/shelf | ||
|
||
# AWS User-specific | ||
.idea/**/aws.xml | ||
|
||
# Generated files | ||
.idea/**/contentModel.xml | ||
|
||
# Sensitive or high-churn files | ||
.idea/**/dataSources/ | ||
.idea/**/dataSources.ids | ||
.idea/**/dataSources.local.xml | ||
.idea/**/sqlDataSources.xml | ||
.idea/**/dynamic.xml | ||
.idea/**/uiDesigner.xml | ||
.idea/**/dbnavigator.xml | ||
|
||
# Gradle | ||
.idea/**/gradle.xml | ||
.idea/**/libraries | ||
|
||
# Gradle and Maven with auto-import | ||
# When using Gradle or Maven with auto-import, you should exclude module files, | ||
# since they will be recreated, and may cause churn. Uncomment if using | ||
# auto-import. | ||
# .idea/artifacts | ||
# .idea/compiler.xml | ||
# .idea/jarRepositories.xml | ||
# .idea/modules.xml | ||
# .idea/*.iml | ||
# .idea/modules | ||
# *.iml | ||
# *.ipr | ||
|
||
# CMake | ||
cmake-build-*/ | ||
|
||
# Mongo Explorer plugin | ||
.idea/**/mongoSettings.xml | ||
dependency-reduced-pom.xml | ||
|
||
# File-based project format | ||
*.iws | ||
|
||
# IntelliJ | ||
out/ | ||
|
||
# mpeltonen/sbt-idea plugin | ||
.idea_modules/ | ||
|
||
# JIRA plugin | ||
atlassian-ide-plugin.xml | ||
|
||
# Cursive Clojure plugin | ||
.idea/replstate.xml | ||
|
||
# SonarLint plugin | ||
.idea/sonarlint/ | ||
|
||
# Crashlytics plugin (for Android Studio and IntelliJ) | ||
com_crashlytics_export_strings.xml | ||
crashlytics.properties | ||
crashlytics-build.properties | ||
fabric.properties | ||
|
||
# Editor-based Rest Client | ||
.idea/httpRequests | ||
|
||
# Android studio 3.1+ serialized cache file | ||
.idea/caches/build_file_checksums.ser | ||
|
||
### Intellij Patch ### | ||
# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 | ||
|
||
# *.iml | ||
# modules.xml | ||
# .idea/misc.xml | ||
# *.ipr | ||
|
||
# Sonarlint plugin | ||
# https://plugins.jetbrains.com/plugin/7973-sonarlint | ||
.idea/**/sonarlint/ | ||
|
||
# SonarQube Plugin | ||
# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin | ||
.idea/**/sonarIssues.xml | ||
|
||
# Markdown Navigator plugin | ||
# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced | ||
.idea/**/markdown-navigator.xml | ||
.idea/**/markdown-navigator-enh.xml | ||
.idea/**/markdown-navigator/ | ||
|
||
# Cache file creation bug | ||
# See https://youtrack.jetbrains.com/issue/JBR-2257 | ||
.idea/$CACHE_FILE$ | ||
|
||
# CodeStream plugin | ||
# https://plugins.jetbrains.com/plugin/12206-codestream | ||
.idea/codestream.xml | ||
|
||
# End of https://www.toptal.com/developers/gitignore/api/intellij |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
|
||
FROM maven:3.5-jdk-8-alpine as builder | ||
# add pom.xml and source code | ||
ADD ./pom.xml pom.xml | ||
#cache dependencies | ||
RUN mvn dependency:go-offline | ||
ADD ./src src/ | ||
RUN mvn clean package | ||
|
||
FROM flink:1.14.4-scala_2.12-java8 | ||
RUN wget https://repo.maven.apache.org/maven2/org/apache/flink/flink-connector-jdbc_2.12/1.14.4/flink-connector-jdbc_2.12-1.14.4.jar -O /opt/flink/lib/flink-connector-jdbc_2.12-1.14.4.jar | ||
RUN wget https://repo1.maven.org/maven2/org/apache/flink/flink-parquet_2.11/1.13.5/flink-parquet_2.11-1.13.5.jar -O /opt/flink/lib/flink-parquet_2.11-1.13.5.jar | ||
RUN wget https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar -O /opt/flink/lib/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar | ||
RUN wget https://repo1.maven.org/maven2/org/apache/parquet/parquet-hadoop/1.12.2/parquet-hadoop-1.12.2.jar -O /opt/flink/lib/parquet-hadoop-1.12.2.jar | ||
RUN wget https://repo1.maven.org/maven2/org/apache/parquet/parquet-common/1.12.2/parquet-common-1.12.2.jar -O /opt/flink/lib/parquet-common-1.12.2.jar | ||
RUN wget https://repo1.maven.org/maven2/org/apache/httpcomponents/httpclient/4.5.13/httpclient-4.5.13.jar -O /opt/flink/lib/httpclient-4.5.13.jar | ||
RUN wget https://repo1.maven.org/maven2/org/apache/httpcomponents/httpcore/4.4.15/httpcore-4.4.15.jar -O /opt/flink/lib/httpcore-4.4.15.jar | ||
RUN wget https://repo1.maven.org/maven2/com/ecwid/consul/consul-api/1.4.5/consul-api-1.4.5.jar -O /opt/flink/lib/consul-api-1.4.5.jar | ||
RUN wget https://repo1.maven.org/maven2/com/google/code/gson/gson/2.9.0/gson-2.9.0.jar -O /opt/flink/lib/gson-2.9.0.jar | ||
COPY --from=builder target/ozone-etl-flink-1.0-SNAPSHOT-etl-streaming.jar /opt/flink/usrlib/streaming-etl-job.jar | ||
COPY --from=builder target/ozone-etl-flink-1.0-SNAPSHOT-etl-migrations.jar /opt/flink/usrlib/ozone-etl-migrations.jar | ||
COPY run.sh /run.sh | ||
RUN chmod +x /run.sh | ||
ENTRYPOINT ["/docker-entrypoint.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
FROM maven:3.5-jdk-8-alpine as builder | ||
# add pom.xml and source code | ||
ADD ./pom.xml pom.xml | ||
ADD ./src src/ | ||
RUN mvn clean install -Pbatch | ||
|
||
FROM openjdk:8-jre-alpine | ||
ENV OUTPUT_DIR=/parquet | ||
COPY --from=builder target/ozone-etl-flink-1.0-SNAPSHOT-etl-export.jar etl-export.jar | ||
COPY --from=builder target/ozone-etl-flink-1.0-SNAPSHOT-etl-migrations.jar /opt/flink/usrlib/ozone-etl-migrations.jar | ||
ADD ./batch-export.sh ./batch-export.sh | ||
COPY run.sh /run.sh | ||
RUN chmod +x /run.sh | ||
CMD [ "/bin/sh","./batch-export.sh" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
|
||
# Ozone ETL pipelines | ||
|
||
## Flink | ||
|
||
|
||
|
||
This repository contains an ETL [Flink](hhttps://ci.apache.org/projects/flink/flink-docs-master/) [job](https://ci.apache.org/projects/flink/flink-docs-master/docs/internals/job_scheduling/#:~:text=A%20Flink%20job%20is%20first,it%20cancels%20all%20running%20tasks) for flattening [Ozone HIS](https://github.com/ozone-his) data. | ||
|
||
## Features | ||
|
||
|
||
|
||
- Provides both [batch]() and [streaming]() modes | ||
|
||
- Currently flattens OpenMRS to output reporting friendly tables for: | ||
|
||
- patients | ||
|
||
- observations | ||
|
||
- visits | ||
|
||
- concepts | ||
|
||
|
||
|
||
## Tech | ||
|
||
- [Flink](hhttps://ci.apache.org/projects/flink/flink-docs-master/) | ||
|
||
- [Flink CDC connectors](https://github.com/ververica/flink-cdc-connectors) - For streaming | ||
|
||
|
||
|
||
## Building and Installation | ||
|
||
|
||
|
||
### Prerequisites | ||
|
||
- A running Flink [installation](https://ci.apache.org/projects/flink/flink-docs-release-1.13/docs/try-flink/local_installation/) | ||
|
||
- A running OpenMRS installation | ||
|
||
- A running PostgreSQL installation | ||
|
||
- A liquibase installation | ||
|
||
|
||
|
||
### Build and install | ||
|
||
- Update the [job.properties](./job.properties) files with the details for your OpenMRS MySQL database and the analytics PostgreSQL database | ||
|
||
- Build with `mvn clean package` | ||
|
||
- Retrieve and apply the [Liquibase file](https://github.com/ozone-his/ozonepro-docker/tree/master/flink/liquidbase) needed to create tables on the analytics database (more on installation and usage of Liquibase see [liquibase](https://www.liquibase.org/get-started/quickstart)) | ||
|
||
- Run with `flink run -m <flink-job-manager-url> target/ozone-etl-flink-1.0-SNAPSHOT.jar` | ||
|
||
#### Building Docker image | ||
|
||
`docker build -t mekomsolutions/ozone-flink-jobs .` | ||
|
||
### Layout | ||
|
||
src/main/java/net/mekomsolutions/data/pipelines/batch | ||
|
||
src/main/java/net/mekomsolutions/data/pipelines/export | ||
|
||
src/main/java/net/mekomsolutions/data/pipelines/streaming | ||
|
||
src/main/java/net/mekomsolutions/data/pipelines/utils | ||
|
||
src/main/java/net/mekomsolutions/data/pipelines/shared | ||
|
||
|
||
|
||
### Adding new jobs | ||
#### Sources and Sinks | ||
This project uses flink's [Table API & SQL](https://nightlies.apache.org/flink/flink-docs-release-1.15/docs/dev/table/overview/) ,for you to access data you need to setup [temporary](https://nightlies.apache.org/flink/flink-docs-release-1.15/docs/dev/table/sql/create/) tables both for data sources and data sinks. For most cases all the tables you will every need for writting OpenMRS data processing jobs are already added under `src/main/java/net/mekomsolutions/data/pipelines/shared/dsl` . But incase you need to add more you can add them to either `src/main/java/net/mekomsolutions/data/pipelines/shared/dsl/source` or `src/main/java/net/mekomsolutions/data/pipelines/shared/dsl/sink` You will then need to register them in factory class `src/main/java/net/mekomsolutions/data/pipelines/shared/dsl/TableDSLFactory.java` . See `src/main/java/net/mekomsolutions/data/pipelines/streaming/StreamingETLJob.java` for an example of how to use the factory to setup sources and sinks. | ||
|
||
#### Setting up jobs | ||
Assuming you already have all the source and sink tables setup adding new jobs involves; | ||
|
||
- Adding your SQL files to `src/main/resources` | ||
- Registering the jobs in `flink-jobs/src/main/resources/jobs.json` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#!/bin/sh | ||
: ${JDBC_URL?"Need to set JDBC_URL"} | ||
: ${JDBC_USERNAME:?"Need to set JDBC_USERNAME"} | ||
: ${JDBC_PASSWORD:?"Need to set JDBC_PASSWORDy"} | ||
: ${OUTPUT_DIR:?"Need to set OUTPUT_DIR"} | ||
: ${LOCATION_TAG:?"Need to set LOCATION_TAG"} | ||
java -jar etl-export.jar --jdbc-url $JDBC_URL --jdbc-username $JDBC_USERNAME --jdbc-password $JDBC_PASSWORD --output-dir $OUTPUT_DIR --location-tag $LOCATION_TAG |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
job.streaming=true | ||
openmrs.database.username=root | ||
openmrs.database.password=3cY8Kve4lGey | ||
openmrs.database.batch.url=jdbc:mysql://localhost:3306/openmrs | ||
openmrs.database.streaming.hostname=localhost | ||
openmrs.database.streaming.port=3306 | ||
openmrs.database.streaming.database-name=openmrs | ||
properties.bootstrap.servers=localhost:9092 | ||
|
||
|
||
analytics.database.username=postgres | ||
analytics.database.password=password | ||
analytics.database.batch.url=jdbc:postgresql://localhost:5432/analytics | ||
|
||
analytics.filesystem=true | ||
|
Oops, something went wrong.