Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#75]: add CI check for playground #111

Draft
wants to merge 22 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
c620369
feat(ci): init
unknowntpo Dec 1, 2024
b081f5c
feat(ci): let ci run for every push
unknowntpo Dec 24, 2024
3d13905
feat(ci): push workflow file to correct location
unknowntpo Dec 24, 2024
f0b178a
refactor(ci): remove unnecessary workflows
unknowntpo Dec 24, 2024
d36023c
feat(shfmt): add .editorconfig
unknowntpo Dec 24, 2024
6722fd8
feat(shfmt): format .sh files
unknowntpo Dec 24, 2024
6ef2db0
refactor(ci): tweak shfmt command
unknowntpo Dec 24, 2024
3575422
refactor(ci): tweak workflow name
unknowntpo Dec 24, 2024
e54b8c4
refactor(ci): remove redundant checkout
unknowntpo Dec 24, 2024
d14e002
fix(ci): change cd dir
unknowntpo Dec 24, 2024
9e627a2
refactor(ci): split steps into two jobs
unknowntpo Dec 24, 2024
acf81dc
refactor(ci): curl in quiet mode to wait for gravitino playground ser…
unknowntpo Dec 24, 2024
ea65542
refactor(ci): use docker compose ps to show status of services
unknowntpo Dec 24, 2024
675fc5a
refactor(ci): add artifacts at test job
unknowntpo Dec 24, 2024
d0c8686
refactor(ci): add --enable-ranger option
unknowntpo Dec 24, 2024
4da2e27
fix(ci): fix wrong playground command
unknowntpo Dec 24, 2024
8301f76
fix(ci): move logs to /tmp/playground-log dir
unknowntpo Dec 24, 2024
423a2cd
fix(ci): disable ranger temporary, FIXME: should revert this
unknowntpo Dec 24, 2024
86c1793
refactor(docker-compose.yml): increase healthcheck timeout
unknowntpo Jan 19, 2025
d1b177f
refactor(ci-test.yml): fix incorrect folder in Test sql step
unknowntpo Jan 19, 2025
6b2b434
refactor(docker-compose.yml): ranger: tweak interval
unknowntpo Jan 19, 2025
272f609
refactor(tests/trino-cross-catalog.sql): rename catalog gt_hive2 to c…
unknowntpo Jan 19, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
root = true

[*.sh]
end_of_line = lf
insert_final_newline = true
indent_size = 2
indent_style = space
shell_variant = bash
binary_next_line = false
85 changes: 85 additions & 0 deletions .github/workflows/ci-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
name: "CI"

on:
push:
branches:
- "**"
jobs:
lint:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Install xmllint
run: |
sudo apt-get update
sudo apt-get install -y libxml2-utils

- name: Install shfmt
run: |
curl -sSLo shfmt https://github.com/mvdan/sh/releases/download/v3.8.0/shfmt_v3.8.0_linux_amd64 &&
chmod +x shfmt &&
sudo mv shfmt /usr/local/bin/shfmt

- name: Lint Shell Scripts
run: |
find . -name '*.sh' -print0 | xargs -0 shfmt -d

- name: Lint XML Files
run: |
find . -name '*.xml' -print0 | xargs -0 xmllint --noout

test:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- uses: KengoTODA/actions-setup-docker-compose@v1
with:
version: "2.14.2"

- name: Run Gravitino Playground Services
id: run-gravitino-playground-services
timeout-minutes: 40
run: |
mkdir -p /tmp/playground-log

sudo curl -L https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -o /usr/local/bin/yq
sudo chmod +x /usr/local/bin/yq
cd ${{ github.workspace }}
ls
nohup ./playground.sh start -y > /tmp/playground-log/playground.log 2>&1 &
docker compose logs -f >> /tmp/playground-log/docker-compose.log &
# wait for gravitino trino ready to use
i=0
while [[ ! $(curl -sk http://127.0.0.1:8090) || ! $(curl -sk http://127.0.0.1:18080/v1/info) && $i -le 300 ]]; do
echo "Waiting for Gravitino playground to be ready..."
sleep 5
i=$(expr $i + 1)
done
docker ps
if [[ $(curl -sk http://127.0.0.1:8090) && $(curl -sk http://127.0.0.1:18080/v1/info) ]]; then
echo "Gravitino and Trino are ready to use"
else
echo "Gravitino or Trino is not ready"
exit 1
fi

- name: Test sql
id: test-sql
timeout-minutes: 40
run: |
cd ${{ github.workspace }}
ls
cd tests/
bash -x ./runSQLOnPlayground.sh
- name: Upload test artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: test-artifacts
path: |
/tmp/playground-log
retention-days: 7
4 changes: 2 additions & 2 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ services:
healthcheck:
test: ["CMD", "/tmp/healthcheck/ranger-healthcheck.sh"]
interval: 5s
timeout: 60s
retries: 5
timeout: 180s
retries: 10
start_period: 120s
deploy:
resources:
Expand Down
2 changes: 1 addition & 1 deletion healthcheck/gravitino-healthcheck.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ success=false

while [ $attempt -lt $max_attempts ]; do
response=$(curl -X GET -H "Content-Type: application/json" http://127.0.0.1:8090/api/version)

if echo "$response" | grep -q "\"code\":0"; then
success=true
break
Expand Down
4 changes: 2 additions & 2 deletions healthcheck/ranger-healthcheck.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ attempt=0

while [ $attempt -lt $max_attempts ]; do
response=$(curl -s -o /dev/null -w "%{http_code}" -u admin:rangerR0cks! -H "Content-Type: application/json" -X GET http://127.0.0.1:6080/service/public/v2/api/plugins/info)

echo "Ranger health check ${response}"

if [[ ${response} -eq 200 ]]; then
Expand All @@ -33,7 +33,7 @@ while [ $attempt -lt $max_attempts ]; do
echo "Attempt $((attempt + 1)) failed..."
sleep 1
fi

((attempt++))
done

Expand Down
2 changes: 1 addition & 1 deletion init/common/init_metalake_catalog.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ if echo "$response" | grep -q "\"code\":0"; then
else
# Create Mysql catalog for experience Gravitino service
response=$(curl -X POST -H "Accept: application/vnd.gravitino.v1+json" -H "Content-Type: application/json" -d '{ "name":"catalog_mysql", "type":"RELATIONAL", "provider":"jdbc-mysql", "comment":"comment", "properties":{ "jdbc-url":"jdbc:mysql://'${MYSQL_HOST_IP}':3306", "jdbc-user":"mysql", "jdbc-password":"mysql", "jdbc-driver": "com.mysql.cj.jdbc.Driver" } }' http://gravitino:8090/api/metalakes/metalake_demo/catalogs)

if echo "$response" | grep -q "catalog_mysql"; then
true # Placeholder, do nothing
else
Expand Down
2 changes: 1 addition & 1 deletion init/gravitino/init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ echo "Finish downloading"
echo "Start the Gravitino Server"
/bin/bash /root/gravitino/bin/gravitino.sh start &
sleep 3
tail -f /root/gravitino/logs/gravitino-server.log
tail -f /root/gravitino/logs/gravitino-server.log
8 changes: 4 additions & 4 deletions init/ranger/init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ sed -i '$d' /tmp/start-ranger-services.sh

status=0
while [ $status -ne 1 ]; do
status=$(curl -iv -u admin:rangerR0cks! -H "Content-Type: application/json" -X GET http://127.0.0.1:6080/service/public/v2/api/service 2> /dev/null | grep -c '200 OK')
status=$(curl -iv -u admin:rangerR0cks! -H "Content-Type: application/json" -X GET http://127.0.0.1:6080/service/public/v2/api/service 2>/dev/null | grep -c '200 OK')

if [ "$status" -ne '1' ]; then
sleep 5
fi
if [ "$status" -ne '1' ]; then
sleep 5
fi
done

curl -iv -u admin:rangerR0cks! -d @/tmp/ranger/hiveDev.json -H "Content-Type: application/json" -X POST http://127.0.0.1:6080/service/public/v2/api/service
Expand Down
28 changes: 14 additions & 14 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,20 @@ curl -L -o gravitino-playground-main.zip https://github.com/apache/gravitino-pla
unzip gravitino-playground-main.zip

while true; do
# Prompt the user
read -p "Would you like to run gravitino-playground immediately? [Y/N]: " choice
# Prompt the user
read -p "Would you like to run gravitino-playground immediately? [Y/N]: " choice

# Convert choice to uppercase using `tr`
choice=$(echo "$choice" | tr '[:lower:]' '[:upper:]')
# Convert choice to uppercase using `tr`
choice=$(echo "$choice" | tr '[:lower:]' '[:upper:]')

if [[ "$choice" == "Y" ]]; then
echo "Starting gravitino-playground..."
cd ./gravitino-playground-main && ./playground.sh start
break
elif [[ "$choice" == "N" ]]; then
echo "Download complete. You can start gravitino-playground later by running './playground.sh start'."
break
else
echo "Invalid input. Please enter Y or N."
fi
if [[ "$choice" == "Y" ]]; then
echo "Starting gravitino-playground..."
cd ./gravitino-playground-main && ./playground.sh start
break
elif [[ "$choice" == "N" ]]; then
echo "Download complete. You can start gravitino-playground later by running './playground.sh start'."
break
else
echo "Invalid input. Please enter Y or N."
fi
done
95 changes: 95 additions & 0 deletions tests/runSQLOnPlayground.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/bin/bash

# install trino cli
if [[ -n $(trino --version) ]]; then
echo "Trino client installed"
else
wget https://repo1.maven.org/maven2/io/trino/trino-cli/448/trino-cli-448-executable.jar -O /tmp/trino
sudo cp /tmp/trino /usr/local/bin/trino
sudo chmod +x /usr/local/bin/trino
trino --version
fi

# check trino connection
i=0
while [[ ! $(trino --server http://127.0.0.1:18080 -f ./trino-test.sql) && $i -le 200 ]]; do
sleep 5
i=$(expr $i + 1)
done

# check trino catalog loaded
j=0
rm -rf /tmp/playground-log/trino-test.sql.log
trino --server http://127.0.0.1:18080 -f ./trino-test.sql >>/tmp/playground-log/trino-test.sql.log
while [[ -n $(diff ./trino-test.sql.out /tmp/playground-log/trino-test.sql.log) && $j -le 200 ]]; do
sleep 5
j=$(expr $j + 1)
rm -rf /tmp/playground-log/trino-test.sql.log
trino --server http://127.0.0.1:18080 -f ./trino-test.sql >>/tmp/playground-log/trino-test.sql.log
done

# run sql and check results
rm -rf /tmp/trino-simple.sql.log
trino --server http://127.0.0.1:18080 -f ./trino-simple.sql >>/tmp/trino-simple.sql.log
if [[ -z $(diff ./trino-simple.sql.out /tmp/trino-simple.sql.log) ]]; then
echo "run trino-simple.sql successfully"
else
echo "run trino-simple.sql failed"
exit 1
fi

i=0
num=$(trino --server http://127.0.0.1:18080 -f ./trino-cross-catalog.sql | wc -l)
while [[ ${num} -lt 42 && $i -le 200 ]]; do
sleep 5
i=$(expr $i + 1)
num=$(trino --server http://127.0.0.1:18080 -f ./trino-cross-catalog.sql | wc -l)
done
rm -rf /tmp/playground-log/trino-cross-catalog.sql.log
trino --server http://127.0.0.1:18080 -f ./trino-cross-catalog.sql | sort >>/tmp/playground-log/trino-cross-catalog.sql.log
if [[ -z $(diff ./trino-cross-catalog.sql.out /tmp/playground-log/trino-cross-catalog.sql.log) ]]; then
echo "run trino-cross-catalog.sql successfully"
else
echo "run trino-cross-catalog.sql failed"
exit 1
fi

for fileName in $(docker exec playground-spark ls /opt/spark/jars/ | grep gravitino-spark-connector); do
docker exec playground-spark rm -rf /opt/spark/jars/${fileName}
done
aws s3 cp s3://gravitino-spark-connector/3.4_2.12/ /tmp/gravitino-spark-connector/3.4_2.12 --recursive
docker cp /tmp/gravitino-spark-connector/3.4_2.12/gravitino-spark-connector-*.jar playground-spark:/opt/spark/jars/
rm -rf /tmp/playground-log/spark-simple.sql.log /tmp/playground-log/union-spark.sql.log
docker cp ./union.sql playground-spark:/opt/spark/work-dir/
docker cp ./spark-simple.sql playground-spark:/opt/spark/work-dir/
sleep 2
docker exec playground-spark bash /opt/spark/bin/spark-sql -f spark-simple.sql
if [[ $? == 0 ]]; then
echo "run spark-simple.sql successfully"
else
echo "run spark-simple.sql failed"
exit 1
fi
docker exec playground-spark bash /opt/spark/bin/spark-sql -f union.sql | sort >>/tmp/playground-log/union-spark.sql.log
if [[ -z $(diff ./union-spark.sql.out /tmp/playground-log/union-spark.sql.log) ]]; then
echo "run union.sql in spark successfully"
else
echo "run union.sql in spark failed"
exit 1
fi

i=0
num=$(trino --server http://127.0.0.1:18080 -f ./union.sql | wc -l)
while [[ ${num} -lt 12 && $i -le 200 ]]; do
sleep 5
i=$(expr $i + 1)
num=$(trino --server http://127.0.0.1:18080 -f ./union.sql | wc -l)
done
rm -rf /tmp/playground-log/union.sql.log
trino --server http://127.0.0.1:18080 -f ./union.sql | sort >>/tmp/playground-log/union.sql.log
if [[ -z $(diff ./union-trino.sql.out /tmp/playground-log/union.sql.log) ]]; then
echo "run union.sql in trino successfully"
else
echo "run union.sql in trino failed"
exit 1
fi
38 changes: 38 additions & 0 deletions tests/spark-simple.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
USE catalog_hive;
CREATE DATABASE IF NOT EXISTS product;
USE product;

CREATE TABLE IF NOT EXISTS employees (
id INT,
name STRING,
age INT
)
PARTITIONED BY (department STRING)
STORED AS PARQUET;
DESC TABLE EXTENDED employees;

INSERT OVERWRITE TABLE employees PARTITION(department='Engineering') VALUES (1, 'John Doe', 30), (2, 'Jane Smith', 28);
INSERT OVERWRITE TABLE employees PARTITION(department='Marketing') VALUES (3, 'Mike Brown', 32);

use catalog_rest;
create database sales;
use sales;
create table customers (customer_id int, customer_name varchar(100), customer_email varchar(100));
describe extended customers;
insert into customers (customer_id, customer_name, customer_email) values (11,'Rory Brown','[email protected]');
insert into customers (customer_id, customer_name, customer_email) values (12,'Jerry Washington','[email protected]');

use catalog_iceberg;
use mydb;
create table abc(a int, b int) partitioned by (a) TBLPROPERTIES ('format-version'='2', 'write.merge.mode'='merge-on-read', 'write.delete.mode'='merge-on-read');
insert into abc values(1,2);
insert into abc values(2,3);
insert into abc values(3,4);
update abc set a = 4 where b = 4;
delete from abc where a = 1;
merge into abc USING (select 2 as a,20 as b) as t on abc.a = t.a when matched then update set * when not matched then insert *;
merge into abc USING (select 8 as a,8 as b) as t on abc.a = t.a when matched then update set * when not matched then insert *;

select * from catalog_iceberg.mydb.example_table;

select * from catalog_hive.product.page_views where country = 'USA';
4 changes: 4 additions & 0 deletions tests/spark-simple.sql.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
1 2021-01-01 10.5
2 2021-01-02 20.5
3 2021-01-03 30.75
2023-12-01 08:15:30 123457 http://example.com/about 2023-12-01 USA
22 changes: 22 additions & 0 deletions tests/trino-cross-catalog.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
SELECT given_name, family_name, job_title, sum(total_amount) AS total_sales
FROM catalog_hive.sales.sales as s,
catalog_postgres.hr.employees AS e
where s.employee_id = e.employee_id
GROUP BY given_name, family_name, job_title
ORDER BY total_sales DESC
LIMIT 1;

SELECT customer_name, location, SUM(total_amount) AS total_spent
FROM catalog_hive.sales.sales AS s,
catalog_hive.sales.stores AS l,
catalog_hive.sales.customers AS c
WHERE s.store_id = l.store_id AND s.customer_id = c.customer_id
GROUP BY location, customer_name
ORDER BY location, SUM(total_amount) DESC;

SELECT e.employee_id, given_name, family_name, AVG(rating) AS average_rating, SUM(total_amount) AS total_sales
FROM catalog_postgres.hr.employees AS e,
catalog_postgres.hr.employee_performance AS p,
catalog_hive.sales.sales AS s
WHERE e.employee_id = p.employee_id AND p.employee_id = s.employee_id
GROUP BY e.employee_id, given_name, family_name;
Loading