diff --git a/index.html b/index.html index ea99519a9..dd2a29e69 100644 --- a/index.html +++ b/index.html @@ -418,8 +418,8 @@ ,{"system":"MySQL (MyISAM)","date":"2022-07-01","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"","tags":["C++","row-oriented","MySQL compatible"],"load_time":2512,"data_size":121588958061,"result":[[0,0,0],[283.32,276.83,274.52],[276.93,278.29,283.27],[28.83,23.63,21.55],[46.41,40.81,40.93],[467.04,467.39,469.08],[31.02,25.89,24.2],[277.89,275.3,277.3],[329.34,325.8,325.35],[342.86,338.43,336.95],[282.03,279.87,281.22],[277.74,282.68,282],[335.66,334.83,336.44],[305.24,310.39,307.3],[337.41,338.52,342.94],[308.66,307.34,306.27],[738.38,748.44,740.75],[738.75,734.01,738.25],[867.01,872.92,868.84],[25.65,20.61,18.46],[312.39,313.67,306.66],[301.66,305.12,308.01],[298.12,298.44,312.4],[311.34,309.9,311.85],[281.87,278.5,275],[277.46,277.46,277.46],[280.75,278.04,281.76],[263.9,417.39,406.88],[707.21,711.96,705],[668.1,668.33,665.96],[330.31,333.36,331.94],[506.57,506.18,500.53],[2604.49,2681.96,2703.12],[830.65,832.88,831.14],[831.98,830.46,833.41],[608.49,608.51,613.68],[4.56,4.13,4.16],[3.8,3.8,3.7],[1.65,1.45,1.46],[6.33,5.14,6.15],[1.6,1.41,1.41],[1.56,1.42,1.39],[7.04,1.17,1.13]],"source":"mysql-myisam/results/c6a.4xlarge.json"} ,{"system":"MySQL","date":"2022-07-01","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"","tags":["C++","row-oriented","MySQL compatible"],"load_time":9472,"data_size":171953585825,"result":[[339.77,339.88,339.77],[364.91,371.86,367.55],[366.2,368.91,389.66],[364.39,377.53,571.45],[377.69,390.02,384.86],[569.48,576.51,574.68],[367.4,368.23,370.41],[371.29,384.02,613.22],[478.85,683.22,495.68],[489.9,635.96,662.43],[386.07,396.49,640.15],[389.13,412.55,444.12],[447.97,455.54,448.06],[423.22,845.44,813.6],[452.48,460.07,453.98],[577.54,623.21,586.49],[852.07,856.36,862.66],[838.09,848.92,851.12],[1006.37,1011.16,1023.17],[369.76,375.61,415.28],[412.45,419.9,456.62],[411.65,432.88,482.2],[412.73,420.73,429.5],[551.16,577.62,545.45],[382.89,394.76,386.37],[380.9,391.4,385.05],[385.3,394.67,460.32],[388.95,394.7,387.21],[800.33,807.9,807.11],[706.03,745.27,718.9],[450.9,489.59,530.97],[625.5,651.93,647.32],[2721.13,2792.12,2819.26],[945.9,954.94,957.54],[945.42,953.78,965.16],[684.36,716.29,708.75],[10.01,3.79,3.77],[7.48,3.32,3.27],[5.09,0.98,0.96],[8.7,4.77,4.68],[4.82,0.76,0.74],[4.46,0.77,0.75],[7.04,1.17,1.13]],"source":"mysql/results/c6a.4xlarge.json"} ,{"system":"Oxla","date":"2024-04-09","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"Ingests data only from non-compressed cvs.","tags":["C","analytical","somewhat PostgreSQL compatible"],"load_time":497.815,"data_size":17394972923,"result":[[3.112892,0.068225,0.049215],[1.476993,0.068502,0.01352],[1.532504,0.015794,0.01899],[1.541791,0.043208,0.090244],[1.424205,1.114138,1.079011],[1.546764,1.340306,1.339957],[0.202873,0.009787,0.008214],[1.017122,0.01247,0.010861],[1.790766,1.816432,1.681662],[2.06058,2.051205,2.05747],[0.166164,0.149605,0.147586],[0.33821,0.15334,0.15212],[0.968408,0.975795,0.932127],[1.641231,1.648973,1.69153],[1.039926,1.021776,1.015062],[1.059569,1.038191,1.016849],[2.930077,2.780725,2.786122],[2.7766,2.745188,2.827054],[5.474963,5.455883,5.462812],[0.069049,0.037876,0.030425],[5.294758,2.818725,2.803313],[null,null,null],[null,null,null],[21.034479,18.253271,6.146486],[0.17394,0.151798,0.146398],[0.180155,0.170271,0.177003],[0.22494,0.216158,0.216051],[0.978861,0.973059,0.964485],[null,null,null],[0.030928,0.02037,0.020366],[0.408601,0.412485,0.408602],[0.875709,0.743332,0.704842],[7.962516,7.867736,7.594272],[6.209667,5.892066,5.963681],[5.931634,5.947336,6.005506],[0.577314,0.583573,0.545736],[0.126127,0.090768,0.094307],[0.110712,0.04149,0.039939],[0.060824,0.043637,0.030213],[0.322545,0.204934,0.185178],[0.121207,0.011082,0.011699],[0.069138,0.012728,0.014108],[0.030538,0.028048,0.030625]],"source":"oxla/results/c6a.4xlarge.json"} -,{"hide":false,"system":"ParadeDB","date":"2024-02-02","machine":"c6a.4xlarge, 1500gb gp2","cluster_size":1,"comment":"The results for (c6a.4xlarge, 500gb gp2) are also submitted here for easy comparison with Elasticsearch","tags":["Rust","row-oriented","column-oriented","search","PostgreSQL compatible"],"load_time":1294,"data_size":15415061091,"result":[[0.170805,0.005724,0.006098],[0.207075,0.09786,0.097367],[0.223766,0.088988,0.083545],[0.428068,0.097471,0.098085],[1.02813,0.842475,0.847983],[1.47076,1.27149,1.24457],[0.079231,0.005903,0.006381],[0.20438,0.102631,0.101062],[1.61474,1.57814,1.57654],[1.34235,1.09032,1.07038],[0.624544,0.350221,0.354308],[0.592603,0.38256,0.382334],[1.5044,1.32063,1.31768],[3.54282,2.67601,2.61359],[1.66994,1.46717,1.4526],[1.16541,0.999982,0.975601],[3.36336,2.90587,2.89977],[3.1358,2.75251,2.71843],[7.57537,5.9709,6.09835],[0.450773,0.136726,0.134937],[9.75662,1.39915,1.44835],[11.2973,1.70324,1.6996],[22.2634,4.01717,3.96894],[57.8901,10.7998,10.8794],[3.10183,0.605149,0.595146],[0.829921,0.542481,0.531427],[3.01411,0.668447,0.680061],[9.88178,2.28414,2.28912],[9.13347,5.15906,5.15358],[0.599997,0.454952,0.463381],[2.34495,1.18331,1.19755],[5.76726,1.62152,1.52735],[8.98009,8.85184,8.65374],[11.9086,6.49934,6.7125],[12.2924,7.10393,7.21253],[2.04731,1.8265,1.84334],[0.274516,0.252795,0.251755],[0.338633,0.253551,0.252828],[0.339914,0.254548,0.253683],[0.244831,0.158776,0.158403],[0.382238,0.253784,0.254412],[0.404016,0.253894,0.253025],[0.358676,0.204648,0.204944]],"source":"paradedb/results/c6a.4xlarge.1500gb.json"} -,{"hide":false,"system":"ParadeDB","date":"2024-02-02","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"","tags":["Rust","row-oriented","column-oriented","search","PostgreSQL compatible"],"load_time":1294,"data_size":15415061091,"result":[[0.170805,0.005724,0.006098],[0.207075,0.09786,0.097367],[0.223766,0.088988,0.083545],[0.428068,0.097471,0.098085],[1.02813,0.842475,0.847983],[1.47076,1.27149,1.24457],[0.079231,0.005903,0.006381],[0.20438,0.102631,0.101062],[1.61474,1.57814,1.57654],[1.34235,1.09032,1.07038],[0.624544,0.350221,0.354308],[0.592603,0.38256,0.382334],[1.5044,1.32063,1.31768],[3.54282,2.67601,2.61359],[1.66994,1.46717,1.4526],[1.16541,0.999982,0.975601],[3.36336,2.90587,2.89977],[3.1358,2.75251,2.71843],[7.57537,5.9709,6.09835],[0.450773,0.136726,0.134937],[9.75662,1.39915,1.44835],[11.2973,1.70324,1.6996],[22.2634,4.01717,3.96894],[57.8901,10.7998,10.8794],[3.10183,0.605149,0.595146],[0.829921,0.542481,0.531427],[3.01411,0.668447,0.680061],[9.88178,2.28414,2.28912],[9.13347,5.15906,5.15358],[0.599997,0.454952,0.463381],[2.34495,1.18331,1.19755],[5.76726,1.62152,1.52735],[8.98009,8.85184,8.65374],[11.9086,6.49934,6.7125],[12.2924,7.10393,7.21253],[2.04731,1.8265,1.84334],[0.274516,0.252795,0.251755],[0.338633,0.253551,0.252828],[0.339914,0.254548,0.253683],[0.244831,0.158776,0.158403],[0.382238,0.253784,0.254412],[0.404016,0.253894,0.253025],[0.358676,0.204648,0.204944]],"source":"paradedb/results/c6a.4xlarge.json"} +,{"hide":false,"system":"ParadeDB (Parquet, partitioned)","date":"2024-07-13","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"","tags":["Rust","row-oriented","column-oriented","search","PostgreSQL compatible"],"load_time":0,"data_size":14779976446,"result":[[0.428529,0.169996,0.165737],[0.288946,0.183979,0.181161],[0.320601,0.19505,0.197994],[0.518503,0.211602,0.208391],[0.983889,0.535952,0.540423],[0.933326,0.71414,0.726933],[0.296356,0.18583,0.184422],[0.356075,0.19027,0.18721],[0.975868,0.638999,0.641937],[1.32702,0.867837,0.869265],[0.613737,0.27098,0.272166],[0.829598,0.313263,0.307575],[1.0526,0.647612,0.649306],[2.49939,0.985964,0.999792],[1.11407,0.709399,0.712346],[0.819786,0.573928,0.57883],[2.59222,1.24389,1.23864],[2.48332,1.23208,1.22347],[4.81146,2.17722,2.18397],[0.319524,0.200827,0.198878],[9.63605,2.08945,2.12315],[11.2103,1.9764,1.97172],[21.749,3.67693,3.67421],[55.5353,12.1117,12.171],[2.77726,0.514813,0.515359],[0.801555,0.387173,0.392298],[2.68619,0.528309,0.534164],[9.53043,1.99481,2.01316],[8.25088,4.33341,4.35408],[3.85116,3.68595,3.66299],[2.3524,0.811623,0.79966],[6.04314,0.931478,0.915213],[5.52233,2.58091,2.59227],[9.95394,3.09667,3.12531],[10.1152,3.1241,3.14043],[0.960004,0.758048,0.756207],[0.345793,0.205136,0.222101],[0.294038,0.178427,0.180238],[0.341727,0.177457,0.175767],[0.630001,0.348767,0.348869],[0.449262,0.175673,0.178257],[0.446425,0.172544,0.169029],[0.451342,0.172461,0.173655]],"source":"paradedb/results/partitioned.json"} +,{"hide":false,"system":"ParadeDB (Parquet, single)","date":"2024-07-13","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"","tags":["Rust","row-oriented","column-oriented","search","PostgreSQL compatible"],"load_time":0,"data_size":14779976446,"result":[[0.30371,0.135831,0.134925],[0.331691,0.159252,0.159245],[0.400919,0.209442,0.209561],[0.502732,0.194204,0.194097],[0.950075,0.568904,0.569178],[0.903286,0.763889,0.774315],[0.24162,0.16433,0.166469],[0.235,0.160248,0.161342],[0.904137,0.684214,0.679888],[1.29153,0.912441,0.907842],[0.595888,0.287961,0.291285],[0.720569,0.337319,0.337211],[0.928428,0.695999,0.69211],[2.43338,1.04071,1.03619],[1.00484,0.753286,0.763281],[0.856187,0.621652,0.629723],[2.53197,1.26466,1.29484],[2.50383,1.26855,1.26425],[4.64118,2.16319,2.19976],[0.2972,0.192337,0.194048],[9.72895,2.19798,2.16338],[11.2355,2.07493,2.07859],[21.9552,3.80413,3.8139],[55.8402,12.0323,12.0304],[2.64233,0.562578,0.562455],[0.797866,0.443126,0.44026],[2.55378,0.576578,0.573527],[9.51073,2.08182,2.08824],[8.25622,4.44961,4.44914],[0.777114,0.611315,0.604831],[2.28008,0.860235,0.871169],[5.79713,0.958523,0.966058],[5.40475,2.54125,2.65384],[9.989,3.18323,3.15122],[10.1684,3.19349,3.2022],[0.939116,0.713083,0.714321],[0.421327,0.276761,0.295984],[0.295189,0.212242,0.226419],[0.316216,0.216213,0.219],[0.680445,0.427435,0.427072],[0.348007,0.143995,0.145892],[0.328972,0.144153,0.143452],[0.338385,0.16674,0.159561]],"source":"paradedb/results/single.json"} ,{"system":"Pinot","date":"2022-07-01","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"It successfully loaded only 94465149 out of 99997497 records. Some queries returned NullPointerException. The loading process is painful - splitting to 100 pieces required. It does not correctly report errors on data loading, the results may be incorrect.","tags":["Java","column-oriented"],"load_time":2032,"data_size":null,"result":[[0.002,0.001,0.001],[0.186,0.186,0.185],[0.251,0.276,0.258],[0.475,0.281,0.238],[3.907,3.655,3.633],[30.471,14.687,14.93],[null,null,null],[0.135,0.134,0.148],[3.039,2.902,2.938],[3.159,3.212,3.225],[4.217,4.197,4.384],[4.145,4.124,4.121],[2.989,3.145,3.18],[6.402,6.886,6.374],[3.245,3.35,3.129],[5.112,5.027,5.141],[5.509,5.279,5.257],[0.865,0.856,0.829],[null,null,null],[0.017,0.015,0.015],[54.348,19.562,19.128],[null,null,null],[76.596,74.719,14.228],[7.441,5.77,5.87],[0.376,0.327,0.286],[7.689,0.395,1.281],[3.434,0.499,0.5],[27.679,2.378,2.393],[null,null,null],[2.221,2.227,2.167],[4.941,4.639,4.565],[5.641,5.37,5.007],[5.295,5.006,5.357],[5.28,5.21,5.105],[6.231,6.238,6.385],[5.918,5.933,5.934],[0.26,0.202,0.21],[0.364,0.072,0.069],[0.042,0.034,0.035],[1.483,0.686,0.651],[0.113,0.071,0.079],[0.042,0.051,0.037],[null,null,null]],"source":"pinot/results/c6a.4xlarge.json"} ,{"hide":true,"system":"PostgreSQL (tuned)","date":"2022-12-18","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"","tags":["C","row-oriented","PostgreSQL compatible"],"load_time":11800,"data_size":128875248605,"result":[[4.10957,0.687829,0.702019],[3.92694,0.63874,0.636411],[262.331,262.793,262.826],[6.14366,1.16457,1.20297],[26.752,27.4474,27.4878],[50.6555,47.6136,49.2373],[0.035605,0.002266,0.002208],[1.07557,0.599462,0.597673],[18.537,17.4348,17.7942],[309.49,306.831,306.894],[7.78261,2.95749,3.00855],[325.58,315.688,319.237],[4.03189,2.31557,2.21055],[18.8998,10.0497,10.0307],[272.381,271.91,271.87],[5.91906,4.17788,4.21394],[15.6578,15.8794,15.937],[0.025629,0.001484,0.00153],[312.64,312.543,310.582],[0.026206,0.001114,0.001115],[13.1447,0.090463,0.104537],[0.11893,0.093162,0.090471],[18.7775,0.107025,0.110361],[0.131412,0.089608,0.087852],[0.048958,0.00153,0.001405],[0.016134,0.001351,0.001268],[0.021939,0.001781,0.00164],[265.557,260.971,261.025],[277.516,276.053,277.378],[7.56565,6.50964,6.54902],[266.723,266.389,266.231],[269.738,269.255,269.193],[331.115,330.011,330.913],[416.169,421.255,423.724],[408.434,409.538,407.552],[35.5578,18.1707,18.6893],[3.17403,0.652905,0.698142],[0.730699,0.489315,0.465665],[0.563463,0.319131,0.3182],[1.23744,0.989391,1.01963],[0.752814,0.509212,0.511453],[0.677541,0.435178,0.437736],[1.18281,0.688545,0.697523]],"source":"postgresql-tuned/results/c6a.4xlarge.json"} ,{"system":"PostgreSQL","date":"2022-07-01","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"","tags":["C","row-oriented","PostgreSQL compatible"],"load_time":2342,"data_size":77797067741,"result":[[439.753,309.785,282.017],[317.874,254.238,254.941],[262.883,263.072,263.09],[32.421,5.31,5.06],[57.134,42.648,42.334],[358.423,356.315,358.342],[31.524,5.35,4.994],[263.145,263.193,263.165],[323.659,322.858,321.918],[327.395,326.17,326.231],[265.983,265.681,265.912],[269.984,265.336,265.379],[284.096,284.56,282.234],[277.25,279.455,280.035],[285.66,286.2,283.611],[66.605,32.023,38.282],[312.452,304.431,305.391],[289.209,290.449,287.578],[331.706,327.485,334.428],[24.646,2.543,2.263],[267.561,267.496,267.524],[267.729,267.69,268.184],[263.074,263.12,267.04],[267.602,267.488,267.494],[263.141,263.859,263.137],[262.923,263.102,263.113],[262.885,263.088,263.114],[267.864,269.127,268.204],[303.376,306.925,308.664],[263.221,263.119,263.148],[270.814,270.575,270.294],[278.342,275.925,276.224],[584.599,576.932,591.502],[462.576,446.962,439.779],[429.93,417.696,416.704],[296.875,297.283,295.14],[3.461,0.842,0.794],[2.179,0.564,0.558],[2.258,0.566,0.416],[2.805,1.311,1.317],[2.936,0.82,0.615],[2.197,0.736,0.535],[1.983,0.32,0.312]],"source":"postgresql/results/c6a.4xlarge.json"} diff --git a/paradedb/README.md b/paradedb/README.md index 5af53db36..9e5354d57 100644 --- a/paradedb/README.md +++ b/paradedb/README.md @@ -1,23 +1,27 @@ # ParadeDB -ParadeDB is an alternative to Elasticsearch built on Postgres. +ParadeDB is an Elasticsearch alternative built on Postgres. - [GitHub](https://github.com/paradedb/paradedb) - [Homepage](https://paradedb.com) -The published benchmarks are based on ParadeDB version `v0.5.4`. +The published benchmarks are based on ParadeDB version `v0.8.4`. ## Benchmarks -To run the benchmarks yourself: +To run the benchmarks: 1. Manually start an AWS EC2 instance - `c6a.4xlarge` - - Ubuntu Server 22.04 LTS (HVM), SSD Volume Type - - Root 500GB gp2 SSD + - Ubuntu Server 22.04 LTS (HVM), SSD Volume Type\* + - Root 500GB gp2 SSD\*\* 2. Wait for the status check to pass, then SSH into the instance via EC2 Instance Connect 3. Clone this repository via `git clone https://github.com/ClickHouse/ClickBench` 4. Navigate to the `paradedb` directory via `cd ClickBench/paradedb` -5. Run the benchmark via `./benchmark.sh` +5. Run the benchmark via `./benchmark.sh`. This will run the benchmarks against the default settings below. -The benchmark should be completed in under an hour. If you'd like to benchmark against a different version of ParadeDB, modify the Docker tag in `benchmark.sh`. You can find the list of available tags [here](https://hub.docker.com/r/paradedb/paradedb/tags). +The benchmark script takes the following parameters: + +- `-w` - Type of workload, either `single` or `partitioned`. The default is `single`, meaning it uses the `hits.parquet` ClickBench dataset. The `partitioned` option uses the Clickbench partitioned dataset. + +The benchmark should be completed within a few minutes. If you'd like to benchmark against a different version of ParadeDB, modify the Docker tag in the `benchmark.sh` script. You can find the list of available tags [here](https://hub.docker.com/r/paradedb/paradedb/tags). diff --git a/paradedb/benchmark.sh b/paradedb/benchmark.sh index b126666f5..6c8142592 100755 --- a/paradedb/benchmark.sh +++ b/paradedb/benchmark.sh @@ -1,65 +1,107 @@ #!/bin/bash -# Cleanup function to reset the environment +PARADEDB_VERSION=0.8.4 +FLAG_WORKLOAD=single + +usage() { + echo "Usage: $0 [OPTIONS]" + echo "Options:" + echo " -h (optional), Display this help message" + echo " -w (optional), Workload type, either or . Default is ." + exit 1 +} + cleanup() { - echo "" - echo "Cleaning up..." - if sudo docker ps -q --filter "name=paradedb" | grep -q .; then - sudo docker kill paradedb - fi - sudo docker rm paradedb - echo "Done, goodbye!" + echo "Done, goodbye!" } -# Register the cleanup function to run when the script exits trap cleanup EXIT -sudo apt-get update -sudo apt-get install -y docker.io -sudo apt-get install -y postgresql-client +while getopts "hw:" flag +do + case $flag in + h) + usage + ;; + w) + FLAG_WORKLOAD=$OPTARG + case "$FLAG_WORKLOAD" in single | partitioned): + ;; + *) + usage + ;; + esac + ;; + *) + usage + ;; + esac +done -if [ ! -e hits.tsv ]; then - echo "" - echo "Downloading dataset..." - wget --no-verbose --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' - gzip -d hits.tsv.gz -else - echo "" - echo "Dataset already downloaded, skipping..." -fi +echo "" +echo "Installing dependencies..." +sudo apt-get update -y +sudo apt-get install -y docker.io postgresql-client echo "" echo "Pulling ParadeDB image..." sudo docker run \ - -e POSTGRES_USER=myuser \ - -e POSTGRES_PASSWORD=mypassword \ - -e POSTGRES_DB=mydb \ - -p 5432:5432 \ - --name paradedb \ - -d \ - paradedb/paradedb:0.5.4 + --name paradedb \ + -e POSTGRESQL_USERNAME=myuser \ + -e POSTGRESQL_PASSWORD=mypassword \ + -e POSTGRESQL_DATABASE=mydb \ + -e POSTGRESQL_POSTGRES_PASSWORD=postgres \ + -p 5432:5432 \ + -d \ + paradedb/paradedb:$PARADEDB_VERSION echo "" echo "Waiting for ParadeDB to start..." sleep 10 +echo "ParadeDB is ready!" + +echo "" +echo "Downloading ClickBench dataset ($FLAG_WORKLOAD)..." +if [ $FLAG_WORKLOAD == "single" ]; then + if [ ! -e /tmp/hits.parquet ]; then + wget --no-verbose --continue -O /tmp/hits.parquet https://datasets.clickhouse.com/hits_compatible/hits.parquet + fi + if ! sudo docker exec paradedb sh -c '[ -f /tmp/hits.parquet ]'; then + sudo docker cp /tmp/hits.parquet paradedb:/tmp/hits.parquet + fi +elif [ $FLAG_WORKLOAD == "partitioned" ]; then + if [ ! -e /tmp/partitioned/ ]; then + mkdir -p /tmp/partitioned + seq 0 99 | xargs -P100 -I{} bash -c 'wget --no-verbose --directory-prefix /tmp/partitioned --continue https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_{}.parquet' + fi + if ! sudo docker exec paradedb sh -c '[ -f /tmp/partitioned ]'; then + sudo docker cp /tmp/partitioned paradedb:tmp + fi +else + echo "Invalid workload type: $FLAG_WORKLOAD" + exit 1 +fi echo "" -echo "Loading dataset..." -export PGPASSWORD='mypassword' -psql -h localhost -U myuser -d mydb -p 5432 -t < create.sql -psql -h localhost -U myuser -d mydb -p 5432 -t -c 'CALL paradedb.init();' -c '\timing' -c "\\copy hits FROM 'hits.tsv'" +echo "Creating database..." +export PGPASSWORD='postgres' +if [ $FLAG_WORKLOAD == "single" ]; then + psql -h localhost -U postgres -d mydb -p 5432 -t < create-single.sql +else + psql -h localhost -U postgres -d mydb -p 5432 -t < create-partitioned.sql +fi -# COPY 99997497 -# Time: 1268695.244 ms (21:08.695) +# load_time is zero, since the data is directly read from the Parquet file(s) +# Time: 0000000.000 ms (00:00.000) echo "" echo "Running queries..." ./run.sh 2>&1 | tee log.txt -sudo docker exec -it paradedb du -bcs /var/lib/postgresql/data - -# 15415061091 /var/lib/postgresql/data -# 15415061091 total +# data_size is the Parquet file(s) total size +# 14779976446 +echo "" +echo "Parsing results..." cat log.txt | grep -oP 'Time: \d+\.\d+ ms' | sed -r -e 's/Time: ([0-9]+\.[0-9]+) ms/\1/' | - awk '{ if (i % 3 == 0) { printf "[" }; printf $1 / 1000; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' + awk '{ if (i % 3 == 0) { printf "[" }; printf $1 / 1000; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/paradedb/create-partitioned.sql b/paradedb/create-partitioned.sql new file mode 100644 index 000000000..ba768ab98 --- /dev/null +++ b/paradedb/create-partitioned.sql @@ -0,0 +1,10 @@ +CREATE FOREIGN DATA WRAPPER parquet_wrapper + HANDLER parquet_fdw_handler + VALIDATOR parquet_fdw_validator; + +CREATE SERVER parquet_server + FOREIGN DATA WRAPPER parquet_wrapper; + +CREATE FOREIGN TABLE hits () +SERVER parquet_server +OPTIONS (files '/tmp/partitioned/*.parquet'); diff --git a/paradedb/create-single.sql b/paradedb/create-single.sql new file mode 100644 index 000000000..70ff561b2 --- /dev/null +++ b/paradedb/create-single.sql @@ -0,0 +1,10 @@ +CREATE FOREIGN DATA WRAPPER parquet_wrapper + HANDLER parquet_fdw_handler + VALIDATOR parquet_fdw_validator; + +CREATE SERVER parquet_server + FOREIGN DATA WRAPPER parquet_wrapper; + +CREATE FOREIGN TABLE IF NOT EXISTS hits () +SERVER parquet_server +OPTIONS (files '/tmp/hits.parquet'); diff --git a/paradedb/create.sql b/paradedb/create.sql deleted file mode 100644 index b6e21d480..000000000 --- a/paradedb/create.sql +++ /dev/null @@ -1,109 +0,0 @@ -CALL paradedb.init(); -CREATE TABLE IF NOT EXISTS hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT NOT NULL, - Referer TEXT NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT NOT NULL, - Params TEXT NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT NOT NULL, - BrowserCountry TEXT NOT NULL, - SocialNetwork TEXT NOT NULL, - SocialAction TEXT NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT NOT NULL, - ParamCurrency TEXT NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT NOT NULL, - OpenstatCampaignID TEXT NOT NULL, - OpenstatAdID TEXT NOT NULL, - OpenstatSourceID TEXT NOT NULL, - UTMSource TEXT NOT NULL, - UTMMedium TEXT NOT NULL, - UTMCampaign TEXT NOT NULL, - UTMContent TEXT NOT NULL, - UTMTerm TEXT NOT NULL, - FromTag TEXT NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL -) USING deltalake; diff --git a/paradedb/queries.sql b/paradedb/queries.sql index 31f65fc89..8d394088e 100644 --- a/paradedb/queries.sql +++ b/paradedb/queries.sql @@ -4,7 +4,7 @@ SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; SELECT AVG(UserID) FROM hits; SELECT COUNT(DISTINCT UserID) FROM hits; SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; +SELECT DATE '1970-01-01' + MIN(EventDate) * INTERVAL '1 day' AS min, DATE '1970-01-01' + MAX(EventDate) * INTERVAL '1 day' AS max FROM hits; SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; @@ -16,17 +16,17 @@ SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, extract(minute FROM CAST(to_timestamp(EventTime) AS TIMESTAMP)) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; +SELECT COUNT(*) FROM hits WHERE URL::VARCHAR LIKE '%google%'; +SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL::VARCHAR LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title::VARCHAR LIKE '%Google%' AND URL::VARCHAR NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT * FROM hits WHERE URL::VARCHAR LIKE '%google%' ORDER BY EventTime LIMIT 10; SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT CounterID, AVG(length(URL::VARCHAR)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT REGEXP_REPLACE(Referer::VARCHAR, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer::VARCHAR)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; @@ -34,10 +34,10 @@ SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FR SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate * INTERVAL '1 day') >= '2013-07-01' AND (DATE '1970-01-01' + EventDate * INTERVAL '1 day') <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate * INTERVAL '1 day') >= '2013-07-01' AND (DATE '1970-01-01' + EventDate * INTERVAL '1 day') <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate * INTERVAL '1 day') >= '2013-07-01' AND (DATE '1970-01-01' + EventDate * INTERVAL '1 day') <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate * INTERVAL '1 day') >= '2013-07-01' AND (DATE '1970-01-01' + EventDate * INTERVAL '1 day') <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate * INTERVAL '1 day') >= '2013-07-01' AND (DATE '1970-01-01' + EventDate * INTERVAL '1 day') <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; +SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate * INTERVAL '1 day') >= '2013-07-01' AND (DATE '1970-01-01' + EventDate * INTERVAL '1 day') <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; +SELECT DATE_TRUNC('minute', CAST(to_timestamp(EventTime) AS TIMESTAMP)) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate * INTERVAL '1 day') >= '2013-07-14' AND (DATE '1970-01-01' + EventDate * INTERVAL '1 day') <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', CAST(to_timestamp(EventTime) AS TIMESTAMP)) ORDER BY DATE_TRUNC('minute', CAST(to_timestamp(EventTime) AS TIMESTAMP)) LIMIT 10 OFFSET 1000; diff --git a/paradedb/results/c6a.4xlarge.1500gb.json b/paradedb/results/c6a.4xlarge.1500gb.json deleted file mode 100644 index 0925b9979..000000000 --- a/paradedb/results/c6a.4xlarge.1500gb.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "hide": false, - "system": "ParadeDB", - "date": "2024-02-02", - "machine": "c6a.4xlarge, 1500gb gp2", - "cluster_size": 1, - "comment": "The results for (c6a.4xlarge, 500gb gp2) are also submitted here for easy comparison with Elasticsearch", - - "tags": [ - "Rust", - "row-oriented", - "column-oriented", - "search", - "PostgreSQL compatible" - ], - - "load_time": 1294, - "data_size": 15415061091, - - "result": [ - [0.170805, 0.005724, 0.006098], - [0.207075, 0.09786, 0.097367], - [0.223766, 0.088988, 0.083545], - [0.428068, 0.097471, 0.098085], - [1.02813, 0.842475, 0.847983], - [1.47076, 1.27149, 1.24457], - [0.079231, 0.005903, 0.006381], - [0.20438, 0.102631, 0.101062], - [1.61474, 1.57814, 1.57654], - [1.34235, 1.09032, 1.07038], - [0.624544, 0.350221, 0.354308], - [0.592603, 0.38256, 0.382334], - [1.5044, 1.32063, 1.31768], - [3.54282, 2.67601, 2.61359], - [1.66994, 1.46717, 1.4526], - [1.16541, 0.999982, 0.975601], - [3.36336, 2.90587, 2.89977], - [3.1358, 2.75251, 2.71843], - [7.57537, 5.9709, 6.09835], - [0.450773, 0.136726, 0.134937], - [9.75662, 1.39915, 1.44835], - [11.2973, 1.70324, 1.6996], - [22.2634, 4.01717, 3.96894], - [57.8901, 10.7998, 10.8794], - [3.10183, 0.605149, 0.595146], - [0.829921, 0.542481, 0.531427], - [3.01411, 0.668447, 0.680061], - [9.88178, 2.28414, 2.28912], - [9.13347, 5.15906, 5.15358], - [0.599997, 0.454952, 0.463381], - [2.34495, 1.18331, 1.19755], - [5.76726, 1.62152, 1.52735], - [8.98009, 8.85184, 8.65374], - [11.9086, 6.49934, 6.7125], - [12.2924, 7.10393, 7.21253], - [2.04731, 1.8265, 1.84334], - [0.274516, 0.252795, 0.251755], - [0.338633, 0.253551, 0.252828], - [0.339914, 0.254548, 0.253683], - [0.244831, 0.158776, 0.158403], - [0.382238, 0.253784, 0.254412], - [0.404016, 0.253894, 0.253025], - [0.358676, 0.204648, 0.204944] - ] -} diff --git a/paradedb/results/c6a.4xlarge.json b/paradedb/results/c6a.4xlarge.json deleted file mode 100644 index 8c9803372..000000000 --- a/paradedb/results/c6a.4xlarge.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "hide": false, - "system": "ParadeDB", - "date": "2024-02-02", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "", - - "tags": [ - "Rust", - "row-oriented", - "column-oriented", - "search", - "PostgreSQL compatible" - ], - - "load_time": 1294, - "data_size": 15415061091, - - "result": [ - [0.170805, 0.005724, 0.006098], - [0.207075, 0.09786, 0.097367], - [0.223766, 0.088988, 0.083545], - [0.428068, 0.097471, 0.098085], - [1.02813, 0.842475, 0.847983], - [1.47076, 1.27149, 1.24457], - [0.079231, 0.005903, 0.006381], - [0.20438, 0.102631, 0.101062], - [1.61474, 1.57814, 1.57654], - [1.34235, 1.09032, 1.07038], - [0.624544, 0.350221, 0.354308], - [0.592603, 0.38256, 0.382334], - [1.5044, 1.32063, 1.31768], - [3.54282, 2.67601, 2.61359], - [1.66994, 1.46717, 1.4526], - [1.16541, 0.999982, 0.975601], - [3.36336, 2.90587, 2.89977], - [3.1358, 2.75251, 2.71843], - [7.57537, 5.9709, 6.09835], - [0.450773, 0.136726, 0.134937], - [9.75662, 1.39915, 1.44835], - [11.2973, 1.70324, 1.6996], - [22.2634, 4.01717, 3.96894], - [57.8901, 10.7998, 10.8794], - [3.10183, 0.605149, 0.595146], - [0.829921, 0.542481, 0.531427], - [3.01411, 0.668447, 0.680061], - [9.88178, 2.28414, 2.28912], - [9.13347, 5.15906, 5.15358], - [0.599997, 0.454952, 0.463381], - [2.34495, 1.18331, 1.19755], - [5.76726, 1.62152, 1.52735], - [8.98009, 8.85184, 8.65374], - [11.9086, 6.49934, 6.7125], - [12.2924, 7.10393, 7.21253], - [2.04731, 1.8265, 1.84334], - [0.274516, 0.252795, 0.251755], - [0.338633, 0.253551, 0.252828], - [0.339914, 0.254548, 0.253683], - [0.244831, 0.158776, 0.158403], - [0.382238, 0.253784, 0.254412], - [0.404016, 0.253894, 0.253025], - [0.358676, 0.204648, 0.204944] - ] -} diff --git a/paradedb/results/partitioned.json b/paradedb/results/partitioned.json new file mode 100644 index 000000000..71f91d6db --- /dev/null +++ b/paradedb/results/partitioned.json @@ -0,0 +1,65 @@ +{ + "hide": false, + "system": "ParadeDB (Parquet, partitioned)", + "date": "2024-07-13", + "machine": "c6a.4xlarge, 500gb gp2", + "cluster_size": 1, + "comment": "", + + "tags": [ + "Rust", + "row-oriented", + "column-oriented", + "search", + "PostgreSQL compatible" + ], + + "load_time": 0, + "data_size": 14779976446, + + "result": [ + [0.428529,0.169996,0.165737], + [0.288946,0.183979,0.181161], + [0.320601,0.19505,0.197994], + [0.518503,0.211602,0.208391], + [0.983889,0.535952,0.540423], + [0.933326,0.71414,0.726933], + [0.296356,0.18583,0.184422], + [0.356075,0.19027,0.18721], + [0.975868,0.638999,0.641937], + [1.32702,0.867837,0.869265], + [0.613737,0.27098,0.272166], + [0.829598,0.313263,0.307575], + [1.0526,0.647612,0.649306], + [2.49939,0.985964,0.999792], + [1.11407,0.709399,0.712346], + [0.819786,0.573928,0.57883], + [2.59222,1.24389,1.23864], + [2.48332,1.23208,1.22347], + [4.81146,2.17722,2.18397], + [0.319524,0.200827,0.198878], + [9.63605,2.08945,2.12315], + [11.2103,1.9764,1.97172], + [21.749,3.67693,3.67421], + [55.5353,12.1117,12.171], + [2.77726,0.514813,0.515359], + [0.801555,0.387173,0.392298], + [2.68619,0.528309,0.534164], + [9.53043,1.99481,2.01316], + [8.25088,4.33341,4.35408], + [3.85116,3.68595,3.66299], + [2.3524,0.811623,0.79966], + [6.04314,0.931478,0.915213], + [5.52233,2.58091,2.59227], + [9.95394,3.09667,3.12531], + [10.1152,3.1241,3.14043], + [0.960004,0.758048,0.756207], + [0.345793,0.205136,0.222101], + [0.294038,0.178427,0.180238], + [0.341727,0.177457,0.175767], + [0.630001,0.348767,0.348869], + [0.449262,0.175673,0.178257], + [0.446425,0.172544,0.169029], + [0.451342,0.172461,0.173655] + ] +} diff --git a/paradedb/results/single.json b/paradedb/results/single.json new file mode 100644 index 000000000..6375b0537 --- /dev/null +++ b/paradedb/results/single.json @@ -0,0 +1,65 @@ +{ + "hide": false, + "system": "ParadeDB (Parquet, single)", + "date": "2024-07-13", + "machine": "c6a.4xlarge, 500gb gp2", + "cluster_size": 1, + "comment": "", + + "tags": [ + "Rust", + "row-oriented", + "column-oriented", + "search", + "PostgreSQL compatible" + ], + + "load_time": 0, + "data_size": 14779976446, + + "result": [ + [0.30371,0.135831,0.134925], + [0.331691,0.159252,0.159245], + [0.400919,0.209442,0.209561], + [0.502732,0.194204,0.194097], + [0.950075,0.568904,0.569178], + [0.903286,0.763889,0.774315], + [0.24162,0.16433,0.166469], + [0.235,0.160248,0.161342], + [0.904137,0.684214,0.679888], + [1.29153,0.912441,0.907842], + [0.595888,0.287961,0.291285], + [0.720569,0.337319,0.337211], + [0.928428,0.695999,0.69211], + [2.43338,1.04071,1.03619], + [1.00484,0.753286,0.763281], + [0.856187,0.621652,0.629723], + [2.53197,1.26466,1.29484], + [2.50383,1.26855,1.26425], + [4.64118,2.16319,2.19976], + [0.2972,0.192337,0.194048], + [9.72895,2.19798,2.16338], + [11.2355,2.07493,2.07859], + [21.9552,3.80413,3.8139], + [55.8402,12.0323,12.0304], + [2.64233,0.562578,0.562455], + [0.797866,0.443126,0.44026], + [2.55378,0.576578,0.573527], + [9.51073,2.08182,2.08824], + [8.25622,4.44961,4.44914], + [0.777114,0.611315,0.604831], + [2.28008,0.860235,0.871169], + [5.79713,0.958523,0.966058], + [5.40475,2.54125,2.65384], + [9.989,3.18323,3.15122], + [10.1684,3.19349,3.2022], + [0.939116,0.713083,0.714321], + [0.421327,0.276761,0.295984], + [0.295189,0.212242,0.226419], + [0.316216,0.216213,0.219], + [0.680445,0.427435,0.427072], + [0.348007,0.143995,0.145892], + [0.328972,0.144153,0.143452], + [0.338385,0.16674,0.159561] + ] +} diff --git a/paradedb/run.sh b/paradedb/run.sh index ec10816e2..1113eacc7 100755 --- a/paradedb/run.sh +++ b/paradedb/run.sh @@ -1,14 +1,14 @@ #!/bin/bash TRIES=3 -export PGPASSWORD='mypassword' +export PGPASSWORD='postgres' cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches + sync + echo 3 | sudo tee /proc/sys/vm/drop_caches - echo "$query"; - for i in $(seq 1 $TRIES); do - psql -h localhost -U myuser -d mydb -p 5432 -t -c "CALL paradedb.init();" -c '\timing' -c "$query" | grep 'Time' - done; + echo "$query"; + for i in $(seq 1 $TRIES); do + psql -h localhost -U postgres -d mydb -p 5432 -t -c '\timing' -c "$query" | grep 'Time' + done; done;