From 8419e9429da78246ca83af19cf6ec40019d9b11a Mon Sep 17 00:00:00 2001 From: Erin Cochran Date: Mon, 12 Aug 2024 18:43:17 -0400 Subject: [PATCH 1/7] Add Concepts --- .../{thinking-in-assets.md => assets.md} | 0 .../docs/concepts/assets/asset-checks.md | 6 ++ .../concepts/assets/asset-dependencies.md | 6 ++ .../concepts/assets/asset-materialization.md | 6 ++ .../docs/concepts/assets/asset-metadata.md | 6 ++ .../concepts/assets/thinking-in-assets.md | 6 ++ docs/docs-next/docs/concepts/automation.md | 1 + .../automation/declarative-automation.md | 6 ++ .../docs/concepts/automation/schedules.md | 6 ++ .../docs/concepts/automation/sensors.md | 6 ++ docs/docs-next/docs/concepts/execution.md | 1 + .../docs/concepts/execution/dagster-daemon.md | 6 ++ .../concepts/execution/run-coordinators.md | 6 ++ .../docs/concepts/execution/run-executors.md | 6 ++ .../docs/concepts/execution/run-launchers.md | 6 ++ docs/docs-next/docs/concepts/ops-jobs.md | 1 + .../concepts/ops-jobs/job-configuration.md | 6 ++ .../docs/concepts/ops-jobs/ops-vs-assets.md | 6 ++ docs/docs-next/docs/concepts/resources.md | 1 + docs/docs-next/sidebars.ts | 74 ++++++++++++++++--- 20 files changed, 152 insertions(+), 10 deletions(-) rename docs/docs-next/docs/concepts/{thinking-in-assets.md => assets.md} (100%) create mode 100644 docs/docs-next/docs/concepts/assets/asset-checks.md create mode 100644 docs/docs-next/docs/concepts/assets/asset-dependencies.md create mode 100644 docs/docs-next/docs/concepts/assets/asset-materialization.md create mode 100644 docs/docs-next/docs/concepts/assets/asset-metadata.md create mode 100644 docs/docs-next/docs/concepts/assets/thinking-in-assets.md create mode 100644 docs/docs-next/docs/concepts/automation.md create mode 100644 docs/docs-next/docs/concepts/automation/declarative-automation.md create mode 100644 docs/docs-next/docs/concepts/automation/schedules.md create mode 100644 docs/docs-next/docs/concepts/automation/sensors.md create mode 100644 docs/docs-next/docs/concepts/execution.md create mode 100644 docs/docs-next/docs/concepts/execution/dagster-daemon.md create mode 100644 docs/docs-next/docs/concepts/execution/run-coordinators.md create mode 100644 docs/docs-next/docs/concepts/execution/run-executors.md create mode 100644 docs/docs-next/docs/concepts/execution/run-launchers.md create mode 100644 docs/docs-next/docs/concepts/ops-jobs.md create mode 100644 docs/docs-next/docs/concepts/ops-jobs/job-configuration.md create mode 100644 docs/docs-next/docs/concepts/ops-jobs/ops-vs-assets.md create mode 100644 docs/docs-next/docs/concepts/resources.md diff --git a/docs/docs-next/docs/concepts/thinking-in-assets.md b/docs/docs-next/docs/concepts/assets.md similarity index 100% rename from docs/docs-next/docs/concepts/thinking-in-assets.md rename to docs/docs-next/docs/concepts/assets.md diff --git a/docs/docs-next/docs/concepts/assets/asset-checks.md b/docs/docs-next/docs/concepts/assets/asset-checks.md new file mode 100644 index 0000000000000..68bd2769f4092 --- /dev/null +++ b/docs/docs-next/docs/concepts/assets/asset-checks.md @@ -0,0 +1,6 @@ +--- +title: "Asset checks" +sidebar_position: 7 +--- + +# Asset checks \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/assets/asset-dependencies.md b/docs/docs-next/docs/concepts/assets/asset-dependencies.md new file mode 100644 index 0000000000000..1263048d62009 --- /dev/null +++ b/docs/docs-next/docs/concepts/assets/asset-dependencies.md @@ -0,0 +1,6 @@ +--- +title: "Asset dependencies" +sidebar_position: 3 +--- + +# Asset dependencies \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/assets/asset-materialization.md b/docs/docs-next/docs/concepts/assets/asset-materialization.md new file mode 100644 index 0000000000000..c1d93e625ba1b --- /dev/null +++ b/docs/docs-next/docs/concepts/assets/asset-materialization.md @@ -0,0 +1,6 @@ +--- +title: "Asset materialization" +sidebar_position: 2 +--- + +# Asset materialization \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/assets/asset-metadata.md b/docs/docs-next/docs/concepts/assets/asset-metadata.md new file mode 100644 index 0000000000000..0d0eb0e297ace --- /dev/null +++ b/docs/docs-next/docs/concepts/assets/asset-metadata.md @@ -0,0 +1,6 @@ +--- +title: "Asset metadata" +sidebar_position: 4 +--- + +# Asset metadata \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/assets/thinking-in-assets.md b/docs/docs-next/docs/concepts/assets/thinking-in-assets.md new file mode 100644 index 0000000000000..397abc10f4084 --- /dev/null +++ b/docs/docs-next/docs/concepts/assets/thinking-in-assets.md @@ -0,0 +1,6 @@ +--- +title: "Thinking in assets" +sidebar_position: 1 +--- + +# Thinking in assets \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/automation.md b/docs/docs-next/docs/concepts/automation.md new file mode 100644 index 0000000000000..420e995b0d090 --- /dev/null +++ b/docs/docs-next/docs/concepts/automation.md @@ -0,0 +1 @@ +# Automation \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/automation/declarative-automation.md b/docs/docs-next/docs/concepts/automation/declarative-automation.md new file mode 100644 index 0000000000000..ac76e3e9b954c --- /dev/null +++ b/docs/docs-next/docs/concepts/automation/declarative-automation.md @@ -0,0 +1,6 @@ +--- +title: "Declarative Automation" +sidebar_position: 1 +--- + +# Declarative Automation \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/automation/schedules.md b/docs/docs-next/docs/concepts/automation/schedules.md new file mode 100644 index 0000000000000..d34d19d8d3bcd --- /dev/null +++ b/docs/docs-next/docs/concepts/automation/schedules.md @@ -0,0 +1,6 @@ +--- +title: "Schedules" +sidebar_position: 1 +--- + +# Schedules \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/automation/sensors.md b/docs/docs-next/docs/concepts/automation/sensors.md new file mode 100644 index 0000000000000..3e3ffcda3d366 --- /dev/null +++ b/docs/docs-next/docs/concepts/automation/sensors.md @@ -0,0 +1,6 @@ +--- +title: "Sensors" +sidebar_position: 2 +--- + +# Sensors \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/execution.md b/docs/docs-next/docs/concepts/execution.md new file mode 100644 index 0000000000000..b0301fd3fcea7 --- /dev/null +++ b/docs/docs-next/docs/concepts/execution.md @@ -0,0 +1 @@ +# Execution \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/execution/dagster-daemon.md b/docs/docs-next/docs/concepts/execution/dagster-daemon.md new file mode 100644 index 0000000000000..305681216aa9e --- /dev/null +++ b/docs/docs-next/docs/concepts/execution/dagster-daemon.md @@ -0,0 +1,6 @@ +--- +title: "Dagster daemon" +sidebar_position: 1 +--- + +# Dagster daemon \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/execution/run-coordinators.md b/docs/docs-next/docs/concepts/execution/run-coordinators.md new file mode 100644 index 0000000000000..bfecadc4d5c2a --- /dev/null +++ b/docs/docs-next/docs/concepts/execution/run-coordinators.md @@ -0,0 +1,6 @@ +--- +title: "Run coordinators" +sidebar_position: 4 +--- + +# Run coordinators \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/execution/run-executors.md b/docs/docs-next/docs/concepts/execution/run-executors.md new file mode 100644 index 0000000000000..5cdbd2afd653e --- /dev/null +++ b/docs/docs-next/docs/concepts/execution/run-executors.md @@ -0,0 +1,6 @@ +--- +title: "Run executors" +sidebar_position: 3 +--- + +# Run executors \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/execution/run-launchers.md b/docs/docs-next/docs/concepts/execution/run-launchers.md new file mode 100644 index 0000000000000..b4037513913e7 --- /dev/null +++ b/docs/docs-next/docs/concepts/execution/run-launchers.md @@ -0,0 +1,6 @@ +--- +title: "Run launchers" +sidebar_position: 2 +--- + +# Run launchers \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/ops-jobs.md b/docs/docs-next/docs/concepts/ops-jobs.md new file mode 100644 index 0000000000000..9dafc7edc1e32 --- /dev/null +++ b/docs/docs-next/docs/concepts/ops-jobs.md @@ -0,0 +1 @@ +# Ops and jobs \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/ops-jobs/job-configuration.md b/docs/docs-next/docs/concepts/ops-jobs/job-configuration.md new file mode 100644 index 0000000000000..9076edc06c991 --- /dev/null +++ b/docs/docs-next/docs/concepts/ops-jobs/job-configuration.md @@ -0,0 +1,6 @@ +--- +title: "Job configuration" +sidebar_position: 1 +--- + +# Job configuration \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/ops-jobs/ops-vs-assets.md b/docs/docs-next/docs/concepts/ops-jobs/ops-vs-assets.md new file mode 100644 index 0000000000000..32ef5e8a66785 --- /dev/null +++ b/docs/docs-next/docs/concepts/ops-jobs/ops-vs-assets.md @@ -0,0 +1,6 @@ +--- +title: "Ops vs. assets" +sidebar_position: 1 +--- + +# Ops vs. assets \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/resources.md b/docs/docs-next/docs/concepts/resources.md new file mode 100644 index 0000000000000..54fed86a065a5 --- /dev/null +++ b/docs/docs-next/docs/concepts/resources.md @@ -0,0 +1 @@ +# Resources \ No newline at end of file diff --git a/docs/docs-next/sidebars.ts b/docs/docs-next/sidebars.ts index fb9c0b3ff12b0..3c2c88fae57f3 100644 --- a/docs/docs-next/sidebars.ts +++ b/docs/docs-next/sidebars.ts @@ -128,20 +128,74 @@ const sidebars: SidebarsConfig = { } ], }, - ], - - /* - // But you can create a sidebar manually - docSidebar: [ - "intro", - "hello", { type: "category", - label: "Tutorial", - items: ["tutorial-basics/create-a-document"], + label: "Concepts", + items: [ + { + type: "category", + label: "Assets", + link: { + type: "doc", + id: "concepts/assets" + }, + items: [ + { + type: 'autogenerated', + dirName: "concepts/assets" + }, + ] + }, + { + type: "category", + label: "Automation", + link: { + type: "doc", + id: "concepts/automation" + }, + items: [ + { + type: 'autogenerated', + dirName: "concepts/automation" + }, + ] + }, + { + type: "doc", + label: "Resources", + id: "concepts/resources", + }, + { + type: "category", + label: "Ops and jobs", + link: { + type: "doc", + id: "concepts/ops-jobs" + }, + items: [ + { + type: 'autogenerated', + dirName: "concepts/ops-jobs" + }, + ] + }, + { + type: "category", + label: "Execution", + link: { + type: "doc", + id: "concepts/execution" + }, + items: [ + { + type: 'autogenerated', + dirName: "concepts/execution" + }, + ] + } + ], }, ], - */ }; export default sidebars; From 79d978a251defca11f0b45b3016447bd6a61cf43 Mon Sep 17 00:00:00 2001 From: Erin Cochran Date: Mon, 12 Aug 2024 19:47:48 -0400 Subject: [PATCH 2/7] Remove content to fix weird build error We can fix this and put it back when we're ready. --- docs/docs-next/docs/tutorial/quick-start.md | 170 +------------------- 1 file changed, 1 insertion(+), 169 deletions(-) diff --git a/docs/docs-next/docs/tutorial/quick-start.md b/docs/docs-next/docs/tutorial/quick-start.md index 6e71b987fd13b..85927b84fba75 100644 --- a/docs/docs-next/docs/tutorial/quick-start.md +++ b/docs/docs-next/docs/tutorial/quick-start.md @@ -1,171 +1,3 @@ ---- -title: Quickstart -description: Learn how to quickly get up and running with Dagster -last_update: - date: 2024-08-10 - author: Pedram Navid ---- -# Dagster Tutorial: Building Your First Dagster Project -Welcome to this hands-on tutorial where you'll learn how to build a basic Extract, Transform, Load (ETL) pipeline using Dagster. By the end of this tutorial, you'll have created a functional pipeline that extracts data from a CSV file and transforms it. - -## What You'll Learn - -- How to set up a basic Dagster project -- How to create Software-Defined Assets (SDAs) for each step of the ETL process -- How to use Dagster's built-in features to monitor and execute your pipeline - -## Prerequisites - -- Basic Python knowledge -- Python 3.7+ installed on your system, see [installation guide](tutorial/installation.md) for more details - -## Step 1: Set Up Your Dagster Environment - -First, set up a new Dagster project. - -1. Open your terminal and create a new directory for your project: - - ```bash title="Create a new directory" - mkdir dagster-quickstart - cd dagster-quickstart - ``` - -2. Create a virtual environment and activate it: - - ```bash title="Create a virtual environment" - python -m venv venv - source venv/bin/activate - # On Windows, use `venv\Scripts\activate` - ``` - -3. Install Dagster and the required dependencies: - - ```bash title="Install Dagster and dependencies" - pip install dagster dagster-webserver pandas - ``` - -## Step 2: Create Your Dagster Project Structure - -Set up a basic project structure: - -:::warning - -The file structure here is simplified to get quickly started. - -Once you've completed this tutorial, consider the [ETL Pipeline Tutorial](/tutorial/tutorial-etl) to learn -how to build more complex pipelines with best practices. - -::: - -1. Create the following files and directories: - - ```bash title="Project structure" - dagster-quickstart/ - ├── quickstart/ - │ ├── __init__.py - │ └── assets.py - ├── data/ - └── sample_data.csv - ``` - - ```bash title="Create the project structure" - mkdir quickstart data - touch quickstart/__init__.py quickstart/assets.py - touch data/sample_data.csv - ``` - - - -2. Create a sample CSV file as a data source. In the `data/sample_data.csv` file, add the following content: - - ```csv - id,name,age,city - 1,Alice,28,New York - 2,Bob,35,San Francisco - 3,Charlie,42,Chicago - 4,Diana,31,Los Angeles - ``` - -## Step 3: Define Your Assets - -Now, create the assets for the ETL pipeline. Open `quickstart/assets.py` and add the following code: - -```python -import pandas as pd -from dagster import asset, Definitions - -@asset -def processed_data(): - df = pd.read_csv("data/sample_data.csv") - df['age_group'] = pd.cut(df['age'], bins=[0, 30, 40, 100], labels=['Young', 'Middle', 'Senior']) - df.to_csv("data/processed_data.csv", index=False) - return "Data loaded successfully" - -defs = Definitions(assets=[processed_data]) -``` - -This code defines a single data asset within a single computation that performs three steps: -- Reads data from the CSV file -- Adds an `age_group` column based on the `age` -- Saves the processed data to a CSV file - -If you are used to task-based orchestrations, this might feel a bit different. -In traditional task-based orchestrations, you would have three separate steps, -but in Dagster, you model your pipelines using assets as the fundamental building block, -rather than tasks. - -The `Definitions` object serves as the central configuration point for a Dagster project. In this code, a `Definitions` -object is defined and the asset is passed to it. This tells Dagster about the assets that make up the ETL pipeline -and allows Dagster to manage their execution and dependencies. - -## Step 4: Run Your Pipeline - -:::warning - -There should be screenshots here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - -::: - -1. In the terminal, navigate to your project root directory and run: - - ```bash - dagster dev -f quickstart/assets.py - ``` - -2. Open your web browser and go to `http://localhost:3000` - -3. You should see the Dagster UI along with the asset. - -3. Click Materialize All to run the pipeline. - -4. In the popup that appears, click View to view a run as it executes. - -5. Watch as Dagster executes your pipeline. Try different views by selecting the different view buttons in the top-left. -You can click on each asset to see its logs and metadata. - -## Step 5: Verify Your Results - -To verify that your pipeline worked correctly: - -1. In your terminal, run: - - ```bash - cat data/processed_data.csv - ``` - -You should see your transformed data, including the new `age_group` column. - -## What You've Learned - -Congratulations! You've just built and run your first pipeline with Dagster. You've learned how to: - -- Set up a Dagster project -- Define Software-Defined Assets for each step of your pipeline -- Use Dagster's UI to run and monitor your pipeline - -## Next Steps - -- Continue with the [ETL Pipeline Tutorial](/tutorial/tutorial-etl) to learn how to build a more complex ETL pipeline -- Learn how to [Think in Assets](/concepts/thinking-in-assets) \ No newline at end of file +# Quickstart \ No newline at end of file From 7f936199b87beb9f4a348e7e557a9fba8a244334 Mon Sep 17 00:00:00 2001 From: Erin Cochran Date: Mon, 12 Aug 2024 19:47:55 -0400 Subject: [PATCH 3/7] Finish Concepts --- docs/docs-next/docs/concepts/io-managers.md | 5 +++++ docs/docs-next/docs/concepts/partitions.md | 5 +++++ docs/docs-next/sidebars.ts | 10 ++++++++++ 3 files changed, 20 insertions(+) create mode 100644 docs/docs-next/docs/concepts/io-managers.md create mode 100644 docs/docs-next/docs/concepts/partitions.md diff --git a/docs/docs-next/docs/concepts/io-managers.md b/docs/docs-next/docs/concepts/io-managers.md new file mode 100644 index 0000000000000..078ce80e62986 --- /dev/null +++ b/docs/docs-next/docs/concepts/io-managers.md @@ -0,0 +1,5 @@ +--- +title: "I/O managers" +--- + +# I/O managers \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/partitions.md b/docs/docs-next/docs/concepts/partitions.md new file mode 100644 index 0000000000000..e13a76656017f --- /dev/null +++ b/docs/docs-next/docs/concepts/partitions.md @@ -0,0 +1,5 @@ +--- +title: "Partitions" +--- + +# Partitions \ No newline at end of file diff --git a/docs/docs-next/sidebars.ts b/docs/docs-next/sidebars.ts index 3c2c88fae57f3..4e48637f87494 100644 --- a/docs/docs-next/sidebars.ts +++ b/docs/docs-next/sidebars.ts @@ -160,11 +160,21 @@ const sidebars: SidebarsConfig = { }, ] }, + { + type: "doc", + label: "Partitions", + id: "concepts/partitions", + }, { type: "doc", label: "Resources", id: "concepts/resources", }, + { + type: "doc", + label: "I/O managers", + id: "concepts/io-managers", + }, { type: "category", label: "Ops and jobs", From 3aa28aa03db621b957e0226e75be4d5ded826338 Mon Sep 17 00:00:00 2001 From: Pedram Navid <1045990+PedramNavid@users.noreply.github.com> Date: Tue, 13 Aug 2024 09:38:57 -0700 Subject: [PATCH 4/7] update sidebar positions --- docs/docs-next/docs/concepts/assets/asset-checks.md | 2 +- docs/docs-next/docs/concepts/assets/asset-dependencies.md | 2 +- docs/docs-next/docs/concepts/assets/asset-materialization.md | 2 +- docs/docs-next/docs/concepts/assets/asset-metadata.md | 2 +- docs/docs-next/docs/concepts/assets/thinking-in-assets.md | 2 +- .../docs/concepts/automation/declarative-automation.md | 4 ++-- docs/docs-next/docs/concepts/automation/schedules.md | 4 ++-- docs/docs-next/docs/concepts/automation/sensors.md | 4 ++-- docs/docs-next/docs/concepts/execution/dagster-daemon.md | 4 ++-- docs/docs-next/docs/concepts/execution/run-coordinators.md | 4 ++-- docs/docs-next/docs/concepts/execution/run-executors.md | 4 ++-- docs/docs-next/docs/concepts/execution/run-launchers.md | 4 ++-- docs/docs-next/docs/concepts/ops-jobs/job-configuration.md | 4 ++-- docs/docs-next/docs/concepts/ops-jobs/ops-vs-assets.md | 4 ++-- 14 files changed, 23 insertions(+), 23 deletions(-) diff --git a/docs/docs-next/docs/concepts/assets/asset-checks.md b/docs/docs-next/docs/concepts/assets/asset-checks.md index 68bd2769f4092..913b2c1a53ede 100644 --- a/docs/docs-next/docs/concepts/assets/asset-checks.md +++ b/docs/docs-next/docs/concepts/assets/asset-checks.md @@ -1,6 +1,6 @@ --- title: "Asset checks" -sidebar_position: 7 +sidebar_position: 70 --- # Asset checks \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/assets/asset-dependencies.md b/docs/docs-next/docs/concepts/assets/asset-dependencies.md index 1263048d62009..5c8105f20d7ba 100644 --- a/docs/docs-next/docs/concepts/assets/asset-dependencies.md +++ b/docs/docs-next/docs/concepts/assets/asset-dependencies.md @@ -1,6 +1,6 @@ --- title: "Asset dependencies" -sidebar_position: 3 +sidebar_position: 30 --- # Asset dependencies \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/assets/asset-materialization.md b/docs/docs-next/docs/concepts/assets/asset-materialization.md index c1d93e625ba1b..fa5d0043d088f 100644 --- a/docs/docs-next/docs/concepts/assets/asset-materialization.md +++ b/docs/docs-next/docs/concepts/assets/asset-materialization.md @@ -1,6 +1,6 @@ --- title: "Asset materialization" -sidebar_position: 2 +sidebar_position: 20 --- # Asset materialization \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/assets/asset-metadata.md b/docs/docs-next/docs/concepts/assets/asset-metadata.md index 0d0eb0e297ace..1681bcbf52686 100644 --- a/docs/docs-next/docs/concepts/assets/asset-metadata.md +++ b/docs/docs-next/docs/concepts/assets/asset-metadata.md @@ -1,6 +1,6 @@ --- title: "Asset metadata" -sidebar_position: 4 +sidebar_position: 40 --- # Asset metadata \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/assets/thinking-in-assets.md b/docs/docs-next/docs/concepts/assets/thinking-in-assets.md index 397abc10f4084..78675fba1317b 100644 --- a/docs/docs-next/docs/concepts/assets/thinking-in-assets.md +++ b/docs/docs-next/docs/concepts/assets/thinking-in-assets.md @@ -1,6 +1,6 @@ --- title: "Thinking in assets" -sidebar_position: 1 +sidebar_position: 10 --- # Thinking in assets \ No newline at end of file diff --git a/docs/docs-next/docs/concepts/automation/declarative-automation.md b/docs/docs-next/docs/concepts/automation/declarative-automation.md index ac76e3e9b954c..60dc401e4168e 100644 --- a/docs/docs-next/docs/concepts/automation/declarative-automation.md +++ b/docs/docs-next/docs/concepts/automation/declarative-automation.md @@ -1,6 +1,6 @@ --- title: "Declarative Automation" -sidebar_position: 1 +sidebar_position: 10 --- -# Declarative Automation \ No newline at end of file +# Declarative Automation diff --git a/docs/docs-next/docs/concepts/automation/schedules.md b/docs/docs-next/docs/concepts/automation/schedules.md index d34d19d8d3bcd..280d14cb0761f 100644 --- a/docs/docs-next/docs/concepts/automation/schedules.md +++ b/docs/docs-next/docs/concepts/automation/schedules.md @@ -1,6 +1,6 @@ --- title: "Schedules" -sidebar_position: 1 +sidebar_position: 10 --- -# Schedules \ No newline at end of file +# Schedules diff --git a/docs/docs-next/docs/concepts/automation/sensors.md b/docs/docs-next/docs/concepts/automation/sensors.md index 3e3ffcda3d366..d6b970d3fdd9c 100644 --- a/docs/docs-next/docs/concepts/automation/sensors.md +++ b/docs/docs-next/docs/concepts/automation/sensors.md @@ -1,6 +1,6 @@ --- title: "Sensors" -sidebar_position: 2 +sidebar_position: 20 --- -# Sensors \ No newline at end of file +# Sensors diff --git a/docs/docs-next/docs/concepts/execution/dagster-daemon.md b/docs/docs-next/docs/concepts/execution/dagster-daemon.md index 305681216aa9e..ab685f8137253 100644 --- a/docs/docs-next/docs/concepts/execution/dagster-daemon.md +++ b/docs/docs-next/docs/concepts/execution/dagster-daemon.md @@ -1,6 +1,6 @@ --- title: "Dagster daemon" -sidebar_position: 1 +sidebar_position: 10 --- -# Dagster daemon \ No newline at end of file +# Dagster daemon diff --git a/docs/docs-next/docs/concepts/execution/run-coordinators.md b/docs/docs-next/docs/concepts/execution/run-coordinators.md index bfecadc4d5c2a..dd069fe237aac 100644 --- a/docs/docs-next/docs/concepts/execution/run-coordinators.md +++ b/docs/docs-next/docs/concepts/execution/run-coordinators.md @@ -1,6 +1,6 @@ --- title: "Run coordinators" -sidebar_position: 4 +sidebar_position: 40 --- -# Run coordinators \ No newline at end of file +# Run coordinators diff --git a/docs/docs-next/docs/concepts/execution/run-executors.md b/docs/docs-next/docs/concepts/execution/run-executors.md index 5cdbd2afd653e..993b5c37af4ab 100644 --- a/docs/docs-next/docs/concepts/execution/run-executors.md +++ b/docs/docs-next/docs/concepts/execution/run-executors.md @@ -1,6 +1,6 @@ --- title: "Run executors" -sidebar_position: 3 +sidebar_position: 30 --- -# Run executors \ No newline at end of file +# Run executors diff --git a/docs/docs-next/docs/concepts/execution/run-launchers.md b/docs/docs-next/docs/concepts/execution/run-launchers.md index b4037513913e7..c4096f6dd5beb 100644 --- a/docs/docs-next/docs/concepts/execution/run-launchers.md +++ b/docs/docs-next/docs/concepts/execution/run-launchers.md @@ -1,6 +1,6 @@ --- title: "Run launchers" -sidebar_position: 2 +sidebar_position: 20 --- -# Run launchers \ No newline at end of file +# Run launchers diff --git a/docs/docs-next/docs/concepts/ops-jobs/job-configuration.md b/docs/docs-next/docs/concepts/ops-jobs/job-configuration.md index 9076edc06c991..b250bbadbc04d 100644 --- a/docs/docs-next/docs/concepts/ops-jobs/job-configuration.md +++ b/docs/docs-next/docs/concepts/ops-jobs/job-configuration.md @@ -1,6 +1,6 @@ --- title: "Job configuration" -sidebar_position: 1 +sidebar_position: 10 --- -# Job configuration \ No newline at end of file +# Job configuration diff --git a/docs/docs-next/docs/concepts/ops-jobs/ops-vs-assets.md b/docs/docs-next/docs/concepts/ops-jobs/ops-vs-assets.md index 32ef5e8a66785..e887e8cc6e45f 100644 --- a/docs/docs-next/docs/concepts/ops-jobs/ops-vs-assets.md +++ b/docs/docs-next/docs/concepts/ops-jobs/ops-vs-assets.md @@ -1,6 +1,6 @@ --- title: "Ops vs. assets" -sidebar_position: 1 +sidebar_position: 10 --- -# Ops vs. assets \ No newline at end of file +# Ops vs. assets From 05a2acbb7eebd5f7a7e476bab3cd2ad7ff67e9fa Mon Sep 17 00:00:00 2001 From: Pedram Navid <1045990+PedramNavid@users.noreply.github.com> Date: Tue, 13 Aug 2024 09:39:49 -0700 Subject: [PATCH 5/7] revert quick-start --- docs/docs-next/docs/tutorial/quick-start.md | 170 +++++++++++++++++++- 1 file changed, 169 insertions(+), 1 deletion(-) diff --git a/docs/docs-next/docs/tutorial/quick-start.md b/docs/docs-next/docs/tutorial/quick-start.md index 85927b84fba75..6e71b987fd13b 100644 --- a/docs/docs-next/docs/tutorial/quick-start.md +++ b/docs/docs-next/docs/tutorial/quick-start.md @@ -1,3 +1,171 @@ +--- +title: Quickstart +description: Learn how to quickly get up and running with Dagster +last_update: + date: 2024-08-10 + author: Pedram Navid +--- +# Dagster Tutorial: Building Your First Dagster Project -# Quickstart \ No newline at end of file +Welcome to this hands-on tutorial where you'll learn how to build a basic Extract, Transform, Load (ETL) pipeline using Dagster. By the end of this tutorial, you'll have created a functional pipeline that extracts data from a CSV file and transforms it. + +## What You'll Learn + +- How to set up a basic Dagster project +- How to create Software-Defined Assets (SDAs) for each step of the ETL process +- How to use Dagster's built-in features to monitor and execute your pipeline + +## Prerequisites + +- Basic Python knowledge +- Python 3.7+ installed on your system, see [installation guide](tutorial/installation.md) for more details + +## Step 1: Set Up Your Dagster Environment + +First, set up a new Dagster project. + +1. Open your terminal and create a new directory for your project: + + ```bash title="Create a new directory" + mkdir dagster-quickstart + cd dagster-quickstart + ``` + +2. Create a virtual environment and activate it: + + ```bash title="Create a virtual environment" + python -m venv venv + source venv/bin/activate + # On Windows, use `venv\Scripts\activate` + ``` + +3. Install Dagster and the required dependencies: + + ```bash title="Install Dagster and dependencies" + pip install dagster dagster-webserver pandas + ``` + +## Step 2: Create Your Dagster Project Structure + +Set up a basic project structure: + +:::warning + +The file structure here is simplified to get quickly started. + +Once you've completed this tutorial, consider the [ETL Pipeline Tutorial](/tutorial/tutorial-etl) to learn +how to build more complex pipelines with best practices. + +::: + +1. Create the following files and directories: + + ```bash title="Project structure" + dagster-quickstart/ + ├── quickstart/ + │ ├── __init__.py + │ └── assets.py + ├── data/ + └── sample_data.csv + ``` + + ```bash title="Create the project structure" + mkdir quickstart data + touch quickstart/__init__.py quickstart/assets.py + touch data/sample_data.csv + ``` + + + +2. Create a sample CSV file as a data source. In the `data/sample_data.csv` file, add the following content: + + ```csv + id,name,age,city + 1,Alice,28,New York + 2,Bob,35,San Francisco + 3,Charlie,42,Chicago + 4,Diana,31,Los Angeles + ``` + +## Step 3: Define Your Assets + +Now, create the assets for the ETL pipeline. Open `quickstart/assets.py` and add the following code: + +```python +import pandas as pd +from dagster import asset, Definitions + +@asset +def processed_data(): + df = pd.read_csv("data/sample_data.csv") + df['age_group'] = pd.cut(df['age'], bins=[0, 30, 40, 100], labels=['Young', 'Middle', 'Senior']) + df.to_csv("data/processed_data.csv", index=False) + return "Data loaded successfully" + +defs = Definitions(assets=[processed_data]) +``` + +This code defines a single data asset within a single computation that performs three steps: +- Reads data from the CSV file +- Adds an `age_group` column based on the `age` +- Saves the processed data to a CSV file + +If you are used to task-based orchestrations, this might feel a bit different. +In traditional task-based orchestrations, you would have three separate steps, +but in Dagster, you model your pipelines using assets as the fundamental building block, +rather than tasks. + +The `Definitions` object serves as the central configuration point for a Dagster project. In this code, a `Definitions` +object is defined and the asset is passed to it. This tells Dagster about the assets that make up the ETL pipeline +and allows Dagster to manage their execution and dependencies. + +## Step 4: Run Your Pipeline + +:::warning + +There should be screenshots here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +::: + +1. In the terminal, navigate to your project root directory and run: + + ```bash + dagster dev -f quickstart/assets.py + ``` + +2. Open your web browser and go to `http://localhost:3000` + +3. You should see the Dagster UI along with the asset. + +3. Click Materialize All to run the pipeline. + +4. In the popup that appears, click View to view a run as it executes. + +5. Watch as Dagster executes your pipeline. Try different views by selecting the different view buttons in the top-left. +You can click on each asset to see its logs and metadata. + +## Step 5: Verify Your Results + +To verify that your pipeline worked correctly: + +1. In your terminal, run: + + ```bash + cat data/processed_data.csv + ``` + +You should see your transformed data, including the new `age_group` column. + +## What You've Learned + +Congratulations! You've just built and run your first pipeline with Dagster. You've learned how to: + +- Set up a Dagster project +- Define Software-Defined Assets for each step of your pipeline +- Use Dagster's UI to run and monitor your pipeline + +## Next Steps + +- Continue with the [ETL Pipeline Tutorial](/tutorial/tutorial-etl) to learn how to build a more complex ETL pipeline +- Learn how to [Think in Assets](/concepts/thinking-in-assets) \ No newline at end of file From 8fb8d5f3370d58ff475af5c14830b977d351d31e Mon Sep 17 00:00:00 2001 From: Pedram Navid <1045990+PedramNavid@users.noreply.github.com> Date: Tue, 13 Aug 2024 09:41:36 -0700 Subject: [PATCH 6/7] try updating workflow --- .github/workflows/build-docs-revamp.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-docs-revamp.yml b/.github/workflows/build-docs-revamp.yml index 6a5ea0b7c0dc7..c7c0184d915ad 100644 --- a/.github/workflows/build-docs-revamp.yml +++ b/.github/workflows/build-docs-revamp.yml @@ -1,8 +1,6 @@ name: Deploy Docs Revamp on: pull_request: - branches: - - docs/revamp paths: - docs/docs-next - .github/workflows/build-docs-revamp.yml @@ -13,10 +11,12 @@ on: paths: - docs/docs-next - .github/workflows/build-docs-revamp.yml + concurrency: # Cancel in-progress runs on same branch group: ${{ github.ref }} cancel-in-progress: true + jobs: deploy: runs-on: ubuntu-latest @@ -36,4 +36,4 @@ jobs: vercel-args: "--prod" github-token: ${{ secrets.GITHUB_TOKEN }} scope: ${{ secrets.VERCEL_ORG_ID }} - alias-domains: dagster-docs-next.dagster.dagster-docs.io \ No newline at end of file + alias-domains: dagster-docs-next.dagster.dagster-docs.io From cf504f99a6962e43253953c8ad262de54e4540a0 Mon Sep 17 00:00:00 2001 From: Pedram Navid <1045990+PedramNavid@users.noreply.github.com> Date: Tue, 13 Aug 2024 09:44:00 -0700 Subject: [PATCH 7/7] fix concurrency bug --- .github/workflows/build-docs-revamp.yml | 4 +--- .github/workflows/build-docs.yml | 2 +- docs/docs-next/docs/tutorial/quick-start.md | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-docs-revamp.yml b/.github/workflows/build-docs-revamp.yml index c7c0184d915ad..871ce662ddc07 100644 --- a/.github/workflows/build-docs-revamp.yml +++ b/.github/workflows/build-docs-revamp.yml @@ -14,7 +14,7 @@ on: concurrency: # Cancel in-progress runs on same branch - group: ${{ github.ref }} + group: ${{ github.workflow}}-${{github.ref}} cancel-in-progress: true jobs: @@ -22,12 +22,10 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout docs/revamp branch - if: github.event_name == 'push' && github.ref == 'refs/heads/docs/revamp' uses: actions/checkout@v4 - name: Publish Preview to Vercel uses: amondnet/vercel-action@v25 - if: github.event_name == 'push' && github.ref == 'refs/heads/docs/revamp' with: github-comment: ${{ true }} vercel-token: ${{ secrets.VERCEL_TOKEN }} diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml index a01a04f77172b..4bcecaf458b0b 100644 --- a/.github/workflows/build-docs.yml +++ b/.github/workflows/build-docs.yml @@ -18,7 +18,7 @@ on: - .github/workflows/build-docs.yml concurrency: # Cancel in-progress runs on same branch - group: ${{ github.ref }} + group: ${{ github.workflow}}-${{github.ref}} cancel-in-progress: true jobs: deploy: diff --git a/docs/docs-next/docs/tutorial/quick-start.md b/docs/docs-next/docs/tutorial/quick-start.md index 6e71b987fd13b..f1edcb273879f 100644 --- a/docs/docs-next/docs/tutorial/quick-start.md +++ b/docs/docs-next/docs/tutorial/quick-start.md @@ -168,4 +168,4 @@ Congratulations! You've just built and run your first pipeline with Dagster. You ## Next Steps - Continue with the [ETL Pipeline Tutorial](/tutorial/tutorial-etl) to learn how to build a more complex ETL pipeline -- Learn how to [Think in Assets](/concepts/thinking-in-assets) \ No newline at end of file +- Learn how to [Think in Assets](/concepts/assets/thinking-in-assets) \ No newline at end of file