Test all of our documentation. (instructor-ai#404)

Co-authored-by: grit-app[bot] <grit-app[bot]@users.noreply.github.com>
inayet · Feb 5, 2024 · dcb84b1 · dcb84b1
1 parent edc22b8
commit dcb84b1
Show file tree

Hide file tree

Showing 37 changed files with 7,785 additions and 686 deletions.
diff --git a/.github/workflows/test_docs.yml b/.github/workflows/test_docs.yml
@@ -0,0 +1,40 @@
+name: Test Docs
+on: [push, pull_request]
+
+jobs:
+  release:
+    runs-on: ubuntu-latest
+
+    strategy:
+      matrix:
+        python-version: ['3.11']
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Cache Poetry virtualenv
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/pypoetry/virtualenvs
+          key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-poetry-
+
+      - name: Install Poetry
+        uses: snok/[email protected]
+
+      - name: Install dependencies
+        run: poetry install --with dev
+
+      - name: Install doc dependencies
+        run: poetry install --with test-docs
+
+      - name: Run tests
+        run: poetry run pytest tests/openai/docs
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
diff --git a/.gitignore b/.gitignore
@@ -169,3 +169,4 @@ tutorials/results.jsonl
 tutorials/results.jsonlines
 tutorials/schema.json
 wandb/settings
+math_finetunes.jsonl
diff --git a/.grit/grit.yaml b/.grit/grit.yaml
@@ -2,5 +2,3 @@ version: 0.0.1
 patterns:
   - name: github.com/getgrit/python#openai
     level: info
-  - name: github.com/getgrit/python#no_skipped_tests
-    level: error
diff --git a/README.md b/README.md
@@ -79,26 +79,35 @@ For async clients you must use `apatch` vs. `patch`, as shown:
 
 ```py
 import instructor
-from openai import AsyncOpenAI
+import asyncio
+import openai
 from pydantic import BaseModel
 
-aclient = instructor.apatch(AsyncOpenAI())
+aclient = instructor.apatch(openai.AsyncOpenAI())
 
 
 class UserExtract(BaseModel):
     name: str
     age: int
 
 
-model = await aclient.chat.completions.create(
+task = aclient.chat.completions.create(
     model="gpt-3.5-turbo",
     response_model=UserExtract,
     messages=[
         {"role": "user", "content": "Extract jason is 25 years old"},
     ],
 )
 
-assert isinstance(model, UserExtract)
+
+response = asyncio.run(task)
+print(response.model_dump_json(indent=2))
+"""
+{
+  "name": "Jason",
+  "age": 25
+}
+"""
 ```
 
 ### Step 1: Patch the client
@@ -132,17 +141,36 @@ class UserDetail(BaseModel):
 Use the `client.chat.completions.create` method to send a prompt and extract the data into the Pydantic object. The `response_model` parameter specifies the Pydantic model to use for extraction. It is helpful to annotate the variable with the type of the response model which will help your IDE provide autocomplete and spell check.
 
 ```python
+import instructor
+import openai
+from pydantic import BaseModel
+
+client = instructor.patch(openai.OpenAI())
+
+
+class UserDetail(BaseModel):
+    name: str
+    age: int
 
-user: UserDetail = client.chat.completions.create(
+
+user = client.chat.completions.create(
     model="gpt-3.5-turbo",
     response_model=UserDetail,
     messages=[
         {"role": "user", "content": "Extract Jason is 25 years old"},
     ],
 )
 
+assert isinstance(user, UserDetail)
 assert user.name == "Jason"
 assert user.age == 25
+print(user.model_dump_json(indent=2))
+"""
+{
+  "name": "Jason",
+  "age": 25
+}
+"""
 ```
 
 ## Pydantic Validation

diff --git a/docs/blog/posts/anyscale.md b/docs/blog/posts/anyscale.md
@@ -70,7 +70,7 @@ resp = client.chat.completions.create(
     response_model=UserDetails,
 )
 print(resp)
-# >>> name='Jason' age=20
+# # > name='Jason' age=20
 ```
 
 You can find more information about Anyscale's output mode support [here](https://docs.endpoints.anyscale.com/).
diff --git a/docs/blog/posts/caching.md b/docs/blog/posts/caching.md
@@ -76,16 +76,16 @@ Now we can call `extract` multiple times with the same argument, and the result
 ```python hl_lines="4 8 12"
 import time
 
-start = time.perf_counter() # (1)
+start = time.perf_counter()  # (1)
 model = extract("Extract jason is 25 years old")
 print(f"Time taken: {time.perf_counter() - start}")
 
 start = time.perf_counter()
-model = extract("Extract jason is 25 years old") # (2)
+model = extract("Extract jason is 25 years old")  # (2)
 print(f"Time taken: {time.perf_counter() - start}")
 
->>> Time taken: 0.9267581660533324
->>> Time taken: 1.2080417945981026e-06 # (3)
+#> Time taken: 0.92
+#> Time taken: 1.20e-06 # (3)
 ```
 
 1. Using `time.perf_counter()` to measure the time taken to run the function is better than using `time.time()` because it's more accurate and less susceptible to system clock changes.
@@ -101,20 +101,23 @@ print(f"Time taken: {time.perf_counter() - start}")
     ```python hl_lines="3-5 9"
     def decorator(func):
         def wrapper(*args, **kwargs):
-            print("Do something before") # (1)
+            print("Do something before")  # (1)
             result = func(*args, **kwargs)
-            print("Do something after") # (2)
+            print("Do something after")  # (2)
             return result
+
         return wrapper
 
+
     @decorator
     def say_hello():
         print("Hello!")
 
+
     say_hello()
-    >>> "Do something before"
-    >>> "Hello!"
-    >>> "Do something after"
+    #> "Do something before"
+    #> "Hello!"
+    #> "Do something after"
     ```
 
     1. The code is executed before the function is called

diff --git a/docs/blog/posts/chain-of-density.md b/docs/blog/posts/chain-of-density.md
@@ -115,21 +115,21 @@ Firstly, we'll need a data model for the initial summary that we will be generat
 
     ```py
     class GeneratedSummary(BaseModel):
-    """
-    This represents a highly concise summary that includes as many entities as possible from the original source article.
+        """
+        This represents a highly concise summary that includes as many entities as possible from the original source article.
 
-    An Entity is a real-world object that's assigned a name - for example, a person, country a product or a book title.
+        An Entity is a real-world object that's assigned a name - for example, a person, country a product or a book title.
 
-    Guidelines
-    - Make every word count
-    - The new summary should be highly dense and concise yet self-contained, eg., easily understood without the Article.
-    - Make space with fusion, compression, and removal of uninformative phrases like "the article discusses"
-    """
+        Guidelines
+        - Make every word count
+        - The new summary should be highly dense and concise yet self-contained, eg., easily understood without the Article.
+        - Make space with fusion, compression, and removal of uninformative phrases like "the article discusses"
+        """
 
-    summary: str = Field(
-        ...,
-        description="This represents the final summary generated that captures the meaning of the original article which is as concise as possible. ",
-    )
+        summary: str = Field(
+            ...,
+            description="This represents the final summary generated that captures the meaning of the original article which is as concise as possible. ",
+        )
     ```
 
     We eventually transform it into an OpenAI function call as seen below.
@@ -254,21 +254,21 @@ def has_no_absent_entities(cls, absent_entities: List[str]):
     return absent_entities
 
 @field_validator("summary")
-    def min_entity_density(cls, v: str):
-        tokens = nltk.word_tokenize(v)
-        num_tokens = len(tokens)
+def min_entity_density(cls, v: str):
+    tokens = nltk.word_tokenize(v)
+    num_tokens = len(tokens)
 
-        # Extract Entities
-        doc = nlp(v) #(2)!
-        num_entities = len(doc.ents)
+    # Extract Entities
+    doc = nlp(v) #(2)!
+    num_entities = len(doc.ents)
 
-        density = num_entities / num_tokens
-        if density < 0.08: #(3)!
-            raise ValueError(
-                f"The summary of {v} has too few entities. Please regenerate a new summary with more new entities added to it. Remember that new entities can be added at any point of the summary."
-            )
+    density = num_entities / num_tokens
+    if density < 0.08: #(3)!
+        raise ValueError(
+            f"The summary of {v} has too few entities. Please regenerate a new summary with more new entities added to it. Remember that new entities can be added at any point of the summary."
+        )
 
-        return v
+    return v
 ```
 
 1.  Similar to the original paper, we utilize the `NLTK` word tokenizer to count the number of tokens within our generated sentences.
@@ -282,7 +282,7 @@ def has_no_absent_entities(cls, absent_entities: List[str]):
 
 Now that we have our models and the rough flow figured out, let's implement a function to summarize a piece of text using `Chain Of Density` summarization.
 
-```py hl_lines="4 9-24 38-68"
+```python hl_lines="4 9-24 38-68"
 from openai import OpenAI
 import instructor
 

diff --git a/docs/blog/posts/introduction.md b/docs/blog/posts/introduction.md
@@ -163,11 +163,18 @@ The architecture resembles FastAPI. Most code can be written as Python functions
 ### FastAPI Stub
 
 ```python
-app = FastAPI()
+import fastapi
+from pydantic import BaseModel
+
+class UserDetails(BaseModel):
+    name: str
+    age: int
+
+app = fastapi.FastAPI()
 
 @app.get("/user/{user_id}", response_model=UserDetails)
 async def get_user(user_id: int) -> UserDetails:
-    return UserDetails(...)
+    return ...
 ```
 
 ### Using Instructor as a Function
@@ -176,7 +183,7 @@ async def get_user(user_id: int) -> UserDetails:
 def extract_user(str) -> UserDetails:
     return client.chat.completions(
            response_model=UserDetails,
-           messages=[...]
+           messages=[]
     )
 ```
 

diff --git a/docs/blog/posts/validation-part1.md b/docs/blog/posts/validation-part1.md
@@ -34,12 +34,13 @@ def validation_function(value):
 
 ```python
 from openai import OpenAI
-import instructor # pip install instructor
+import instructor  # pip install instructor
 from pydantic import BaseModel
 
 # This enables response_model keyword
 # from client.chat.completions.create
-client = instructor.patch(OpenAI()) # (1)!
+client = instructor.patch(OpenAI())  # (1)!
+
 
 class UserDetail(BaseModel):
     name: str
@@ -51,11 +52,11 @@ user: UserDetail = client.chat.completions.create(
     response_model=UserDetail,
     messages=[
         {"role": "user", "content": "Extract Jason is 25 years old"},
-    ]
-    max_retries=3 # (2)!
+    ],
+    max_retries=3,  # (2)!
 )
 
-assert user.name == "Jason" # (3)!
+assert user.name == "Jason"  # (3)!
 assert user.age == 25
 ```