From 9e0846976c9b25726a71f2aeb695b85dffa061ed Mon Sep 17 00:00:00 2001 From: Marc Khoury Date: Fri, 8 Mar 2024 17:08:26 -0500 Subject: [PATCH] Update OpenAI API key and model in llm-code-review-action --- README.md | 9 ++++----- action.yml | 6 +++--- entrypoint.py | 18 +++++++++--------- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index eadc4eb..20ece9c 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,15 @@ # llm-code-review-action -A container GitHub Action to review a pull request by HuggingFace's LLM Model. +A container GitHub Action to review a pull request by OpenAI's LLM Model. If the size of a pull request is over the maximum chunk size of the HuggingFace API, the Action will split the pull request into multiple chunks and generate review comments for each chunk. And then the Action summarizes the review comments and posts a review comment to the pull request. ## Pre-requisites -We have to set a GitHub Actions secret `HUGGING_FACE_API_KEY` to use the HuggingFace API so that we securely pass it to the Action. +We have to set a GitHub Actions secret `OPENAI_API_KEY` to use the OpenAI API so that we securely pass it to the Action. ## Inputs -- `apiKey`: The HuggingFace API key to access the API. +- `apiKey`: The OpenAI API key to access the API. - `githubToken`: The GitHub token to access the GitHub API. - `githubRepository`: The GitHub repository to post a review comment. - `githubPullRequestNumber`: The GitHub pull request number to post a review comment. @@ -23,7 +23,7 @@ We have to set a GitHub Actions secret `HUGGING_FACE_API_KEY` to use the Hugging - `maxNewTokens`: The max_tokens to generate a review comment. - `logLevel`: The log level to print logs. -As you might know, a model of HuggingFace has limitation of the maximum number of input tokens. +As you might know, a model of OpenAI has limitation of the maximum number of input tokens. So we have to split the diff of a pull request into multiple chunks, if the size of the diff is over the limitation. We can tune the chunk size based on the model we use. @@ -69,7 +69,6 @@ jobs: githubRepository: ${{ github.repository }} githubPullRequestNumber: ${{ github.event.pull_request.number }} gitCommitHash: ${{ github.event.pull_request.head.sha }} - repoId: "meta-llama/Llama-2-7b-chat-hf" temperature: "0.2" maxNewTokens: "250" topK: "50" diff --git a/action.yml b/action.yml index d36b927..2c0b6fd 100644 --- a/action.yml +++ b/action.yml @@ -1,12 +1,12 @@ name: 'LLM Code Review' description: 'Let LLM model review your code' -author: 'Louis Le (luiyen)' +author: 'Marc Khoury -- Forked from Louis Le (luiyen)' inputs: githubToken: description: 'Github token to access the repo' required: true apiKey: - description: 'Huggingface access token from [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)' + description: 'OPEN AI API KEY' required: true githubRepository: description: "The GitHub repository to use for the action" @@ -23,7 +23,7 @@ inputs: repoId: description: "LLM model" required: true - default: "meta-llama/Llama-2-7b-chat-hf" + default: "gpt-3.5-turbo" maxNewTokens: description: "The amount of new tokens to be generated, this does not include the input length it is a estimate of the size of generated text you want. Each new tokens slows down the request, so look for balance between response times and length of text generated." required: false diff --git a/entrypoint.py b/entrypoint.py index c3658ca..8d46400 100755 --- a/entrypoint.py +++ b/entrypoint.py @@ -15,7 +15,7 @@ import click import requests -from langchain import HuggingFaceHub, LLMChain, PromptTemplate +from langchain import OpenAI, LLMChain, PromptTemplate from loguru import logger @@ -76,14 +76,14 @@ def get_review( chunked_diff_list = chunk_string(input_string=diff, chunk_size=prompt_chunk_size) # Get summary by chunk chunked_reviews = [] - llm = HuggingFaceHub( - repo_id=repo_id, - model_kwargs={"temperature": temperature, - "max_new_tokens": max_new_tokens, - "top_p": top_p, - "top_k": top_k}, - huggingfacehub_api_token=os.getenv("API_KEY") - ) + llm = OpenAI( + openai_api_key=os.getenv("API_KEY"), + model="gpt-3.5-turbo", + temperature=temperature, + max_new_tokens=max_new_tokens, + top_p=top_p, + top_k=top_k + ) for chunked_diff in chunked_diff_list: question=chunked_diff template = """Provide a concise summary of the bug found in the code, describing its characteristics,