Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add function calling support #116

Merged
merged 12 commits into from
Mar 26, 2024
2 changes: 1 addition & 1 deletion Sources/GoogleAI/Chat.swift
Original file line number Diff line number Diff line change
@@ -153,7 +153,7 @@ public class Chat {
case let .text(str):
combinedText += str

case .data(mimetype: _, _):
case .data, .functionCall, .functionResponse:
// Don't combine it, just add to the content. If there's any text pending, add that as
// a part.
if !combinedText.isEmpty {
235 changes: 235 additions & 0 deletions Sources/GoogleAI/FunctionCalling.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import Foundation

/// A predicted function call returned from the model.
public struct FunctionCall: Equatable, Encodable {
/// The name of the function to call.
public let name: String

/// The function parameters and values.
public let args: JSONObject
}

/// A `Schema` object allows the definition of input and output data types.
///
/// These types can be objects, but also primitives and arrays. Represents a select subset of an
/// [OpenAPI 3.0 schema object](https://spec.openapis.org/oas/v3.0.3#schema).
public class Schema: Encodable {
/// The data type.
let type: DataType

/// The format of the data.
let format: String?

/// A brief description of the parameter.
let description: String?

/// Indicates if the value may be null.
let nullable: Bool?

/// Possible values of the element of type ``DataType/string`` with "enum" format.
let enumValues: [String]?

/// Schema of the elements of type ``DataType/array``.
let items: Schema?

/// Properties of type ``DataType/object``.
let properties: [String: Schema]?

/// Required properties of type ``DataType/object``.
let requiredProperties: [String]?

enum CodingKeys: String, CodingKey {
case type
case format
case description
case nullable
case enumValues = "enum"
case items
case properties
case requiredProperties = "required"
}

/// Constructs a new `Schema`.
///
/// - Parameters:
/// - type: The data type.
/// - format: The format of the data; used only for primitive datatypes.
/// Supported formats:
/// - ``DataType/integer``: int32, int64
/// - ``DataType/number``: float, double
/// - ``DataType/string``: enum
/// - description: A brief description of the parameter; may be formatted as Markdown.
/// - nullable: Indicates if the value may be null.
/// - enumValues: Possible values of the element of type ``DataType/string`` with "enum" format.
/// For example, an enum `Direction` may be defined as `["EAST", NORTH", "SOUTH", "WEST"]`.
/// - items: Schema of the elements of type ``DataType/array``.
/// - properties: Properties of type ``DataType/object``.
/// - requiredProperties: Required properties of type ``DataType/object``.
public init(type: DataType, format: String? = nil, description: String? = nil,
nullable: Bool? = nil,
enumValues: [String]? = nil, items: Schema? = nil,
properties: [String: Schema]? = nil,
requiredProperties: [String]? = nil) {
self.type = type
self.format = format
self.description = description
self.nullable = nullable
self.enumValues = enumValues
self.items = items
self.properties = properties
self.requiredProperties = requiredProperties
}
}

/// A data type.
///
/// Contains the set of OpenAPI [data types](https://spec.openapis.org/oas/v3.0.3#data-types).
public enum DataType: String, Encodable {
/// A `String` type.
case string = "STRING"

/// A floating-point number type.
case number = "NUMBER"

/// An integer type.
case integer = "INTEGER"

/// A boolean type.
case boolean = "BOOLEAN"

/// An array type.
case array = "ARRAY"

/// An object type.
case object = "OBJECT"
}

/// Structured representation of a function declaration.
///
/// This `FunctionDeclaration` is a representation of a block of code that can be used as a ``Tool``
/// by the model and executed by the client.
public struct FunctionDeclaration {
/// The name of the function.
let name: String

/// A brief description of the function.
let description: String

/// Describes the parameters to this function; must be of type ``DataType/object``.
let parameters: Schema?

/// Constructs a new `FunctionDeclaration`.
///
/// - Parameters:
/// - name: The name of the function; must be a-z, A-Z, 0-9, or contain underscores and dashes,
/// with a maximum length of 63.
/// - description: A brief description of the function.
/// - parameters: Describes the parameters to this function; the keys are parameter names and
/// the values are ``Schema`` objects describing them.
/// - requiredParameters: A list of required parameters by name.
public init(name: String, description: String, parameters: [String: Schema]?,
requiredParameters: [String]?) {
self.name = name
self.description = description
self.parameters = Schema(
type: .object,
properties: parameters,
requiredProperties: requiredParameters
)
}
}

/// Helper tools that the model may use to generate response.
///
/// A `Tool` is a piece of code that enables the system to interact with external systems to
/// perform an action, or set of actions, outside of knowledge and scope of the model.
public struct Tool: Encodable {
/// A list of `FunctionDeclarations` available to the model.
let functionDeclarations: [FunctionDeclaration]?

/// Constructs a new `Tool`.
///
/// - Parameters:
/// - functionDeclarations: A list of `FunctionDeclarations` available to the model that can be
/// used for function calling.
/// The model or system does not execute the function. Instead the defined function may be
/// returned as a ``FunctionCall`` in ``ModelContent/Part/functionCall(_:)`` with arguments to
/// the client side for execution. The model may decide to call a subset of these functions by
/// populating ``FunctionCall`` in the response. The next conversation turn may contain a
/// ``FunctionResponse`` in ``ModelContent/Part/functionResponse(_:)`` with the
/// ``ModelContent/role`` "function", providing generation context for the next model turn.
public init(functionDeclarations: [FunctionDeclaration]?) {
self.functionDeclarations = functionDeclarations
}
}

/// Result output from a ``FunctionCall``.
///
/// Contains a string representing the `FunctionDeclaration.name` and a structured JSON object
/// containing any output from the function is used as context to the model. This should contain the
/// result of a ``FunctionCall`` made based on model prediction.
public struct FunctionResponse: Equatable, Encodable {
/// The name of the function that was called.
let name: String

/// The function's response.
let response: JSONObject

/// Constructs a new `FunctionResponse`.
///
/// - Parameters:
/// - name: The name of the function that was called.
/// - response: The function's response.
public init(name: String, response: JSONObject) {
self.name = name
self.response = response
}
}

// MARK: - Codable Conformance

extension FunctionCall: Decodable {
enum CodingKeys: CodingKey {
case name
case args
}

public init(from decoder: Decoder) throws {
let container = try decoder.container(keyedBy: CodingKeys.self)
name = try container.decode(String.self, forKey: .name)
if let args = try container.decodeIfPresent(JSONObject.self, forKey: .args) {
self.args = args
} else {
args = JSONObject()
}
}
}

extension FunctionDeclaration: Encodable {
enum CodingKeys: String, CodingKey {
case name
case description
case parameters
}

public func encode(to encoder: Encoder) throws {
var container = encoder.container(keyedBy: CodingKeys.self)
try container.encode(name, forKey: .name)
try container.encode(description, forKey: .description)
try container.encode(parameters, forKey: .parameters)
}
}
2 changes: 2 additions & 0 deletions Sources/GoogleAI/GenerateContentRequest.swift
Original file line number Diff line number Diff line change
@@ -21,6 +21,7 @@ struct GenerateContentRequest {
let contents: [ModelContent]
let generationConfig: GenerationConfig?
let safetySettings: [SafetySetting]?
let tools: [Tool]?
let isStreaming: Bool
let options: RequestOptions
}
@@ -31,6 +32,7 @@ extension GenerateContentRequest: Encodable {
case contents
case generationConfig
case safetySettings
case tools
}
}

10 changes: 10 additions & 0 deletions Sources/GoogleAI/GenerativeModel.swift
Original file line number Diff line number Diff line change
@@ -36,6 +36,9 @@ public final class GenerativeModel {
/// The safety settings to be used for prompts.
let safetySettings: [SafetySetting]?

/// A list of tools the model may use to generate the next response.
let tools: [Tool]?

/// Configuration parameters for sending requests to the backend.
let requestOptions: RequestOptions

@@ -47,17 +50,20 @@ public final class GenerativeModel {
/// - apiKey: The API key for your project.
/// - generationConfig: The content generation parameters your model should use.
/// - safetySettings: A value describing what types of harmful content your model should allow.
/// - tools: A list of ``Tool`` objects that the model may use to generate the next response.
/// - requestOptions Configuration parameters for sending requests to the backend.
public convenience init(name: String,
apiKey: String,
generationConfig: GenerationConfig? = nil,
safetySettings: [SafetySetting]? = nil,
tools: [Tool]? = nil,
requestOptions: RequestOptions = RequestOptions()) {
self.init(
name: name,
apiKey: apiKey,
generationConfig: generationConfig,
safetySettings: safetySettings,
tools: tools,
requestOptions: requestOptions,
urlSession: .shared
)
@@ -68,12 +74,14 @@ public final class GenerativeModel {
apiKey: String,
generationConfig: GenerationConfig? = nil,
safetySettings: [SafetySetting]? = nil,
tools: [Tool]? = nil,
requestOptions: RequestOptions = RequestOptions(),
urlSession: URLSession) {
modelResourceName = GenerativeModel.modelResourceName(name: name)
generativeAIService = GenerativeAIService(apiKey: apiKey, urlSession: urlSession)
self.generationConfig = generationConfig
self.safetySettings = safetySettings
self.tools = tools
self.requestOptions = requestOptions

Logging.default.info("""
@@ -119,6 +127,7 @@ public final class GenerativeModel {
contents: content(),
generationConfig: generationConfig,
safetySettings: safetySettings,
tools: tools,
isStreaming: false,
options: requestOptions)
response = try await generativeAIService.loadRequest(request: generateContentRequest)
@@ -190,6 +199,7 @@ public final class GenerativeModel {
contents: evaluatedContent,
generationConfig: generationConfig,
safetySettings: safetySettings,
tools: tools,
isStreaming: true,
options: requestOptions)

96 changes: 96 additions & 0 deletions Sources/GoogleAI/JSONValue.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import Foundation

/// A collection of name-value pairs representing a JSON object.
///
/// This may be decoded from, or encoded to, a
/// [`google.protobuf.Struct`](https://protobuf.dev/reference/protobuf/google.protobuf/#struct).
public typealias JSONObject = [String: JSONValue]

/// Represents a value in one of JSON's data types.
///
/// This may be decoded from, or encoded to, a
/// [`google.protobuf.Value`](https://protobuf.dev/reference/protobuf/google.protobuf/#value).
public enum JSONValue {
/// A `null` value.
case null

/// A numeric value.
case number(Double)

/// A string value.
case string(String)

/// A boolean value.
case bool(Bool)

/// A JSON object.
case object(JSONObject)

/// An array of `JSONValue`s.
case array([JSONValue])
}

extension JSONValue: Decodable {
public init(from decoder: Decoder) throws {
let container = try decoder.singleValueContainer()
if container.decodeNil() {
self = .null
} else if let numberValue = try? container.decode(Double.self) {
self = .number(numberValue)
} else if let stringValue = try? container.decode(String.self) {
self = .string(stringValue)
} else if let boolValue = try? container.decode(Bool.self) {
self = .bool(boolValue)
} else if let objectValue = try? container.decode(JSONObject.self) {
self = .object(objectValue)
} else if let arrayValue = try? container.decode([JSONValue].self) {
self = .array(arrayValue)
} else {
throw DecodingError.dataCorruptedError(
in: container,
debugDescription: "Failed to decode JSON value."
)
}
}
}

extension JSONValue: Encodable {
public func encode(to encoder: Encoder) throws {
var container = encoder.singleValueContainer()
switch self {
case .null:
try container.encodeNil()
case let .number(numberValue):
// Convert to `Decimal` before encoding for consistent floating-point serialization across
// platforms. E.g., `Double` serializes 3.14159 as 3.1415899999999999 in some cases and
// 3.14159 in others. See
// https://forums.swift.org/t/jsonencoder-encodable-floating-point-rounding-error/41390/4 for
// more details.
try container.encode(Decimal(numberValue))
case let .string(stringValue):
try container.encode(stringValue)
case let .bool(boolValue):
try container.encode(boolValue)
case let .object(objectValue):
try container.encode(objectValue)
case let .array(arrayValue):
try container.encode(arrayValue)
}
}
}

extension JSONValue: Equatable {}
16 changes: 15 additions & 1 deletion Sources/GoogleAI/ModelContent.swift
Original file line number Diff line number Diff line change
@@ -25,6 +25,8 @@ public struct ModelContent: Codable, Equatable {
enum CodingKeys: String, CodingKey {
case text
case inlineData
case functionCall
case functionResponse
}

enum InlineDataKeys: String, CodingKey {
@@ -38,6 +40,12 @@ public struct ModelContent: Codable, Equatable {
/// Data with a specified media type. Not all media types may be supported by the AI model.
case data(mimetype: String, Data)

/// A predicted function call returned from the model.
case functionCall(FunctionCall)

/// A response to a function call.
case functionResponse(FunctionResponse)

// MARK: Convenience Initializers

/// Convenience function for populating a Part with JPEG data.
@@ -64,6 +72,10 @@ public struct ModelContent: Codable, Equatable {
)
try inlineDataContainer.encode(mimetype, forKey: .mimeType)
try inlineDataContainer.encode(bytes, forKey: .bytes)
case let .functionCall(functionCall):
try container.encode(functionCall, forKey: .functionCall)
case let .functionResponse(functionResponse):
try container.encode(functionResponse, forKey: .functionResponse)
}
}

@@ -79,10 +91,12 @@ public struct ModelContent: Codable, Equatable {
let mimetype = try dataContainer.decode(String.self, forKey: .mimeType)
let bytes = try dataContainer.decode(Data.self, forKey: .bytes)
self = .data(mimetype: mimetype, bytes)
} else if values.contains(.functionCall) {
self = try .functionCall(values.decode(FunctionCall.self, forKey: .functionCall))
} else {
throw DecodingError.dataCorrupted(.init(
codingPath: [CodingKeys.text, CodingKeys.inlineData],
debugDescription: "Neither text or inline data was found."
debugDescription: "No text, inline data or function call was found."
))
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"candidates": [
{
"content": {
"parts": [
{
"functionCall": {
"name": "current_time"
}
}
],
"role": "model"
},
"finishReason": "STOP",
"index": 0
}
]
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"candidates": [
{
"content": {
"parts": [
{
"functionCall": {
"name": "current_time",
"args": {}
}
}
],
"role": "model"
},
"finishReason": "STOP",
"index": 0
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"candidates": [
{
"content": {
"parts": [
{
"functionCall": {
"name": "sum",
"args": {
"y": 5,
"x": 4
}
}
}
],
"role": "model"
},
"finishReason": "STOP",
"index": 0
}
]
}
67 changes: 67 additions & 0 deletions Tests/GoogleAITests/GenerativeModelTests.swift
Original file line number Diff line number Diff line change
@@ -169,6 +169,73 @@ final class GenerativeModelTests: XCTestCase {
_ = try await model.generateContent(testPrompt)
}

func testGenerateContent_success_functionCall_emptyArguments() async throws {
MockURLProtocol
.requestHandler = try httpRequestHandler(
forResource: "unary-success-function-call-empty-arguments",
withExtension: "json"
)

let response = try await model.generateContent(testPrompt)

XCTAssertEqual(response.candidates.count, 1)
let candidate = try XCTUnwrap(response.candidates.first)
XCTAssertEqual(candidate.content.parts.count, 1)
let part = try XCTUnwrap(candidate.content.parts.first)
guard case let .functionCall(functionCall) = part else {
XCTFail("Part is not a FunctionCall.")
return
}
XCTAssertEqual(functionCall.name, "current_time")
XCTAssertTrue(functionCall.args.isEmpty)
}

func testGenerateContent_success_functionCall_noArguments() async throws {
MockURLProtocol
.requestHandler = try httpRequestHandler(
forResource: "unary-success-function-call-no-arguments",
withExtension: "json"
)

let response = try await model.generateContent(testPrompt)

XCTAssertEqual(response.candidates.count, 1)
let candidate = try XCTUnwrap(response.candidates.first)
XCTAssertEqual(candidate.content.parts.count, 1)
let part = try XCTUnwrap(candidate.content.parts.first)
guard case let .functionCall(functionCall) = part else {
XCTFail("Part is not a FunctionCall.")
return
}
XCTAssertEqual(functionCall.name, "current_time")
XCTAssertTrue(functionCall.args.isEmpty)
}

func testGenerateContent_success_functionCall_withArguments() async throws {
MockURLProtocol
.requestHandler = try httpRequestHandler(
forResource: "unary-success-function-call-with-arguments",
withExtension: "json"
)

let response = try await model.generateContent(testPrompt)

XCTAssertEqual(response.candidates.count, 1)
let candidate = try XCTUnwrap(response.candidates.first)
XCTAssertEqual(candidate.content.parts.count, 1)
let part = try XCTUnwrap(candidate.content.parts.first)
guard case let .functionCall(functionCall) = part else {
XCTFail("Part is not a FunctionCall.")
return
}
XCTAssertEqual(functionCall.name, "sum")
XCTAssertEqual(functionCall.args.count, 2)
let argX = try XCTUnwrap(functionCall.args["x"])
XCTAssertEqual(argX, .number(4))
let argY = try XCTUnwrap(functionCall.args["y"])
XCTAssertEqual(argY, .number(5))
}

func testGenerateContent_failure_invalidAPIKey() async throws {
let expectedStatusCode = 400
MockURLProtocol
145 changes: 145 additions & 0 deletions Tests/GoogleAITests/JSONValueTests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import XCTest

@testable import GoogleGenerativeAI

final class JSONValueTests: XCTestCase {
let decoder = JSONDecoder()
let encoder = JSONEncoder()

let numberKey = "pi"
let numberValue = 3.14159
let numberValueEncoded = "3.14159"
let stringKey = "hello"
let stringValue = "Hello, world!"

override func setUp() {
encoder.outputFormatting = .sortedKeys
}

func testDecodeNull() throws {
let jsonData = try XCTUnwrap("null".data(using: .utf8))

let jsonObject = try XCTUnwrap(decoder.decode(JSONValue.self, from: jsonData))

XCTAssertEqual(jsonObject, .null)
}

func testDecodeNumber() throws {
let jsonData = try XCTUnwrap("\(numberValue)".data(using: .utf8))

let jsonObject = try XCTUnwrap(decoder.decode(JSONValue.self, from: jsonData))

XCTAssertEqual(jsonObject, .number(numberValue))
}

func testDecodeString() throws {
let jsonData = try XCTUnwrap("\"\(stringValue)\"".data(using: .utf8))

let jsonObject = try XCTUnwrap(decoder.decode(JSONValue.self, from: jsonData))

XCTAssertEqual(jsonObject, .string(stringValue))
}

func testDecodeBool() throws {
let expectedBool = true
let jsonData = try XCTUnwrap("\(expectedBool)".data(using: .utf8))

let jsonObject = try XCTUnwrap(decoder.decode(JSONValue.self, from: jsonData))

XCTAssertEqual(jsonObject, .bool(expectedBool))
}

func testDecodeObject() throws {
let expectedObject: JSONObject = [
numberKey: .number(numberValue),
stringKey: .string(stringValue),
]
let json = """
{
"\(numberKey)": \(numberValue),
"\(stringKey)": "\(stringValue)"
}
"""
let jsonData = try XCTUnwrap(json.data(using: .utf8))

let jsonObject = try XCTUnwrap(decoder.decode(JSONValue.self, from: jsonData))

XCTAssertEqual(jsonObject, .object(expectedObject))
}

func testDecodeArray() throws {
let expectedArray: [JSONValue] = [.null, .number(numberValue)]
let jsonData = try XCTUnwrap("[ null, \(numberValue) ]".data(using: .utf8))

let jsonObject = try XCTUnwrap(decoder.decode(JSONValue.self, from: jsonData))

XCTAssertEqual(jsonObject, .array(expectedArray))
}

func testEncodeNull() throws {
let jsonData = try encoder.encode(JSONValue.null)

let json = try XCTUnwrap(String(data: jsonData, encoding: .utf8))
XCTAssertEqual(json, "null")
}

func testEncodeNumber() throws {
let jsonData = try encoder.encode(JSONValue.number(numberValue))

let json = try XCTUnwrap(String(data: jsonData, encoding: .utf8))
XCTAssertEqual(json, "\(numberValue)")
}

func testEncodeString() throws {
let jsonData = try encoder.encode(JSONValue.string(stringValue))

let json = try XCTUnwrap(String(data: jsonData, encoding: .utf8))
XCTAssertEqual(json, "\"\(stringValue)\"")
}

func testEncodeBool() throws {
let boolValue = true

let jsonData = try encoder.encode(JSONValue.bool(boolValue))

let json = try XCTUnwrap(String(data: jsonData, encoding: .utf8))
XCTAssertEqual(json, "\(boolValue)")
}

func testEncodeObject() throws {
let objectValue: JSONObject = [
numberKey: .number(numberValue),
stringKey: .string(stringValue),
]

let jsonData = try encoder.encode(JSONValue.object(objectValue))

let json = try XCTUnwrap(String(data: jsonData, encoding: .utf8))
XCTAssertEqual(
json,
"{\"\(stringKey)\":\"\(stringValue)\",\"\(numberKey)\":\(numberValueEncoded)}"
)
}

func testEncodeArray() throws {
let arrayValue: [JSONValue] = [.null, .number(numberValue)]

let jsonData = try encoder.encode(JSONValue.array(arrayValue))

let json = try XCTUnwrap(String(data: jsonData, encoding: .utf8))
XCTAssertEqual(json, "[null,\(numberValueEncoded)]")
}
}