Skip to content

Commit

Permalink
add: google custom search
Browse files Browse the repository at this point in the history
  • Loading branch information
iwilltry42 committed Mar 6, 2025
1 parent d9a1866 commit 0c1ed3f
Show file tree
Hide file tree
Showing 12 changed files with 498 additions and 36 deletions.
2 changes: 2 additions & 0 deletions index.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ tools:
reference: ./word
tavily:
reference: ./search/tavily
googlecustomsearch:
reference: ./search/google/googlecustomsearch
pagerduty:
reference: ./pagerduty
postgres:
Expand Down
27 changes: 27 additions & 0 deletions search/google/googlecustomsearch/credential/tool.gpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Name: Google Custom Search API Credential
Share Credential: google-cse-cred as google-cse
Type: credential

---
Name: google-cse-cred
Tools: ../../../../generic-credential

#!sys.call ../../../../generic-credential

{
"promptInfo": {
"fields" : [
{
"name": "Google CSE API Key",
"env": "GOOGLE_CSE_API_KEY",
"sensitive": true
},
{
"name": "Google Custom Search Engine ID",
"env": "GOOGLE_CSE_ID",
"sensitive": false
}
],
"message": "Configure the custom search API and get an API Key as per https://developers.google.com/custom-search/v1/introduction"
}
}
34 changes: 34 additions & 0 deletions search/google/googlecustomsearch/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
module github.com/obot-platform/tools/search/google/googlecustomsearch

go 1.23.4

require (
google.golang.org/api v0.223.0

)

require (
cloud.google.com/go/auth v0.15.0 // indirect
cloud.google.com/go/auth/oauth2adapt v0.2.7 // indirect
cloud.google.com/go/compute/metadata v0.6.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/google/s2a-go v0.1.9 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
github.com/googleapis/gax-go/v2 v2.14.1 // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 // indirect
go.opentelemetry.io/otel v1.34.0 // indirect
go.opentelemetry.io/otel/metric v1.34.0 // indirect
go.opentelemetry.io/otel/trace v1.34.0 // indirect
golang.org/x/crypto v0.33.0 // indirect
golang.org/x/net v0.35.0 // indirect
golang.org/x/oauth2 v0.26.0 // indirect
golang.org/x/sys v0.30.0 // indirect
golang.org/x/text v0.22.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250219182151-9fdb1cabc7b2 // indirect
google.golang.org/grpc v1.70.0 // indirect
google.golang.org/protobuf v1.36.5 // indirect
)
69 changes: 69 additions & 0 deletions search/google/googlecustomsearch/go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
cloud.google.com/go/auth v0.15.0 h1:Ly0u4aA5vG/fsSsxu98qCQBemXtAtJf+95z9HK+cxps=
cloud.google.com/go/auth v0.15.0/go.mod h1:WJDGqZ1o9E9wKIL+IwStfyn/+s59zl4Bi+1KQNVXLZ8=
cloud.google.com/go/auth/oauth2adapt v0.2.7 h1:/Lc7xODdqcEw8IrZ9SvwnlLX6j9FHQM74z6cBk9Rw6M=
cloud.google.com/go/auth/oauth2adapt v0.2.7/go.mod h1:NTbTTzfvPl1Y3V1nPpOgl2w6d/FjO7NNUQaWSox6ZMc=
cloud.google.com/go/compute/metadata v0.6.0 h1:A6hENjEsCDtC1k8byVsgwvVcioamEHvZ4j01OwKxG9I=
cloud.google.com/go/compute/metadata v0.6.0/go.mod h1:FjyFAW1MW0C203CEOMDTu3Dk1FlqW3Rga40jzHL4hfg=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0=
github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw=
github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA=
github.com/googleapis/gax-go/v2 v2.14.1 h1:hb0FFeiPaQskmvakKu5EbCbpntQn48jyHuvrkurSS/Q=
github.com/googleapis/gax-go/v2 v2.14.1/go.mod h1:Hb/NubMaVM88SrNkvl8X/o8XWwDJEPqouaLeN2IUxoA=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0 h1:CV7UdSGJt/Ao6Gp4CXckLxVRRsRgDHoI8XjbL3PDl8s=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.59.0/go.mod h1:FRmFuRJfag1IZ2dPkHnEoSFVgTVPUd2qf5Vi69hLb8I=
go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY=
go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI=
go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ=
go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE=
go.opentelemetry.io/otel/sdk v1.34.0 h1:95zS4k/2GOy069d321O8jWgYsW3MzVV+KuSPKp7Wr1A=
go.opentelemetry.io/otel/sdk v1.34.0/go.mod h1:0e/pNiaMAqaykJGKbi+tSjWfNNHMTxoC9qANsCzbyxU=
go.opentelemetry.io/otel/sdk/metric v1.32.0 h1:rZvFnvmvawYb0alrYkjraqJq0Z4ZUJAiyYCU9snn1CU=
go.opentelemetry.io/otel/sdk/metric v1.32.0/go.mod h1:PWeZlq0zt9YkYAp3gjKZ0eicRYvOh1Gd+X99x6GHpCQ=
go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k=
go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE=
golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus=
golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M=
golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
golang.org/x/oauth2 v0.26.0 h1:afQXWNNaeC4nvZ0Ed9XvCCzXM6UHJG7iCg0W4fPqSBE=
golang.org/x/oauth2 v0.26.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
google.golang.org/api v0.223.0 h1:JUTaWEriXmEy5AhvdMgksGGPEFsYfUKaPEYXd4c3Wvc=
google.golang.org/api v0.223.0/go.mod h1:C+RS7Z+dDwds2b+zoAk5hN/eSfsiCn0UDrYof/M4d2M=
google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576 h1:CkkIfIt50+lT6NHAVoRYEyAvQGFM7xEwXUUywFvEb3Q=
google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576/go.mod h1:1R3kvZ1dtP3+4p4d3G8uJ8rFk/fWlScl38vanWACI08=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250219182151-9fdb1cabc7b2 h1:DMTIbak9GhdaSxEjvVzAeNZvyc03I61duqNbnm3SU0M=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250219182151-9fdb1cabc7b2/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
google.golang.org/grpc v1.70.0 h1:pWFv03aZoHzlRKHWicjsZytKAiYCtNS0dHbXnIdq7jQ=
google.golang.org/grpc v1.70.0/go.mod h1:ofIJqVKDXx/JiXrwr2IG4/zwdH9txy3IlF40RmcJSQw=
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
60 changes: 60 additions & 0 deletions search/google/googlecustomsearch/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package main

import (
"context"
"encoding/json"
"fmt"
"os"

googlecustomsearch "google.golang.org/api/customsearch/v1"
"google.golang.org/api/option"
)

func exitError(err error) {
if err != nil {
fmt.Printf("google custom search failed: %v\n", err)
os.Exit(1)
}
}

type result struct {
Title string
Link string
Snippet string
}

func main() {
ctx := context.Background()
res, err := search(ctx)
exitError(err)
fmt.Println(res)
}

func search(ctx context.Context) (string, error) {
apiKey := os.Getenv("GOOGLE_CSE_API_KEY")
client, err := googlecustomsearch.NewService(ctx, option.WithAPIKey(apiKey))
exitError(err)

cseID := os.Getenv("GOOGLE_CSE_ID")
query := os.Getenv("QUERY")

resp, err := client.Cse.List().Cx(cseID).Q(query).Do()
exitError(err)

results := make([]result, len(resp.Items))

for i, item := range resp.Items {
results[i] = result{
Title: item.Title,
Link: item.Link,
Snippet: item.Snippet,
}
}

resJSON, err := json.Marshal(results)
if err != nil {
return "", err
}

return string(resJSON), nil
}
28 changes: 28 additions & 0 deletions search/google/googlecustomsearch/tool.gpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
---
Name: Google Custom Search
Description: Search the Web using a Google Custom Search Engine.
Credential: ./credential
Share Context: ../../../time
Param: query: The search query
Tools: Search, ../../../website-scraper

Search the Web using the Google Custom Search Engine for the query ${QUERY}.
Skim through the results and decide if you need to scrape any of the URLs to get more information.
Scrape all relevant items using the provided link.
Then construct an answer based on the information you have gathered.
Return your answer with the sources you have used in the following JSON format (do not use any syntax highlighting, just return the JSON):
{
"answer": "Your answer here",
"sources": ["https://www.example.com", "https://www.example2.com"]
}

---
Name: Search
Description: Search the Web using a Google Custom Search Engine.
Credential: ./credential
Share Context: ../../../time
Param: query: The search query

#!${GPTSCRIPT_TOOL_DIR}/bin/gptscript-go-tool search


42 changes: 6 additions & 36 deletions website-cleaner/main.go
Original file line number Diff line number Diff line change
@@ -1,31 +1,15 @@
package main

import (
"bytes"
"context"
"fmt"
"os"

"github.com/PuerkitoBio/goquery"
"github.com/gptscript-ai/go-gptscript"
"github.com/obot-platform/tools/website-cleaner/pkg/clean"
"github.com/sirupsen/logrus"

md "github.com/JohannesKaufmann/html-to-markdown/v2/converter"
"github.com/JohannesKaufmann/html-to-markdown/v2/plugin/base"
"github.com/JohannesKaufmann/html-to-markdown/v2/plugin/commonmark"
)

var tagsToRemove = []string{
"script, style, noscript, meta, head",
"header", "footer", "nav", "aside", ".header", ".top", ".navbar", "#header",
".footer", ".bottom", "#footer", ".sidebar", ".side", ".aside", "#sidebar",
".modal", ".popup", "#modal", ".overlay", ".ad", ".ads", ".advert", "#ad",
".lang-selector", ".language", "#language-selector", ".social", ".social-media",
".social-links", "#social", ".menu", ".navigation", "#nav", ".breadcrumbs",
"#breadcrumbs", "#search-form", ".search", "#search", ".share", "#share",
".widget", "#widget", ".cookie", "#cookie",
}

func main() {
input := os.Getenv("INPUT")
output := os.Getenv("OUTPUT")
Expand Down Expand Up @@ -61,35 +45,21 @@ func main() {

originalSize := len(inputFile)

doc, err := goquery.NewDocumentFromReader(bytes.NewBuffer(inputFile))
if err != nil {
logOut.WithError(fmt.Errorf("failed to parse html input: %v", err)).Error()
os.Exit(0)
}

// Clean HTML programmatically
for _, tag := range tagsToRemove {
doc.Find(tag).Remove()
}

// transform to Markdown
converter := md.NewConverter(md.WithPlugins(base.NewBasePlugin(), commonmark.NewCommonmarkPlugin()))
html, err := doc.Html()
cleaned, err := clean.Clean(inputFile)
if err != nil {
logOut.WithError(fmt.Errorf("failed to get html from document: %v", err)).Error()
logOut.WithError(fmt.Errorf("failed to clean html: %v", err)).Error()
os.Exit(0)
}

sanitizedHTMLSize := len(html)

markdown, err := converter.ConvertString(html)
// transform to Markdown
markdown, err := clean.ToMarkdown(cleaned)
if err != nil {
logOut.WithError(fmt.Errorf("failed to convert html to markdown: %v", err)).Error()
os.Exit(0)
}

markdownSize := len(markdown)
logErr.Infof("[%s] Original HTML size: %d, Sanitized HTML size: %d, Converted Markdown size: %d", input, originalSize, sanitizedHTMLSize, markdownSize)
logErr.Infof("[%s] Original HTML size: %d, Converted Markdown size: %d", input, originalSize, markdownSize)

if err := gptscriptClient.WriteFileInWorkspace(ctx, output, []byte(markdown)); err != nil {
logOut.WithError(fmt.Errorf("failed to write output file %q: %v", output, err)).Error()
Expand Down
42 changes: 42 additions & 0 deletions website-cleaner/pkg/clean/clean.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package clean

import (
"bytes"
"fmt"

md "github.com/JohannesKaufmann/html-to-markdown/v2/converter"
"github.com/JohannesKaufmann/html-to-markdown/v2/plugin/base"
"github.com/JohannesKaufmann/html-to-markdown/v2/plugin/commonmark"
"github.com/PuerkitoBio/goquery"
)

var tagsToRemove = []string{
"script, style, noscript, meta, head",
"header", "footer", "nav", "aside", ".header", ".top", ".navbar", "#header",
".footer", ".bottom", "#footer", ".sidebar", ".side", ".aside", "#sidebar",
".modal", ".popup", "#modal", ".overlay", ".ad", ".ads", ".advert", "#ad",
".lang-selector", ".language", "#language-selector", ".social", ".social-media",
".social-links", "#social", ".menu", ".navigation", "#nav", ".breadcrumbs",
"#breadcrumbs", "#search-form", ".search", "#search", ".share", "#share",
".widget", "#widget", ".cookie", "#cookie",
}

func Clean(in []byte) ([]byte, error) {
doc, err := goquery.NewDocumentFromReader(bytes.NewBuffer(in))
if err != nil {
return nil, fmt.Errorf("failed to parse html input: %v", err)
}
for _, tag := range tagsToRemove {
doc.Find(tag).Remove()
}
html, err := doc.Html()
if err != nil {
return nil, fmt.Errorf("failed to get html from document: %v", err)
}
return []byte(html), nil
}

func ToMarkdown(in []byte) ([]byte, error) {
converter := md.NewConverter(md.WithPlugins(base.NewBasePlugin(), commonmark.NewCommonmarkPlugin()))
return converter.ConvertReader(bytes.NewBuffer(in))
}
30 changes: 30 additions & 0 deletions website-scraper/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
module github.com/obot-platform/tools/website-scraper

replace github.com/obot-platform/tools/website-cleaner => ../website-cleaner

go 1.24.0

require (
github.com/PuerkitoBio/goquery v1.10.2
github.com/gocolly/colly v1.2.0
github.com/obot-platform/tools/website-cleaner v0.0.0-00010101000000-000000000000
)

require (
github.com/JohannesKaufmann/dom v0.1.1-0.20240706125338-ff9f3b772364 // indirect
github.com/JohannesKaufmann/html-to-markdown/v2 v2.2.1 // indirect
github.com/andybalholm/cascadia v1.3.3 // indirect
github.com/antchfx/htmlquery v1.3.4 // indirect
github.com/antchfx/xmlquery v1.4.4 // indirect
github.com/antchfx/xpath v1.3.3 // indirect
github.com/gobwas/glob v0.2.3 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/kennygrant/sanitize v1.2.4 // indirect
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect
github.com/temoto/robotstxt v1.1.2 // indirect
golang.org/x/net v0.35.0 // indirect
golang.org/x/text v0.22.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
google.golang.org/protobuf v1.26.0 // indirect
)
Loading

0 comments on commit 0c1ed3f

Please sign in to comment.