Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add featurestore support for cold start recall #69

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ require (
require (
fortio.org/assert v1.2.1
github.com/alibabacloud-go/opensearch-util v1.0.1
github.com/aliyun/aliyun-pai-featurestore-go-sdk/v2 v2.3.1-0.20250108002338-26adc33508e8
github.com/aliyun/aliyun-pai-featurestore-go-sdk/v2 v2.3.2-0.20250123235701-85862d61a9b0
github.com/aliyun/aliyun-pairec-config-go-sdk/v2 v2.0.8-0.20241121075700-d79108b1097e
github.com/aliyun/credentials-go v1.3.1
github.com/apache/calcite-avatica-go/v5 v5.0.0
Expand All @@ -60,6 +60,7 @@ require (
github.com/alibabacloud-go/tea-utils/v2 v2.0.6 // indirect
github.com/alibabacloud-go/tea-xml v1.1.3 // indirect
github.com/aliyun/alibaba-cloud-sdk-go v1.63.0 // indirect
github.com/aliyun/aliyun-odps-go-sdk/arrow v0.0.1 // indirect
github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
github.com/clbanning/mxj/v2 v2.5.5 // indirect
github.com/expr-lang/expr v1.16.9 // indirect
Expand All @@ -76,10 +77,12 @@ require (
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/milvus-io/milvus-proto/go-api v0.0.0-20230105121931-9f9303dcc729 // indirect
github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b // indirect
github.com/pierrec/lz4/v4 v4.1.11 // indirect
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a // indirect
github.com/rogpeppe/go-internal v1.11.0 // indirect
github.com/xinsnake/go-http-digest-auth-client v0.6.0 // indirect
golang.org/x/crypto v0.18.0 // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
modernc.org/mathutil v1.6.0 // indirect
modernc.org/strutil v1.2.0 // indirect
Expand Down Expand Up @@ -107,7 +110,7 @@ require (
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/pierrec/lz4 v2.6.0+incompatible // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pkg/errors v0.9.1
github.com/prometheus/client_model v0.3.0 // indirect
github.com/prometheus/common v0.42.0
github.com/prometheus/procfs v0.9.0 // indirect
Expand All @@ -121,7 +124,7 @@ require (
github.com/valyala/fasthttp v1.43.0 // indirect
go.uber.org/atomic v1.9.0 // indirect
golang.org/x/net v0.20.0
golang.org/x/sys v0.16.0 // indirect
golang.org/x/sys v0.16.0
golang.org/x/text v0.14.0 // indirect
google.golang.org/genproto v0.0.0-20220616135557-88e70c0c3a90 // indirect
gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect
Expand Down
9 changes: 7 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,10 @@ github.com/aliyun/aliyun-igraph-go-sdk v0.0.0-20221208132745-defc68e1b227 h1:NBH
github.com/aliyun/aliyun-igraph-go-sdk v0.0.0-20221208132745-defc68e1b227/go.mod h1:k+nixj5x99h1pIn/7RyAxeMtig9PvnCFdDNzHduLqWU=
github.com/aliyun/aliyun-log-go-sdk v0.1.27 h1:fXtaOAcdR3DsqN9GZkfJue8B2Dba0TV+8Ahwq4o+y5g=
github.com/aliyun/aliyun-log-go-sdk v0.1.27/go.mod h1:aBG0R+MWRTgvlIODQkz+a3/RM9bQYKsmSbKdbIx4vpc=
github.com/aliyun/aliyun-pai-featurestore-go-sdk/v2 v2.3.1-0.20250108002338-26adc33508e8 h1:IEwhwW2s/2DD6xh1RnyHW3OuK1p7eXbqDMA5hz67Hmk=
github.com/aliyun/aliyun-pai-featurestore-go-sdk/v2 v2.3.1-0.20250108002338-26adc33508e8/go.mod h1:kUGlPd5k1G1PFtt97rt4B5O1nHp7lhq7wQJWh5BQOVY=
github.com/aliyun/aliyun-odps-go-sdk/arrow v0.0.1 h1:jSMfOm0fpc83+YersEbXhYG7/4o1z0u698FiacPD77o=
github.com/aliyun/aliyun-odps-go-sdk/arrow v0.0.1/go.mod h1:ki4zr1YK8ehW1rQxMFoAVTnjZeRNcSbVNyJxJZqR9i4=
github.com/aliyun/aliyun-pai-featurestore-go-sdk/v2 v2.3.2-0.20250123235701-85862d61a9b0 h1:axiyfRxHzSB1fG6RKag5q5k5cI6Fu6gXWInqf2raCC8=
github.com/aliyun/aliyun-pai-featurestore-go-sdk/v2 v2.3.2-0.20250123235701-85862d61a9b0/go.mod h1:mLX4xsN69N26u0CCwsVXtATyrKNfChNdL/Q986zG0f0=
github.com/aliyun/aliyun-pairec-config-go-sdk/v2 v2.0.8-0.20241121075700-d79108b1097e h1:sux0dDaqjww6ITWg/idpacUtwSLkR6Bz2Q2mGM82B+M=
github.com/aliyun/aliyun-pairec-config-go-sdk/v2 v2.0.8-0.20241121075700-d79108b1097e/go.mod h1:nYJ7Wp+sW/X3NAZBeJUyBGifcLTqIkNumq3lExpEG30=
github.com/aliyun/aliyun-tablestore-go-sdk v1.7.7 h1:+d/mcgaxx1jaWtFN2WrBHy4XeM9IK5gmZvbbpVkhqHE=
Expand Down Expand Up @@ -416,6 +418,8 @@ github.com/pierrec/lz4 v1.0.2-0.20190131084431-473cd7ce01a1/go.mod h1:3/3N9NVKO0
github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY=
github.com/pierrec/lz4 v2.6.0+incompatible h1:Ix9yFKn1nSPBLFl/yZknTp8TU5G4Ps0JDmguYK6iH1A=
github.com/pierrec/lz4 v2.6.0+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY=
github.com/pierrec/lz4/v4 v4.1.11 h1:LVs17FAZJFOjgmJXl9Tf13WfLUvZq7/RjfEJrnwZ9OE=
github.com/pierrec/lz4/v4 v4.1.11/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
Expand Down Expand Up @@ -728,6 +732,7 @@ golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0=
Expand Down
2 changes: 2 additions & 0 deletions module/cold_start_recall_dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ type ColdStartRecallDao interface {
func NewColdStartRecallDao(config recconf.RecallConfig) ColdStartRecallDao {
if config.ColdStartDaoConf.AdapterType == recconf.DaoConf_Adapter_Hologres {
return NewColdStartRecallHologresDao(config)
} else if config.ColdStartDaoConf.AdapterType == recconf.DataSource_Type_FeatureStore {
return NewColdStartRecallFeatureStoreDao(config)
}

panic("not found ColdStartRecallDao implement")
Expand Down
153 changes: 153 additions & 0 deletions module/cold_start_recall_featurestore_dao.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package module

import (
"fmt"
"math/rand"
"strings"
"time"

"github.com/alibaba/pairec/v2/context"
"github.com/alibaba/pairec/v2/log"
"github.com/alibaba/pairec/v2/persist/fs"
"github.com/alibaba/pairec/v2/recconf"
"github.com/alibaba/pairec/v2/utils"
)

type ColdStartRecallFeatureStoreDao struct {
fsClient *fs.FSClient
recallCount int
timeInterval int
recallName string
table string
whereClause string
ch chan string
itemIds []string
lastScanTime time.Time // last scan data time
}

func NewColdStartRecallFeatureStoreDao(config recconf.RecallConfig) *ColdStartRecallFeatureStoreDao {
fsclient, err := fs.GetFeatureStoreClient(config.ColdStartDaoConf.FeatureStoreName)
if err != nil {
log.Error(fmt.Sprintf("error=%v", err))
return nil
}

dao := &ColdStartRecallFeatureStoreDao{
fsClient: fsclient,
recallCount: config.RecallCount,
table: config.ColdStartDaoConf.FeatureStoreViewName,
recallName: config.Name,
timeInterval: config.ColdStartDaoConf.TimeInterval,
whereClause: config.ColdStartDaoConf.WhereClause,
ch: make(chan string, 1000),
itemIds: make([]string, 0, 1024),
}
featureView := dao.fsClient.GetProject().GetFeatureView(dao.table)
if featureView == nil {
panic(fmt.Sprintf("featureView not found, table:%s", dao.table))
}
go dao.initItemData()
if featureView.GetType() == "Stream" {
go dao.loopIterateData()
}
return dao
}
func (d *ColdStartRecallFeatureStoreDao) initItemData() {
featureView := d.fsClient.GetProject().GetFeatureView(d.table)
if featureView == nil {
log.Error(fmt.Sprintf("module=ColdStartRecallFeatureStoreDao\trecallName=%s\terror=featureView not found, table:%s", d.recallName, d.table))
return
}
where := d.whereClause
createTime := time.Now().Add(time.Duration(-1*d.timeInterval) * time.Second)
where = strings.ReplaceAll(where, "${time}", utils.ToString(createTime.Unix(), "0"))
var (
ids []string
err error
)
if featureView.GetType() == "Batch" {
ids, err = featureView.ScanAndIterateData(where, nil)
} else {
ids, err = featureView.ScanAndIterateData(where, d.ch)
}
d.lastScanTime = time.Now()
if err != nil {
log.Error(fmt.Sprintf("module=ColdStartRecallFeatureStoreDao\terror=%v", err))
return
}

d.itemIds = ids
}
func (d *ColdStartRecallFeatureStoreDao) loopIterateData() {
ticker := time.NewTicker(time.Minute)
var ids []string
appendItems := func() {
newItemIds := make([]string, len(d.itemIds))
copy(newItemIds, d.itemIds)
m := make(map[string]bool)
for _, id := range newItemIds {
m[id] = true
}
for _, id := range ids {
if _, ok := m[id]; !ok {
newItemIds = append(newItemIds, id)
}
}
ids = ids[:0]
d.itemIds = newItemIds
}
for id := range d.ch {
ids = append(ids, id)
select {
case <-ticker.C:
if len(ids) > 0 {
appendItems()
}
default:
if len(ids) > 1000 {
appendItems()
}
}
}
}

func (d *ColdStartRecallFeatureStoreDao) ListItemsByUser(user *User, context *context.RecommendContext) (ret []*Item) {
featureView := d.fsClient.GetProject().GetFeatureView(d.table)
if featureView == nil {
log.Error(fmt.Sprintf("module=ColdStartRecallFeatureStoreDao\trecallName=%s\terror=featureView not found, table:%s", d.recallName, d.table))
return
}

for _, itemId := range d.itemIds {
item := NewItem(itemId)
item.RetrieveId = d.recallName
ret = append(ret, item)
}

go func() {
if time.Since(d.lastScanTime) <= time.Duration(30)*time.Minute {
return
}
d.lastScanTime = time.Now()
where := d.whereClause
createTime := time.Now().Add(time.Duration(-1*d.timeInterval) * time.Second)
where = strings.ReplaceAll(where, "${time}", utils.ToString(createTime.Unix(), "0"))
ids, err := featureView.ScanAndIterateData(where, nil)
if err != nil {
log.Error(fmt.Sprintf("module=ColdStartRecallFeatureStoreDao\terror=%v", err))
return
}

d.itemIds = ids

}()

rand.Shuffle(len(ret), func(i, j int) {
ret[i], ret[j] = ret[j], ret[i]
})
if len(ret) > d.recallCount {
ret = ret[:d.recallCount]
}
return

}