diff --git a/api/agent/v1alpha2/agent_types.go b/api/agent/v1alpha2/agent_types.go index e936a412..6df4d37c 100644 --- a/api/agent/v1alpha2/agent_types.go +++ b/api/agent/v1alpha2/agent_types.go @@ -17,6 +17,7 @@ package v1alpha2 import ( "sort" + "github.com/pkg/errors" "go.githedgehog.com/fabric/api/meta" vpcapi "go.githedgehog.com/fabric/api/vpc/v1alpha2" wiringapi "go.githedgehog.com/fabric/api/wiring/v1alpha2" @@ -126,7 +127,9 @@ type AgentStatus struct { LastAppliedTime metav1.Time `json:"lastAppliedTime,omitempty"` // Generation of the last successful configuration application LastAppliedGen int64 `json:"lastAppliedGen,omitempty"` - // Information about the switch and NOS + // Detailed switch state updated with each heartbeat + State SwitchState `json:"state,omitempty"` + // Information about the switch and NOS updated with each heartbeat NOSInfo NOSInfo `json:"nosInfo,omitempty"` // Status updates from the agent StatusUpdates []ApplyStatusUpdate `json:"statusUpdates,omitempty"` @@ -134,6 +137,198 @@ type AgentStatus struct { Conditions []metav1.Condition `json:"conditions"` } +type SwitchState struct { + // Switch interfaces state (incl. physical, management and port channels) + Interfaces map[string]SwitchStateInterface `json:"interfaces,omitempty"` // TODO add LLDP to interface? + // Switch transceivers state (port -> transceiver) + Transceivers map[string]SwitchStateTransceiver `json:"transceivers,omitempty"` // TODO move to interface? + // State of all BGP neighbors (VRF -> neighbor address -> state) + BGPNeighbors map[string]map[string]SwitchStateBGPNeighbor `json:"bgpNeighbors,omitempty"` + + // TODO add FAN info, PSU info, Temperature info +} + +type SwitchStateInterface struct { + Enabled bool `json:"enabled,omitempty"` + AdminStatus AdminStatus `json:"adminStatus,omitempty"` + OperStatus OperStatus `json:"operStatus,omitempty"` + MAC string `json:"mac,omitempty"` + LastChange metav1.Time `json:"lastChanged,omitempty"` + Counters SwitchStateInterfaceCounters `json:"counters,omitempty"` +} + +type SwitchStateInterfaceCounters struct { + InBitsPerSecond float64 `json:"inBitsPerSecond,omitempty"` + InDiscards uint64 `json:"inDiscards,omitempty"` + InErrors uint64 `json:"inErrors,omitempty"` + InPktsPerSecond float64 `json:"inPktsPerSecond,omitempty"` + InUtilization uint8 `json:"inUtilization,omitempty"` + LastClear metav1.Time `json:"lastClear,omitempty"` + OutBitsPerSecond float64 `json:"outBitsPerSecond,omitempty"` + OutDiscards uint64 `json:"outDiscards,omitempty"` + OutErrors uint64 `json:"outErrors,omitempty"` + OutPktsPerSecond float64 `json:"outPktsPerSecond,omitempty"` + OutUtilization uint8 `json:"outUtilization,omitempty"` +} + +type AdminStatus string + +const ( + AdminStatusUnset AdminStatus = "" + AdminStatusUp AdminStatus = "up" + AdminStatusDown AdminStatus = "down" + AdminStatusTesting AdminStatus = "testing" +) + +func (a AdminStatus) ID() (uint8, error) { + switch a { + case AdminStatusUnset: + return 0, nil + case AdminStatusUp: + return 1, nil + case AdminStatusDown: + return 2, nil + case AdminStatusTesting: + return 3, nil + default: + return 0, errors.Errorf("unknown AdminStatus %s", a) + } +} + +type OperStatus string + +const ( + OperStatusUnset OperStatus = "" + OperStatusUp OperStatus = "up" + OperStatusDown OperStatus = "down" + OperStatusTesting OperStatus = "testing" + OperStatusUnknown OperStatus = "unknown" + OperStatusDormant OperStatus = "dormant" + OperStatusNotPresent OperStatus = "notPresent" + OperStatusLowerLayerDown OperStatus = "lowerLayerDown" +) + +func (o OperStatus) ID() (uint8, error) { + switch o { + case OperStatusUnset: + return 0, nil + case OperStatusUp: + return 1, nil + case OperStatusDown: + return 2, nil + case OperStatusTesting: + return 3, nil + case OperStatusUnknown: + return 4, nil + case OperStatusDormant: + return 5, nil + case OperStatusNotPresent: + return 6, nil + case OperStatusLowerLayerDown: + return 7, nil + default: + return 0, errors.Errorf("unknown OperStatus %s", o) + } +} + +type SwitchStateTransceiver struct { + CableClass string `json:"cable-class,omitempty"` + Temperature float64 `json:"temperature,omitempty"` + Voltage float64 `json:"voltage,omitempty"` + + // TODO add vendor, serial number, etc + // Vendor *string `json:"vendor,omitempty" module:"openconfig-platform-diagnostics"` + // VendorPart *string `json:"vendor-part,omitempty" module:"openconfig-platform-diagnostics"` +} + +type SwitchStateBGPNeighbor struct { + ConnectionsDropped uint64 `json:"connectionsDropped,omitempty"` + Enabled bool `json:"enabled,omitempty"` + EstablishedTransitions uint64 `json:"establishedTransitions,omitempty"` + LastEstablished metav1.Time `json:"lastEstablished,omitempty"` + LastRead metav1.Time `json:"lastRead,omitempty"` + LastResetReason string `json:"lastResetReason,omitempty"` + LastResetTime metav1.Time `json:"lastResetTime,omitempty"` + LastWrite metav1.Time `json:"lastWrite,omitempty"` + LocalAS uint32 `json:"localAS,omitempty"` + Messages BGPMessages `json:"messages,omitempty"` + PeerAS uint32 `json:"peerAS,omitempty"` + PeerGroup string `json:"peerGroup,omitempty"` + PeerPort uint16 `json:"peerPort,omitempty"` + PeerType BGPPeerType `json:"peerType,omitempty"` + RemoteRouterID string `json:"remoteRouterID,omitempty"` + SessionState BGPNeighborSessionState `json:"sessionState,omitempty"` + ShutdownMessage string `json:"shutdownMessage,omitempty"` +} + +type BGPNeighborSessionState string + +const ( + BGPNeighborSessionStateUnset BGPNeighborSessionState = "" + BGPNeighborSessionStateIdle BGPNeighborSessionState = "idle" + BGPNeighborSessionStateConnect BGPNeighborSessionState = "connect" + BGPNeighborSessionStateActive BGPNeighborSessionState = "active" + BGPNeighborSessionStateOpenSent BGPNeighborSessionState = "openSent" + BGPNeighborSessionStateOpenConfirm BGPNeighborSessionState = "openConfirm" + BGPNeighborSessionStateEstablished BGPNeighborSessionState = "established" +) + +func (b BGPNeighborSessionState) ID() (uint8, error) { + switch b { + case BGPNeighborSessionStateUnset: + return 0, nil + case BGPNeighborSessionStateIdle: + return 1, nil + case BGPNeighborSessionStateConnect: + return 2, nil + case BGPNeighborSessionStateActive: + return 3, nil + case BGPNeighborSessionStateOpenSent: + return 4, nil + case BGPNeighborSessionStateOpenConfirm: + return 5, nil + case BGPNeighborSessionStateEstablished: + return 6, nil + default: + return 0, errors.Errorf("unknown BGPNeighborSessionState %s", b) + } +} + +type BGPPeerType string + +const ( + BGPPeerTypeUnset BGPPeerType = "" + BGPPeerTypeInternal BGPPeerType = "internal" + BGPPeerTypeExternal BGPPeerType = "external" +) + +func (b BGPPeerType) ID() (uint8, error) { + switch b { + case BGPPeerTypeUnset: + return 0, nil + case BGPPeerTypeInternal: + return 1, nil + case BGPPeerTypeExternal: + return 2, nil + default: + return 0, errors.Errorf("unknown BGPPeerType %s", b) + } +} + +type BGPMessages struct { + Received BGPMessagesCounters `json:"received,omitempty"` + Sent BGPMessagesCounters `json:"sent,omitempty"` +} + +type BGPMessagesCounters struct { + Capability uint64 `json:"capability,omitempty"` + Keepalive uint64 `json:"keepalive,omitempty"` + Notification uint64 `json:"notification,omitempty"` + Open uint64 `json:"open,omitempty"` + RouteRefresh uint64 `json:"routeRefresh,omitempty"` + Update uint64 `json:"update,omitempty"` +} + // NOSInfo contains information about the switch and NOS received from the switch itself by the agent type NOSInfo struct { // ASIC name, such as "broadcom" or "vs" diff --git a/api/agent/v1alpha2/zz_generated.deepcopy.go b/api/agent/v1alpha2/zz_generated.deepcopy.go index b33162be..8dadfc4e 100644 --- a/api/agent/v1alpha2/zz_generated.deepcopy.go +++ b/api/agent/v1alpha2/zz_generated.deepcopy.go @@ -265,6 +265,7 @@ func (in *AgentStatus) DeepCopyInto(out *AgentStatus) { in.LastHeartbeat.DeepCopyInto(&out.LastHeartbeat) in.LastAttemptTime.DeepCopyInto(&out.LastAttemptTime) in.LastAppliedTime.DeepCopyInto(&out.LastAppliedTime) + in.State.DeepCopyInto(&out.State) out.NOSInfo = in.NOSInfo if in.StatusUpdates != nil { in, out := &in.StatusUpdates, &out.StatusUpdates @@ -320,6 +321,38 @@ func (in *ApplyStatusUpdate) DeepCopy() *ApplyStatusUpdate { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *BGPMessages) DeepCopyInto(out *BGPMessages) { + *out = *in + out.Received = in.Received + out.Sent = in.Sent +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BGPMessages. +func (in *BGPMessages) DeepCopy() *BGPMessages { + if in == nil { + return nil + } + out := new(BGPMessages) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *BGPMessagesCounters) DeepCopyInto(out *BGPMessagesCounters) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BGPMessagesCounters. +func (in *BGPMessagesCounters) DeepCopy() *BGPMessagesCounters { + if in == nil { + return nil + } + out := new(BGPMessagesCounters) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Catalog) DeepCopyInto(out *Catalog) { *out = *in @@ -612,6 +645,121 @@ func (in *NOSInfo) DeepCopy() *NOSInfo { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SwitchState) DeepCopyInto(out *SwitchState) { + *out = *in + if in.Interfaces != nil { + in, out := &in.Interfaces, &out.Interfaces + *out = make(map[string]SwitchStateInterface, len(*in)) + for key, val := range *in { + (*out)[key] = *val.DeepCopy() + } + } + if in.Transceivers != nil { + in, out := &in.Transceivers, &out.Transceivers + *out = make(map[string]SwitchStateTransceiver, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.BGPNeighbors != nil { + in, out := &in.BGPNeighbors, &out.BGPNeighbors + *out = make(map[string]map[string]SwitchStateBGPNeighbor, len(*in)) + for key, val := range *in { + var outVal map[string]SwitchStateBGPNeighbor + if val == nil { + (*out)[key] = nil + } else { + inVal := (*in)[key] + in, out := &inVal, &outVal + *out = make(map[string]SwitchStateBGPNeighbor, len(*in)) + for key, val := range *in { + (*out)[key] = *val.DeepCopy() + } + } + (*out)[key] = outVal + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SwitchState. +func (in *SwitchState) DeepCopy() *SwitchState { + if in == nil { + return nil + } + out := new(SwitchState) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SwitchStateBGPNeighbor) DeepCopyInto(out *SwitchStateBGPNeighbor) { + *out = *in + in.LastEstablished.DeepCopyInto(&out.LastEstablished) + in.LastRead.DeepCopyInto(&out.LastRead) + in.LastResetTime.DeepCopyInto(&out.LastResetTime) + in.LastWrite.DeepCopyInto(&out.LastWrite) + out.Messages = in.Messages +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SwitchStateBGPNeighbor. +func (in *SwitchStateBGPNeighbor) DeepCopy() *SwitchStateBGPNeighbor { + if in == nil { + return nil + } + out := new(SwitchStateBGPNeighbor) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SwitchStateInterface) DeepCopyInto(out *SwitchStateInterface) { + *out = *in + in.LastChange.DeepCopyInto(&out.LastChange) + in.Counters.DeepCopyInto(&out.Counters) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SwitchStateInterface. +func (in *SwitchStateInterface) DeepCopy() *SwitchStateInterface { + if in == nil { + return nil + } + out := new(SwitchStateInterface) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SwitchStateInterfaceCounters) DeepCopyInto(out *SwitchStateInterfaceCounters) { + *out = *in + in.LastClear.DeepCopyInto(&out.LastClear) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SwitchStateInterfaceCounters. +func (in *SwitchStateInterfaceCounters) DeepCopy() *SwitchStateInterfaceCounters { + if in == nil { + return nil + } + out := new(SwitchStateInterfaceCounters) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SwitchStateTransceiver) DeepCopyInto(out *SwitchStateTransceiver) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SwitchStateTransceiver. +func (in *SwitchStateTransceiver) DeepCopy() *SwitchStateTransceiver { + if in == nil { + return nil + } + out := new(SwitchStateTransceiver) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *UserCreds) DeepCopyInto(out *UserCreds) { *out = *in diff --git a/config/crd/bases/agent.githedgehog.com_agents.yaml b/config/crd/bases/agent.githedgehog.com_agents.yaml index 5aaf9138..2eff787f 100644 --- a/config/crd/bases/agent.githedgehog.com_agents.yaml +++ b/config/crd/bases/agent.githedgehog.com_agents.yaml @@ -1272,7 +1272,8 @@ spec: format: date-time type: string nosInfo: - description: Information about the switch and NOS + description: Information about the switch and NOS updated with each + heartbeat properties: asicVersion: description: ASIC name, such as "broadcom" or "vs" @@ -1328,6 +1329,164 @@ spec: runID: description: ID of the agent run, used to track NOS reboots type: string + state: + description: Detailed switch state updated with each heartbeat + properties: + bgpNeighbors: + additionalProperties: + additionalProperties: + properties: + connectionsDropped: + format: int64 + type: integer + enabled: + type: boolean + establishedTransitions: + format: int64 + type: integer + lastEstablished: + format: date-time + type: string + lastRead: + format: date-time + type: string + lastResetReason: + type: string + lastResetTime: + format: date-time + type: string + lastWrite: + format: date-time + type: string + localAS: + format: int32 + type: integer + messages: + properties: + received: + properties: + capability: + format: int64 + type: integer + keepalive: + format: int64 + type: integer + notification: + format: int64 + type: integer + open: + format: int64 + type: integer + routeRefresh: + format: int64 + type: integer + update: + format: int64 + type: integer + type: object + sent: + properties: + capability: + format: int64 + type: integer + keepalive: + format: int64 + type: integer + notification: + format: int64 + type: integer + open: + format: int64 + type: integer + routeRefresh: + format: int64 + type: integer + update: + format: int64 + type: integer + type: object + type: object + peerAS: + format: int32 + type: integer + peerGroup: + type: string + peerPort: + type: integer + peerType: + type: string + remoteRouterID: + type: string + sessionState: + type: string + shutdownMessage: + type: string + type: object + type: object + description: State of all BGP neighbors (VRF -> neighbor address + -> state) + type: object + interfaces: + additionalProperties: + properties: + adminStatus: + type: string + counters: + properties: + inBitsPerSecond: + type: number + inDiscards: + format: int64 + type: integer + inErrors: + format: int64 + type: integer + inPktsPerSecond: + type: number + inUtilization: + type: integer + lastClear: + format: date-time + type: string + outBitsPerSecond: + type: number + outDiscards: + format: int64 + type: integer + outErrors: + format: int64 + type: integer + outPktsPerSecond: + type: number + outUtilization: + type: integer + type: object + enabled: + type: boolean + lastChanged: + format: date-time + type: string + mac: + type: string + operStatus: + type: string + type: object + description: Switch interfaces state (incl. physical, management + and port channels) + type: object + transceivers: + additionalProperties: + properties: + cable-class: + type: string + temperature: + type: number + voltage: + type: number + type: object + description: Switch transceivers state (port -> transceiver) + type: object + type: object statusUpdates: description: Status updates from the agent items: diff --git a/docs/api.md b/docs/api.md index a8613206..42352012 100644 --- a/docs/api.md +++ b/docs/api.md @@ -17,6 +17,19 @@ for the switch and control node agents. Not intended to be modified by the user. +#### AdminStatus + +_Underlying type:_ _string_ + + + + + +_Appears in:_ +- [SwitchStateInterface](#switchstateinterface) + + + #### Agent @@ -62,7 +75,8 @@ _Appears in:_ | `lastAttemptGen` _integer_ | Generation of the last attempt to apply configuration | | | | `lastAppliedTime` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#time-v1-meta)_ | Time of the last successful configuration application | | | | `lastAppliedGen` _integer_ | Generation of the last successful configuration application | | | -| `nosInfo` _[NOSInfo](#nosinfo)_ | Information about the switch and NOS | | | +| `state` _[SwitchState](#switchstate)_ | Detailed switch state updated with each heartbeat | | | +| `nosInfo` _[NOSInfo](#nosinfo)_ | Information about the switch and NOS updated with each heartbeat | | | | `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#condition-v1-meta) array_ | Conditions of the agent, includes readiness marker for use with kubectl wait | | | @@ -97,6 +111,82 @@ _Appears in:_ | `upTime` _string_ | Switch uptime, such as "21:21:27 up 1 day, 23:26, 0 users, load average: 1.92, 1.99, 2.00 " | | | +#### OperStatus + +_Underlying type:_ _string_ + + + + + +_Appears in:_ +- [SwitchStateInterface](#switchstateinterface) + + + +#### SwitchState + + + + + + + +_Appears in:_ +- [AgentStatus](#agentstatus) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `interfaces` _object (keys:string, values:[SwitchStateInterface](#switchstateinterface))_ | | | | + + +#### SwitchStateInterface + + + + + + + +_Appears in:_ +- [SwitchState](#switchstate) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `enabled` _boolean_ | | | | +| `adminStatus` _[AdminStatus](#adminstatus)_ | | | | +| `operStatus` _[OperStatus](#operstatus)_ | | | | +| `mac` _string_ | | | | +| `lastChanged` _integer_ | | | | +| `counters` _[SwitchStateInterfaceCounters](#switchstateinterfacecounters)_ | | | | + + +#### SwitchStateInterfaceCounters + + + + + + + +_Appears in:_ +- [SwitchStateInterface](#switchstateinterface) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `inBitsPerSecond` _float_ | | | | +| `inDiscards` _integer_ | | | | +| `inErrors` _integer_ | | | | +| `inPktsPerSecond` _float_ | | | | +| `inUtilization` _integer_ | | | | +| `lastClear` _integer_ | | | | +| `outBitsPerSecond` _float_ | | | | +| `outDiscards` _integer_ | | | | +| `outErrors` _integer_ | | | | +| `outPktsPerSecond` _float_ | | | | +| `outUtilization` _integer_ | | | | + + diff --git a/go.mod b/go.mod index 9de4e201..625f6c16 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/bits-and-blooms/bitset v1.13.0 github.com/coredhcp/coredhcp v0.0.0-20231020075302-1cd0fca8759a github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc + github.com/go-chi/chi/v5 v5.0.12 github.com/google/uuid v1.6.0 github.com/insomniacslk/dhcp v0.0.0-20231206064809-8c70d406f6d2 github.com/lmittmann/tint v1.0.4 @@ -20,6 +21,7 @@ require ( github.com/openconfig/ygot v0.29.18 github.com/pkg/errors v0.9.1 github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 + github.com/prometheus/client_golang v1.18.0 github.com/samber/slog-multi v1.0.2 github.com/sirupsen/logrus v1.9.3 github.com/stretchr/testify v1.9.0 @@ -173,7 +175,6 @@ require ( github.com/pierrec/lz4/v4 v4.1.18 // indirect github.com/pjbgf/sha1cd v0.3.0 // indirect github.com/pkg/sftp v1.13.6 // indirect - github.com/prometheus/client_golang v1.18.0 // indirect github.com/prometheus/client_model v0.5.0 // indirect github.com/prometheus/common v0.45.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect diff --git a/go.sum b/go.sum index d73c0950..3236a4d2 100644 --- a/go.sum +++ b/go.sum @@ -325,6 +325,8 @@ github.com/gin-gonic/gin v1.7.3/go.mod h1:jD2toBW3GZUr5UMcdrwQA10I7RuaFOl/SGeDjX github.com/gliderlabs/ssh v0.3.5 h1:OcaySEmAQJgyYcArR+gGGTHCyE7nvhEMTlYY+Dp8CpY= github.com/gliderlabs/ssh v0.3.5/go.mod h1:8XB4KraRrX39qHhT6yxPsHedjA08I/uBVwj4xC+/+z4= github.com/go-asn1-ber/asn1-ber v1.3.1/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0= +github.com/go-chi/chi/v5 v5.0.12 h1:9euLV5sTrTNTRUU9POmDUvfxyj6LAABLUcEWO+JJb4s= +github.com/go-chi/chi/v5 v5.0.12/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8= github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI= github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376/go.mod h1:an3vInlBmSxCcxctByoQdvwPiA7DTK7jaaFDBTtu0ic= github.com/go-git/go-billy/v5 v5.5.0 h1:yEY4yhzCDuMGSv83oGxiBotRzhwhNr8VZyphhiu+mTU= diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index 295c88c1..039d9baa 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -32,6 +32,7 @@ import ( "go.githedgehog.com/fabric/pkg/agent/dozer" "go.githedgehog.com/fabric/pkg/agent/dozer/bcm" "go.githedgehog.com/fabric/pkg/agent/dozer/bcm/gnmi" + "go.githedgehog.com/fabric/pkg/agent/switchstate" "go.githedgehog.com/fabric/pkg/util/kubeutil" "go.githedgehog.com/fabric/pkg/util/uefiutil" apimeta "k8s.io/apimachinery/pkg/api/meta" @@ -63,9 +64,22 @@ type Service struct { name string installID string runID string + + reg *switchstate.Registry } func (svc *Service) Run(ctx context.Context, getClient func() (*gnmi.Client, error)) error { + svc.reg = switchstate.NewRegistry() + + if !svc.ApplyOnce && !svc.DryRun { + go func() { + if err := svc.reg.ServeMetrics(); err != nil { + slog.Error("Failed to serve metrics", "err", err) + panic(err) + } + }() + } + if svc.Basedir == "" { return errors.New("basedir is required") } @@ -142,6 +156,13 @@ func (svc *Service) Run(ctx context.Context, getClient func() (*gnmi.Client, err agent.Status.Conditions = []metav1.Condition{} } + if err := svc.processor.UpdateSwitchState(ctx, svc.reg); err != nil { + return errors.Wrapf(err, "failed to update switch state") + } + if st := svc.reg.GetSwitchState(); st != nil { + agent.Status.State = *st + } + nosInfo, err := svc.processor.Info(ctx) if err != nil { return errors.Wrap(err, "failed to get initial NOS info") @@ -180,7 +201,12 @@ func (svc *Service) Run(ctx context.Context, getClient func() (*gnmi.Client, err if err != nil { return errors.Wrapf(err, "failed to get heartbeat NOS info") } + + if err := svc.processor.UpdateSwitchState(ctx, svc.reg); err != nil { + return errors.Wrapf(err, "failed to update switch state") + } agent.Status.NOSInfo = *nosInfo + agent.Status.LastHeartbeat = metav1.Time{Time: time.Now()} err = kube.Status().Update(ctx, agent) diff --git a/pkg/agent/dozer/bcm/state.go b/pkg/agent/dozer/bcm/state.go new file mode 100644 index 00000000..5bc28c45 --- /dev/null +++ b/pkg/agent/dozer/bcm/state.go @@ -0,0 +1,705 @@ +// Copyright 2023 Hedgehog +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package bcm + +import ( + "context" + "log/slog" + "strings" + "time" + + "github.com/pkg/errors" + "go.githedgehog.com/fabric-bcm-ygot/pkg/oc" + agentapi "go.githedgehog.com/fabric/api/agent/v1alpha2" + "go.githedgehog.com/fabric/pkg/agent/switchstate" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func (p *BroadcomProcessor) UpdateSwitchState(ctx context.Context, reg *switchstate.Registry) error { + start := time.Now() + + swState := &agentapi.SwitchState{ + Interfaces: map[string]agentapi.SwitchStateInterface{}, + Transceivers: map[string]agentapi.SwitchStateTransceiver{}, + BGPNeighbors: map[string]map[string]agentapi.SwitchStateBGPNeighbor{}, + } + + if err := p.updateInterfaceMetrics(ctx, reg, swState); err != nil { + return errors.Wrapf(err, "failed to update interface metrics") + } + + if err := p.updateTransceiverMetrics(ctx, reg, swState); err != nil { + return errors.Wrapf(err, "failed to update transceiver metrics") + } + + if err := p.updateBGPNeighborMetrics(ctx, reg, swState); err != nil { + return errors.Wrapf(err, "failed to update bgp neighbor metrics") + } + + reg.SaveSwitchState(swState) + + slog.Debug("Switch state updated", "took", time.Since(start)) + + return nil +} + +func (p *BroadcomProcessor) updateInterfaceMetrics(ctx context.Context, reg *switchstate.Registry, swState *agentapi.SwitchState) error { + ifaces := &oc.OpenconfigInterfaces_Interfaces{} + err := p.client.Get(ctx, "/openconfig-interfaces:interfaces/interface", ifaces) + if err != nil { + return errors.Wrapf(err, "failed to get interfaces") + } + + for ifaceName, iface := range ifaces.Interface { + if !isManagement(ifaceName) && !isPhysical(ifaceName) && !isPortChannel(ifaceName) { + continue + } + + if iface.State == nil { + continue + } + + st := iface.State + + adminStatus, err := mapAdminStatus(st.AdminStatus) + if err != nil { + return errors.Wrapf(err, "failed to map admin status") + } + adminStatusID, err := adminStatus.ID() + if err != nil { + return errors.Wrapf(err, "failed to get admin status ID") + } + + operStatus, err := mapOperStatus(st.OperStatus) + if err != nil { + return errors.Wrapf(err, "failed to map oper status") + } + operStatusID, err := operStatus.ID() + if err != nil { + return errors.Wrapf(err, "failed to get oper status ID") + } + + reg.InterfaceMetrics.Enabled.WithLabelValues(ifaceName).Set(boolToFloat64(st.Enabled)) + reg.InterfaceMetrics.AdminStatus.WithLabelValues(ifaceName).Set(float64(adminStatusID)) + reg.InterfaceMetrics.OperStatus.WithLabelValues(ifaceName).Set(float64(operStatusID)) + + if st.RateInterval != nil { + reg.InterfaceMetrics.RateInterval.WithLabelValues(ifaceName).Set(float64(*st.RateInterval)) + } + + ifState := agentapi.SwitchStateInterface{} + if st.Enabled != nil { + ifState.Enabled = *st.Enabled + } + ifState.AdminStatus = adminStatus + ifState.OperStatus = operStatus + if st.MacAddress != nil { + ifState.MAC = *st.MacAddress + } + if st.LastChange != nil { + reg.InterfaceMetrics.LastChange.WithLabelValues(ifaceName).Set(float64(*st.LastChange)) + if *st.LastChange != 0 { + ifState.LastChange = metav1.Time{Time: time.Unix(int64(*st.LastChange), 0)} + } + } + + if st.Counters != nil { + if st.Counters.InBitsPerSecond != nil { + reg.InterfaceCounters.InBitsPerSecond.WithLabelValues(ifaceName).Set(float64(*st.Counters.InBitsPerSecond)) + ifState.Counters.InBitsPerSecond = *st.Counters.InBitsPerSecond + } + + if st.Counters.InBroadcastPkts != nil { + reg.InterfaceCounters.InBroadcastPkts.WithLabelValues(ifaceName).Set(float64(*st.Counters.InBroadcastPkts)) + } + + if st.Counters.InDiscards != nil { + reg.InterfaceCounters.InDiscards.WithLabelValues(ifaceName).Set(float64(*st.Counters.InDiscards)) + ifState.Counters.InDiscards = *st.Counters.InDiscards + } + + if st.Counters.InErrors != nil { + reg.InterfaceCounters.InErrors.WithLabelValues(ifaceName).Set(float64(*st.Counters.InErrors)) + ifState.Counters.InErrors = *st.Counters.InErrors + } + + if st.Counters.InMulticastPkts != nil { + reg.InterfaceCounters.InMulticastPkts.WithLabelValues(ifaceName).Set(float64(*st.Counters.InMulticastPkts)) + } + + if st.Counters.InOctets != nil { + reg.InterfaceCounters.InOctets.WithLabelValues(ifaceName).Set(float64(*st.Counters.InOctets)) + } + + if st.Counters.InOctetsPerSecond != nil { + reg.InterfaceCounters.InOctetsPerSecond.WithLabelValues(ifaceName).Set(float64(*st.Counters.InOctetsPerSecond)) + } + + if st.Counters.InPkts != nil { + reg.InterfaceCounters.InPkts.WithLabelValues(ifaceName).Set(float64(*st.Counters.InPkts)) + } + + if st.Counters.InPktsPerSecond != nil { + reg.InterfaceCounters.InPktsPerSecond.WithLabelValues(ifaceName).Set(float64(*st.Counters.InPktsPerSecond)) + ifState.Counters.InPktsPerSecond = *st.Counters.InPktsPerSecond + } + + if st.Counters.InUnicastPkts != nil { + reg.InterfaceCounters.InUnicastPkts.WithLabelValues(ifaceName).Set(float64(*st.Counters.InUnicastPkts)) + } + + if st.Counters.InUtilization != nil { + reg.InterfaceCounters.InUtilization.WithLabelValues(ifaceName).Set(float64(*st.Counters.InUtilization)) + ifState.Counters.InUtilization = *st.Counters.InUtilization + } + + if st.Counters.LastClear != nil { + reg.InterfaceCounters.LastClear.WithLabelValues(ifaceName).Set(float64(*st.Counters.LastClear)) + if *st.Counters.LastClear != 0 { + ifState.Counters.LastClear = metav1.Time{Time: time.Unix(int64(*st.Counters.LastClear), 0)} + } + } + + if st.Counters.OutBitsPerSecond != nil { + reg.InterfaceCounters.OutBitsPerSecond.WithLabelValues(ifaceName).Set(float64(*st.Counters.OutBitsPerSecond)) + ifState.Counters.OutBitsPerSecond = *st.Counters.OutBitsPerSecond + } + + if st.Counters.OutBroadcastPkts != nil { + reg.InterfaceCounters.OutBroadcastPkts.WithLabelValues(ifaceName).Set(float64(*st.Counters.OutBroadcastPkts)) + } + + if st.Counters.OutDiscards != nil { + reg.InterfaceCounters.OutDiscards.WithLabelValues(ifaceName).Set(float64(*st.Counters.OutDiscards)) + ifState.Counters.OutDiscards = *st.Counters.OutDiscards + } + + if st.Counters.OutErrors != nil { + reg.InterfaceCounters.OutErrors.WithLabelValues(ifaceName).Set(float64(*st.Counters.OutErrors)) + ifState.Counters.OutErrors = *st.Counters.OutErrors + } + + if st.Counters.OutMulticastPkts != nil { + reg.InterfaceCounters.OutMulticastPkts.WithLabelValues(ifaceName).Set(float64(*st.Counters.OutMulticastPkts)) + } + + if st.Counters.OutOctets != nil { + reg.InterfaceCounters.OutOctets.WithLabelValues(ifaceName).Set(float64(*st.Counters.OutOctets)) + } + + if st.Counters.OutOctetsPerSecond != nil { + reg.InterfaceCounters.OutOctetsPerSecond.WithLabelValues(ifaceName).Set(float64(*st.Counters.OutOctetsPerSecond)) + } + + if st.Counters.OutPkts != nil { + reg.InterfaceCounters.OutPkts.WithLabelValues(ifaceName).Set(float64(*st.Counters.OutPkts)) + } + + if st.Counters.OutPktsPerSecond != nil { + reg.InterfaceCounters.OutPktsPerSecond.WithLabelValues(ifaceName).Set(float64(*st.Counters.OutPktsPerSecond)) + ifState.Counters.OutPktsPerSecond = *st.Counters.OutPktsPerSecond + } + + if st.Counters.OutUnicastPkts != nil { + reg.InterfaceCounters.OutUnicastPkts.WithLabelValues(ifaceName).Set(float64(*st.Counters.OutUnicastPkts)) + } + + if st.Counters.OutUtilization != nil { + reg.InterfaceCounters.OutUtilization.WithLabelValues(ifaceName).Set(float64(*st.Counters.OutUtilization)) + ifState.Counters.OutUtilization = *st.Counters.OutUtilization + } + } + + swState.Interfaces[ifaceName] = ifState + } + + return nil +} + +func (p *BroadcomProcessor) updateTransceiverMetrics(ctx context.Context, reg *switchstate.Registry, swState *agentapi.SwitchState) error { + dev := &oc.Device{} + if err := p.client.Get(ctx, "/transceiver-dom", dev); err != nil { + return errors.Wrapf(err, "failed to get transceivers") + } + if dev.TransceiverDom == nil { + return errors.Errorf("transceiver-dom not found") + } + + for transceiverName, transceiver := range dev.TransceiverDom.TransceiverDomInfo { + if !strings.HasPrefix(transceiverName, "Ethernet") { + continue + } + if transceiver.State == nil { + continue + } + + ocSt := transceiver.State + st := agentapi.SwitchStateTransceiver{} + + if ocSt.CableClass != nil { + st.CableClass = *ocSt.CableClass + } + + if ocSt.Temperature != nil { + reg.TransceiverMetrics.Temperature.WithLabelValues(transceiverName).Set(*ocSt.Temperature) + st.Temperature = *ocSt.Temperature + } + + if ocSt.Voltage != nil { + reg.TransceiverMetrics.Voltage.WithLabelValues(transceiverName).Set(*ocSt.Voltage) + st.Voltage = *ocSt.Voltage + } + + if ocSt.AlarmRxPowerHi != nil { + reg.TransceiverMetrics.AlarmRxPowerHi.WithLabelValues(transceiverName).Set(*ocSt.AlarmRxPowerHi) + } + + if ocSt.AlarmRxPowerLo != nil { + reg.TransceiverMetrics.AlarmRxPowerLo.WithLabelValues(transceiverName).Set(*ocSt.AlarmRxPowerLo) + } + + if ocSt.AlarmTempHi != nil { + reg.TransceiverMetrics.AlarmTempHi.WithLabelValues(transceiverName).Set(*ocSt.AlarmTempHi) + } + + if ocSt.AlarmTempLo != nil { + reg.TransceiverMetrics.AlarmTempLo.WithLabelValues(transceiverName).Set(*ocSt.AlarmTempLo) + } + + if ocSt.AlarmTxBiasHi != nil { + reg.TransceiverMetrics.AlarmTxBiasHi.WithLabelValues(transceiverName).Set(*ocSt.AlarmTxBiasHi) + } + + if ocSt.AlarmTxBiasLo != nil { + reg.TransceiverMetrics.AlarmTxBiasLo.WithLabelValues(transceiverName).Set(*ocSt.AlarmTxBiasLo) + } + + if ocSt.AlarmTxPowerHi != nil { + reg.TransceiverMetrics.AlarmTxPowerHi.WithLabelValues(transceiverName).Set(*ocSt.AlarmTxPowerHi) + } + + if ocSt.AlarmTxPowerLo != nil { + reg.TransceiverMetrics.AlarmTxPowerLo.WithLabelValues(transceiverName).Set(*ocSt.AlarmTxPowerLo) + } + + if ocSt.AlarmVoltHi != nil { + reg.TransceiverMetrics.AlarmVoltHi.WithLabelValues(transceiverName).Set(*ocSt.AlarmVoltHi) + } + + if ocSt.AlarmVoltLo != nil { + reg.TransceiverMetrics.AlarmVoltLo.WithLabelValues(transceiverName).Set(*ocSt.AlarmVoltLo) + } + + if ocSt.Rx1Power != nil { + reg.TransceiverMetrics.Rx1Power.WithLabelValues(transceiverName).Set(*ocSt.Rx1Power) + } + + if ocSt.Rx2Power != nil { + reg.TransceiverMetrics.Rx2Power.WithLabelValues(transceiverName).Set(*ocSt.Rx2Power) + } + + if ocSt.Rx3Power != nil { + reg.TransceiverMetrics.Rx3Power.WithLabelValues(transceiverName).Set(*ocSt.Rx3Power) + } + + if ocSt.Rx4Power != nil { + reg.TransceiverMetrics.Rx4Power.WithLabelValues(transceiverName).Set(*ocSt.Rx4Power) + } + + if ocSt.Rx5Power != nil { + reg.TransceiverMetrics.Rx5Power.WithLabelValues(transceiverName).Set(*ocSt.Rx5Power) + } + + if ocSt.Rx6Power != nil { + reg.TransceiverMetrics.Rx6Power.WithLabelValues(transceiverName).Set(*ocSt.Rx6Power) + } + + if ocSt.Rx7Power != nil { + reg.TransceiverMetrics.Rx7Power.WithLabelValues(transceiverName).Set(*ocSt.Rx7Power) + } + + if ocSt.Rx8Power != nil { + reg.TransceiverMetrics.Rx8Power.WithLabelValues(transceiverName).Set(*ocSt.Rx8Power) + } + + if ocSt.Tx1Bias != nil { + reg.TransceiverMetrics.Tx1Bias.WithLabelValues(transceiverName).Set(*ocSt.Tx1Bias) + } + + if ocSt.Tx2Bias != nil { + reg.TransceiverMetrics.Tx2Bias.WithLabelValues(transceiverName).Set(*ocSt.Tx2Bias) + } + + if ocSt.Tx3Bias != nil { + reg.TransceiverMetrics.Tx3Bias.WithLabelValues(transceiverName).Set(*ocSt.Tx3Bias) + } + + if ocSt.Tx4Bias != nil { + reg.TransceiverMetrics.Tx4Bias.WithLabelValues(transceiverName).Set(*ocSt.Tx4Bias) + } + + if ocSt.Tx5Bias != nil { + reg.TransceiverMetrics.Tx5Bias.WithLabelValues(transceiverName).Set(*ocSt.Tx5Bias) + } + + if ocSt.Tx6Bias != nil { + reg.TransceiverMetrics.Tx6Bias.WithLabelValues(transceiverName).Set(*ocSt.Tx6Bias) + } + + if ocSt.Tx7Bias != nil { + reg.TransceiverMetrics.Tx7Bias.WithLabelValues(transceiverName).Set(*ocSt.Tx7Bias) + } + + if ocSt.Tx8Bias != nil { + reg.TransceiverMetrics.Tx8Bias.WithLabelValues(transceiverName).Set(*ocSt.Tx8Bias) + } + + if ocSt.Tx1Power != nil { + reg.TransceiverMetrics.Tx1Power.WithLabelValues(transceiverName).Set(*ocSt.Tx1Power) + } + + if ocSt.Tx2Power != nil { + reg.TransceiverMetrics.Tx2Power.WithLabelValues(transceiverName).Set(*ocSt.Tx2Power) + } + + if ocSt.Tx3Power != nil { + reg.TransceiverMetrics.Tx3Power.WithLabelValues(transceiverName).Set(*ocSt.Tx3Power) + } + + if ocSt.Tx4Power != nil { + reg.TransceiverMetrics.Tx4Power.WithLabelValues(transceiverName).Set(*ocSt.Tx4Power) + } + + if ocSt.Tx5Power != nil { + reg.TransceiverMetrics.Tx5Power.WithLabelValues(transceiverName).Set(*ocSt.Tx5Power) + } + + if ocSt.Tx6Power != nil { + reg.TransceiverMetrics.Tx6Power.WithLabelValues(transceiverName).Set(*ocSt.Tx6Power) + } + + if ocSt.Tx7Power != nil { + reg.TransceiverMetrics.Tx7Power.WithLabelValues(transceiverName).Set(*ocSt.Tx7Power) + } + + if ocSt.Tx8Power != nil { + reg.TransceiverMetrics.Tx8Power.WithLabelValues(transceiverName).Set(*ocSt.Tx8Power) + } + + if ocSt.WarningRxPowerHi != nil { + reg.TransceiverMetrics.WarningRxPowerHi.WithLabelValues(transceiverName).Set(*ocSt.WarningRxPowerHi) + } + + if ocSt.WarningRxPowerLo != nil { + reg.TransceiverMetrics.WarningRxPowerLo.WithLabelValues(transceiverName).Set(*ocSt.WarningRxPowerLo) + } + + if ocSt.WarningTempHi != nil { + reg.TransceiverMetrics.WarningTempHi.WithLabelValues(transceiverName).Set(*ocSt.WarningTempHi) + } + + if ocSt.WarningTempLo != nil { + reg.TransceiverMetrics.WarningTempLo.WithLabelValues(transceiverName).Set(*ocSt.WarningTempLo) + } + + if ocSt.WarningTxBiasHi != nil { + reg.TransceiverMetrics.WarningTxBiasHi.WithLabelValues(transceiverName).Set(*ocSt.WarningTxBiasHi) + } + + if ocSt.WarningTxBiasLo != nil { + reg.TransceiverMetrics.WarningTxBiasLo.WithLabelValues(transceiverName).Set(*ocSt.WarningTxBiasLo) + } + + if ocSt.WarningTxPowerHi != nil { + reg.TransceiverMetrics.WarningTxPowerHi.WithLabelValues(transceiverName).Set(*ocSt.WarningTxPowerHi) + } + + if ocSt.WarningTxPowerLo != nil { + reg.TransceiverMetrics.WarningTxPowerLo.WithLabelValues(transceiverName).Set(*ocSt.WarningTxPowerLo) + } + + if ocSt.WarningVoltHi != nil { + reg.TransceiverMetrics.WarningVoltHi.WithLabelValues(transceiverName).Set(*ocSt.WarningVoltHi) + } + + if ocSt.WarningVoltLo != nil { + reg.TransceiverMetrics.WarningVoltLo.WithLabelValues(transceiverName).Set(*ocSt.WarningVoltLo) + } + + swState.Transceivers[transceiverName] = st + } + + return nil +} + +func (p *BroadcomProcessor) updateBGPNeighborMetrics(ctx context.Context, reg *switchstate.Registry, swState *agentapi.SwitchState) error { + vrfs := &oc.OpenconfigNetworkInstance_NetworkInstances{} + if err := p.client.Get(ctx, "/network-instances/network-instance", vrfs); err != nil { + return errors.Wrapf(err, "failed to get vrfs") + } + + for vrfName, vrf := range vrfs.NetworkInstance { + if vrf.Protocols == nil || vrf.Protocols.Protocol == nil { + continue + } + + bgpProto := vrf.Protocols.Protocol[oc.OpenconfigNetworkInstance_NetworkInstances_NetworkInstance_Protocols_Protocol_Key{ + Identifier: oc.OpenconfigPolicyTypes_INSTALL_PROTOCOL_TYPE_BGP, + Name: "bgp", + }] + if bgpProto == nil || bgpProto.Bgp == nil || bgpProto.Bgp.Neighbors == nil { + continue + } + + vrfSt := map[string]agentapi.SwitchStateBGPNeighbor{} + + for neighborAddress, neighbor := range bgpProto.Bgp.Neighbors.Neighbor { + if neighbor.State == nil { + continue + } + + ocSt := neighbor.State + st := agentapi.SwitchStateBGPNeighbor{} + + if ocSt.Enabled != nil { + reg.BGPNeighborMetrics.Enabled.WithLabelValues(vrfName, neighborAddress).Set(boolToFloat64(ocSt.Enabled)) + st.Enabled = *ocSt.Enabled + } + + if ocSt.ConnectionsDropped != nil { + reg.BGPNeighborMetrics.ConnectionsDropped.WithLabelValues(vrfName, neighborAddress).Set(float64(*ocSt.ConnectionsDropped)) + st.ConnectionsDropped = *ocSt.ConnectionsDropped + } + + if ocSt.EstablishedTransitions != nil { + reg.BGPNeighborMetrics.EstablishedTransitions.WithLabelValues(vrfName, neighborAddress).Set(float64(*ocSt.EstablishedTransitions)) + st.EstablishedTransitions = *ocSt.EstablishedTransitions + } + + if ocSt.LastEstablished != nil { + if *ocSt.LastEstablished != 0 { + st.LastEstablished = metav1.Time{Time: time.Unix(int64(*ocSt.LastEstablished), 0)} + } + } + + if ocSt.LastRead != nil { + if *ocSt.LastRead != 0 { + st.LastRead = metav1.Time{Time: time.Unix(int64(*ocSt.LastRead), 0)} + } + } + + if ocSt.LastResetReason != nil { + st.LastResetReason = *ocSt.LastResetReason + } + + if ocSt.LastResetTime != nil { + if *ocSt.LastResetTime != 0 { + st.LastResetTime = metav1.Time{Time: time.Unix(int64(*ocSt.LastResetTime), 0)} + } + } + + if ocSt.LastWrite != nil { + if *ocSt.LastWrite != 0 { + st.LastWrite = metav1.Time{Time: time.Unix(int64(*ocSt.LastWrite), 0)} + } + } + + if ocSt.LocalAs != nil { + st.LocalAS = *ocSt.LocalAs + } + + if ocSt.PeerAs != nil { + st.PeerAS = *ocSt.PeerAs + } + + if ocSt.PeerGroup != nil { + st.PeerGroup = *ocSt.PeerGroup + } + + if ocSt.PeerPort != nil { + st.PeerPort = *ocSt.PeerPort + } + + peerType, err := mapBGPPeerType(ocSt.PeerType) + if err != nil { + return errors.Wrapf(err, "failed to map bgp peer type") + } + st.PeerType = peerType + + peerTypeID, err := peerType.ID() + if err != nil { + return errors.Wrapf(err, "failed to get peer type ID") + } + reg.BGPNeighborMetrics.PeerType.WithLabelValues(vrfName, neighborAddress).Set(float64(peerTypeID)) + + if ocSt.RemoteRouterId != nil { + st.RemoteRouterID = *ocSt.RemoteRouterId + } + + sessionState, err := mapBGPNeighborSessionState(ocSt.SessionState) + if err != nil { + return errors.Wrapf(err, "failed to map bgp neighbor session state") + } + st.SessionState = sessionState + + sessionStateID, err := sessionState.ID() + if err != nil { + return errors.Wrapf(err, "failed to get session state ID") + } + reg.BGPNeighborMetrics.SessionState.WithLabelValues(vrfName, neighborAddress).Set(float64(sessionStateID)) + + if ocSt.ShutdownMessage != nil { + st.ShutdownMessage = *ocSt.ShutdownMessage + } + + if ocSt.Messages != nil { + messages := agentapi.BGPMessages{} + if ocSt.Messages.Received != nil { + ocR := ocSt.Messages.Received + messages.Received = agentapi.BGPMessagesCounters{ + Capability: unptrUint64(ocR.CAPABILITY), + Keepalive: unptrUint64(ocR.KEEPALIVE), + Notification: unptrUint64(ocR.NOTIFICATION), + Open: unptrUint64(ocR.OPEN), + RouteRefresh: unptrUint64(ocR.ROUTE_REFRESH), + Update: unptrUint64(ocR.UPDATE), + } + + reg.BGPNeighborMetrics.Messages.Received.Capability.WithLabelValues(vrfName, neighborAddress).Set(float64(unptrUint64(ocR.CAPABILITY))) + reg.BGPNeighborMetrics.Messages.Received.Keepalive.WithLabelValues(vrfName, neighborAddress).Set(float64(unptrUint64(ocR.KEEPALIVE))) + reg.BGPNeighborMetrics.Messages.Received.Notification.WithLabelValues(vrfName, neighborAddress).Set(float64(unptrUint64(ocR.NOTIFICATION))) + reg.BGPNeighborMetrics.Messages.Received.Open.WithLabelValues(vrfName, neighborAddress).Set(float64(unptrUint64(ocR.OPEN))) + reg.BGPNeighborMetrics.Messages.Received.RouteRefresh.WithLabelValues(vrfName, neighborAddress).Set(float64(unptrUint64(ocR.ROUTE_REFRESH))) + reg.BGPNeighborMetrics.Messages.Received.Update.WithLabelValues(vrfName, neighborAddress).Set(float64(unptrUint64(ocR.UPDATE))) + } + + if ocSt.Messages.Sent != nil { + ocS := ocSt.Messages.Sent + messages.Sent = agentapi.BGPMessagesCounters{ + Capability: unptrUint64(ocS.CAPABILITY), + Keepalive: unptrUint64(ocS.KEEPALIVE), + Notification: unptrUint64(ocS.NOTIFICATION), + Open: unptrUint64(ocS.OPEN), + RouteRefresh: unptrUint64(ocS.ROUTE_REFRESH), + Update: unptrUint64(ocS.UPDATE), + } + + reg.BGPNeighborMetrics.Messages.Sent.Capability.WithLabelValues(vrfName, neighborAddress).Set(float64(unptrUint64(ocS.CAPABILITY))) + reg.BGPNeighborMetrics.Messages.Sent.Keepalive.WithLabelValues(vrfName, neighborAddress).Set(float64(unptrUint64(ocS.KEEPALIVE))) + reg.BGPNeighborMetrics.Messages.Sent.Notification.WithLabelValues(vrfName, neighborAddress).Set(float64(unptrUint64(ocS.NOTIFICATION))) + reg.BGPNeighborMetrics.Messages.Sent.Open.WithLabelValues(vrfName, neighborAddress).Set(float64(unptrUint64(ocS.OPEN))) + reg.BGPNeighborMetrics.Messages.Sent.RouteRefresh.WithLabelValues(vrfName, neighborAddress).Set(float64(unptrUint64(ocS.ROUTE_REFRESH))) + reg.BGPNeighborMetrics.Messages.Sent.Update.WithLabelValues(vrfName, neighborAddress).Set(float64(unptrUint64(ocS.UPDATE))) + } + + st.Messages = messages + } + + vrfSt[neighborAddress] = st + } + + swState.BGPNeighbors[vrfName] = vrfSt + } + + return nil +} + +func boolToFloat64(b *bool) float64 { + if b != nil && *b { + return 1 + } + + return 0 +} + +func unptrUint64(u *uint64) uint64 { + if u != nil { + return *u + } + + return 0 +} + +func mapAdminStatus(in oc.E_OpenconfigInterfaces_Interfaces_Interface_State_AdminStatus) (agentapi.AdminStatus, error) { + switch in { + case oc.OpenconfigInterfaces_Interfaces_Interface_State_AdminStatus_UNSET: + return agentapi.AdminStatusUnset, nil + case oc.OpenconfigInterfaces_Interfaces_Interface_State_AdminStatus_UP: + return agentapi.AdminStatusUp, nil + case oc.OpenconfigInterfaces_Interfaces_Interface_State_AdminStatus_DOWN: + return agentapi.AdminStatusDown, nil + case oc.OpenconfigInterfaces_Interfaces_Interface_State_AdminStatus_TESTING: + return agentapi.AdminStatusTesting, nil + default: + return agentapi.AdminStatusUnset, errors.Errorf("unknown admin status from gnmi: %d", in) + } +} + +func mapOperStatus(in oc.E_OpenconfigInterfaces_Interfaces_Interface_State_OperStatus) (agentapi.OperStatus, error) { + switch in { + case oc.OpenconfigInterfaces_Interfaces_Interface_State_OperStatus_UNSET: + return agentapi.OperStatusUnset, nil + case oc.OpenconfigInterfaces_Interfaces_Interface_State_OperStatus_UP: + return agentapi.OperStatusUp, nil + case oc.OpenconfigInterfaces_Interfaces_Interface_State_OperStatus_DOWN: + return agentapi.OperStatusDown, nil + case oc.OpenconfigInterfaces_Interfaces_Interface_State_OperStatus_TESTING: + return agentapi.OperStatusTesting, nil + case oc.OpenconfigInterfaces_Interfaces_Interface_State_OperStatus_UNKNOWN: + return agentapi.OperStatusUnknown, nil + case oc.OpenconfigInterfaces_Interfaces_Interface_State_OperStatus_DORMANT: + return agentapi.OperStatusDormant, nil + case oc.OpenconfigInterfaces_Interfaces_Interface_State_OperStatus_NOT_PRESENT: + return agentapi.OperStatusNotPresent, nil + case oc.OpenconfigInterfaces_Interfaces_Interface_State_OperStatus_LOWER_LAYER_DOWN: + return agentapi.OperStatusLowerLayerDown, nil + default: + return agentapi.OperStatusUnset, errors.Errorf("unknown oper status from gnmi: %d", in) + } +} + +func mapBGPPeerType(in oc.E_OpenconfigBgp_PeerType) (agentapi.BGPPeerType, error) { + switch in { + case oc.OpenconfigBgp_PeerType_UNSET: + return agentapi.BGPPeerTypeUnset, nil + case oc.OpenconfigBgp_PeerType_INTERNAL: + return agentapi.BGPPeerTypeInternal, nil + case oc.OpenconfigBgp_PeerType_EXTERNAL: + return agentapi.BGPPeerTypeExternal, nil + default: + return agentapi.BGPPeerTypeInternal, errors.Errorf("unknown bgp peer type from gnmi: %d", in) + } +} + +func mapBGPNeighborSessionState(in oc.E_OpenconfigBgp_Bgp_Neighbors_Neighbor_State_SessionState) (agentapi.BGPNeighborSessionState, error) { + switch in { + case oc.OpenconfigBgp_Bgp_Neighbors_Neighbor_State_SessionState_UNSET: + return agentapi.BGPNeighborSessionStateUnset, nil + case oc.OpenconfigBgp_Bgp_Neighbors_Neighbor_State_SessionState_IDLE: + return agentapi.BGPNeighborSessionStateIdle, nil + case oc.OpenconfigBgp_Bgp_Neighbors_Neighbor_State_SessionState_CONNECT: + return agentapi.BGPNeighborSessionStateConnect, nil + case oc.OpenconfigBgp_Bgp_Neighbors_Neighbor_State_SessionState_ACTIVE: + return agentapi.BGPNeighborSessionStateActive, nil + case oc.OpenconfigBgp_Bgp_Neighbors_Neighbor_State_SessionState_OPENSENT: + return agentapi.BGPNeighborSessionStateOpenSent, nil + case oc.OpenconfigBgp_Bgp_Neighbors_Neighbor_State_SessionState_OPENCONFIRM: + return agentapi.BGPNeighborSessionStateOpenConfirm, nil + case oc.OpenconfigBgp_Bgp_Neighbors_Neighbor_State_SessionState_ESTABLISHED: + return agentapi.BGPNeighborSessionStateEstablished, nil + default: + return agentapi.BGPNeighborSessionStateUnset, errors.Errorf("unknown bgp neighbor session state from gnmi: %d", in) + } +} diff --git a/pkg/agent/dozer/dozer.go b/pkg/agent/dozer/dozer.go index c4b7775d..fe3043f2 100644 --- a/pkg/agent/dozer/dozer.go +++ b/pkg/agent/dozer/dozer.go @@ -22,6 +22,7 @@ import ( "github.com/pkg/errors" "github.com/pmezard/go-difflib/difflib" agentapi "go.githedgehog.com/fabric/api/agent/v1alpha2" + "go.githedgehog.com/fabric/pkg/agent/switchstate" "sigs.k8s.io/yaml" ) @@ -33,6 +34,7 @@ type Processor interface { CalculateActions(ctx context.Context, actual, desired *Spec) ([]Action, error) ApplyActions(ctx context.Context, actions []Action) ([]string, error) // warnings Info(ctx context.Context) (*agentapi.NOSInfo, error) + UpdateSwitchState(ctx context.Context, reg *switchstate.Registry) error Reboot(ctx context.Context, force bool) error Reinstall(ctx context.Context) error FactoryReset(ctx context.Context) error diff --git a/pkg/agent/switchstate/switchstate.go b/pkg/agent/switchstate/switchstate.go new file mode 100644 index 00000000..ee1caec9 --- /dev/null +++ b/pkg/agent/switchstate/switchstate.go @@ -0,0 +1,337 @@ +// Copyright 2023 Hedgehog +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package switchstate + +import ( + "fmt" + "net/http" + "sync" + "time" + + "github.com/go-chi/chi/v5" + "github.com/go-chi/chi/v5/middleware" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/prometheus/client_golang/prometheus/promhttp" + agentapi "go.githedgehog.com/fabric/api/agent/v1alpha2" +) + +const ( + MetricNamespace = "fabric" + MetricSubsystem = "agent" +) + +type Registry struct { + reg *prometheus.Registry + + stateSync sync.RWMutex + state *agentapi.SwitchState + + InterfaceMetrics InterfaceMetrics + InterfaceCounters InterfaceCounters + TransceiverMetrics TransceiverMetrics + BGPNeighborMetrics BGPNeighborMetrics +} + +type InterfaceMetrics struct { + Enabled *prometheus.GaugeVec + AdminStatus *prometheus.GaugeVec + OperStatus *prometheus.GaugeVec + LastChange *prometheus.GaugeVec + RateInterval *prometheus.GaugeVec +} + +type InterfaceCounters struct { + InBitsPerSecond *prometheus.GaugeVec + InBroadcastPkts *prometheus.GaugeVec + InDiscards *prometheus.GaugeVec + InErrors *prometheus.GaugeVec + InMulticastPkts *prometheus.GaugeVec + InOctets *prometheus.GaugeVec + InOctetsPerSecond *prometheus.GaugeVec + InPkts *prometheus.GaugeVec + InPktsPerSecond *prometheus.GaugeVec + InUnicastPkts *prometheus.GaugeVec + InUtilization *prometheus.GaugeVec + LastClear *prometheus.GaugeVec + OutBitsPerSecond *prometheus.GaugeVec + OutBroadcastPkts *prometheus.GaugeVec + OutDiscards *prometheus.GaugeVec + OutErrors *prometheus.GaugeVec + OutMulticastPkts *prometheus.GaugeVec + OutOctets *prometheus.GaugeVec + OutOctetsPerSecond *prometheus.GaugeVec + OutPkts *prometheus.GaugeVec + OutPktsPerSecond *prometheus.GaugeVec + OutUnicastPkts *prometheus.GaugeVec + OutUtilization *prometheus.GaugeVec +} + +type TransceiverMetrics struct { + AlarmRxPowerHi *prometheus.GaugeVec + AlarmRxPowerLo *prometheus.GaugeVec + AlarmTempHi *prometheus.GaugeVec + AlarmTempLo *prometheus.GaugeVec + AlarmTxBiasHi *prometheus.GaugeVec + AlarmTxBiasLo *prometheus.GaugeVec + AlarmTxPowerHi *prometheus.GaugeVec + AlarmTxPowerLo *prometheus.GaugeVec + AlarmVoltHi *prometheus.GaugeVec + AlarmVoltLo *prometheus.GaugeVec + Rx1Power *prometheus.GaugeVec + Rx2Power *prometheus.GaugeVec + Rx3Power *prometheus.GaugeVec + Rx4Power *prometheus.GaugeVec + Rx5Power *prometheus.GaugeVec + Rx6Power *prometheus.GaugeVec + Rx7Power *prometheus.GaugeVec + Rx8Power *prometheus.GaugeVec + Temperature *prometheus.GaugeVec + Tx1Bias *prometheus.GaugeVec + Tx1Power *prometheus.GaugeVec + Tx2Bias *prometheus.GaugeVec + Tx2Power *prometheus.GaugeVec + Tx3Bias *prometheus.GaugeVec + Tx3Power *prometheus.GaugeVec + Tx4Bias *prometheus.GaugeVec + Tx4Power *prometheus.GaugeVec + Tx5Bias *prometheus.GaugeVec + Tx5Power *prometheus.GaugeVec + Tx6Bias *prometheus.GaugeVec + Tx6Power *prometheus.GaugeVec + Tx7Bias *prometheus.GaugeVec + Tx7Power *prometheus.GaugeVec + Tx8Bias *prometheus.GaugeVec + Tx8Power *prometheus.GaugeVec + Voltage *prometheus.GaugeVec + WarningRxPowerHi *prometheus.GaugeVec + WarningRxPowerLo *prometheus.GaugeVec + WarningTempHi *prometheus.GaugeVec + WarningTempLo *prometheus.GaugeVec + WarningTxBiasHi *prometheus.GaugeVec + WarningTxBiasLo *prometheus.GaugeVec + WarningTxPowerHi *prometheus.GaugeVec + WarningTxPowerLo *prometheus.GaugeVec + WarningVoltHi *prometheus.GaugeVec + WarningVoltLo *prometheus.GaugeVec +} + +type BGPNeighborMetrics struct { + ConnectionsDropped *prometheus.GaugeVec + Enabled *prometheus.GaugeVec `json:"enabled,omitempty"` + EstablishedTransitions *prometheus.GaugeVec + PeerType *prometheus.GaugeVec + SessionState *prometheus.GaugeVec + Messages BGPNeighborMetricsMessages `json:"messages,omitempty"` +} + +type BGPNeighborMetricsMessages struct { + Received BGPNeighborMetricsMessagesCounters + Sent BGPNeighborMetricsMessagesCounters +} + +type BGPNeighborMetricsMessagesCounters struct { + Capability *prometheus.GaugeVec + Keepalive *prometheus.GaugeVec + Notification *prometheus.GaugeVec + Open *prometheus.GaugeVec + RouteRefresh *prometheus.GaugeVec + Update *prometheus.GaugeVec +} + +func NewRegistry() *Registry { + reg := prometheus.NewRegistry() + autoreg := promauto.With(reg) + + labels := prometheus.Labels{} + + newInterfaceGaugeVec := func(name string, help string) *prometheus.GaugeVec { + return autoreg.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: MetricNamespace, + Subsystem: MetricSubsystem, + Name: name, + Help: help, + ConstLabels: labels, + }, []string{"interface"}) + } + + newTransceiverGaugeVec := func(name string, help string) *prometheus.GaugeVec { + return autoreg.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: MetricNamespace, + Subsystem: MetricSubsystem, + Name: name, + Help: help, + ConstLabels: labels, + }, []string{"transceiver"}) + } + + newBGPNeighborGaugeVec := func(name string, help string) *prometheus.GaugeVec { + return autoreg.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: MetricNamespace, + Subsystem: MetricSubsystem, + Name: name, + Help: help, + ConstLabels: labels, + }, []string{"vrf", "neighbor"}) + } + + r := &Registry{ + reg: reg, + + InterfaceMetrics: InterfaceMetrics{ + Enabled: newInterfaceGaugeVec("interface_enabled", "Whether the interface is enabled"), + AdminStatus: newInterfaceGaugeVec("interface_admin_status", "Admin status of the interface"), + OperStatus: newInterfaceGaugeVec("interface_oper_status", "Operational status of the interface"), + LastChange: newInterfaceGaugeVec("interface_last_change", "Time of last change in interface status"), + RateInterval: newInterfaceGaugeVec("interface_rate_interval", "Rate interval for interface counters"), + }, + InterfaceCounters: InterfaceCounters{ + InBitsPerSecond: newInterfaceGaugeVec("interface_in_bits_per_second", "Incoming bits per second"), + InBroadcastPkts: newInterfaceGaugeVec("interface_in_broadcast_pkts", "Incoming broadcast packets"), + InDiscards: newInterfaceGaugeVec("interface_in_discards", "Incoming discards"), + InErrors: newInterfaceGaugeVec("interface_in_errors", "Incoming errors"), + InMulticastPkts: newInterfaceGaugeVec("interface_in_multicast_pkts", "Incoming multicast packets"), + InOctets: newInterfaceGaugeVec("interface_in_octets", "Incoming octets"), + InOctetsPerSecond: newInterfaceGaugeVec("interface_in_octets_per_second", "Incoming octets per second"), + InPkts: newInterfaceGaugeVec("interface_in_pkts", "Incoming packets"), + InPktsPerSecond: newInterfaceGaugeVec("interface_in_pkts_per_second", "Incoming packets per second"), + InUnicastPkts: newInterfaceGaugeVec("interface_in_unicast_pkts", "Incoming unicast packets"), + InUtilization: newInterfaceGaugeVec("interface_in_utilization", "Incoming utilization"), + LastClear: newInterfaceGaugeVec("interface_last_clear", "Time of last counter clear"), + OutBitsPerSecond: newInterfaceGaugeVec("interface_out_bits_per_second", "Outgoing bits per second"), + OutBroadcastPkts: newInterfaceGaugeVec("interface_out_broadcast_pkts", "Outgoing broadcast packets"), + OutDiscards: newInterfaceGaugeVec("interface_out_discards", "Outgoing discards"), + OutErrors: newInterfaceGaugeVec("interface_out_errors", "Outgoing errors"), + OutMulticastPkts: newInterfaceGaugeVec("interface_out_multicast_pkts", "Outgoing multicast packets"), + OutOctets: newInterfaceGaugeVec("interface_out_octets", "Outgoing octets"), + OutOctetsPerSecond: newInterfaceGaugeVec("interface_out_octets_per_second", "Outgoing octets per second"), + OutPkts: newInterfaceGaugeVec("interface_out_pkts", "Outgoing packets"), + OutPktsPerSecond: newInterfaceGaugeVec("interface_out_pkts_per_second", "Outgoing packets per second"), + OutUnicastPkts: newInterfaceGaugeVec("interface_out_unicast_pkts", "Outgoing unicast packets"), + OutUtilization: newInterfaceGaugeVec("interface_out_utilization", "Outgoing utilization"), + }, + TransceiverMetrics: TransceiverMetrics{ + AlarmRxPowerHi: newTransceiverGaugeVec("transceiver_alarm_rx_power_hi", "Alarm rx power hi"), + AlarmRxPowerLo: newTransceiverGaugeVec("transceiver_alarm_rx_power_lo", "Alarm rx power lo"), + AlarmTempHi: newTransceiverGaugeVec("transceiver_alarm_temp_hi", "Alarm temp hi"), + AlarmTempLo: newTransceiverGaugeVec("transceiver_alarm_temp_lo", "Alarm temp lo"), + AlarmTxBiasHi: newTransceiverGaugeVec("transceiver_alarm_tx_bias_hi", "Alarm tx bias hi"), + AlarmTxBiasLo: newTransceiverGaugeVec("transceiver_alarm_tx_bias_lo", "Alarm tx bias lo"), + AlarmTxPowerHi: newTransceiverGaugeVec("transceiver_alarm_tx_power_hi", "Alarm tx power hi"), + AlarmTxPowerLo: newTransceiverGaugeVec("transceiver_alarm_tx_power_lo", "Alarm tx power lo"), + AlarmVoltHi: newTransceiverGaugeVec("transceiver_alarm_volt_hi", "Alarm volt hi"), + AlarmVoltLo: newTransceiverGaugeVec("transceiver_alarm_volt_lo", "Alarm volt lo"), + Rx1Power: newTransceiverGaugeVec("transceiver_rx1_power", "Rx1 power"), + Rx2Power: newTransceiverGaugeVec("transceiver_rx2_power", "Rx2 power"), + Rx3Power: newTransceiverGaugeVec("transceiver_rx3_power", "Rx3 power"), + Rx4Power: newTransceiverGaugeVec("transceiver_rx4_power", "Rx4 power"), + Rx5Power: newTransceiverGaugeVec("transceiver_rx5_power", "Rx5 power"), + Rx6Power: newTransceiverGaugeVec("transceiver_rx6_power", "Rx6 power"), + Rx7Power: newTransceiverGaugeVec("transceiver_rx7_power", "Rx7 power"), + Rx8Power: newTransceiverGaugeVec("transceiver_rx8_power", "Rx8 power"), + Temperature: newTransceiverGaugeVec("transceiver_temperature", "Temperature"), + Tx1Bias: newTransceiverGaugeVec("transceiver_tx1_bias", "Tx1 bias"), + Tx1Power: newTransceiverGaugeVec("transceiver_tx1_power", "Tx1 power"), + Tx2Bias: newTransceiverGaugeVec("transceiver_tx2_bias", "Tx2 bias"), + Tx2Power: newTransceiverGaugeVec("transceiver_tx2_power", "Tx2 power"), + Tx3Bias: newTransceiverGaugeVec("transceiver_tx3_bias", "Tx3 bias"), + Tx3Power: newTransceiverGaugeVec("transceiver_tx3_power", "Tx3 power"), + Tx4Bias: newTransceiverGaugeVec("transceiver_tx4_bias", "Tx4 bias"), + Tx4Power: newTransceiverGaugeVec("transceiver_tx4_power", "Tx4 power"), + Tx5Bias: newTransceiverGaugeVec("transceiver_tx5_bias", "Tx5 bias"), + Tx5Power: newTransceiverGaugeVec("transceiver_tx5_power", "Tx5 power"), + Tx6Bias: newTransceiverGaugeVec("transceiver_tx6_bias", "Tx6 bias"), + Tx6Power: newTransceiverGaugeVec("transceiver_tx6_power", "Tx6 power"), + Tx7Bias: newTransceiverGaugeVec("transceiver_tx7_bias", "Tx7 bias"), + Tx7Power: newTransceiverGaugeVec("transceiver_tx7_power", "Tx7 power"), + Tx8Bias: newTransceiverGaugeVec("transceiver_tx8_bias", "Tx8 bias"), + Tx8Power: newTransceiverGaugeVec("transceiver_tx8_power", "Tx8 power"), + Voltage: newTransceiverGaugeVec("transceiver_voltage", "Voltage"), + WarningRxPowerHi: newTransceiverGaugeVec("transceiver_warning_rx_power_hi", "Warning rx power hi"), + WarningRxPowerLo: newTransceiverGaugeVec("transceiver_warning_rx_power_lo", "Warning rx power lo"), + WarningTempHi: newTransceiverGaugeVec("transceiver_warning_temp_hi", "Warning temp hi"), + WarningTempLo: newTransceiverGaugeVec("transceiver_warning_temp_lo", "Warning temp lo"), + WarningTxBiasHi: newTransceiverGaugeVec("transceiver_warning_tx_bias_hi", "Warning tx bias hi"), + WarningTxBiasLo: newTransceiverGaugeVec("transceiver_warning_tx_bias_lo", "Warning tx bias lo"), + WarningTxPowerHi: newTransceiverGaugeVec("transceiver_warning_tx_power_hi", "Warning tx power hi"), + WarningTxPowerLo: newTransceiverGaugeVec("transceiver_warning_tx_power_lo", "Warning tx power lo"), + WarningVoltHi: newTransceiverGaugeVec("transceiver_warning_volt_hi", "Warning volt hi"), + WarningVoltLo: newTransceiverGaugeVec("transceiver_warning_volt_lo", "Warning volt lo"), + }, + BGPNeighborMetrics: BGPNeighborMetrics{ + ConnectionsDropped: newBGPNeighborGaugeVec("bgp_neighbor_connections_dropped", "Number of dropped BGP connections"), + Enabled: newBGPNeighborGaugeVec("bgp_neighbor_enabled", "Whether the BGP neighbor is enabled"), + EstablishedTransitions: newBGPNeighborGaugeVec("bgp_neighbor_established_transitions", "Number of established BGP neighbor transitions"), + PeerType: newBGPNeighborGaugeVec("bgp_neighbor_peer_type", "Type of BGP peer"), + SessionState: newBGPNeighborGaugeVec("bgp_neighbor_session_state", "State of BGP session"), + Messages: BGPNeighborMetricsMessages{ + Received: BGPNeighborMetricsMessagesCounters{ + Capability: newBGPNeighborGaugeVec("bgp_neighbor_messages_received_capability", "Number of received BGP capability messages"), + Keepalive: newBGPNeighborGaugeVec("bgp_neighbor_messages_received_keepalive", "Number of received BGP keepalive messages"), + Notification: newBGPNeighborGaugeVec("bgp_neighbor_messages_received_notification", "Number of received BGP notification messages"), + Open: newBGPNeighborGaugeVec("bgp_neighbor_messages_received_open", "Number of received BGP open messages"), + RouteRefresh: newBGPNeighborGaugeVec("bgp_neighbor_messages_received_route_refresh", "Number of received BGP route refresh messages"), + Update: newBGPNeighborGaugeVec("bgp_neighbor_messages_received_update", "Number of received BGP update messages"), + }, + Sent: BGPNeighborMetricsMessagesCounters{ + Capability: newBGPNeighborGaugeVec("bgp_neighbor_messages_sent_capability", "Number of sent BGP capability messages"), + Keepalive: newBGPNeighborGaugeVec("bgp_neighbor_messages_sent_keepalive", "Number of sent BGP keepalive messages"), + Notification: newBGPNeighborGaugeVec("bgp_neighbor_messages_sent_notification", "Number of sent BGP notification messages"), + Open: newBGPNeighborGaugeVec("bgp_neighbor_messages_sent_open", "Number of sent BGP open messages"), + RouteRefresh: newBGPNeighborGaugeVec("bgp_neighbor_messages_sent_route_refresh", "Number of sent BGP route refresh messages"), + Update: newBGPNeighborGaugeVec("bgp_neighbor_messages_sent_update", "Number of sent BGP update messages"), + }, + }, + }, + } + + return r +} + +func (r *Registry) GetSwitchState() *agentapi.SwitchState { + r.stateSync.RLock() + defer r.stateSync.RUnlock() + + return r.state +} + +func (r *Registry) SaveSwitchState(state *agentapi.SwitchState) { + r.stateSync.Lock() + defer r.stateSync.Unlock() + + r.state = state +} + +func (r *Registry) ServeMetrics() error { + router := chi.NewRouter() + router.Use(middleware.Recoverer) + router.Use(middleware.Heartbeat("/ping")) + + router.Handle("/metrics", promhttp.HandlerFor(r.reg, promhttp.HandlerOpts{ + Registry: r.reg, + // TODO Timeout: , + // TODO ErrorLog: , + })) + + server := &http.Server{ + Handler: router, + Addr: fmt.Sprintf("127.0.0.1:%d", 2112), // TODO configurable + ReadHeaderTimeout: 30 * time.Second, + // TODO any other timeouts? + } + + return errors.Wrapf(server.ListenAndServe(), "failed to start metrics server") +}