Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vlab show tech #300

Merged
merged 7 commits into from
Jan 23, 2025
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add flag to enable show tech collect
Signed-off-by: Sergei Lukianov <me@slukjanov.name>
Frostman committed Jan 23, 2025
commit 799ebfe8043cb6eedaf706cf334150f2cbb711b7
8 changes: 8 additions & 0 deletions .github/workflows/ci-hw.yaml
Original file line number Diff line number Diff line change
@@ -69,11 +69,19 @@ jobs:
- name: hhfab init and vlab up
env:
HHFAB_REG_REPO: 127.0.0.1:30000
HHFAB_VLAB_COLLECT: true
run: |
source "./lab-ci/envs/$KUBE_NODE/source.sh"
bin/hhfab init -v --dev --include-onie=${{ matrix.includeonie }} -w "./lab-ci/envs/$KUBE_NODE/wiring.yaml"
bin/hhfab vlab up -v --ready switch-reinstall ${{ inputs.skip_ready && '' || '--ready setup-vpcs --ready test-connectivity' }} ${{ inputs.keep && '' || '--ready exit' }} --mode=${{ matrix.buildmode }}

- name: Upload show-tech artifacts
uses: actions/upload-artifact@v4
if: ${{ always() }}
with:
name: show-tech-hlab-${{ matrix.fabricmode }}-${{ matrix.includeonie }}-${{ matrix.buildmode }}
path: show-tech-output

- name: Dump local registry logs
if: ${{ always() }}
run: |
4 changes: 4 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -239,6 +239,7 @@ jobs:
- name: hhfab init and vlab up
env:
HHFAB_REG_REPO: 127.0.0.1:30000
HHFAB_VLAB_COLLECT: true
run: |
bin/hhfab init -v --dev -m ${{ matrix.fabricmode }} --include-onie=${{ matrix.includeonie }}
bin/hhfab vlab gen -v
@@ -304,6 +305,7 @@ jobs:
- name: Initial VLAB with old version
env:
HHFAB_REG_REPO: 127.0.0.1:30000
HHFAB_VLAB_COLLECT: true
run: |
mkdir old
curl -fsSL https://i.hhdev.io/hhfab | USE_SUDO=false INSTALL_DIR=./old VERSION="${{ matrix.fromversion }}" bash
@@ -326,6 +328,7 @@ jobs:
- name: hhfab vlab up --upgrade
env:
HHFAB_REG_REPO: 127.0.0.1:30000
HHFAB_VLAB_COLLECT: true
run: |
bin/hhfab vlab up -v --ready setup-vpcs --ready test-connectivity --ready exit --upgrade

@@ -412,6 +415,7 @@ jobs:
- name: hhfab init and vlab up
env:
HHFAB_REG_REPO: 127.0.0.1:30000
HHFAB_VLAB_COLLECT: true
run: |
source "./lab-ci/envs/$KUBE_NODE/source.sh"
bin/hhfab init -v --dev --include-onie=${{ matrix.includeonie }} -w "./lab-ci/envs/$KUBE_NODE/wiring.yaml"
8 changes: 8 additions & 0 deletions cmd/hhfab/main.go
Original file line number Diff line number Diff line change
@@ -56,6 +56,7 @@ const (
FlagNameControlUpgrade = "control-upgrade"
FlagNameFailFast = "fail-fast"
FlagNameReady = "ready"
FlagNameCollectShowTech = "collect-show-tech"
)

func main() {
@@ -636,6 +637,12 @@ func Run(ctx context.Context) error {
Aliases: []string{"r"},
Usage: "run commands on all VMs ready (one of: " + strings.Join(onReadyCommands, ", ") + ")",
},
&cli.BoolFlag{
Name: FlagNameCollectShowTech,
Aliases: []string{"collect"},
Usage: "collect show-tech from all devices at exit or error",
EnvVars: []string{"HHFAB_VLAB_COLLECT"},
},
),
Before: before(false),
Action: func(c *cli.Context) error {
@@ -651,6 +658,7 @@ func Run(ctx context.Context) error {
ControlUpgrade: c.Bool(FlagNameControlUpgrade),
FailFast: c.Bool(FlagNameFailFast),
OnReady: c.StringSlice(FlagNameReady),
CollectShowTech: c.Bool(FlagNameCollectShowTech),
},
}); err != nil {
return fmt.Errorf("running VLAB: %w", err)
55 changes: 37 additions & 18 deletions pkg/hhfab/vlabrunner.go
Original file line number Diff line number Diff line change
@@ -86,6 +86,7 @@ type VLABRunOpts struct {
ControlUpgrade bool
FailFast bool
OnReady []string
CollectShowTech bool
}

type OnReady string
@@ -375,10 +376,13 @@ func (c *Config) VLABRun(ctx context.Context, vlab *VLAB, opts VLABRunOpts) erro

if err := execCmd(ctx, true, vmDir, VLABCmdQemuSystem, args, "vm", vm.Name); err != nil {
slog.Warn("Failed running VM", "vm", vm.Name, "type", vm.Type, "err", err)
if err := c.VLABShowTech(ctx, vlab); err != nil {
slog.Warn("Failed to collect show-tech diagnostics", "err", err)

return fmt.Errorf("getting show-tech: %w", err)
if opts.CollectShowTech {
if err := c.VLABShowTech(ctx, vlab); err != nil {
slog.Warn("Failed to collect show-tech diagnostics", "err", err)

return fmt.Errorf("getting show-tech: %w", err)
}
}

if opts.FailFast {
@@ -394,10 +398,13 @@ func (c *Config) VLABRun(ctx context.Context, vlab *VLAB, opts VLABRunOpts) erro
group.Go(func() error {
if err := c.vmPostProcess(ctx, vlab, d, vm, opts); err != nil {
slog.Warn("Failed to post-process VM", "vm", vm.Name, "type", vm.Type, "err", err)
if err := c.VLABShowTech(ctx, vlab); err != nil {
slog.Warn("Failed to collect show-tech diagnostics", "err", err)

return fmt.Errorf("getting show-tech: %w", err)
if opts.CollectShowTech {
if err := c.VLABShowTech(ctx, vlab); err != nil {
slog.Warn("Failed to collect show-tech diagnostics", "err", err)

return fmt.Errorf("getting show-tech: %w", err)
}
}

if opts.FailFast {
@@ -445,10 +452,13 @@ func (c *Config) VLABRun(ctx context.Context, vlab *VLAB, opts VLABRunOpts) erro
WaitReady: false,
}); err != nil {
slog.Warn("Failed to reinstall switches", "err", err)
if err := c.VLABShowTech(ctx, vlab); err != nil {
slog.Warn("Failed to collect show-tech diagnostics", "err", err)

return fmt.Errorf("getting show-tech: %w", err)
if opts.CollectShowTech {
if err := c.VLABShowTech(ctx, vlab); err != nil {
slog.Warn("Failed to collect show-tech diagnostics", "err", err)

return fmt.Errorf("getting show-tech: %w", err)
}
}

return fmt.Errorf("reinstalling switches: %w", err)
@@ -465,10 +475,13 @@ func (c *Config) VLABRun(ctx context.Context, vlab *VLAB, opts VLABRunOpts) erro
TimeServers: []string{"219.239.35.0"},
}); err != nil {
slog.Warn("Failed to setup VPCs", "err", err)
if err := c.VLABShowTech(ctx, vlab); err != nil {
slog.Warn("Failed to collect show-tech diagnostics", "err", err)

return fmt.Errorf("getting show-tech: %w", err)
if opts.CollectShowTech {
if err := c.VLABShowTech(ctx, vlab); err != nil {
slog.Warn("Failed to collect show-tech diagnostics", "err", err)

return fmt.Errorf("getting show-tech: %w", err)
}
}

return fmt.Errorf("setting up VPCs: %w", err)
@@ -482,20 +495,26 @@ func (c *Config) VLABRun(ctx context.Context, vlab *VLAB, opts VLABRunOpts) erro
CurlsCount: 3,
}); err != nil {
slog.Warn("Failed to test connectivity", "err", err)
if err := c.VLABShowTech(ctx, vlab); err != nil {
slog.Warn("Failed to collect show-tech diagnostics", "err", err)

return fmt.Errorf("getting show-tech: %w", err)
if opts.CollectShowTech {
if err := c.VLABShowTech(ctx, vlab); err != nil {
slog.Warn("Failed to collect show-tech diagnostics", "err", err)

return fmt.Errorf("getting show-tech: %w", err)
}
}

return fmt.Errorf("testing connectivity: %w", err)
}
case OnReadyExit:
if err := c.VLABShowTech(ctx, vlab); err != nil {
slog.Warn("Failed to collect show-tech diagnostics", "err", err)
if opts.CollectShowTech {
if err := c.VLABShowTech(ctx, vlab); err != nil {
slog.Warn("Failed to collect show-tech diagnostics", "err", err)

return fmt.Errorf("getting show-tech: %w", err)
return fmt.Errorf("getting show-tech: %w", err)
}
}

// TODO seems like some graceful shutdown logic isn't working in CI and we're getting stuck w/o this
if os.Getenv("GITHUB_ACTIONS") == "true" {
slog.Warn("Immediately exiting b/c running in GHA")