From d92710d0b5c54a816fc06785553c13af8a13131c Mon Sep 17 00:00:00 2001 From: Pavel Boldyrev <627562+bpg@users.noreply.github.com> Date: Thu, 21 Nov 2024 22:28:10 -0500 Subject: [PATCH] fix(vm): add retries to VM `update` operation (#1650) * fix(vm): add retries to VM `update` operation Signed-off-by: Pavel Boldyrev <627562+bpg@users.noreply.github.com> --- fwprovider/vm/concurrency_test.go | 87 +++++++++++++++++++++++++++++++ proxmox/nodes/vms/vms.go | 62 ++++++++++++++-------- 2 files changed, 128 insertions(+), 21 deletions(-) create mode 100644 fwprovider/vm/concurrency_test.go diff --git a/fwprovider/vm/concurrency_test.go b/fwprovider/vm/concurrency_test.go new file mode 100644 index 00000000..755cbc02 --- /dev/null +++ b/fwprovider/vm/concurrency_test.go @@ -0,0 +1,87 @@ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. + */ + +package vm_test + +import ( + "context" + "sync" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/bpg/terraform-provider-proxmox/fwprovider/test" + "github.com/bpg/terraform-provider-proxmox/proxmox/cluster" + "github.com/bpg/terraform-provider-proxmox/proxmox/nodes/vms" + "github.com/bpg/terraform-provider-proxmox/utils" +) + +func TestBatchCreate(t *testing.T) { + t.Parallel() + + const ( + numVMs = 30 + ) + + if utils.GetAnyStringEnv("TF_ACC") == "" { + t.Skip("Acceptance tests are disabled") + } + + te := test.InitEnvironment(t) + + ctx := context.Background() + + gen := cluster.NewIDGenerator(te.ClusterClient(), cluster.IDGeneratorConfig{RandomIDs: false}) + + sourceID, err := gen.NextID(ctx) + require.NoError(t, err) + + err = te.NodeClient().VM(0).CreateVM(ctx, &vms.CreateRequestBody{VMID: sourceID}) + + require.NoError(t, err, "failed to create VM %d", sourceID) + + ids := make([]int, numVMs) + + t.Cleanup(func() { + _ = te.NodeClient().VM(sourceID).DeleteVM(ctx) //nolint:errcheck + + var wg sync.WaitGroup + for _, id := range ids { + wg.Add(1) + + go func() { + defer wg.Done() + + if id > 0 { + _ = te.NodeClient().VM(id).DeleteVM(ctx) //nolint:errcheck + } + }() + } + + wg.Wait() + }) + + var wg sync.WaitGroup + + for i := range numVMs { + wg.Add(1) + + go func() { + defer wg.Done() + + id := 999900 + i + if err == nil { + err = te.NodeClient().VM(sourceID).CloneVM(ctx, 5, &vms.CloneRequestBody{VMIDNew: id}) + ids[i] = id + } + + assert.NoError(t, err) + }() + } + + wg.Wait() +} diff --git a/proxmox/nodes/vms/vms.go b/proxmox/nodes/vms/vms.go index 752886fe..cfdc4480 100644 --- a/proxmox/nodes/vms/vms.go +++ b/proxmox/nodes/vms/vms.go @@ -35,19 +35,25 @@ func (c *Client) CloneVM(ctx context.Context, retries int, d *CloneRequestBody) retries = 1 } - err = retry.Do(func() error { - err = c.DoRequest(ctx, http.MethodPost, c.ExpandPath("clone"), d, resBody) - if err != nil { - return fmt.Errorf("error cloning VM: %w", err) - } + err = retry.Do( + func() error { + err = c.DoRequest(ctx, http.MethodPost, c.ExpandPath("clone"), d, resBody) + if err != nil { + return fmt.Errorf("error cloning VM: %w", err) + } - if resBody.Data == nil { - return api.ErrNoDataObjectInResponse - } + if resBody.Data == nil { + return api.ErrNoDataObjectInResponse + } - // ignoring warnings as per https://www.mail-archive.com/pve-devel@lists.proxmox.com/msg17724.html - return c.Tasks().WaitForTask(ctx, *resBody.Data, tasks.WithIgnoreWarnings()) - }, retry.Attempts(uint(retries)), retry.Delay(10*time.Second)) + // ignoring warnings as per https://www.mail-archive.com/pve-devel@lists.proxmox.com/msg17724.html + return c.Tasks().WaitForTask(ctx, *resBody.Data, tasks.WithIgnoreWarnings()) + }, + retry.Context(ctx), + retry.Attempts(uint(retries)), + retry.Delay(10*time.Second), + retry.LastErrorOnly(false), + ) if err != nil { return fmt.Errorf("error waiting for VM clone: %w", err) } @@ -79,6 +85,9 @@ func (c *Client) CreateVMAsync(ctx context.Context, d *CreateRequestBody) (*stri return c.DoRequest(ctx, http.MethodPost, c.basePath(), d, resBody) }, retry.Context(ctx), + retry.Attempts(3), + retry.Delay(1*time.Second), + retry.LastErrorOnly(false), retry.OnRetry(func(n uint, err error) { tflog.Warn(ctx, "retrying VM creation", map[string]interface{}{ "attempt": n, @@ -92,8 +101,6 @@ func (c *Client) CreateVMAsync(ctx context.Context, d *CreateRequestBody) (*stri }) } }), - retry.LastErrorOnly(false), - retry.Attempts(3), ) if err != nil { return nil, fmt.Errorf("error creating VM: %w", err) @@ -131,10 +138,12 @@ func (c *Client) DeleteVMAsync(ctx context.Context) (*string, error) { return c.DoRequest(ctx, http.MethodDelete, c.ExpandPath("?destroy-unreferenced-disks=1&purge=1"), nil, resBody) }, retry.Context(ctx), + retry.Attempts(3), + retry.Delay(1*time.Second), + retry.LastErrorOnly(true), retry.RetryIf(func(err error) bool { return !errors.Is(err, api.ErrResourceDoesNotExist) }), - retry.LastErrorOnly(true), ) if err != nil { return nil, fmt.Errorf("error deleting VM: %w", err) @@ -460,7 +469,18 @@ func (c *Client) StopVMAsync(ctx context.Context) (*string, error) { // UpdateVM updates a virtual machine. func (c *Client) UpdateVM(ctx context.Context, d *UpdateRequestBody) error { - err := c.DoRequest(ctx, http.MethodPut, c.ExpandPath("config"), d, nil) + err := retry.Do( + func() error { + return c.DoRequest(ctx, http.MethodPut, c.ExpandPath("config"), d, nil) + }, + retry.Context(ctx), + retry.Attempts(3), + retry.Delay(1*time.Second), + retry.LastErrorOnly(true), + retry.RetryIf(func(err error) bool { + return strings.Contains(err.Error(), "got timeout") + }), + ) if err != nil { return fmt.Errorf("error updating VM: %w", err) } @@ -595,12 +615,12 @@ func (c *Client) WaitForVMConfigUnlock(ctx context.Context, ignoreErrorResponse return nil }, retry.Context(ctx), - retry.RetryIf(func(err error) bool { - return errors.Is(err, stillLocked) || ignoreErrorResponse - }), retry.UntilSucceeded(), retry.Delay(1*time.Second), retry.LastErrorOnly(true), + retry.RetryIf(func(err error) bool { + return errors.Is(err, stillLocked) || ignoreErrorResponse + }), ) if errors.Is(err, context.DeadlineExceeded) { @@ -634,12 +654,12 @@ func (c *Client) WaitForVMStatus(ctx context.Context, status string) error { return nil }, retry.Context(ctx), - retry.RetryIf(func(err error) bool { - return errors.Is(err, unexpectedStatus) - }), retry.UntilSucceeded(), retry.Delay(1*time.Second), retry.LastErrorOnly(true), + retry.RetryIf(func(err error) bool { + return errors.Is(err, unexpectedStatus) + }), ) if errors.Is(err, context.DeadlineExceeded) {