From c8345090d9a5befed26d6b1ff9c726a12affa338 Mon Sep 17 00:00:00 2001 From: Stephen J Day Date: Mon, 20 Apr 2015 18:43:19 -0700 Subject: [PATCH] Attempt to deal with eventual consistency by retrying Rather than accept the resulting of a layer validation, we retry up to three times, backing off 100ms after each try. The thought is that we allow s3 files to make their way into the correct location increasing the liklihood the verification can proceed, if possible. Signed-off-by: Stephen J Day --- registry/storage/layerwriter.go | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/registry/storage/layerwriter.go b/registry/storage/layerwriter.go index 0305d0117..fe1185978 100644 --- a/registry/storage/layerwriter.go +++ b/registry/storage/layerwriter.go @@ -46,16 +46,37 @@ func (lw *layerWriter) StartedAt() time.Time { // uploaded layer. The final size and checksum are validated against the // contents of the uploaded layer. The checksum should be provided in the // format :. -func (lw *layerWriter) Finish(digest digest.Digest) (distribution.Layer, error) { +func (lw *layerWriter) Finish(dgst digest.Digest) (distribution.Layer, error) { ctxu.GetLogger(lw.layerStore.repository.ctx).Debug("(*layerWriter).Finish") if err := lw.bufferedFileWriter.Close(); err != nil { return nil, err } - canonical, err := lw.validateLayer(digest) - if err != nil { + var ( + canonical digest.Digest + err error + ) + + // HACK(stevvooe): To deal with s3's lack of consistency, attempt to retry + // validation on failure. Three attempts are made, backing off 100ms each + // time. + for retries := 0; ; retries++ { + canonical, err = lw.validateLayer(dgst) + if err == nil { + break + } + + ctxu.GetLoggerWithField(lw.layerStore.repository.ctx, "retries", retries). + Errorf("error validating layer: %v", err) + + if retries < 3 { + time.Sleep(100 * time.Millisecond) + continue + } + return nil, err + } if err := lw.moveLayer(canonical); err != nil { @@ -64,7 +85,7 @@ func (lw *layerWriter) Finish(digest digest.Digest) (distribution.Layer, error) } // Link the layer blob into the repository. - if err := lw.linkLayer(canonical, digest); err != nil { + if err := lw.linkLayer(canonical, dgst); err != nil { return nil, err }