From ff03381d4906d9d90dd92b1ad1050a63147464c7 Mon Sep 17 00:00:00 2001 From: Brian Bland Date: Mon, 8 Feb 2016 14:29:21 -0800 Subject: [PATCH 1/7] Adds new storagedriver.FileWriter interface Updates registry storage code to use this for better resumable writes. Implements this interface for the following drivers: + Inmemory + Filesystem + S3 + Azure Signed-off-by: Brian Bland --- Godeps/Godeps.json | 2 +- .../Azure/azure-sdk-for-go/storage/blob.go | 164 ++++- .../azure-sdk-for-go/storage/blob_test.go | 625 ---------------- .../Azure/azure-sdk-for-go/storage/client.go | 45 +- .../azure-sdk-for-go/storage/client_test.go | 156 ---- .../Azure/azure-sdk-for-go/storage/file.go | 91 +++ .../Azure/azure-sdk-for-go/storage/queue.go | 78 +- .../azure-sdk-for-go/storage/queue_test.go | 91 --- .../azure-sdk-for-go/storage/util_test.go | 69 -- blobs.go | 9 +- cmd/registry/main.go | 6 +- cmd/registry/rados.go | 2 +- registry/client/blob_writer.go | 18 +- registry/handlers/blobupload.go | 49 +- registry/storage/blob_test.go | 11 +- registry/storage/blobwriter.go | 49 +- registry/storage/blobwriter_resumable.go | 45 +- registry/storage/driver/azure/azure.go | 164 ++++- registry/storage/driver/azure/blockblob.go | 24 - .../storage/driver/azure/blockblob_test.go | 155 ---- registry/storage/driver/azure/blockid.go | 60 -- registry/storage/driver/azure/blockid_test.go | 74 -- registry/storage/driver/azure/randomwriter.go | 208 ------ .../storage/driver/azure/randomwriter_test.go | 339 --------- .../storage/driver/azure/zerofillwriter.go | 49 -- .../driver/azure/zerofillwriter_test.go | 126 ---- registry/storage/driver/base/base.go | 24 +- registry/storage/driver/filesystem/driver.go | 146 +++- registry/storage/driver/inmemory/driver.go | 126 +++- registry/storage/driver/s3-aws/s3.go | 670 ++++++++---------- registry/storage/driver/s3-goamz/s3.go | 549 ++++++-------- registry/storage/driver/storagedriver.go | 30 +- .../storage/driver/testsuites/testsuites.go | 247 +++---- registry/storage/filereader.go | 2 +- registry/storage/filewriter.go | 135 ---- registry/storage/filewriter_test.go | 226 ------ registry/storage/linkedblobstore.go | 21 +- 37 files changed, 1429 insertions(+), 3456 deletions(-) delete mode 100644 Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/blob_test.go delete mode 100644 Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/client_test.go create mode 100644 Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/file.go delete mode 100644 Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/queue_test.go delete mode 100644 Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/util_test.go delete mode 100644 registry/storage/driver/azure/blockblob.go delete mode 100644 registry/storage/driver/azure/blockblob_test.go delete mode 100644 registry/storage/driver/azure/blockid.go delete mode 100644 registry/storage/driver/azure/blockid_test.go delete mode 100644 registry/storage/driver/azure/randomwriter.go delete mode 100644 registry/storage/driver/azure/randomwriter_test.go delete mode 100644 registry/storage/driver/azure/zerofillwriter.go delete mode 100644 registry/storage/driver/azure/zerofillwriter_test.go delete mode 100644 registry/storage/filewriter.go delete mode 100644 registry/storage/filewriter_test.go diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index f6411400..74b6ae5e 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -43,7 +43,7 @@ }, { "ImportPath": "github.com/Azure/azure-sdk-for-go/storage", - "Rev": "97d9593768bbbbd316f9c055dfc5f780933cd7fc" + "Rev": "95361a2573b1fa92a00c5fc2707a80308483c6f9" }, { "ImportPath": "github.com/Sirupsen/logrus", diff --git a/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/blob.go b/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/blob.go index a6e2eb8a..dc7bfdca 100644 --- a/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/blob.go +++ b/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/blob.go @@ -167,8 +167,9 @@ type BlobType string // Types of page blobs const ( - BlobTypeBlock BlobType = "BlockBlob" - BlobTypePage BlobType = "PageBlob" + BlobTypeBlock BlobType = "BlockBlob" + BlobTypePage BlobType = "PageBlob" + BlobTypeAppend BlobType = "AppendBlob" ) // PageWriteType defines the type updates that are going to be @@ -330,7 +331,6 @@ func (b BlobStorageClient) createContainer(name string, access ContainerAccessTy uri := b.client.getEndpoint(blobServiceName, pathForContainer(name), url.Values{"restype": {"container"}}) headers := b.client.getStandardHeaders() - headers["Content-Length"] = "0" if access != "" { headers["x-ms-blob-public-access"] = string(access) } @@ -541,17 +541,102 @@ func (b BlobStorageClient) GetBlobProperties(container, name string) (*BlobPrope }, nil } +// SetBlobMetadata replaces the metadata for the specified blob. +// +// Some keys may be converted to Camel-Case before sending. All keys +// are returned in lower case by GetBlobMetadata. HTTP header names +// are case-insensitive so case munging should not matter to other +// applications either. +// +// See https://msdn.microsoft.com/en-us/library/azure/dd179414.aspx +func (b BlobStorageClient) SetBlobMetadata(container, name string, metadata map[string]string) error { + params := url.Values{"comp": {"metadata"}} + uri := b.client.getEndpoint(blobServiceName, pathForBlob(container, name), params) + headers := b.client.getStandardHeaders() + for k, v := range metadata { + headers[userDefinedMetadataHeaderPrefix+k] = v + } + + resp, err := b.client.exec("PUT", uri, headers, nil) + if err != nil { + return err + } + defer resp.body.Close() + + return checkRespCode(resp.statusCode, []int{http.StatusOK}) +} + +// GetBlobMetadata returns all user-defined metadata for the specified blob. +// +// All metadata keys will be returned in lower case. (HTTP header +// names are case-insensitive.) +// +// See https://msdn.microsoft.com/en-us/library/azure/dd179414.aspx +func (b BlobStorageClient) GetBlobMetadata(container, name string) (map[string]string, error) { + params := url.Values{"comp": {"metadata"}} + uri := b.client.getEndpoint(blobServiceName, pathForBlob(container, name), params) + headers := b.client.getStandardHeaders() + + resp, err := b.client.exec("GET", uri, headers, nil) + if err != nil { + return nil, err + } + defer resp.body.Close() + + if err := checkRespCode(resp.statusCode, []int{http.StatusOK}); err != nil { + return nil, err + } + + metadata := make(map[string]string) + for k, v := range resp.headers { + // Can't trust CanonicalHeaderKey() to munge case + // reliably. "_" is allowed in identifiers: + // https://msdn.microsoft.com/en-us/library/azure/dd179414.aspx + // https://msdn.microsoft.com/library/aa664670(VS.71).aspx + // http://tools.ietf.org/html/rfc7230#section-3.2 + // ...but "_" is considered invalid by + // CanonicalMIMEHeaderKey in + // https://golang.org/src/net/textproto/reader.go?s=14615:14659#L542 + // so k can be "X-Ms-Meta-Foo" or "x-ms-meta-foo_bar". + k = strings.ToLower(k) + if len(v) == 0 || !strings.HasPrefix(k, strings.ToLower(userDefinedMetadataHeaderPrefix)) { + continue + } + // metadata["foo"] = content of the last X-Ms-Meta-Foo header + k = k[len(userDefinedMetadataHeaderPrefix):] + metadata[k] = v[len(v)-1] + } + return metadata, nil +} + // CreateBlockBlob initializes an empty block blob with no blocks. // // See https://msdn.microsoft.com/en-us/library/azure/dd179451.aspx func (b BlobStorageClient) CreateBlockBlob(container, name string) error { + return b.CreateBlockBlobFromReader(container, name, 0, nil, nil) +} + +// CreateBlockBlobFromReader initializes a block blob using data from +// reader. Size must be the number of bytes read from reader. To +// create an empty blob, use size==0 and reader==nil. +// +// The API rejects requests with size > 64 MiB (but this limit is not +// checked by the SDK). To write a larger blob, use CreateBlockBlob, +// PutBlock, and PutBlockList. +// +// See https://msdn.microsoft.com/en-us/library/azure/dd179451.aspx +func (b BlobStorageClient) CreateBlockBlobFromReader(container, name string, size uint64, blob io.Reader, extraHeaders map[string]string) error { path := fmt.Sprintf("%s/%s", container, name) uri := b.client.getEndpoint(blobServiceName, path, url.Values{}) headers := b.client.getStandardHeaders() headers["x-ms-blob-type"] = string(BlobTypeBlock) - headers["Content-Length"] = fmt.Sprintf("%v", 0) + headers["Content-Length"] = fmt.Sprintf("%d", size) - resp, err := b.client.exec("PUT", uri, headers, nil) + for k, v := range extraHeaders { + headers[k] = v + } + + resp, err := b.client.exec("PUT", uri, headers, blob) if err != nil { return err } @@ -562,26 +647,37 @@ func (b BlobStorageClient) CreateBlockBlob(container, name string) error { // PutBlock saves the given data chunk to the specified block blob with // given ID. // +// The API rejects chunks larger than 4 MiB (but this limit is not +// checked by the SDK). +// // See https://msdn.microsoft.com/en-us/library/azure/dd135726.aspx func (b BlobStorageClient) PutBlock(container, name, blockID string, chunk []byte) error { - return b.PutBlockWithLength(container, name, blockID, uint64(len(chunk)), bytes.NewReader(chunk)) + return b.PutBlockWithLength(container, name, blockID, uint64(len(chunk)), bytes.NewReader(chunk), nil) } // PutBlockWithLength saves the given data stream of exactly specified size to // the block blob with given ID. It is an alternative to PutBlocks where data // comes as stream but the length is known in advance. // +// The API rejects requests with size > 4 MiB (but this limit is not +// checked by the SDK). +// // See https://msdn.microsoft.com/en-us/library/azure/dd135726.aspx -func (b BlobStorageClient) PutBlockWithLength(container, name, blockID string, size uint64, blob io.Reader) error { +func (b BlobStorageClient) PutBlockWithLength(container, name, blockID string, size uint64, blob io.Reader, extraHeaders map[string]string) error { uri := b.client.getEndpoint(blobServiceName, pathForBlob(container, name), url.Values{"comp": {"block"}, "blockid": {blockID}}) headers := b.client.getStandardHeaders() headers["x-ms-blob-type"] = string(BlobTypeBlock) headers["Content-Length"] = fmt.Sprintf("%v", size) + for k, v := range extraHeaders { + headers[k] = v + } + resp, err := b.client.exec("PUT", uri, headers, blob) if err != nil { return err } + defer resp.body.Close() return checkRespCode(resp.statusCode, []int{http.StatusCreated}) } @@ -628,13 +724,16 @@ func (b BlobStorageClient) GetBlockList(container, name string, blockType BlockL // be created using this method before writing pages. // // See https://msdn.microsoft.com/en-us/library/azure/dd179451.aspx -func (b BlobStorageClient) PutPageBlob(container, name string, size int64) error { +func (b BlobStorageClient) PutPageBlob(container, name string, size int64, extraHeaders map[string]string) error { path := fmt.Sprintf("%s/%s", container, name) uri := b.client.getEndpoint(blobServiceName, path, url.Values{}) headers := b.client.getStandardHeaders() headers["x-ms-blob-type"] = string(BlobTypePage) headers["x-ms-blob-content-length"] = fmt.Sprintf("%v", size) - headers["Content-Length"] = fmt.Sprintf("%v", 0) + + for k, v := range extraHeaders { + headers[k] = v + } resp, err := b.client.exec("PUT", uri, headers, nil) if err != nil { @@ -700,6 +799,48 @@ func (b BlobStorageClient) GetPageRanges(container, name string) (GetPageRangesR return out, err } +// PutAppendBlob initializes an empty append blob with specified name. An +// append blob must be created using this method before appending blocks. +// +// See https://msdn.microsoft.com/en-us/library/azure/dd179451.aspx +func (b BlobStorageClient) PutAppendBlob(container, name string, extraHeaders map[string]string) error { + path := fmt.Sprintf("%s/%s", container, name) + uri := b.client.getEndpoint(blobServiceName, path, url.Values{}) + headers := b.client.getStandardHeaders() + headers["x-ms-blob-type"] = string(BlobTypeAppend) + + for k, v := range extraHeaders { + headers[k] = v + } + + resp, err := b.client.exec("PUT", uri, headers, nil) + if err != nil { + return err + } + defer resp.body.Close() + + return checkRespCode(resp.statusCode, []int{http.StatusCreated}) +} + +// AppendBlock appends a block to an append blob. +// +// See https://msdn.microsoft.com/en-us/library/azure/mt427365.aspx +func (b BlobStorageClient) AppendBlock(container, name string, chunk []byte) error { + path := fmt.Sprintf("%s/%s", container, name) + uri := b.client.getEndpoint(blobServiceName, path, url.Values{"comp": {"appendblock"}}) + headers := b.client.getStandardHeaders() + headers["x-ms-blob-type"] = string(BlobTypeAppend) + headers["Content-Length"] = fmt.Sprintf("%v", len(chunk)) + + resp, err := b.client.exec("PUT", uri, headers, bytes.NewReader(chunk)) + if err != nil { + return err + } + defer resp.body.Close() + + return checkRespCode(resp.statusCode, []int{http.StatusCreated}) +} + // CopyBlob starts a blob copy operation and waits for the operation to // complete. sourceBlob parameter must be a canonical URL to the blob (can be // obtained using GetBlobURL method.) There is no SLA on blob copy and therefore @@ -719,7 +860,6 @@ func (b BlobStorageClient) startBlobCopy(container, name, sourceBlob string) (st uri := b.client.getEndpoint(blobServiceName, pathForBlob(container, name), url.Values{}) headers := b.client.getStandardHeaders() - headers["Content-Length"] = "0" headers["x-ms-copy-source"] = sourceBlob resp, err := b.client.exec("PUT", uri, headers, nil) @@ -850,6 +990,10 @@ func (b BlobStorageClient) GetBlobSASURI(container, name string, expiry time.Tim func blobSASStringToSign(signedVersion, canonicalizedResource, signedExpiry, signedPermissions string) (string, error) { var signedStart, signedIdentifier, rscc, rscd, rsce, rscl, rsct string + if signedVersion >= "2015-02-21" { + canonicalizedResource = "/blob" + canonicalizedResource + } + // reference: http://msdn.microsoft.com/en-us/library/azure/dn140255.aspx if signedVersion >= "2013-08-15" { return fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s", signedPermissions, signedStart, signedExpiry, canonicalizedResource, signedIdentifier, signedVersion, rscc, rscd, rsce, rscl, rsct), nil diff --git a/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/blob_test.go b/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/blob_test.go deleted file mode 100644 index 14a2f6b2..00000000 --- a/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/blob_test.go +++ /dev/null @@ -1,625 +0,0 @@ -package storage - -import ( - "bytes" - "crypto/rand" - "encoding/base64" - "fmt" - "io/ioutil" - "net/http" - "net/url" - "sort" - "sync" - "testing" - "time" - - chk "gopkg.in/check.v1" -) - -type StorageBlobSuite struct{} - -var _ = chk.Suite(&StorageBlobSuite{}) - -const testContainerPrefix = "zzzztest-" - -func getBlobClient(c *chk.C) BlobStorageClient { - return getBasicClient(c).GetBlobService() -} - -func (s *StorageBlobSuite) Test_pathForContainer(c *chk.C) { - c.Assert(pathForContainer("foo"), chk.Equals, "/foo") -} - -func (s *StorageBlobSuite) Test_pathForBlob(c *chk.C) { - c.Assert(pathForBlob("foo", "blob"), chk.Equals, "/foo/blob") -} - -func (s *StorageBlobSuite) Test_blobSASStringToSign(c *chk.C) { - _, err := blobSASStringToSign("2012-02-12", "CS", "SE", "SP") - c.Assert(err, chk.NotNil) // not implemented SAS for versions earlier than 2013-08-15 - - out, err := blobSASStringToSign("2013-08-15", "CS", "SE", "SP") - c.Assert(err, chk.IsNil) - c.Assert(out, chk.Equals, "SP\n\nSE\nCS\n\n2013-08-15\n\n\n\n\n") -} - -func (s *StorageBlobSuite) TestGetBlobSASURI(c *chk.C) { - api, err := NewClient("foo", "YmFy", DefaultBaseURL, "2013-08-15", true) - c.Assert(err, chk.IsNil) - cli := api.GetBlobService() - expiry := time.Time{} - - expectedParts := url.URL{ - Scheme: "https", - Host: "foo.blob.core.windows.net", - Path: "container/name", - RawQuery: url.Values{ - "sv": {"2013-08-15"}, - "sig": {"/OXG7rWh08jYwtU03GzJM0DHZtidRGpC6g69rSGm3I0="}, - "sr": {"b"}, - "sp": {"r"}, - "se": {"0001-01-01T00:00:00Z"}, - }.Encode()} - - u, err := cli.GetBlobSASURI("container", "name", expiry, "r") - c.Assert(err, chk.IsNil) - sasParts, err := url.Parse(u) - c.Assert(err, chk.IsNil) - c.Assert(expectedParts.String(), chk.Equals, sasParts.String()) - c.Assert(expectedParts.Query(), chk.DeepEquals, sasParts.Query()) -} - -func (s *StorageBlobSuite) TestBlobSASURICorrectness(c *chk.C) { - cli := getBlobClient(c) - cnt := randContainer() - blob := randString(20) - body := []byte(randString(100)) - expiry := time.Now().UTC().Add(time.Hour) - permissions := "r" - - c.Assert(cli.CreateContainer(cnt, ContainerAccessTypePrivate), chk.IsNil) - defer cli.DeleteContainer(cnt) - - c.Assert(cli.putSingleBlockBlob(cnt, blob, body), chk.IsNil) - - sasURI, err := cli.GetBlobSASURI(cnt, blob, expiry, permissions) - c.Assert(err, chk.IsNil) - - resp, err := http.Get(sasURI) - c.Assert(err, chk.IsNil) - - blobResp, err := ioutil.ReadAll(resp.Body) - defer resp.Body.Close() - c.Assert(err, chk.IsNil) - - c.Assert(resp.StatusCode, chk.Equals, http.StatusOK) - c.Assert(len(blobResp), chk.Equals, len(body)) -} - -func (s *StorageBlobSuite) TestListContainersPagination(c *chk.C) { - cli := getBlobClient(c) - c.Assert(deleteTestContainers(cli), chk.IsNil) - - const n = 5 - const pageSize = 2 - - // Create test containers - created := []string{} - for i := 0; i < n; i++ { - name := randContainer() - c.Assert(cli.CreateContainer(name, ContainerAccessTypePrivate), chk.IsNil) - created = append(created, name) - } - sort.Strings(created) - - // Defer test container deletions - defer func() { - var wg sync.WaitGroup - for _, cnt := range created { - wg.Add(1) - go func(name string) { - c.Assert(cli.DeleteContainer(name), chk.IsNil) - wg.Done() - }(cnt) - } - wg.Wait() - }() - - // Paginate results - seen := []string{} - marker := "" - for { - resp, err := cli.ListContainers(ListContainersParameters{ - Prefix: testContainerPrefix, - MaxResults: pageSize, - Marker: marker}) - c.Assert(err, chk.IsNil) - - containers := resp.Containers - if len(containers) > pageSize { - c.Fatalf("Got a bigger page. Expected: %d, got: %d", pageSize, len(containers)) - } - - for _, c := range containers { - seen = append(seen, c.Name) - } - - marker = resp.NextMarker - if marker == "" || len(containers) == 0 { - break - } - } - - c.Assert(seen, chk.DeepEquals, created) -} - -func (s *StorageBlobSuite) TestContainerExists(c *chk.C) { - cnt := randContainer() - cli := getBlobClient(c) - ok, err := cli.ContainerExists(cnt) - c.Assert(err, chk.IsNil) - c.Assert(ok, chk.Equals, false) - - c.Assert(cli.CreateContainer(cnt, ContainerAccessTypeBlob), chk.IsNil) - defer cli.DeleteContainer(cnt) - - ok, err = cli.ContainerExists(cnt) - c.Assert(err, chk.IsNil) - c.Assert(ok, chk.Equals, true) -} - -func (s *StorageBlobSuite) TestCreateDeleteContainer(c *chk.C) { - cnt := randContainer() - cli := getBlobClient(c) - c.Assert(cli.CreateContainer(cnt, ContainerAccessTypePrivate), chk.IsNil) - c.Assert(cli.DeleteContainer(cnt), chk.IsNil) -} - -func (s *StorageBlobSuite) TestCreateContainerIfNotExists(c *chk.C) { - cnt := randContainer() - cli := getBlobClient(c) - - // First create - ok, err := cli.CreateContainerIfNotExists(cnt, ContainerAccessTypePrivate) - c.Assert(err, chk.IsNil) - c.Assert(ok, chk.Equals, true) - - // Second create, should not give errors - ok, err = cli.CreateContainerIfNotExists(cnt, ContainerAccessTypePrivate) - c.Assert(err, chk.IsNil) - defer cli.DeleteContainer(cnt) - c.Assert(ok, chk.Equals, false) -} - -func (s *StorageBlobSuite) TestDeleteContainerIfExists(c *chk.C) { - cnt := randContainer() - cli := getBlobClient(c) - - // Nonexisting container - c.Assert(cli.DeleteContainer(cnt), chk.NotNil) - - ok, err := cli.DeleteContainerIfExists(cnt) - c.Assert(err, chk.IsNil) - c.Assert(ok, chk.Equals, false) - - // Existing container - c.Assert(cli.CreateContainer(cnt, ContainerAccessTypePrivate), chk.IsNil) - ok, err = cli.DeleteContainerIfExists(cnt) - c.Assert(err, chk.IsNil) - c.Assert(ok, chk.Equals, true) -} - -func (s *StorageBlobSuite) TestBlobExists(c *chk.C) { - cnt := randContainer() - blob := randString(20) - cli := getBlobClient(c) - - c.Assert(cli.CreateContainer(cnt, ContainerAccessTypeBlob), chk.IsNil) - defer cli.DeleteContainer(cnt) - c.Assert(cli.putSingleBlockBlob(cnt, blob, []byte("Hello!")), chk.IsNil) - defer cli.DeleteBlob(cnt, blob) - - ok, err := cli.BlobExists(cnt, blob+".foo") - c.Assert(err, chk.IsNil) - c.Assert(ok, chk.Equals, false) - - ok, err = cli.BlobExists(cnt, blob) - c.Assert(err, chk.IsNil) - c.Assert(ok, chk.Equals, true) -} - -func (s *StorageBlobSuite) TestGetBlobURL(c *chk.C) { - api, err := NewBasicClient("foo", "YmFy") - c.Assert(err, chk.IsNil) - cli := api.GetBlobService() - - c.Assert(cli.GetBlobURL("c", "nested/blob"), chk.Equals, "https://foo.blob.core.windows.net/c/nested/blob") - c.Assert(cli.GetBlobURL("", "blob"), chk.Equals, "https://foo.blob.core.windows.net/$root/blob") - c.Assert(cli.GetBlobURL("", "nested/blob"), chk.Equals, "https://foo.blob.core.windows.net/$root/nested/blob") -} - -func (s *StorageBlobSuite) TestBlobCopy(c *chk.C) { - if testing.Short() { - c.Skip("skipping blob copy in short mode, no SLA on async operation") - } - - cli := getBlobClient(c) - cnt := randContainer() - src := randString(20) - dst := randString(20) - body := []byte(randString(1024)) - - c.Assert(cli.CreateContainer(cnt, ContainerAccessTypePrivate), chk.IsNil) - defer cli.deleteContainer(cnt) - - c.Assert(cli.putSingleBlockBlob(cnt, src, body), chk.IsNil) - defer cli.DeleteBlob(cnt, src) - - c.Assert(cli.CopyBlob(cnt, dst, cli.GetBlobURL(cnt, src)), chk.IsNil) - defer cli.DeleteBlob(cnt, dst) - - blobBody, err := cli.GetBlob(cnt, dst) - c.Assert(err, chk.IsNil) - - b, err := ioutil.ReadAll(blobBody) - defer blobBody.Close() - c.Assert(err, chk.IsNil) - c.Assert(b, chk.DeepEquals, body) -} - -func (s *StorageBlobSuite) TestDeleteBlobIfExists(c *chk.C) { - cnt := randContainer() - blob := randString(20) - - cli := getBlobClient(c) - c.Assert(cli.DeleteBlob(cnt, blob), chk.NotNil) - - ok, err := cli.DeleteBlobIfExists(cnt, blob) - c.Assert(err, chk.IsNil) - c.Assert(ok, chk.Equals, false) -} - -func (s *StorageBlobSuite) TestGetBlobProperties(c *chk.C) { - cnt := randContainer() - blob := randString(20) - contents := randString(64) - - cli := getBlobClient(c) - c.Assert(cli.CreateContainer(cnt, ContainerAccessTypePrivate), chk.IsNil) - defer cli.DeleteContainer(cnt) - - // Nonexisting blob - _, err := cli.GetBlobProperties(cnt, blob) - c.Assert(err, chk.NotNil) - - // Put the blob - c.Assert(cli.putSingleBlockBlob(cnt, blob, []byte(contents)), chk.IsNil) - - // Get blob properties - props, err := cli.GetBlobProperties(cnt, blob) - c.Assert(err, chk.IsNil) - - c.Assert(props.ContentLength, chk.Equals, int64(len(contents))) - c.Assert(props.BlobType, chk.Equals, BlobTypeBlock) -} - -func (s *StorageBlobSuite) TestListBlobsPagination(c *chk.C) { - cli := getBlobClient(c) - cnt := randContainer() - - c.Assert(cli.CreateContainer(cnt, ContainerAccessTypePrivate), chk.IsNil) - defer cli.DeleteContainer(cnt) - - blobs := []string{} - const n = 5 - const pageSize = 2 - for i := 0; i < n; i++ { - name := randString(20) - c.Assert(cli.putSingleBlockBlob(cnt, name, []byte("Hello, world!")), chk.IsNil) - blobs = append(blobs, name) - } - sort.Strings(blobs) - - // Paginate - seen := []string{} - marker := "" - for { - resp, err := cli.ListBlobs(cnt, ListBlobsParameters{ - MaxResults: pageSize, - Marker: marker}) - c.Assert(err, chk.IsNil) - - for _, v := range resp.Blobs { - seen = append(seen, v.Name) - } - - marker = resp.NextMarker - if marker == "" || len(resp.Blobs) == 0 { - break - } - } - - // Compare - c.Assert(seen, chk.DeepEquals, blobs) -} - -func (s *StorageBlobSuite) TestPutEmptyBlockBlob(c *chk.C) { - cli := getBlobClient(c) - cnt := randContainer() - - c.Assert(cli.CreateContainer(cnt, ContainerAccessTypePrivate), chk.IsNil) - defer cli.deleteContainer(cnt) - - blob := randString(20) - c.Assert(cli.putSingleBlockBlob(cnt, blob, []byte{}), chk.IsNil) - - props, err := cli.GetBlobProperties(cnt, blob) - c.Assert(err, chk.IsNil) - c.Assert(props.ContentLength, chk.Not(chk.Equals), 0) -} - -func (s *StorageBlobSuite) TestGetBlobRange(c *chk.C) { - cnt := randContainer() - blob := randString(20) - body := "0123456789" - - cli := getBlobClient(c) - c.Assert(cli.CreateContainer(cnt, ContainerAccessTypeBlob), chk.IsNil) - defer cli.DeleteContainer(cnt) - - c.Assert(cli.putSingleBlockBlob(cnt, blob, []byte(body)), chk.IsNil) - defer cli.DeleteBlob(cnt, blob) - - // Read 1-3 - for _, r := range []struct { - rangeStr string - expected string - }{ - {"0-", body}, - {"1-3", body[1 : 3+1]}, - {"3-", body[3:]}, - } { - resp, err := cli.GetBlobRange(cnt, blob, r.rangeStr) - c.Assert(err, chk.IsNil) - blobBody, err := ioutil.ReadAll(resp) - c.Assert(err, chk.IsNil) - - str := string(blobBody) - c.Assert(str, chk.Equals, r.expected) - } -} - -func (s *StorageBlobSuite) TestPutBlock(c *chk.C) { - cli := getBlobClient(c) - cnt := randContainer() - c.Assert(cli.CreateContainer(cnt, ContainerAccessTypePrivate), chk.IsNil) - defer cli.deleteContainer(cnt) - - blob := randString(20) - chunk := []byte(randString(1024)) - blockID := base64.StdEncoding.EncodeToString([]byte("foo")) - c.Assert(cli.PutBlock(cnt, blob, blockID, chunk), chk.IsNil) -} - -func (s *StorageBlobSuite) TestGetBlockList_PutBlockList(c *chk.C) { - cli := getBlobClient(c) - cnt := randContainer() - c.Assert(cli.CreateContainer(cnt, ContainerAccessTypePrivate), chk.IsNil) - defer cli.deleteContainer(cnt) - - blob := randString(20) - chunk := []byte(randString(1024)) - blockID := base64.StdEncoding.EncodeToString([]byte("foo")) - - // Put one block - c.Assert(cli.PutBlock(cnt, blob, blockID, chunk), chk.IsNil) - defer cli.deleteBlob(cnt, blob) - - // Get committed blocks - committed, err := cli.GetBlockList(cnt, blob, BlockListTypeCommitted) - c.Assert(err, chk.IsNil) - - if len(committed.CommittedBlocks) > 0 { - c.Fatal("There are committed blocks") - } - - // Get uncommitted blocks - uncommitted, err := cli.GetBlockList(cnt, blob, BlockListTypeUncommitted) - c.Assert(err, chk.IsNil) - - c.Assert(len(uncommitted.UncommittedBlocks), chk.Equals, 1) - // Commit block list - c.Assert(cli.PutBlockList(cnt, blob, []Block{{blockID, BlockStatusUncommitted}}), chk.IsNil) - - // Get all blocks - all, err := cli.GetBlockList(cnt, blob, BlockListTypeAll) - c.Assert(err, chk.IsNil) - c.Assert(len(all.CommittedBlocks), chk.Equals, 1) - c.Assert(len(all.UncommittedBlocks), chk.Equals, 0) - - // Verify the block - thatBlock := all.CommittedBlocks[0] - c.Assert(thatBlock.Name, chk.Equals, blockID) - c.Assert(thatBlock.Size, chk.Equals, int64(len(chunk))) -} - -func (s *StorageBlobSuite) TestCreateBlockBlob(c *chk.C) { - cli := getBlobClient(c) - cnt := randContainer() - c.Assert(cli.CreateContainer(cnt, ContainerAccessTypePrivate), chk.IsNil) - defer cli.deleteContainer(cnt) - - blob := randString(20) - c.Assert(cli.CreateBlockBlob(cnt, blob), chk.IsNil) - - // Verify - blocks, err := cli.GetBlockList(cnt, blob, BlockListTypeAll) - c.Assert(err, chk.IsNil) - c.Assert(len(blocks.CommittedBlocks), chk.Equals, 0) - c.Assert(len(blocks.UncommittedBlocks), chk.Equals, 0) -} - -func (s *StorageBlobSuite) TestPutPageBlob(c *chk.C) { - cli := getBlobClient(c) - cnt := randContainer() - c.Assert(cli.CreateContainer(cnt, ContainerAccessTypePrivate), chk.IsNil) - defer cli.deleteContainer(cnt) - - blob := randString(20) - size := int64(10 * 1024 * 1024) - c.Assert(cli.PutPageBlob(cnt, blob, size), chk.IsNil) - - // Verify - props, err := cli.GetBlobProperties(cnt, blob) - c.Assert(err, chk.IsNil) - c.Assert(props.ContentLength, chk.Equals, size) - c.Assert(props.BlobType, chk.Equals, BlobTypePage) -} - -func (s *StorageBlobSuite) TestPutPagesUpdate(c *chk.C) { - cli := getBlobClient(c) - cnt := randContainer() - c.Assert(cli.CreateContainer(cnt, ContainerAccessTypePrivate), chk.IsNil) - defer cli.deleteContainer(cnt) - - blob := randString(20) - size := int64(10 * 1024 * 1024) // larger than we'll use - c.Assert(cli.PutPageBlob(cnt, blob, size), chk.IsNil) - - chunk1 := []byte(randString(1024)) - chunk2 := []byte(randString(512)) - - // Append chunks - c.Assert(cli.PutPage(cnt, blob, 0, int64(len(chunk1)-1), PageWriteTypeUpdate, chunk1), chk.IsNil) - c.Assert(cli.PutPage(cnt, blob, int64(len(chunk1)), int64(len(chunk1)+len(chunk2)-1), PageWriteTypeUpdate, chunk2), chk.IsNil) - - // Verify contents - out, err := cli.GetBlobRange(cnt, blob, fmt.Sprintf("%v-%v", 0, len(chunk1)+len(chunk2)-1)) - c.Assert(err, chk.IsNil) - defer out.Close() - blobContents, err := ioutil.ReadAll(out) - c.Assert(err, chk.IsNil) - c.Assert(blobContents, chk.DeepEquals, append(chunk1, chunk2...)) - out.Close() - - // Overwrite first half of chunk1 - chunk0 := []byte(randString(512)) - c.Assert(cli.PutPage(cnt, blob, 0, int64(len(chunk0)-1), PageWriteTypeUpdate, chunk0), chk.IsNil) - - // Verify contents - out, err = cli.GetBlobRange(cnt, blob, fmt.Sprintf("%v-%v", 0, len(chunk1)+len(chunk2)-1)) - c.Assert(err, chk.IsNil) - defer out.Close() - blobContents, err = ioutil.ReadAll(out) - c.Assert(err, chk.IsNil) - c.Assert(blobContents, chk.DeepEquals, append(append(chunk0, chunk1[512:]...), chunk2...)) -} - -func (s *StorageBlobSuite) TestPutPagesClear(c *chk.C) { - cli := getBlobClient(c) - cnt := randContainer() - c.Assert(cli.CreateContainer(cnt, ContainerAccessTypePrivate), chk.IsNil) - defer cli.deleteContainer(cnt) - - blob := randString(20) - size := int64(10 * 1024 * 1024) // larger than we'll use - c.Assert(cli.PutPageBlob(cnt, blob, size), chk.IsNil) - - // Put 0-2047 - chunk := []byte(randString(2048)) - c.Assert(cli.PutPage(cnt, blob, 0, 2047, PageWriteTypeUpdate, chunk), chk.IsNil) - - // Clear 512-1023 - c.Assert(cli.PutPage(cnt, blob, 512, 1023, PageWriteTypeClear, nil), chk.IsNil) - - // Verify contents - out, err := cli.GetBlobRange(cnt, blob, "0-2047") - c.Assert(err, chk.IsNil) - contents, err := ioutil.ReadAll(out) - c.Assert(err, chk.IsNil) - defer out.Close() - c.Assert(contents, chk.DeepEquals, append(append(chunk[:512], make([]byte, 512)...), chunk[1024:]...)) -} - -func (s *StorageBlobSuite) TestGetPageRanges(c *chk.C) { - cli := getBlobClient(c) - cnt := randContainer() - c.Assert(cli.CreateContainer(cnt, ContainerAccessTypePrivate), chk.IsNil) - defer cli.deleteContainer(cnt) - - blob := randString(20) - size := int64(10 * 1024 * 1024) // larger than we'll use - c.Assert(cli.PutPageBlob(cnt, blob, size), chk.IsNil) - - // Get page ranges on empty blob - out, err := cli.GetPageRanges(cnt, blob) - c.Assert(err, chk.IsNil) - c.Assert(len(out.PageList), chk.Equals, 0) - - // Add 0-512 page - c.Assert(cli.PutPage(cnt, blob, 0, 511, PageWriteTypeUpdate, []byte(randString(512))), chk.IsNil) - - out, err = cli.GetPageRanges(cnt, blob) - c.Assert(err, chk.IsNil) - c.Assert(len(out.PageList), chk.Equals, 1) - - // Add 1024-2048 - c.Assert(cli.PutPage(cnt, blob, 1024, 2047, PageWriteTypeUpdate, []byte(randString(1024))), chk.IsNil) - - out, err = cli.GetPageRanges(cnt, blob) - c.Assert(err, chk.IsNil) - c.Assert(len(out.PageList), chk.Equals, 2) -} - -func deleteTestContainers(cli BlobStorageClient) error { - for { - resp, err := cli.ListContainers(ListContainersParameters{Prefix: testContainerPrefix}) - if err != nil { - return err - } - if len(resp.Containers) == 0 { - break - } - for _, c := range resp.Containers { - err = cli.DeleteContainer(c.Name) - if err != nil { - return err - } - } - } - return nil -} - -func (b BlobStorageClient) putSingleBlockBlob(container, name string, chunk []byte) error { - if len(chunk) > MaxBlobBlockSize { - return fmt.Errorf("storage: provided chunk (%d bytes) cannot fit into single-block blob (max %d bytes)", len(chunk), MaxBlobBlockSize) - } - - uri := b.client.getEndpoint(blobServiceName, pathForBlob(container, name), url.Values{}) - headers := b.client.getStandardHeaders() - headers["x-ms-blob-type"] = string(BlobTypeBlock) - headers["Content-Length"] = fmt.Sprintf("%v", len(chunk)) - - resp, err := b.client.exec("PUT", uri, headers, bytes.NewReader(chunk)) - if err != nil { - return err - } - return checkRespCode(resp.statusCode, []int{http.StatusCreated}) -} - -func randContainer() string { - return testContainerPrefix + randString(32-len(testContainerPrefix)) -} - -func randString(n int) string { - if n <= 0 { - panic("negative number") - } - const alphanum = "0123456789abcdefghijklmnopqrstuvwxyz" - var bytes = make([]byte, n) - rand.Read(bytes) - for i, b := range bytes { - bytes[i] = alphanum[b%byte(len(alphanum))] - } - return string(bytes) -} diff --git a/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/client.go b/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/client.go index 6c171050..37eb3e6d 100644 --- a/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/client.go +++ b/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/client.go @@ -5,6 +5,7 @@ import ( "bytes" "encoding/base64" "encoding/xml" + "errors" "fmt" "io" "io/ioutil" @@ -12,6 +13,7 @@ import ( "net/url" "regexp" "sort" + "strconv" "strings" ) @@ -22,13 +24,14 @@ const ( // DefaultAPIVersion is the Azure Storage API version string used when a // basic client is created. - DefaultAPIVersion = "2014-02-14" + DefaultAPIVersion = "2015-02-21" defaultUseHTTPS = true blobServiceName = "blob" tableServiceName = "table" queueServiceName = "queue" + fileServiceName = "file" ) // Client is the object that needs to be constructed to perform @@ -79,6 +82,11 @@ func (e UnexpectedStatusCodeError) Error() string { return fmt.Sprintf("storage: status code from service response is %s; was expecting %s", got, strings.Join(expected, " or ")) } +// Got is the actual status code returned by Azure. +func (e UnexpectedStatusCodeError) Got() int { + return e.got +} + // NewBasicClient constructs a Client with given storage service name and // key. func NewBasicClient(accountName, accountKey string) (Client, error) { @@ -154,6 +162,12 @@ func (c Client) GetQueueService() QueueServiceClient { return QueueServiceClient{c} } +// GetFileService returns a FileServiceClient which can operate on the file +// service of the storage account. +func (c Client) GetFileService() FileServiceClient { + return FileServiceClient{c} +} + func (c Client) createAuthorizationHeader(canonicalizedString string) string { signature := c.computeHmac256(canonicalizedString) return fmt.Sprintf("%s %s:%s", "SharedKey", c.accountName, signature) @@ -252,18 +266,22 @@ func (c Client) buildCanonicalizedResource(uri string) (string, error) { } func (c Client) buildCanonicalizedString(verb string, headers map[string]string, canonicalizedResource string) string { + contentLength := headers["Content-Length"] + if contentLength == "0" { + contentLength = "" + } canonicalizedString := fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s", verb, headers["Content-Encoding"], headers["Content-Language"], - headers["Content-Length"], + contentLength, headers["Content-MD5"], headers["Content-Type"], headers["Date"], - headers["If-Modified-Singe"], + headers["If-Modified-Since"], headers["If-Match"], headers["If-None-Match"], - headers["If-Unmodified-Singe"], + headers["If-Unmodified-Since"], headers["Range"], c.buildCanonicalizedHeader(headers), canonicalizedResource) @@ -283,6 +301,20 @@ func (c Client) exec(verb, url string, headers map[string]string, body io.Reader } req, err := http.NewRequest(verb, url, body) + if err != nil { + return nil, errors.New("azure/storage: error creating request: " + err.Error()) + } + + if clstr, ok := headers["Content-Length"]; ok { + // content length header is being signed, but completely ignored by golang. + // instead we have to use the ContentLength property on the request struct + // (see https://golang.org/src/net/http/request.go?s=18140:18370#L536 and + // https://golang.org/src/net/http/transfer.go?s=1739:2467#L49) + req.ContentLength, err = strconv.ParseInt(clstr, 10, 64) + if err != nil { + return nil, err + } + } for k, v := range headers { req.Header.Add(k, v) } @@ -293,7 +325,7 @@ func (c Client) exec(verb, url string, headers map[string]string, body io.Reader } statusCode := resp.StatusCode - if statusCode >= 400 && statusCode <= 505 { + if statusCode >= 400 && statusCode <= 505 && statusCode != 404 { var respBody []byte respBody, err = readResponseBody(resp) if err != nil { @@ -344,7 +376,8 @@ func serviceErrFromXML(body []byte, statusCode int, requestID string) (AzureStor } func (e AzureStorageServiceError) Error() string { - return fmt.Sprintf("storage: service returned error: StatusCode=%d, ErrorCode=%s, ErrorMessage=%s, RequestId=%s", e.StatusCode, e.Code, e.Message, e.RequestID) + return fmt.Sprintf("storage: service returned error: StatusCode=%d, ErrorCode=%s, ErrorMessage=%s, RequestId=%s, QueryParameterName=%s, QueryParameterValue=%s", + e.StatusCode, e.Code, e.Message, e.RequestID, e.QueryParameterName, e.QueryParameterValue) } // checkRespCode returns UnexpectedStatusError if the given response code is not diff --git a/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/client_test.go b/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/client_test.go deleted file mode 100644 index 5bc52110..00000000 --- a/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/client_test.go +++ /dev/null @@ -1,156 +0,0 @@ -package storage - -import ( - "encoding/base64" - "net/url" - "os" - "testing" - - chk "gopkg.in/check.v1" -) - -// Hook up gocheck to testing -func Test(t *testing.T) { chk.TestingT(t) } - -type StorageClientSuite struct{} - -var _ = chk.Suite(&StorageClientSuite{}) - -// getBasicClient returns a test client from storage credentials in the env -func getBasicClient(c *chk.C) Client { - name := os.Getenv("ACCOUNT_NAME") - if name == "" { - c.Fatal("ACCOUNT_NAME not set, need an empty storage account to test") - } - key := os.Getenv("ACCOUNT_KEY") - if key == "" { - c.Fatal("ACCOUNT_KEY not set") - } - cli, err := NewBasicClient(name, key) - c.Assert(err, chk.IsNil) - return cli -} - -func (s *StorageClientSuite) TestGetBaseURL_Basic_Https(c *chk.C) { - cli, err := NewBasicClient("foo", "YmFy") - c.Assert(err, chk.IsNil) - c.Assert(cli.apiVersion, chk.Equals, DefaultAPIVersion) - c.Assert(err, chk.IsNil) - c.Assert(cli.getBaseURL("table"), chk.Equals, "https://foo.table.core.windows.net") -} - -func (s *StorageClientSuite) TestGetBaseURL_Custom_NoHttps(c *chk.C) { - apiVersion := "2015-01-01" // a non existing one - cli, err := NewClient("foo", "YmFy", "core.chinacloudapi.cn", apiVersion, false) - c.Assert(err, chk.IsNil) - c.Assert(cli.apiVersion, chk.Equals, apiVersion) - c.Assert(cli.getBaseURL("table"), chk.Equals, "http://foo.table.core.chinacloudapi.cn") -} - -func (s *StorageClientSuite) TestGetEndpoint_None(c *chk.C) { - cli, err := NewBasicClient("foo", "YmFy") - c.Assert(err, chk.IsNil) - output := cli.getEndpoint(blobServiceName, "", url.Values{}) - c.Assert(output, chk.Equals, "https://foo.blob.core.windows.net/") -} - -func (s *StorageClientSuite) TestGetEndpoint_PathOnly(c *chk.C) { - cli, err := NewBasicClient("foo", "YmFy") - c.Assert(err, chk.IsNil) - output := cli.getEndpoint(blobServiceName, "path", url.Values{}) - c.Assert(output, chk.Equals, "https://foo.blob.core.windows.net/path") -} - -func (s *StorageClientSuite) TestGetEndpoint_ParamsOnly(c *chk.C) { - cli, err := NewBasicClient("foo", "YmFy") - c.Assert(err, chk.IsNil) - params := url.Values{} - params.Set("a", "b") - params.Set("c", "d") - output := cli.getEndpoint(blobServiceName, "", params) - c.Assert(output, chk.Equals, "https://foo.blob.core.windows.net/?a=b&c=d") -} - -func (s *StorageClientSuite) TestGetEndpoint_Mixed(c *chk.C) { - cli, err := NewBasicClient("foo", "YmFy") - c.Assert(err, chk.IsNil) - params := url.Values{} - params.Set("a", "b") - params.Set("c", "d") - output := cli.getEndpoint(blobServiceName, "path", params) - c.Assert(output, chk.Equals, "https://foo.blob.core.windows.net/path?a=b&c=d") -} - -func (s *StorageClientSuite) Test_getStandardHeaders(c *chk.C) { - cli, err := NewBasicClient("foo", "YmFy") - c.Assert(err, chk.IsNil) - - headers := cli.getStandardHeaders() - c.Assert(len(headers), chk.Equals, 2) - c.Assert(headers["x-ms-version"], chk.Equals, cli.apiVersion) - if _, ok := headers["x-ms-date"]; !ok { - c.Fatal("Missing date header") - } -} - -func (s *StorageClientSuite) Test_buildCanonicalizedResource(c *chk.C) { - cli, err := NewBasicClient("foo", "YmFy") - c.Assert(err, chk.IsNil) - - type test struct{ url, expected string } - tests := []test{ - {"https://foo.blob.core.windows.net/path?a=b&c=d", "/foo/path\na:b\nc:d"}, - {"https://foo.blob.core.windows.net/?comp=list", "/foo/\ncomp:list"}, - {"https://foo.blob.core.windows.net/cnt/blob", "/foo/cnt/blob"}, - } - - for _, i := range tests { - out, err := cli.buildCanonicalizedResource(i.url) - c.Assert(err, chk.IsNil) - c.Assert(out, chk.Equals, i.expected) - } -} - -func (s *StorageClientSuite) Test_buildCanonicalizedHeader(c *chk.C) { - cli, err := NewBasicClient("foo", "YmFy") - c.Assert(err, chk.IsNil) - - type test struct { - headers map[string]string - expected string - } - tests := []test{ - {map[string]string{}, ""}, - {map[string]string{"x-ms-foo": "bar"}, "x-ms-foo:bar"}, - {map[string]string{"foo:": "bar"}, ""}, - {map[string]string{"foo:": "bar", "x-ms-foo": "bar"}, "x-ms-foo:bar"}, - {map[string]string{ - "x-ms-version": "9999-99-99", - "x-ms-blob-type": "BlockBlob"}, "x-ms-blob-type:BlockBlob\nx-ms-version:9999-99-99"}} - - for _, i := range tests { - c.Assert(cli.buildCanonicalizedHeader(i.headers), chk.Equals, i.expected) - } -} - -func (s *StorageClientSuite) TestReturnsStorageServiceError(c *chk.C) { - // attempt to delete a nonexisting container - _, err := getBlobClient(c).deleteContainer(randContainer()) - c.Assert(err, chk.NotNil) - - v, ok := err.(AzureStorageServiceError) - c.Check(ok, chk.Equals, true) - c.Assert(v.StatusCode, chk.Equals, 404) - c.Assert(v.Code, chk.Equals, "ContainerNotFound") - c.Assert(v.Code, chk.Not(chk.Equals), "") -} - -func (s *StorageClientSuite) Test_createAuthorizationHeader(c *chk.C) { - key := base64.StdEncoding.EncodeToString([]byte("bar")) - cli, err := NewBasicClient("foo", key) - c.Assert(err, chk.IsNil) - - canonicalizedString := `foobarzoo` - expected := `SharedKey foo:h5U0ATVX6SpbFX1H6GNuxIMeXXCILLoIvhflPtuQZ30=` - c.Assert(cli.createAuthorizationHeader(canonicalizedString), chk.Equals, expected) -} diff --git a/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/file.go b/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/file.go new file mode 100644 index 00000000..ede4e21b --- /dev/null +++ b/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/file.go @@ -0,0 +1,91 @@ +package storage + +import ( + "fmt" + "net/http" + "net/url" +) + +// FileServiceClient contains operations for Microsoft Azure File Service. +type FileServiceClient struct { + client Client +} + +// pathForFileShare returns the URL path segment for a File Share resource +func pathForFileShare(name string) string { + return fmt.Sprintf("/%s", name) +} + +// CreateShare operation creates a new share under the specified account. If the +// share with the same name already exists, the operation fails. +// +// See https://msdn.microsoft.com/en-us/library/azure/dn167008.aspx +func (f FileServiceClient) CreateShare(name string) error { + resp, err := f.createShare(name) + if err != nil { + return err + } + defer resp.body.Close() + return checkRespCode(resp.statusCode, []int{http.StatusCreated}) +} + +// CreateShareIfNotExists creates a new share under the specified account if +// it does not exist. Returns true if container is newly created or false if +// container already exists. +// +// See https://msdn.microsoft.com/en-us/library/azure/dn167008.aspx +func (f FileServiceClient) CreateShareIfNotExists(name string) (bool, error) { + resp, err := f.createShare(name) + if resp != nil { + defer resp.body.Close() + if resp.statusCode == http.StatusCreated || resp.statusCode == http.StatusConflict { + return resp.statusCode == http.StatusCreated, nil + } + } + return false, err +} + +// CreateShare creates a Azure File Share and returns its response +func (f FileServiceClient) createShare(name string) (*storageResponse, error) { + uri := f.client.getEndpoint(fileServiceName, pathForFileShare(name), url.Values{"restype": {"share"}}) + headers := f.client.getStandardHeaders() + return f.client.exec("PUT", uri, headers, nil) +} + +// DeleteShare operation marks the specified share for deletion. The share +// and any files contained within it are later deleted during garbage +// collection. +// +// See https://msdn.microsoft.com/en-us/library/azure/dn689090.aspx +func (f FileServiceClient) DeleteShare(name string) error { + resp, err := f.deleteShare(name) + if err != nil { + return err + } + defer resp.body.Close() + return checkRespCode(resp.statusCode, []int{http.StatusAccepted}) +} + +// DeleteShareIfExists operation marks the specified share for deletion if it +// exists. The share and any files contained within it are later deleted during +// garbage collection. Returns true if share existed and deleted with this call, +// false otherwise. +// +// See https://msdn.microsoft.com/en-us/library/azure/dn689090.aspx +func (f FileServiceClient) DeleteShareIfExists(name string) (bool, error) { + resp, err := f.deleteShare(name) + if resp != nil { + defer resp.body.Close() + if resp.statusCode == http.StatusAccepted || resp.statusCode == http.StatusNotFound { + return resp.statusCode == http.StatusAccepted, nil + } + } + return false, err +} + +// deleteShare makes the call to Delete Share operation endpoint and returns +// the response +func (f FileServiceClient) deleteShare(name string) (*storageResponse, error) { + uri := f.client.getEndpoint(fileServiceName, pathForFileShare(name), url.Values{"restype": {"share"}}) + return f.client.exec("DELETE", uri, f.client.getStandardHeaders(), nil) +} diff --git a/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/queue.go b/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/queue.go index fa017f4c..3ecf4aca 100644 --- a/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/queue.go +++ b/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/queue.go @@ -6,6 +6,13 @@ import ( "net/http" "net/url" "strconv" + "strings" +) + +const ( + // casing is per Golang's http.Header canonicalizing the header names. + approximateMessagesCountHeader = "X-Ms-Approximate-Messages-Count" + userDefinedMetadataHeaderPrefix = "X-Ms-Meta-" ) // QueueServiceClient contains operations for Microsoft Azure Queue Storage @@ -111,13 +118,82 @@ type PeekMessageResponse struct { MessageText string `xml:"MessageText"` } +// QueueMetadataResponse represents user defined metadata and queue +// properties on a specific queue. +// +// See https://msdn.microsoft.com/en-us/library/azure/dd179384.aspx +type QueueMetadataResponse struct { + ApproximateMessageCount int + UserDefinedMetadata map[string]string +} + +// SetMetadata operation sets user-defined metadata on the specified queue. +// Metadata is associated with the queue as name-value pairs. +// +// See https://msdn.microsoft.com/en-us/library/azure/dd179348.aspx +func (c QueueServiceClient) SetMetadata(name string, metadata map[string]string) error { + uri := c.client.getEndpoint(queueServiceName, pathForQueue(name), url.Values{"comp": []string{"metadata"}}) + headers := c.client.getStandardHeaders() + for k, v := range metadata { + headers[userDefinedMetadataHeaderPrefix+k] = v + } + + resp, err := c.client.exec("PUT", uri, headers, nil) + if err != nil { + return err + } + defer resp.body.Close() + + return checkRespCode(resp.statusCode, []int{http.StatusNoContent}) +} + +// GetMetadata operation retrieves user-defined metadata and queue +// properties on the specified queue. Metadata is associated with +// the queue as name-values pairs. +// +// See https://msdn.microsoft.com/en-us/library/azure/dd179384.aspx +// +// Because the way Golang's http client (and http.Header in particular) +// canonicalize header names, the returned metadata names would always +// be all lower case. +func (c QueueServiceClient) GetMetadata(name string) (QueueMetadataResponse, error) { + qm := QueueMetadataResponse{} + qm.UserDefinedMetadata = make(map[string]string) + uri := c.client.getEndpoint(queueServiceName, pathForQueue(name), url.Values{"comp": []string{"metadata"}}) + headers := c.client.getStandardHeaders() + resp, err := c.client.exec("GET", uri, headers, nil) + if err != nil { + return qm, err + } + defer resp.body.Close() + + for k, v := range resp.headers { + if len(v) != 1 { + return qm, fmt.Errorf("Unexpected number of values (%d) in response header '%s'", len(v), k) + } + + value := v[0] + + if k == approximateMessagesCountHeader { + qm.ApproximateMessageCount, err = strconv.Atoi(value) + if err != nil { + return qm, fmt.Errorf("Unexpected value in response header '%s': '%s' ", k, value) + } + } else if strings.HasPrefix(k, userDefinedMetadataHeaderPrefix) { + name := strings.TrimPrefix(k, userDefinedMetadataHeaderPrefix) + qm.UserDefinedMetadata[strings.ToLower(name)] = value + } + } + + return qm, checkRespCode(resp.statusCode, []int{http.StatusOK}) +} + // CreateQueue operation creates a queue under the given account. // // See https://msdn.microsoft.com/en-us/library/azure/dd179342.aspx func (c QueueServiceClient) CreateQueue(name string) error { uri := c.client.getEndpoint(queueServiceName, pathForQueue(name), url.Values{}) headers := c.client.getStandardHeaders() - headers["Content-Length"] = "0" resp, err := c.client.exec("PUT", uri, headers, nil) if err != nil { return err diff --git a/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/queue_test.go b/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/queue_test.go deleted file mode 100644 index 5c7bad93..00000000 --- a/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/queue_test.go +++ /dev/null @@ -1,91 +0,0 @@ -package storage - -import ( - chk "gopkg.in/check.v1" -) - -type StorageQueueSuite struct{} - -var _ = chk.Suite(&StorageQueueSuite{}) - -func getQueueClient(c *chk.C) QueueServiceClient { - return getBasicClient(c).GetQueueService() -} - -func (s *StorageQueueSuite) Test_pathForQueue(c *chk.C) { - c.Assert(pathForQueue("q"), chk.Equals, "/q") -} - -func (s *StorageQueueSuite) Test_pathForQueueMessages(c *chk.C) { - c.Assert(pathForQueueMessages("q"), chk.Equals, "/q/messages") -} - -func (s *StorageQueueSuite) Test_pathForMessage(c *chk.C) { - c.Assert(pathForMessage("q", "m"), chk.Equals, "/q/messages/m") -} - -func (s *StorageQueueSuite) TestCreateQueue_DeleteQueue(c *chk.C) { - cli := getQueueClient(c) - name := randString(20) - c.Assert(cli.CreateQueue(name), chk.IsNil) - c.Assert(cli.DeleteQueue(name), chk.IsNil) -} - -func (s *StorageQueueSuite) TestQueueExists(c *chk.C) { - cli := getQueueClient(c) - ok, err := cli.QueueExists("nonexistent-queue") - c.Assert(err, chk.IsNil) - c.Assert(ok, chk.Equals, false) - - name := randString(20) - c.Assert(cli.CreateQueue(name), chk.IsNil) - defer cli.DeleteQueue(name) - - ok, err = cli.QueueExists(name) - c.Assert(err, chk.IsNil) - c.Assert(ok, chk.Equals, true) -} - -func (s *StorageQueueSuite) TestPostMessage_PeekMessage_DeleteMessage(c *chk.C) { - q := randString(20) - cli := getQueueClient(c) - c.Assert(cli.CreateQueue(q), chk.IsNil) - defer cli.DeleteQueue(q) - - msg := randString(64 * 1024) // exercise max length - c.Assert(cli.PutMessage(q, msg, PutMessageParameters{}), chk.IsNil) - r, err := cli.PeekMessages(q, PeekMessagesParameters{}) - c.Assert(err, chk.IsNil) - c.Assert(len(r.QueueMessagesList), chk.Equals, 1) - c.Assert(r.QueueMessagesList[0].MessageText, chk.Equals, msg) -} - -func (s *StorageQueueSuite) TestGetMessages(c *chk.C) { - q := randString(20) - cli := getQueueClient(c) - c.Assert(cli.CreateQueue(q), chk.IsNil) - defer cli.DeleteQueue(q) - - n := 4 - for i := 0; i < n; i++ { - c.Assert(cli.PutMessage(q, randString(10), PutMessageParameters{}), chk.IsNil) - } - - r, err := cli.GetMessages(q, GetMessagesParameters{NumOfMessages: n}) - c.Assert(err, chk.IsNil) - c.Assert(len(r.QueueMessagesList), chk.Equals, n) -} - -func (s *StorageQueueSuite) TestDeleteMessages(c *chk.C) { - q := randString(20) - cli := getQueueClient(c) - c.Assert(cli.CreateQueue(q), chk.IsNil) - defer cli.DeleteQueue(q) - - c.Assert(cli.PutMessage(q, "message", PutMessageParameters{}), chk.IsNil) - r, err := cli.GetMessages(q, GetMessagesParameters{VisibilityTimeout: 1}) - c.Assert(err, chk.IsNil) - c.Assert(len(r.QueueMessagesList), chk.Equals, 1) - m := r.QueueMessagesList[0] - c.Assert(cli.DeleteMessage(q, m.MessageID, m.PopReceipt), chk.IsNil) -} diff --git a/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/util_test.go b/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/util_test.go deleted file mode 100644 index 9bf82dcc..00000000 --- a/Godeps/_workspace/src/github.com/Azure/azure-sdk-for-go/storage/util_test.go +++ /dev/null @@ -1,69 +0,0 @@ -package storage - -import ( - "encoding/xml" - "io/ioutil" - "net/url" - "strings" - "time" - - chk "gopkg.in/check.v1" -) - -func (s *StorageClientSuite) Test_timeRfc1123Formatted(c *chk.C) { - now := time.Now().UTC() - expectedLayout := "Mon, 02 Jan 2006 15:04:05 GMT" - c.Assert(timeRfc1123Formatted(now), chk.Equals, now.Format(expectedLayout)) -} - -func (s *StorageClientSuite) Test_mergeParams(c *chk.C) { - v1 := url.Values{ - "k1": {"v1"}, - "k2": {"v2"}} - v2 := url.Values{ - "k1": {"v11"}, - "k3": {"v3"}} - out := mergeParams(v1, v2) - c.Assert(out.Get("k1"), chk.Equals, "v1") - c.Assert(out.Get("k2"), chk.Equals, "v2") - c.Assert(out.Get("k3"), chk.Equals, "v3") - c.Assert(out["k1"], chk.DeepEquals, []string{"v1", "v11"}) -} - -func (s *StorageClientSuite) Test_prepareBlockListRequest(c *chk.C) { - empty := []Block{} - expected := `` - c.Assert(prepareBlockListRequest(empty), chk.DeepEquals, expected) - - blocks := []Block{{"foo", BlockStatusLatest}, {"bar", BlockStatusUncommitted}} - expected = `foobar` - c.Assert(prepareBlockListRequest(blocks), chk.DeepEquals, expected) -} - -func (s *StorageClientSuite) Test_xmlUnmarshal(c *chk.C) { - xml := ` - - myblob - ` - var blob Blob - body := ioutil.NopCloser(strings.NewReader(xml)) - c.Assert(xmlUnmarshal(body, &blob), chk.IsNil) - c.Assert(blob.Name, chk.Equals, "myblob") -} - -func (s *StorageClientSuite) Test_xmlMarshal(c *chk.C) { - type t struct { - XMLName xml.Name `xml:"S"` - Name string `xml:"Name"` - } - - b := t{Name: "myblob"} - expected := `myblob` - r, i, err := xmlMarshal(b) - c.Assert(err, chk.IsNil) - o, err := ioutil.ReadAll(r) - c.Assert(err, chk.IsNil) - out := string(o) - c.Assert(out, chk.Equals, expected) - c.Assert(i, chk.Equals, len(expected)) -} diff --git a/blobs.go b/blobs.go index e80800f8..1765e9f7 100644 --- a/blobs.go +++ b/blobs.go @@ -189,9 +189,11 @@ type BlobCreateOption interface { // BlobWriteService.Resume. If supported by the store, a writer can be // recovered with the id. type BlobWriter interface { - io.WriteSeeker + io.WriteCloser io.ReaderFrom - io.Closer + + // Size returns the number of bytes written to this blob. + Size() int64 // ID returns the identifier for this writer. The ID can be used with the // Blob service to later resume the write. @@ -216,9 +218,6 @@ type BlobWriter interface { // result in a no-op. This allows use of Cancel in a defer statement, // increasing the assurance that it is correctly called. Cancel(ctx context.Context) error - - // Get a reader to the blob being written by this BlobWriter - Reader() (io.ReadCloser, error) } // BlobService combines the operations to access, read and write blobs. This diff --git a/cmd/registry/main.go b/cmd/registry/main.go index 686af12b..afa4ae20 100644 --- a/cmd/registry/main.go +++ b/cmd/registry/main.go @@ -10,13 +10,13 @@ import ( _ "github.com/docker/distribution/registry/proxy" _ "github.com/docker/distribution/registry/storage/driver/azure" _ "github.com/docker/distribution/registry/storage/driver/filesystem" - _ "github.com/docker/distribution/registry/storage/driver/gcs" + // _ "github.com/docker/distribution/registry/storage/driver/gcs" _ "github.com/docker/distribution/registry/storage/driver/inmemory" _ "github.com/docker/distribution/registry/storage/driver/middleware/cloudfront" - _ "github.com/docker/distribution/registry/storage/driver/oss" + // _ "github.com/docker/distribution/registry/storage/driver/oss" _ "github.com/docker/distribution/registry/storage/driver/s3-aws" _ "github.com/docker/distribution/registry/storage/driver/s3-goamz" - _ "github.com/docker/distribution/registry/storage/driver/swift" + // _ "github.com/docker/distribution/registry/storage/driver/swift" ) func main() { diff --git a/cmd/registry/rados.go b/cmd/registry/rados.go index e7ea770a..a2a938c3 100644 --- a/cmd/registry/rados.go +++ b/cmd/registry/rados.go @@ -2,4 +2,4 @@ package main -import _ "github.com/docker/distribution/registry/storage/driver/rados" +// import _ "github.com/docker/distribution/registry/storage/driver/rados" diff --git a/registry/client/blob_writer.go b/registry/client/blob_writer.go index 21a018dc..e3ffcb00 100644 --- a/registry/client/blob_writer.go +++ b/registry/client/blob_writer.go @@ -6,7 +6,6 @@ import ( "io" "io/ioutil" "net/http" - "os" "time" "github.com/docker/distribution" @@ -104,21 +103,8 @@ func (hbu *httpBlobUpload) Write(p []byte) (n int, err error) { } -func (hbu *httpBlobUpload) Seek(offset int64, whence int) (int64, error) { - newOffset := hbu.offset - - switch whence { - case os.SEEK_CUR: - newOffset += int64(offset) - case os.SEEK_END: - newOffset += int64(offset) - case os.SEEK_SET: - newOffset = int64(offset) - } - - hbu.offset = newOffset - - return hbu.offset, nil +func (hbu *httpBlobUpload) Size() int64 { + return hbu.offset } func (hbu *httpBlobUpload) ID() string { diff --git a/registry/handlers/blobupload.go b/registry/handlers/blobupload.go index e9f0f513..892393aa 100644 --- a/registry/handlers/blobupload.go +++ b/registry/handlers/blobupload.go @@ -4,7 +4,6 @@ import ( "fmt" "net/http" "net/url" - "os" "github.com/docker/distribution" ctxu "github.com/docker/distribution/context" @@ -76,28 +75,14 @@ func blobUploadDispatcher(ctx *Context, r *http.Request) http.Handler { } buh.Upload = upload - if state.Offset > 0 { - // Seek the blob upload to the correct spot if it's non-zero. - // These error conditions should be rare and demonstrate really - // problems. We basically cancel the upload and tell the client to - // start over. - if nn, err := upload.Seek(buh.State.Offset, os.SEEK_SET); err != nil { - defer upload.Close() - ctxu.GetLogger(ctx).Infof("error seeking blob upload: %v", err) - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - buh.Errors = append(buh.Errors, v2.ErrorCodeBlobUploadInvalid.WithDetail(err)) - upload.Cancel(buh) - }) - } else if nn != buh.State.Offset { - defer upload.Close() - ctxu.GetLogger(ctx).Infof("seek to wrong offest: %d != %d", nn, buh.State.Offset) - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - buh.Errors = append(buh.Errors, v2.ErrorCodeBlobUploadInvalid.WithDetail(err)) - upload.Cancel(buh) - }) - } + if size := upload.Size(); size != buh.State.Offset { + defer upload.Close() + ctxu.GetLogger(ctx).Infof("upload resumed at wrong offest: %d != %d", size, buh.State.Offset) + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + buh.Errors = append(buh.Errors, v2.ErrorCodeBlobUploadInvalid.WithDetail(err)) + upload.Cancel(buh) + }) } - return closeResources(handler, buh.Upload) } @@ -239,10 +224,7 @@ func (buh *blobUploadHandler) PutBlobUploadComplete(w http.ResponseWriter, r *ht return } - size := buh.State.Offset - if offset, err := buh.Upload.Seek(0, os.SEEK_CUR); err == nil { - size = offset - } + size := buh.Upload.Size() desc, err := buh.Upload.Commit(buh, distribution.Descriptor{ Digest: dgst, @@ -308,21 +290,10 @@ func (buh *blobUploadHandler) CancelBlobUpload(w http.ResponseWriter, r *http.Re // uploads always start at a 0 offset. This allows disabling resumable push by // always returning a 0 offset on check status. func (buh *blobUploadHandler) blobUploadResponse(w http.ResponseWriter, r *http.Request, fresh bool) error { - - var offset int64 - if !fresh { - var err error - offset, err = buh.Upload.Seek(0, os.SEEK_CUR) - if err != nil { - ctxu.GetLogger(buh).Errorf("unable get current offset of blob upload: %v", err) - return err - } - } - // TODO(stevvooe): Need a better way to manage the upload state automatically. buh.State.Name = buh.Repository.Named().Name() buh.State.UUID = buh.Upload.ID() - buh.State.Offset = offset + buh.State.Offset = buh.Upload.Size() buh.State.StartedAt = buh.Upload.StartedAt() token, err := hmacKey(buh.Config.HTTP.Secret).packUploadState(buh.State) @@ -341,7 +312,7 @@ func (buh *blobUploadHandler) blobUploadResponse(w http.ResponseWriter, r *http. return err } - endRange := offset + endRange := buh.Upload.Size() if endRange > 0 { endRange = endRange - 1 } diff --git a/registry/storage/blob_test.go b/registry/storage/blob_test.go index 1e5b408c..3698a415 100644 --- a/registry/storage/blob_test.go +++ b/registry/storage/blob_test.go @@ -41,10 +41,7 @@ func TestWriteSeek(t *testing.T) { } contents := []byte{1, 2, 3} blobUpload.Write(contents) - offset, err := blobUpload.Seek(0, os.SEEK_CUR) - if err != nil { - t.Fatalf("unexpected error in blobUpload.Seek: %s", err) - } + offset := blobUpload.Size() if offset != int64(len(contents)) { t.Fatalf("unexpected value for blobUpload offset: %v != %v", offset, len(contents)) } @@ -113,11 +110,7 @@ func TestSimpleBlobUpload(t *testing.T) { t.Fatalf("layer data write incomplete") } - offset, err := blobUpload.Seek(0, os.SEEK_CUR) - if err != nil { - t.Fatalf("unexpected error seeking layer upload: %v", err) - } - + offset := blobUpload.Size() if offset != nn { t.Fatalf("blobUpload not updated with correct offset: %v != %v", offset, nn) } diff --git a/registry/storage/blobwriter.go b/registry/storage/blobwriter.go index f2ca7388..7f280d36 100644 --- a/registry/storage/blobwriter.go +++ b/registry/storage/blobwriter.go @@ -21,6 +21,7 @@ var ( // layerWriter is used to control the various aspects of resumable // layer upload. It implements the LayerUpload interface. type blobWriter struct { + ctx context.Context blobStore *linkedBlobStore id string @@ -28,9 +29,9 @@ type blobWriter struct { digester digest.Digester written int64 // track the contiguous write - // implementes io.WriteSeeker, io.ReaderFrom and io.Closer to satisfy - // LayerUpload Interface - fileWriter + fileWriter storagedriver.FileWriter + driver storagedriver.StorageDriver + path string resumableDigestEnabled bool } @@ -51,7 +52,7 @@ func (bw *blobWriter) StartedAt() time.Time { func (bw *blobWriter) Commit(ctx context.Context, desc distribution.Descriptor) (distribution.Descriptor, error) { context.GetLogger(ctx).Debug("(*blobWriter).Commit") - if err := bw.fileWriter.Close(); err != nil { + if err := bw.fileWriter.Commit(); err != nil { return distribution.Descriptor{}, err } @@ -84,6 +85,10 @@ func (bw *blobWriter) Commit(ctx context.Context, desc distribution.Descriptor) // the writer and canceling the operation. func (bw *blobWriter) Cancel(ctx context.Context) error { context.GetLogger(ctx).Debug("(*blobWriter).Rollback") + if err := bw.fileWriter.Cancel(); err != nil { + return err + } + if err := bw.removeResources(ctx); err != nil { return err } @@ -92,15 +97,19 @@ func (bw *blobWriter) Cancel(ctx context.Context) error { return nil } +func (bw *blobWriter) Size() int64 { + return bw.fileWriter.Size() +} + func (bw *blobWriter) Write(p []byte) (int, error) { // Ensure that the current write offset matches how many bytes have been // written to the digester. If not, we need to update the digest state to // match the current write position. - if err := bw.resumeDigestAt(bw.blobStore.ctx, bw.offset); err != nil && err != errResumableDigestNotAvailable { + if err := bw.resumeDigest(bw.blobStore.ctx); err != nil && err != errResumableDigestNotAvailable { return 0, err } - n, err := io.MultiWriter(&bw.fileWriter, bw.digester.Hash()).Write(p) + n, err := io.MultiWriter(bw.fileWriter, bw.digester.Hash()).Write(p) bw.written += int64(n) return n, err @@ -110,21 +119,17 @@ func (bw *blobWriter) ReadFrom(r io.Reader) (n int64, err error) { // Ensure that the current write offset matches how many bytes have been // written to the digester. If not, we need to update the digest state to // match the current write position. - if err := bw.resumeDigestAt(bw.blobStore.ctx, bw.offset); err != nil && err != errResumableDigestNotAvailable { + if err := bw.resumeDigest(bw.blobStore.ctx); err != nil && err != errResumableDigestNotAvailable { return 0, err } - nn, err := bw.fileWriter.ReadFrom(io.TeeReader(r, bw.digester.Hash())) + nn, err := io.Copy(io.MultiWriter(bw.fileWriter, bw.digester.Hash()), r) bw.written += nn return nn, err } func (bw *blobWriter) Close() error { - if bw.err != nil { - return bw.err - } - if err := bw.storeHashState(bw.blobStore.ctx); err != nil { return err } @@ -148,8 +153,10 @@ func (bw *blobWriter) validateBlob(ctx context.Context, desc distribution.Descri } } + var size int64 + // Stat the on disk file - if fi, err := bw.fileWriter.driver.Stat(ctx, bw.path); err != nil { + if fi, err := bw.driver.Stat(ctx, bw.path); err != nil { switch err := err.(type) { case storagedriver.PathNotFoundError: // NOTE(stevvooe): We really don't care if the file is @@ -165,23 +172,23 @@ func (bw *blobWriter) validateBlob(ctx context.Context, desc distribution.Descri return distribution.Descriptor{}, fmt.Errorf("unexpected directory at upload location %q", bw.path) } - bw.size = fi.Size() + size = fi.Size() } if desc.Size > 0 { - if desc.Size != bw.size { + if desc.Size != size { return distribution.Descriptor{}, distribution.ErrBlobInvalidLength } } else { // if provided 0 or negative length, we can assume caller doesn't know or // care about length. - desc.Size = bw.size + desc.Size = size } // TODO(stevvooe): This section is very meandering. Need to be broken down // to be a lot more clear. - if err := bw.resumeDigestAt(ctx, bw.size); err == nil { + if err := bw.resumeDigest(ctx); err == nil { canonical = bw.digester.Digest() if canonical.Algorithm() == desc.Digest.Algorithm() { @@ -206,7 +213,7 @@ func (bw *blobWriter) validateBlob(ctx context.Context, desc distribution.Descri // the same, we don't need to read the data from the backend. This is // because we've written the entire file in the lifecycle of the // current instance. - if bw.written == bw.size && digest.Canonical == desc.Digest.Algorithm() { + if bw.written == size && digest.Canonical == desc.Digest.Algorithm() { canonical = bw.digester.Digest() verified = desc.Digest == canonical } @@ -223,7 +230,7 @@ func (bw *blobWriter) validateBlob(ctx context.Context, desc distribution.Descri } // Read the file from the backend driver and validate it. - fr, err := newFileReader(ctx, bw.fileWriter.driver, bw.path, desc.Size) + fr, err := newFileReader(ctx, bw.driver, bw.path, desc.Size) if err != nil { return distribution.Descriptor{}, err } @@ -357,7 +364,7 @@ func (bw *blobWriter) Reader() (io.ReadCloser, error) { // todo(richardscothern): Change to exponential backoff, i=0.5, e=2, n=4 try := 1 for try <= 5 { - _, err := bw.fileWriter.driver.Stat(bw.ctx, bw.path) + _, err := bw.driver.Stat(bw.ctx, bw.path) if err == nil { break } @@ -371,7 +378,7 @@ func (bw *blobWriter) Reader() (io.ReadCloser, error) { } } - readCloser, err := bw.fileWriter.driver.ReadStream(bw.ctx, bw.path, 0) + readCloser, err := bw.driver.Reader(bw.ctx, bw.path, 0) if err != nil { return nil, err } diff --git a/registry/storage/blobwriter_resumable.go b/registry/storage/blobwriter_resumable.go index 5ae29c54..ff5482c3 100644 --- a/registry/storage/blobwriter_resumable.go +++ b/registry/storage/blobwriter_resumable.go @@ -4,8 +4,6 @@ package storage import ( "fmt" - "io" - "os" "path" "strconv" @@ -19,24 +17,18 @@ import ( _ "github.com/stevvooe/resumable/sha512" ) -// resumeDigestAt attempts to restore the state of the internal hash function -// by loading the most recent saved hash state less than or equal to the given -// offset. Any unhashed bytes remaining less than the given offset are hashed -// from the content uploaded so far. -func (bw *blobWriter) resumeDigestAt(ctx context.Context, offset int64) error { +// resumeDigest attempts to restore the state of the internal hash function +// by loading the most recent saved hash state equal to the current size of the blob. +func (bw *blobWriter) resumeDigest(ctx context.Context) error { if !bw.resumableDigestEnabled { return errResumableDigestNotAvailable } - if offset < 0 { - return fmt.Errorf("cannot resume hash at negative offset: %d", offset) - } - h, ok := bw.digester.Hash().(resumable.Hash) if !ok { return errResumableDigestNotAvailable } - + offset := bw.fileWriter.Size() if offset == int64(h.Len()) { // State of digester is already at the requested offset. return nil @@ -49,24 +41,12 @@ func (bw *blobWriter) resumeDigestAt(ctx context.Context, offset int64) error { return fmt.Errorf("unable to get stored hash states with offset %d: %s", offset, err) } - // Find the highest stored hashState with offset less than or equal to + // Find the highest stored hashState with offset equal to // the requested offset. for _, hashState := range hashStates { if hashState.offset == offset { hashStateMatch = hashState break // Found an exact offset match. - } else if hashState.offset < offset && hashState.offset > hashStateMatch.offset { - // This offset is closer to the requested offset. - hashStateMatch = hashState - } else if hashState.offset > offset { - // Remove any stored hash state with offsets higher than this one - // as writes to this resumed hasher will make those invalid. This - // is probably okay to skip for now since we don't expect anyone to - // use the API in this way. For that reason, we don't treat an - // an error here as a fatal error, but only log it. - if err := bw.driver.Delete(ctx, hashState.path); err != nil { - logrus.Errorf("unable to delete stale hash state %q: %s", hashState.path, err) - } } } @@ -86,20 +66,7 @@ func (bw *blobWriter) resumeDigestAt(ctx context.Context, offset int64) error { // Mind the gap. if gapLen := offset - int64(h.Len()); gapLen > 0 { - // Need to read content from the upload to catch up to the desired offset. - fr, err := newFileReader(ctx, bw.driver, bw.path, bw.size) - if err != nil { - return err - } - defer fr.Close() - - if _, err = fr.Seek(int64(h.Len()), os.SEEK_SET); err != nil { - return fmt.Errorf("unable to seek to layer reader offset %d: %s", h.Len(), err) - } - - if _, err := io.CopyN(h, fr, gapLen); err != nil { - return err - } + return errResumableDigestNotAvailable } return nil diff --git a/registry/storage/driver/azure/azure.go b/registry/storage/driver/azure/azure.go index cbb95981..70771375 100644 --- a/registry/storage/driver/azure/azure.go +++ b/registry/storage/driver/azure/azure.go @@ -3,6 +3,7 @@ package azure import ( + "bufio" "bytes" "fmt" "io" @@ -26,6 +27,7 @@ const ( paramAccountKey = "accountkey" paramContainer = "container" paramRealm = "realm" + maxChunkSize = 4 * 1024 * 1024 ) type driver struct { @@ -117,18 +119,21 @@ func (d *driver) PutContent(ctx context.Context, path string, contents []byte) e if _, err := d.client.DeleteBlobIfExists(d.container, path); err != nil { return err } - if err := d.client.CreateBlockBlob(d.container, path); err != nil { + writer, err := d.Writer(ctx, path, false) + if err != nil { return err } - bs := newAzureBlockStorage(d.client) - bw := newRandomBlobWriter(&bs, azure.MaxBlobBlockSize) - _, err := bw.WriteBlobAt(d.container, path, 0, bytes.NewReader(contents)) - return err + defer writer.Close() + _, err = writer.Write(contents) + if err != nil { + return err + } + return writer.Commit() } -// ReadStream retrieves an io.ReadCloser for the content stored at "path" with a +// Reader retrieves an io.ReadCloser for the content stored at "path" with a // given byte offset. -func (d *driver) ReadStream(ctx context.Context, path string, offset int64) (io.ReadCloser, error) { +func (d *driver) Reader(ctx context.Context, path string, offset int64) (io.ReadCloser, error) { if ok, err := d.client.BlobExists(d.container, path); err != nil { return nil, err } else if !ok { @@ -153,25 +158,38 @@ func (d *driver) ReadStream(ctx context.Context, path string, offset int64) (io. return resp, nil } -// WriteStream stores the contents of the provided io.ReadCloser at a location -// designated by the given path. -func (d *driver) WriteStream(ctx context.Context, path string, offset int64, reader io.Reader) (int64, error) { - if blobExists, err := d.client.BlobExists(d.container, path); err != nil { - return 0, err - } else if !blobExists { - err := d.client.CreateBlockBlob(d.container, path) +// Writer returns a FileWriter which will store the content written to it +// at the location designated by "path" after the call to Commit. +func (d *driver) Writer(ctx context.Context, path string, append bool) (storagedriver.FileWriter, error) { + blobExists, err := d.client.BlobExists(d.container, path) + if err != nil { + return nil, err + } + var size int64 + if blobExists { + if append { + blobProperties, err := d.client.GetBlobProperties(d.container, path) + if err != nil { + return nil, err + } + size = blobProperties.ContentLength + } else { + err := d.client.DeleteBlob(d.container, path) + if err != nil { + return nil, err + } + } + } else { + if append { + return nil, storagedriver.PathNotFoundError{Path: path} + } + err := d.client.PutAppendBlob(d.container, path, nil) if err != nil { - return 0, err + return nil, err } } - if offset < 0 { - return 0, storagedriver.InvalidOffsetError{Path: path, Offset: offset} - } - bs := newAzureBlockStorage(d.client) - bw := newRandomBlobWriter(&bs, azure.MaxBlobBlockSize) - zw := newZeroFillWriter(&bw) - return zw.Write(d.container, path, offset, reader) + return d.newWriter(path, size), nil } // Stat retrieves the FileInfo for the given path, including the current size @@ -236,6 +254,9 @@ func (d *driver) List(ctx context.Context, path string) ([]string, error) { } list := directDescendants(blobs, path) + if path != "" && len(list) == 0 { + return nil, storagedriver.PathNotFoundError{Path: path} + } return list, nil } @@ -361,6 +382,101 @@ func (d *driver) listBlobs(container, virtPath string) ([]string, error) { } func is404(err error) bool { - e, ok := err.(azure.AzureStorageServiceError) - return ok && e.StatusCode == http.StatusNotFound + statusCodeErr, ok := err.(azure.UnexpectedStatusCodeError) + return ok && statusCodeErr.Got() == http.StatusNotFound +} + +type writer struct { + driver *driver + path string + size int64 + bw *bufio.Writer + closed bool + committed bool + cancelled bool +} + +func (d *driver) newWriter(path string, size int64) storagedriver.FileWriter { + return &writer{ + driver: d, + path: path, + size: size, + bw: bufio.NewWriterSize(&blockWriter{ + client: d.client, + container: d.container, + path: path, + }, maxChunkSize), + } +} + +func (w *writer) Write(p []byte) (int, error) { + if w.closed { + return 0, fmt.Errorf("already closed") + } else if w.committed { + return 0, fmt.Errorf("already committed") + } else if w.cancelled { + return 0, fmt.Errorf("already cancelled") + } + + n, err := w.bw.Write(p) + w.size += int64(n) + return n, err +} + +func (w *writer) Size() int64 { + return w.size +} + +func (w *writer) Close() error { + if w.closed { + return fmt.Errorf("already closed") + } + w.closed = true + return w.bw.Flush() +} + +func (w *writer) Cancel() error { + if w.closed { + return fmt.Errorf("already closed") + } else if w.committed { + return fmt.Errorf("already committed") + } + w.cancelled = true + return w.driver.client.DeleteBlob(w.driver.container, w.path) +} + +func (w *writer) Commit() error { + if w.closed { + return fmt.Errorf("already closed") + } else if w.committed { + return fmt.Errorf("already committed") + } else if w.cancelled { + return fmt.Errorf("already cancelled") + } + w.committed = true + return w.bw.Flush() +} + +type blockWriter struct { + client azure.BlobStorageClient + container string + path string +} + +func (bw *blockWriter) Write(p []byte) (int, error) { + n := 0 + for offset := 0; offset < len(p); offset += maxChunkSize { + chunkSize := maxChunkSize + if offset+chunkSize > len(p) { + chunkSize = len(p) - offset + } + err := bw.client.AppendBlock(bw.container, bw.path, p[offset:offset+chunkSize]) + if err != nil { + return n, err + } + + n += chunkSize + } + + return n, nil } diff --git a/registry/storage/driver/azure/blockblob.go b/registry/storage/driver/azure/blockblob.go deleted file mode 100644 index 1c1df899..00000000 --- a/registry/storage/driver/azure/blockblob.go +++ /dev/null @@ -1,24 +0,0 @@ -package azure - -import ( - "fmt" - "io" - - azure "github.com/Azure/azure-sdk-for-go/storage" -) - -// azureBlockStorage is adaptor between azure.BlobStorageClient and -// blockStorage interface. -type azureBlockStorage struct { - azure.BlobStorageClient -} - -func (b *azureBlockStorage) GetSectionReader(container, blob string, start, length int64) (io.ReadCloser, error) { - return b.BlobStorageClient.GetBlobRange(container, blob, fmt.Sprintf("%v-%v", start, start+length-1)) -} - -func newAzureBlockStorage(b azure.BlobStorageClient) azureBlockStorage { - a := azureBlockStorage{} - a.BlobStorageClient = b - return a -} diff --git a/registry/storage/driver/azure/blockblob_test.go b/registry/storage/driver/azure/blockblob_test.go deleted file mode 100644 index 7ce47195..00000000 --- a/registry/storage/driver/azure/blockblob_test.go +++ /dev/null @@ -1,155 +0,0 @@ -package azure - -import ( - "bytes" - "fmt" - "io" - "io/ioutil" - - azure "github.com/Azure/azure-sdk-for-go/storage" -) - -type StorageSimulator struct { - blobs map[string]*BlockBlob -} - -type BlockBlob struct { - blocks map[string]*DataBlock - blockList []string -} - -type DataBlock struct { - data []byte - committed bool -} - -func (s *StorageSimulator) path(container, blob string) string { - return fmt.Sprintf("%s/%s", container, blob) -} - -func (s *StorageSimulator) BlobExists(container, blob string) (bool, error) { - _, ok := s.blobs[s.path(container, blob)] - return ok, nil -} - -func (s *StorageSimulator) GetBlob(container, blob string) (io.ReadCloser, error) { - bb, ok := s.blobs[s.path(container, blob)] - if !ok { - return nil, fmt.Errorf("blob not found") - } - - var readers []io.Reader - for _, bID := range bb.blockList { - readers = append(readers, bytes.NewReader(bb.blocks[bID].data)) - } - return ioutil.NopCloser(io.MultiReader(readers...)), nil -} - -func (s *StorageSimulator) GetSectionReader(container, blob string, start, length int64) (io.ReadCloser, error) { - r, err := s.GetBlob(container, blob) - if err != nil { - return nil, err - } - b, err := ioutil.ReadAll(r) - if err != nil { - return nil, err - } - return ioutil.NopCloser(bytes.NewReader(b[start : start+length])), nil -} - -func (s *StorageSimulator) CreateBlockBlob(container, blob string) error { - path := s.path(container, blob) - bb := &BlockBlob{ - blocks: make(map[string]*DataBlock), - blockList: []string{}, - } - s.blobs[path] = bb - return nil -} - -func (s *StorageSimulator) PutBlock(container, blob, blockID string, chunk []byte) error { - path := s.path(container, blob) - bb, ok := s.blobs[path] - if !ok { - return fmt.Errorf("blob not found") - } - data := make([]byte, len(chunk)) - copy(data, chunk) - bb.blocks[blockID] = &DataBlock{data: data, committed: false} // add block to blob - return nil -} - -func (s *StorageSimulator) GetBlockList(container, blob string, blockType azure.BlockListType) (azure.BlockListResponse, error) { - resp := azure.BlockListResponse{} - bb, ok := s.blobs[s.path(container, blob)] - if !ok { - return resp, fmt.Errorf("blob not found") - } - - // Iterate committed blocks (in order) - if blockType == azure.BlockListTypeAll || blockType == azure.BlockListTypeCommitted { - for _, blockID := range bb.blockList { - b := bb.blocks[blockID] - block := azure.BlockResponse{ - Name: blockID, - Size: int64(len(b.data)), - } - resp.CommittedBlocks = append(resp.CommittedBlocks, block) - } - - } - - // Iterate uncommitted blocks (in no order) - if blockType == azure.BlockListTypeAll || blockType == azure.BlockListTypeCommitted { - for blockID, b := range bb.blocks { - block := azure.BlockResponse{ - Name: blockID, - Size: int64(len(b.data)), - } - if !b.committed { - resp.UncommittedBlocks = append(resp.UncommittedBlocks, block) - } - } - } - return resp, nil -} - -func (s *StorageSimulator) PutBlockList(container, blob string, blocks []azure.Block) error { - bb, ok := s.blobs[s.path(container, blob)] - if !ok { - return fmt.Errorf("blob not found") - } - - var blockIDs []string - for _, v := range blocks { - bl, ok := bb.blocks[v.ID] - if !ok { // check if block ID exists - return fmt.Errorf("Block id '%s' not found", v.ID) - } - bl.committed = true - blockIDs = append(blockIDs, v.ID) - } - - // Mark all other blocks uncommitted - for k, b := range bb.blocks { - inList := false - for _, v := range blockIDs { - if k == v { - inList = true - break - } - } - if !inList { - b.committed = false - } - } - - bb.blockList = blockIDs - return nil -} - -func NewStorageSimulator() StorageSimulator { - return StorageSimulator{ - blobs: make(map[string]*BlockBlob), - } -} diff --git a/registry/storage/driver/azure/blockid.go b/registry/storage/driver/azure/blockid.go deleted file mode 100644 index 776c7cd5..00000000 --- a/registry/storage/driver/azure/blockid.go +++ /dev/null @@ -1,60 +0,0 @@ -package azure - -import ( - "encoding/base64" - "fmt" - "math/rand" - "sync" - "time" - - azure "github.com/Azure/azure-sdk-for-go/storage" -) - -type blockIDGenerator struct { - pool map[string]bool - r *rand.Rand - m sync.Mutex -} - -// Generate returns an unused random block id and adds the generated ID -// to list of used IDs so that the same block name is not used again. -func (b *blockIDGenerator) Generate() string { - b.m.Lock() - defer b.m.Unlock() - - var id string - for { - id = toBlockID(int(b.r.Int())) - if !b.exists(id) { - break - } - } - b.pool[id] = true - return id -} - -func (b *blockIDGenerator) exists(id string) bool { - _, used := b.pool[id] - return used -} - -func (b *blockIDGenerator) Feed(blocks azure.BlockListResponse) { - b.m.Lock() - defer b.m.Unlock() - - for _, bl := range append(blocks.CommittedBlocks, blocks.UncommittedBlocks...) { - b.pool[bl.Name] = true - } -} - -func newBlockIDGenerator() *blockIDGenerator { - return &blockIDGenerator{ - pool: make(map[string]bool), - r: rand.New(rand.NewSource(time.Now().UnixNano()))} -} - -// toBlockId converts given integer to base64-encoded block ID of a fixed length. -func toBlockID(i int) string { - s := fmt.Sprintf("%029d", i) // add zero padding for same length-blobs - return base64.StdEncoding.EncodeToString([]byte(s)) -} diff --git a/registry/storage/driver/azure/blockid_test.go b/registry/storage/driver/azure/blockid_test.go deleted file mode 100644 index aab70202..00000000 --- a/registry/storage/driver/azure/blockid_test.go +++ /dev/null @@ -1,74 +0,0 @@ -package azure - -import ( - "math" - "testing" - - azure "github.com/Azure/azure-sdk-for-go/storage" -) - -func Test_blockIdGenerator(t *testing.T) { - r := newBlockIDGenerator() - - for i := 1; i <= 10; i++ { - if expected := i - 1; len(r.pool) != expected { - t.Fatalf("rand pool had wrong number of items: %d, expected:%d", len(r.pool), expected) - } - if id := r.Generate(); id == "" { - t.Fatal("returned empty id") - } - if expected := i; len(r.pool) != expected { - t.Fatalf("rand pool has wrong number of items: %d, expected:%d", len(r.pool), expected) - } - } -} - -func Test_blockIdGenerator_Feed(t *testing.T) { - r := newBlockIDGenerator() - if expected := 0; len(r.pool) != expected { - t.Fatalf("rand pool had wrong number of items: %d, expected:%d", len(r.pool), expected) - } - - // feed empty list - blocks := azure.BlockListResponse{} - r.Feed(blocks) - if expected := 0; len(r.pool) != expected { - t.Fatalf("rand pool had wrong number of items: %d, expected:%d", len(r.pool), expected) - } - - // feed blocks - blocks = azure.BlockListResponse{ - CommittedBlocks: []azure.BlockResponse{ - {"1", 1}, - {"2", 2}, - }, - UncommittedBlocks: []azure.BlockResponse{ - {"3", 3}, - }} - r.Feed(blocks) - if expected := 3; len(r.pool) != expected { - t.Fatalf("rand pool had wrong number of items: %d, expected:%d", len(r.pool), expected) - } - - // feed same block IDs with committed/uncommitted place changed - blocks = azure.BlockListResponse{ - CommittedBlocks: []azure.BlockResponse{ - {"3", 3}, - }, - UncommittedBlocks: []azure.BlockResponse{ - {"1", 1}, - }} - r.Feed(blocks) - if expected := 3; len(r.pool) != expected { - t.Fatalf("rand pool had wrong number of items: %d, expected:%d", len(r.pool), expected) - } -} - -func Test_toBlockId(t *testing.T) { - min := 0 - max := math.MaxInt64 - - if len(toBlockID(min)) != len(toBlockID(max)) { - t.Fatalf("different-sized blockIDs are returned") - } -} diff --git a/registry/storage/driver/azure/randomwriter.go b/registry/storage/driver/azure/randomwriter.go deleted file mode 100644 index f18692d0..00000000 --- a/registry/storage/driver/azure/randomwriter.go +++ /dev/null @@ -1,208 +0,0 @@ -package azure - -import ( - "fmt" - "io" - "io/ioutil" - - azure "github.com/Azure/azure-sdk-for-go/storage" -) - -// blockStorage is the interface required from a block storage service -// client implementation -type blockStorage interface { - CreateBlockBlob(container, blob string) error - GetBlob(container, blob string) (io.ReadCloser, error) - GetSectionReader(container, blob string, start, length int64) (io.ReadCloser, error) - PutBlock(container, blob, blockID string, chunk []byte) error - GetBlockList(container, blob string, blockType azure.BlockListType) (azure.BlockListResponse, error) - PutBlockList(container, blob string, blocks []azure.Block) error -} - -// randomBlobWriter enables random access semantics on Azure block blobs -// by enabling writing arbitrary length of chunks to arbitrary write offsets -// within the blob. Normally, Azure Blob Storage does not support random -// access semantics on block blobs; however, this writer can download, split and -// reupload the overlapping blocks and discards those being overwritten entirely. -type randomBlobWriter struct { - bs blockStorage - blockSize int -} - -func newRandomBlobWriter(bs blockStorage, blockSize int) randomBlobWriter { - return randomBlobWriter{bs: bs, blockSize: blockSize} -} - -// WriteBlobAt writes the given chunk to the specified position of an existing blob. -// The offset must be equals to size of the blob or smaller than it. -func (r *randomBlobWriter) WriteBlobAt(container, blob string, offset int64, chunk io.Reader) (int64, error) { - rand := newBlockIDGenerator() - - blocks, err := r.bs.GetBlockList(container, blob, azure.BlockListTypeCommitted) - if err != nil { - return 0, err - } - rand.Feed(blocks) // load existing block IDs - - // Check for write offset for existing blob - size := getBlobSize(blocks) - if offset < 0 || offset > size { - return 0, fmt.Errorf("wrong offset for Write: %v", offset) - } - - // Upload the new chunk as blocks - blockList, nn, err := r.writeChunkToBlocks(container, blob, chunk, rand) - if err != nil { - return 0, err - } - - // For non-append operations, existing blocks may need to be splitted - if offset != size { - // Split the block on the left end (if any) - leftBlocks, err := r.blocksLeftSide(container, blob, offset, rand) - if err != nil { - return 0, err - } - blockList = append(leftBlocks, blockList...) - - // Split the block on the right end (if any) - rightBlocks, err := r.blocksRightSide(container, blob, offset, nn, rand) - if err != nil { - return 0, err - } - blockList = append(blockList, rightBlocks...) - } else { - // Use existing block list - var existingBlocks []azure.Block - for _, v := range blocks.CommittedBlocks { - existingBlocks = append(existingBlocks, azure.Block{ID: v.Name, Status: azure.BlockStatusCommitted}) - } - blockList = append(existingBlocks, blockList...) - } - // Put block list - return nn, r.bs.PutBlockList(container, blob, blockList) -} - -func (r *randomBlobWriter) GetSize(container, blob string) (int64, error) { - blocks, err := r.bs.GetBlockList(container, blob, azure.BlockListTypeCommitted) - if err != nil { - return 0, err - } - return getBlobSize(blocks), nil -} - -// writeChunkToBlocks writes given chunk to one or multiple blocks within specified -// blob and returns their block representations. Those blocks are not committed, yet -func (r *randomBlobWriter) writeChunkToBlocks(container, blob string, chunk io.Reader, rand *blockIDGenerator) ([]azure.Block, int64, error) { - var newBlocks []azure.Block - var nn int64 - - // Read chunks of at most size N except the last chunk to - // maximize block size and minimize block count. - buf := make([]byte, r.blockSize) - for { - n, err := io.ReadFull(chunk, buf) - if err == io.EOF { - break - } - nn += int64(n) - data := buf[:n] - blockID := rand.Generate() - if err := r.bs.PutBlock(container, blob, blockID, data); err != nil { - return newBlocks, nn, err - } - newBlocks = append(newBlocks, azure.Block{ID: blockID, Status: azure.BlockStatusUncommitted}) - } - return newBlocks, nn, nil -} - -// blocksLeftSide returns the blocks that are going to be at the left side of -// the writeOffset: [0, writeOffset) by identifying blocks that will remain -// the same and splitting blocks and reuploading them as needed. -func (r *randomBlobWriter) blocksLeftSide(container, blob string, writeOffset int64, rand *blockIDGenerator) ([]azure.Block, error) { - var left []azure.Block - bx, err := r.bs.GetBlockList(container, blob, azure.BlockListTypeAll) - if err != nil { - return left, err - } - - o := writeOffset - elapsed := int64(0) - for _, v := range bx.CommittedBlocks { - blkSize := int64(v.Size) - if o >= blkSize { // use existing block - left = append(left, azure.Block{ID: v.Name, Status: azure.BlockStatusCommitted}) - o -= blkSize - elapsed += blkSize - } else if o > 0 { // current block needs to be splitted - start := elapsed - size := o - part, err := r.bs.GetSectionReader(container, blob, start, size) - if err != nil { - return left, err - } - newBlockID := rand.Generate() - - data, err := ioutil.ReadAll(part) - if err != nil { - return left, err - } - if err = r.bs.PutBlock(container, blob, newBlockID, data); err != nil { - return left, err - } - left = append(left, azure.Block{ID: newBlockID, Status: azure.BlockStatusUncommitted}) - break - } - } - return left, nil -} - -// blocksRightSide returns the blocks that are going to be at the right side of -// the written chunk: [writeOffset+size, +inf) by identifying blocks that will remain -// the same and splitting blocks and reuploading them as needed. -func (r *randomBlobWriter) blocksRightSide(container, blob string, writeOffset int64, chunkSize int64, rand *blockIDGenerator) ([]azure.Block, error) { - var right []azure.Block - - bx, err := r.bs.GetBlockList(container, blob, azure.BlockListTypeAll) - if err != nil { - return nil, err - } - - re := writeOffset + chunkSize - 1 // right end of written chunk - var elapsed int64 - for _, v := range bx.CommittedBlocks { - var ( - bs = elapsed // left end of current block - be = elapsed + int64(v.Size) - 1 // right end of current block - ) - - if bs > re { // take the block as is - right = append(right, azure.Block{ID: v.Name, Status: azure.BlockStatusCommitted}) - } else if be > re { // current block needs to be splitted - part, err := r.bs.GetSectionReader(container, blob, re+1, be-(re+1)+1) - if err != nil { - return right, err - } - newBlockID := rand.Generate() - - data, err := ioutil.ReadAll(part) - if err != nil { - return right, err - } - if err = r.bs.PutBlock(container, blob, newBlockID, data); err != nil { - return right, err - } - right = append(right, azure.Block{ID: newBlockID, Status: azure.BlockStatusUncommitted}) - } - elapsed += int64(v.Size) - } - return right, nil -} - -func getBlobSize(blocks azure.BlockListResponse) int64 { - var n int64 - for _, v := range blocks.CommittedBlocks { - n += int64(v.Size) - } - return n -} diff --git a/registry/storage/driver/azure/randomwriter_test.go b/registry/storage/driver/azure/randomwriter_test.go deleted file mode 100644 index 32c2509e..00000000 --- a/registry/storage/driver/azure/randomwriter_test.go +++ /dev/null @@ -1,339 +0,0 @@ -package azure - -import ( - "bytes" - "io" - "io/ioutil" - "math/rand" - "reflect" - "strings" - "testing" - - azure "github.com/Azure/azure-sdk-for-go/storage" -) - -func TestRandomWriter_writeChunkToBlocks(t *testing.T) { - s := NewStorageSimulator() - rw := newRandomBlobWriter(&s, 3) - rand := newBlockIDGenerator() - c := []byte("AAABBBCCCD") - - if err := rw.bs.CreateBlockBlob("a", "b"); err != nil { - t.Fatal(err) - } - bw, nn, err := rw.writeChunkToBlocks("a", "b", bytes.NewReader(c), rand) - if err != nil { - t.Fatal(err) - } - if expected := int64(len(c)); nn != expected { - t.Fatalf("wrong nn:%v, expected:%v", nn, expected) - } - if expected := 4; len(bw) != expected { - t.Fatal("unexpected written block count") - } - - bx, err := s.GetBlockList("a", "b", azure.BlockListTypeAll) - if err != nil { - t.Fatal(err) - } - if expected := 0; len(bx.CommittedBlocks) != expected { - t.Fatal("unexpected committed block count") - } - if expected := 4; len(bx.UncommittedBlocks) != expected { - t.Fatalf("unexpected uncommitted block count: %d -- %#v", len(bx.UncommittedBlocks), bx) - } - - if err := rw.bs.PutBlockList("a", "b", bw); err != nil { - t.Fatal(err) - } - - r, err := rw.bs.GetBlob("a", "b") - if err != nil { - t.Fatal(err) - } - assertBlobContents(t, r, c) -} - -func TestRandomWriter_blocksLeftSide(t *testing.T) { - blob := "AAAAABBBBBCCC" - cases := []struct { - offset int64 - expectedBlob string - expectedPattern []azure.BlockStatus - }{ - {0, "", []azure.BlockStatus{}}, // write to beginning, discard all - {13, blob, []azure.BlockStatus{azure.BlockStatusCommitted, azure.BlockStatusCommitted, azure.BlockStatusCommitted}}, // write to end, no change - {1, "A", []azure.BlockStatus{azure.BlockStatusUncommitted}}, // write at 1 - {5, "AAAAA", []azure.BlockStatus{azure.BlockStatusCommitted}}, // write just after first block - {6, "AAAAAB", []azure.BlockStatus{azure.BlockStatusCommitted, azure.BlockStatusUncommitted}}, // split the second block - {9, "AAAAABBBB", []azure.BlockStatus{azure.BlockStatusCommitted, azure.BlockStatusUncommitted}}, // write just after first block - } - - for _, c := range cases { - s := NewStorageSimulator() - rw := newRandomBlobWriter(&s, 5) - rand := newBlockIDGenerator() - - if err := rw.bs.CreateBlockBlob("a", "b"); err != nil { - t.Fatal(err) - } - bw, _, err := rw.writeChunkToBlocks("a", "b", strings.NewReader(blob), rand) - if err != nil { - t.Fatal(err) - } - if err := rw.bs.PutBlockList("a", "b", bw); err != nil { - t.Fatal(err) - } - bx, err := rw.blocksLeftSide("a", "b", c.offset, rand) - if err != nil { - t.Fatal(err) - } - - bs := []azure.BlockStatus{} - for _, v := range bx { - bs = append(bs, v.Status) - } - - if !reflect.DeepEqual(bs, c.expectedPattern) { - t.Logf("Committed blocks %v", bw) - t.Fatalf("For offset %v: Expected pattern: %v, Got: %v\n(Returned: %v)", c.offset, c.expectedPattern, bs, bx) - } - if rw.bs.PutBlockList("a", "b", bx); err != nil { - t.Fatal(err) - } - r, err := rw.bs.GetBlob("a", "b") - if err != nil { - t.Fatal(err) - } - cout, err := ioutil.ReadAll(r) - if err != nil { - t.Fatal(err) - } - outBlob := string(cout) - if outBlob != c.expectedBlob { - t.Fatalf("wrong blob contents: %v, expected: %v", outBlob, c.expectedBlob) - } - } -} - -func TestRandomWriter_blocksRightSide(t *testing.T) { - blob := "AAAAABBBBBCCC" - cases := []struct { - offset int64 - size int64 - expectedBlob string - expectedPattern []azure.BlockStatus - }{ - {0, 100, "", []azure.BlockStatus{}}, // overwrite the entire blob - {0, 3, "AABBBBBCCC", []azure.BlockStatus{azure.BlockStatusUncommitted, azure.BlockStatusCommitted, azure.BlockStatusCommitted}}, // split first block - {4, 1, "BBBBBCCC", []azure.BlockStatus{azure.BlockStatusCommitted, azure.BlockStatusCommitted}}, // write to last char of first block - {1, 6, "BBBCCC", []azure.BlockStatus{azure.BlockStatusUncommitted, azure.BlockStatusCommitted}}, // overwrite splits first and second block, last block remains - {3, 8, "CC", []azure.BlockStatus{azure.BlockStatusUncommitted}}, // overwrite a block in middle block, split end block - {10, 1, "CC", []azure.BlockStatus{azure.BlockStatusUncommitted}}, // overwrite first byte of rightmost block - {11, 2, "", []azure.BlockStatus{}}, // overwrite the rightmost index - {13, 20, "", []azure.BlockStatus{}}, // append to the end - } - - for _, c := range cases { - s := NewStorageSimulator() - rw := newRandomBlobWriter(&s, 5) - rand := newBlockIDGenerator() - - if err := rw.bs.CreateBlockBlob("a", "b"); err != nil { - t.Fatal(err) - } - bw, _, err := rw.writeChunkToBlocks("a", "b", strings.NewReader(blob), rand) - if err != nil { - t.Fatal(err) - } - if err := rw.bs.PutBlockList("a", "b", bw); err != nil { - t.Fatal(err) - } - bx, err := rw.blocksRightSide("a", "b", c.offset, c.size, rand) - if err != nil { - t.Fatal(err) - } - - bs := []azure.BlockStatus{} - for _, v := range bx { - bs = append(bs, v.Status) - } - - if !reflect.DeepEqual(bs, c.expectedPattern) { - t.Logf("Committed blocks %v", bw) - t.Fatalf("For offset %v-size:%v: Expected pattern: %v, Got: %v\n(Returned: %v)", c.offset, c.size, c.expectedPattern, bs, bx) - } - if rw.bs.PutBlockList("a", "b", bx); err != nil { - t.Fatal(err) - } - r, err := rw.bs.GetBlob("a", "b") - if err != nil { - t.Fatal(err) - } - cout, err := ioutil.ReadAll(r) - if err != nil { - t.Fatal(err) - } - outBlob := string(cout) - if outBlob != c.expectedBlob { - t.Fatalf("For offset %v-size:%v: wrong blob contents: %v, expected: %v", c.offset, c.size, outBlob, c.expectedBlob) - } - } -} - -func TestRandomWriter_Write_NewBlob(t *testing.T) { - var ( - s = NewStorageSimulator() - rw = newRandomBlobWriter(&s, 1024*3) // 3 KB blocks - blob = randomContents(1024 * 7) // 7 KB blob - ) - if err := rw.bs.CreateBlockBlob("a", "b"); err != nil { - t.Fatal(err) - } - - if _, err := rw.WriteBlobAt("a", "b", 10, bytes.NewReader(blob)); err == nil { - t.Fatal("expected error, got nil") - } - if _, err := rw.WriteBlobAt("a", "b", 100000, bytes.NewReader(blob)); err == nil { - t.Fatal("expected error, got nil") - } - if nn, err := rw.WriteBlobAt("a", "b", 0, bytes.NewReader(blob)); err != nil { - t.Fatal(err) - } else if expected := int64(len(blob)); expected != nn { - t.Fatalf("wrong written bytes count: %v, expected: %v", nn, expected) - } - if out, err := rw.bs.GetBlob("a", "b"); err != nil { - t.Fatal(err) - } else { - assertBlobContents(t, out, blob) - } - if bx, err := rw.bs.GetBlockList("a", "b", azure.BlockListTypeCommitted); err != nil { - t.Fatal(err) - } else if len(bx.CommittedBlocks) != 3 { - t.Fatalf("got wrong number of committed blocks: %v", len(bx.CommittedBlocks)) - } - - // Replace first 512 bytes - leftChunk := randomContents(512) - blob = append(leftChunk, blob[512:]...) - if nn, err := rw.WriteBlobAt("a", "b", 0, bytes.NewReader(leftChunk)); err != nil { - t.Fatal(err) - } else if expected := int64(len(leftChunk)); expected != nn { - t.Fatalf("wrong written bytes count: %v, expected: %v", nn, expected) - } - if out, err := rw.bs.GetBlob("a", "b"); err != nil { - t.Fatal(err) - } else { - assertBlobContents(t, out, blob) - } - if bx, err := rw.bs.GetBlockList("a", "b", azure.BlockListTypeCommitted); err != nil { - t.Fatal(err) - } else if expected := 4; len(bx.CommittedBlocks) != expected { - t.Fatalf("got wrong number of committed blocks: %v, expected: %v", len(bx.CommittedBlocks), expected) - } - - // Replace last 512 bytes with 1024 bytes - rightChunk := randomContents(1024) - offset := int64(len(blob) - 512) - blob = append(blob[:offset], rightChunk...) - if nn, err := rw.WriteBlobAt("a", "b", offset, bytes.NewReader(rightChunk)); err != nil { - t.Fatal(err) - } else if expected := int64(len(rightChunk)); expected != nn { - t.Fatalf("wrong written bytes count: %v, expected: %v", nn, expected) - } - if out, err := rw.bs.GetBlob("a", "b"); err != nil { - t.Fatal(err) - } else { - assertBlobContents(t, out, blob) - } - if bx, err := rw.bs.GetBlockList("a", "b", azure.BlockListTypeCommitted); err != nil { - t.Fatal(err) - } else if expected := 5; len(bx.CommittedBlocks) != expected { - t.Fatalf("got wrong number of committed blocks: %v, expected: %v", len(bx.CommittedBlocks), expected) - } - - // Replace 2K-4K (overlaps 2 blocks from L/R) - newChunk := randomContents(1024 * 2) - offset = 1024 * 2 - blob = append(append(blob[:offset], newChunk...), blob[offset+int64(len(newChunk)):]...) - if nn, err := rw.WriteBlobAt("a", "b", offset, bytes.NewReader(newChunk)); err != nil { - t.Fatal(err) - } else if expected := int64(len(newChunk)); expected != nn { - t.Fatalf("wrong written bytes count: %v, expected: %v", nn, expected) - } - if out, err := rw.bs.GetBlob("a", "b"); err != nil { - t.Fatal(err) - } else { - assertBlobContents(t, out, blob) - } - if bx, err := rw.bs.GetBlockList("a", "b", azure.BlockListTypeCommitted); err != nil { - t.Fatal(err) - } else if expected := 6; len(bx.CommittedBlocks) != expected { - t.Fatalf("got wrong number of committed blocks: %v, expected: %v\n%v", len(bx.CommittedBlocks), expected, bx.CommittedBlocks) - } - - // Replace the entire blob - newBlob := randomContents(1024 * 30) - if nn, err := rw.WriteBlobAt("a", "b", 0, bytes.NewReader(newBlob)); err != nil { - t.Fatal(err) - } else if expected := int64(len(newBlob)); expected != nn { - t.Fatalf("wrong written bytes count: %v, expected: %v", nn, expected) - } - if out, err := rw.bs.GetBlob("a", "b"); err != nil { - t.Fatal(err) - } else { - assertBlobContents(t, out, newBlob) - } - if bx, err := rw.bs.GetBlockList("a", "b", azure.BlockListTypeCommitted); err != nil { - t.Fatal(err) - } else if expected := 10; len(bx.CommittedBlocks) != expected { - t.Fatalf("got wrong number of committed blocks: %v, expected: %v\n%v", len(bx.CommittedBlocks), expected, bx.CommittedBlocks) - } else if expected, size := int64(1024*30), getBlobSize(bx); size != expected { - t.Fatalf("committed block size does not indicate blob size") - } -} - -func Test_getBlobSize(t *testing.T) { - // with some committed blocks - if expected, size := int64(151), getBlobSize(azure.BlockListResponse{ - CommittedBlocks: []azure.BlockResponse{ - {"A", 100}, - {"B", 50}, - {"C", 1}, - }, - UncommittedBlocks: []azure.BlockResponse{ - {"D", 200}, - }}); expected != size { - t.Fatalf("wrong blob size: %v, expected: %v", size, expected) - } - - // with no committed blocks - if expected, size := int64(0), getBlobSize(azure.BlockListResponse{ - UncommittedBlocks: []azure.BlockResponse{ - {"A", 100}, - {"B", 50}, - {"C", 1}, - {"D", 200}, - }}); expected != size { - t.Fatalf("wrong blob size: %v, expected: %v", size, expected) - } -} - -func assertBlobContents(t *testing.T, r io.Reader, expected []byte) { - out, err := ioutil.ReadAll(r) - if err != nil { - t.Fatal(err) - } - - if !reflect.DeepEqual(out, expected) { - t.Fatalf("wrong blob contents. size: %v, expected: %v", len(out), len(expected)) - } -} - -func randomContents(length int64) []byte { - b := make([]byte, length) - for i := range b { - b[i] = byte(rand.Intn(2 << 8)) - } - return b -} diff --git a/registry/storage/driver/azure/zerofillwriter.go b/registry/storage/driver/azure/zerofillwriter.go deleted file mode 100644 index 095489d2..00000000 --- a/registry/storage/driver/azure/zerofillwriter.go +++ /dev/null @@ -1,49 +0,0 @@ -package azure - -import ( - "bytes" - "io" -) - -type blockBlobWriter interface { - GetSize(container, blob string) (int64, error) - WriteBlobAt(container, blob string, offset int64, chunk io.Reader) (int64, error) -} - -// zeroFillWriter enables writing to an offset outside a block blob's size -// by offering the chunk to the underlying writer as a contiguous data with -// the gap in between filled with NUL (zero) bytes. -type zeroFillWriter struct { - blockBlobWriter -} - -func newZeroFillWriter(b blockBlobWriter) zeroFillWriter { - w := zeroFillWriter{} - w.blockBlobWriter = b - return w -} - -// Write writes the given chunk to the specified existing blob even though -// offset is out of blob's size. The gaps are filled with zeros. Returned -// written number count does not include zeros written. -func (z *zeroFillWriter) Write(container, blob string, offset int64, chunk io.Reader) (int64, error) { - size, err := z.blockBlobWriter.GetSize(container, blob) - if err != nil { - return 0, err - } - - var reader io.Reader - var zeroPadding int64 - if offset <= size { - reader = chunk - } else { - zeroPadding = offset - size - offset = size // adjust offset to be the append index - zeros := bytes.NewReader(make([]byte, zeroPadding)) - reader = io.MultiReader(zeros, chunk) - } - - nn, err := z.blockBlobWriter.WriteBlobAt(container, blob, offset, reader) - nn -= zeroPadding - return nn, err -} diff --git a/registry/storage/driver/azure/zerofillwriter_test.go b/registry/storage/driver/azure/zerofillwriter_test.go deleted file mode 100644 index 49361791..00000000 --- a/registry/storage/driver/azure/zerofillwriter_test.go +++ /dev/null @@ -1,126 +0,0 @@ -package azure - -import ( - "bytes" - "testing" -) - -func Test_zeroFillWrite_AppendNoGap(t *testing.T) { - s := NewStorageSimulator() - bw := newRandomBlobWriter(&s, 1024*1) - zw := newZeroFillWriter(&bw) - if err := s.CreateBlockBlob("a", "b"); err != nil { - t.Fatal(err) - } - - firstChunk := randomContents(1024*3 + 512) - if nn, err := zw.Write("a", "b", 0, bytes.NewReader(firstChunk)); err != nil { - t.Fatal(err) - } else if expected := int64(len(firstChunk)); expected != nn { - t.Fatalf("wrong written bytes count: %v, expected: %v", nn, expected) - } - if out, err := s.GetBlob("a", "b"); err != nil { - t.Fatal(err) - } else { - assertBlobContents(t, out, firstChunk) - } - - secondChunk := randomContents(256) - if nn, err := zw.Write("a", "b", int64(len(firstChunk)), bytes.NewReader(secondChunk)); err != nil { - t.Fatal(err) - } else if expected := int64(len(secondChunk)); expected != nn { - t.Fatalf("wrong written bytes count: %v, expected: %v", nn, expected) - } - if out, err := s.GetBlob("a", "b"); err != nil { - t.Fatal(err) - } else { - assertBlobContents(t, out, append(firstChunk, secondChunk...)) - } - -} - -func Test_zeroFillWrite_StartWithGap(t *testing.T) { - s := NewStorageSimulator() - bw := newRandomBlobWriter(&s, 1024*2) - zw := newZeroFillWriter(&bw) - if err := s.CreateBlockBlob("a", "b"); err != nil { - t.Fatal(err) - } - - chunk := randomContents(1024 * 5) - padding := int64(1024*2 + 256) - if nn, err := zw.Write("a", "b", padding, bytes.NewReader(chunk)); err != nil { - t.Fatal(err) - } else if expected := int64(len(chunk)); expected != nn { - t.Fatalf("wrong written bytes count: %v, expected: %v", nn, expected) - } - if out, err := s.GetBlob("a", "b"); err != nil { - t.Fatal(err) - } else { - assertBlobContents(t, out, append(make([]byte, padding), chunk...)) - } -} - -func Test_zeroFillWrite_AppendWithGap(t *testing.T) { - s := NewStorageSimulator() - bw := newRandomBlobWriter(&s, 1024*2) - zw := newZeroFillWriter(&bw) - if err := s.CreateBlockBlob("a", "b"); err != nil { - t.Fatal(err) - } - - firstChunk := randomContents(1024*3 + 512) - if _, err := zw.Write("a", "b", 0, bytes.NewReader(firstChunk)); err != nil { - t.Fatal(err) - } - if out, err := s.GetBlob("a", "b"); err != nil { - t.Fatal(err) - } else { - assertBlobContents(t, out, firstChunk) - } - - secondChunk := randomContents(256) - padding := int64(1024 * 4) - if nn, err := zw.Write("a", "b", int64(len(firstChunk))+padding, bytes.NewReader(secondChunk)); err != nil { - t.Fatal(err) - } else if expected := int64(len(secondChunk)); expected != nn { - t.Fatalf("wrong written bytes count: %v, expected: %v", nn, expected) - } - if out, err := s.GetBlob("a", "b"); err != nil { - t.Fatal(err) - } else { - assertBlobContents(t, out, append(firstChunk, append(make([]byte, padding), secondChunk...)...)) - } -} - -func Test_zeroFillWrite_LiesWithinSize(t *testing.T) { - s := NewStorageSimulator() - bw := newRandomBlobWriter(&s, 1024*2) - zw := newZeroFillWriter(&bw) - if err := s.CreateBlockBlob("a", "b"); err != nil { - t.Fatal(err) - } - - firstChunk := randomContents(1024 * 3) - if _, err := zw.Write("a", "b", 0, bytes.NewReader(firstChunk)); err != nil { - t.Fatal(err) - } - if out, err := s.GetBlob("a", "b"); err != nil { - t.Fatal(err) - } else { - assertBlobContents(t, out, firstChunk) - } - - // in this case, zerofill won't be used - secondChunk := randomContents(256) - if nn, err := zw.Write("a", "b", 0, bytes.NewReader(secondChunk)); err != nil { - t.Fatal(err) - } else if expected := int64(len(secondChunk)); expected != nn { - t.Fatalf("wrong written bytes count: %v, expected: %v", nn, expected) - } - if out, err := s.GetBlob("a", "b"); err != nil { - t.Fatal(err) - } else { - assertBlobContents(t, out, append(secondChunk, firstChunk[len(secondChunk):]...)) - } -} diff --git a/registry/storage/driver/base/base.go b/registry/storage/driver/base/base.go index c816d2d6..064bda60 100644 --- a/registry/storage/driver/base/base.go +++ b/registry/storage/driver/base/base.go @@ -102,10 +102,10 @@ func (base *Base) PutContent(ctx context.Context, path string, content []byte) e return base.setDriverName(base.StorageDriver.PutContent(ctx, path, content)) } -// ReadStream wraps ReadStream of underlying storage driver. -func (base *Base) ReadStream(ctx context.Context, path string, offset int64) (io.ReadCloser, error) { +// Reader wraps Reader of underlying storage driver. +func (base *Base) Reader(ctx context.Context, path string, offset int64) (io.ReadCloser, error) { ctx, done := context.WithTrace(ctx) - defer done("%s.ReadStream(%q, %d)", base.Name(), path, offset) + defer done("%s.Reader(%q, %d)", base.Name(), path, offset) if offset < 0 { return nil, storagedriver.InvalidOffsetError{Path: path, Offset: offset, DriverName: base.StorageDriver.Name()} @@ -115,25 +115,21 @@ func (base *Base) ReadStream(ctx context.Context, path string, offset int64) (io return nil, storagedriver.InvalidPathError{Path: path, DriverName: base.StorageDriver.Name()} } - rc, e := base.StorageDriver.ReadStream(ctx, path, offset) + rc, e := base.StorageDriver.Reader(ctx, path, offset) return rc, base.setDriverName(e) } -// WriteStream wraps WriteStream of underlying storage driver. -func (base *Base) WriteStream(ctx context.Context, path string, offset int64, reader io.Reader) (nn int64, err error) { +// Writer wraps Writer of underlying storage driver. +func (base *Base) Writer(ctx context.Context, path string, append bool) (storagedriver.FileWriter, error) { ctx, done := context.WithTrace(ctx) - defer done("%s.WriteStream(%q, %d)", base.Name(), path, offset) - - if offset < 0 { - return 0, storagedriver.InvalidOffsetError{Path: path, Offset: offset, DriverName: base.StorageDriver.Name()} - } + defer done("%s.Writer(%q, %v)", base.Name(), path, append) if !storagedriver.PathRegexp.MatchString(path) { - return 0, storagedriver.InvalidPathError{Path: path, DriverName: base.StorageDriver.Name()} + return nil, storagedriver.InvalidPathError{Path: path, DriverName: base.StorageDriver.Name()} } - i64, e := base.StorageDriver.WriteStream(ctx, path, offset, reader) - return i64, base.setDriverName(e) + writer, e := base.StorageDriver.Writer(ctx, path, append) + return writer, base.setDriverName(e) } // Stat wraps Stat of underlying storage driver. diff --git a/registry/storage/driver/filesystem/driver.go b/registry/storage/driver/filesystem/driver.go index 5b495818..3bbdc637 100644 --- a/registry/storage/driver/filesystem/driver.go +++ b/registry/storage/driver/filesystem/driver.go @@ -1,6 +1,7 @@ package filesystem import ( + "bufio" "bytes" "fmt" "io" @@ -78,7 +79,7 @@ func (d *driver) Name() string { // GetContent retrieves the content stored at "path" as a []byte. func (d *driver) GetContent(ctx context.Context, path string) ([]byte, error) { - rc, err := d.ReadStream(ctx, path, 0) + rc, err := d.Reader(ctx, path, 0) if err != nil { return nil, err } @@ -94,16 +95,22 @@ func (d *driver) GetContent(ctx context.Context, path string) ([]byte, error) { // PutContent stores the []byte content at a location designated by "path". func (d *driver) PutContent(ctx context.Context, subPath string, contents []byte) error { - if _, err := d.WriteStream(ctx, subPath, 0, bytes.NewReader(contents)); err != nil { + writer, err := d.Writer(ctx, subPath, false) + if err != nil { return err } - - return os.Truncate(d.fullPath(subPath), int64(len(contents))) + defer writer.Close() + _, err = io.Copy(writer, bytes.NewReader(contents)) + if err != nil { + writer.Cancel() + return err + } + return writer.Commit() } -// ReadStream retrieves an io.ReadCloser for the content stored at "path" with a +// Reader retrieves an io.ReadCloser for the content stored at "path" with a // given byte offset. -func (d *driver) ReadStream(ctx context.Context, path string, offset int64) (io.ReadCloser, error) { +func (d *driver) Reader(ctx context.Context, path string, offset int64) (io.ReadCloser, error) { file, err := os.OpenFile(d.fullPath(path), os.O_RDONLY, 0644) if err != nil { if os.IsNotExist(err) { @@ -125,40 +132,36 @@ func (d *driver) ReadStream(ctx context.Context, path string, offset int64) (io. return file, nil } -// WriteStream stores the contents of the provided io.Reader at a location -// designated by the given path. -func (d *driver) WriteStream(ctx context.Context, subPath string, offset int64, reader io.Reader) (nn int64, err error) { - // TODO(stevvooe): This needs to be a requirement. - // if !path.IsAbs(subPath) { - // return fmt.Errorf("absolute path required: %q", subPath) - // } - +func (d *driver) Writer(ctx context.Context, subPath string, append bool) (storagedriver.FileWriter, error) { fullPath := d.fullPath(subPath) parentDir := path.Dir(fullPath) if err := os.MkdirAll(parentDir, 0777); err != nil { - return 0, err + return nil, err } fp, err := os.OpenFile(fullPath, os.O_WRONLY|os.O_CREATE, 0666) if err != nil { - // TODO(stevvooe): A few missing conditions in storage driver: - // 1. What if the path is already a directory? - // 2. Should number 1 be exposed explicitly in storagedriver? - // 2. Can this path not exist, even if we create above? - return 0, err - } - defer fp.Close() - - nn, err = fp.Seek(offset, os.SEEK_SET) - if err != nil { - return 0, err + return nil, err } - if nn != offset { - return 0, fmt.Errorf("bad seek to %v, expected %v in fp=%v", offset, nn, fp) + var offset int64 + + if !append { + err := fp.Truncate(0) + if err != nil { + fp.Close() + return nil, err + } + } else { + n, err := fp.Seek(0, os.SEEK_END) + if err != nil { + fp.Close() + return nil, err + } + offset = int64(n) } - return io.Copy(fp, reader) + return newFileWriter(fp, offset), nil } // Stat retrieves the FileInfo for the given path, including the current size @@ -286,3 +289,88 @@ func (fi fileInfo) ModTime() time.Time { func (fi fileInfo) IsDir() bool { return fi.FileInfo.IsDir() } + +type fileWriter struct { + file *os.File + size int64 + bw *bufio.Writer + closed bool + committed bool + cancelled bool +} + +func newFileWriter(file *os.File, size int64) *fileWriter { + return &fileWriter{ + file: file, + size: size, + bw: bufio.NewWriter(file), + } +} + +func (fw *fileWriter) Write(p []byte) (int, error) { + if fw.closed { + return 0, fmt.Errorf("already closed") + } else if fw.committed { + return 0, fmt.Errorf("already committed") + } else if fw.cancelled { + return 0, fmt.Errorf("already cancelled") + } + n, err := fw.bw.Write(p) + fw.size += int64(n) + return n, err +} + +func (fw *fileWriter) Size() int64 { + return fw.size +} + +func (fw *fileWriter) Close() error { + if fw.closed { + return fmt.Errorf("already closed") + } + + if err := fw.bw.Flush(); err != nil { + return err + } + + if err := fw.file.Sync(); err != nil { + return err + } + + if err := fw.file.Close(); err != nil { + return err + } + fw.closed = true + return nil +} + +func (fw *fileWriter) Cancel() error { + if fw.closed { + return fmt.Errorf("already closed") + } + + fw.cancelled = true + fw.file.Close() + return os.Remove(fw.file.Name()) +} + +func (fw *fileWriter) Commit() error { + if fw.closed { + return fmt.Errorf("already closed") + } else if fw.committed { + return fmt.Errorf("already committed") + } else if fw.cancelled { + return fmt.Errorf("already cancelled") + } + + if err := fw.bw.Flush(); err != nil { + return err + } + + if err := fw.file.Sync(); err != nil { + return err + } + + fw.committed = true + return nil +} diff --git a/registry/storage/driver/inmemory/driver.go b/registry/storage/driver/inmemory/driver.go index b5735c0a..eb2fd1cf 100644 --- a/registry/storage/driver/inmemory/driver.go +++ b/registry/storage/driver/inmemory/driver.go @@ -1,7 +1,6 @@ package inmemory import ( - "bytes" "fmt" "io" "io/ioutil" @@ -74,7 +73,7 @@ func (d *driver) GetContent(ctx context.Context, path string) ([]byte, error) { d.mutex.RLock() defer d.mutex.RUnlock() - rc, err := d.ReadStream(ctx, path, 0) + rc, err := d.Reader(ctx, path, 0) if err != nil { return nil, err } @@ -88,7 +87,9 @@ func (d *driver) PutContent(ctx context.Context, p string, contents []byte) erro d.mutex.Lock() defer d.mutex.Unlock() - f, err := d.root.mkfile(p) + normalized := normalize(p) + + f, err := d.root.mkfile(normalized) if err != nil { // TODO(stevvooe): Again, we need to clarify when this is not a // directory in StorageDriver API. @@ -101,9 +102,9 @@ func (d *driver) PutContent(ctx context.Context, p string, contents []byte) erro return nil } -// ReadStream retrieves an io.ReadCloser for the content stored at "path" with a +// Reader retrieves an io.ReadCloser for the content stored at "path" with a // given byte offset. -func (d *driver) ReadStream(ctx context.Context, path string, offset int64) (io.ReadCloser, error) { +func (d *driver) Reader(ctx context.Context, path string, offset int64) (io.ReadCloser, error) { d.mutex.RLock() defer d.mutex.RUnlock() @@ -111,10 +112,10 @@ func (d *driver) ReadStream(ctx context.Context, path string, offset int64) (io. return nil, storagedriver.InvalidOffsetError{Path: path, Offset: offset} } - path = normalize(path) - found := d.root.find(path) + normalized := normalize(path) + found := d.root.find(normalized) - if found.path() != path { + if found.path() != normalized { return nil, storagedriver.PathNotFoundError{Path: path} } @@ -125,46 +126,24 @@ func (d *driver) ReadStream(ctx context.Context, path string, offset int64) (io. return ioutil.NopCloser(found.(*file).sectionReader(offset)), nil } -// WriteStream stores the contents of the provided io.ReadCloser at a location -// designated by the given path. -func (d *driver) WriteStream(ctx context.Context, path string, offset int64, reader io.Reader) (nn int64, err error) { +// Writer returns a FileWriter which will store the content written to it +// at the location designated by "path" after the call to Commit. +func (d *driver) Writer(ctx context.Context, path string, append bool) (storagedriver.FileWriter, error) { d.mutex.Lock() defer d.mutex.Unlock() - if offset < 0 { - return 0, storagedriver.InvalidOffsetError{Path: path, Offset: offset} - } - normalized := normalize(path) f, err := d.root.mkfile(normalized) if err != nil { - return 0, fmt.Errorf("not a file") + return nil, fmt.Errorf("not a file") } - // Unlock while we are reading from the source, in case we are reading - // from the same mfs instance. This can be fixed by a more granular - // locking model. - d.mutex.Unlock() - d.mutex.RLock() // Take the readlock to block other writers. - var buf bytes.Buffer - - nn, err = buf.ReadFrom(reader) - if err != nil { - // TODO(stevvooe): This condition is odd and we may need to clarify: - // we've read nn bytes from reader but have written nothing to the - // backend. What is the correct return value? Really, the caller needs - // to know that the reader has been advanced and reattempting the - // operation is incorrect. - d.mutex.RUnlock() - d.mutex.Lock() - return nn, err + if !append { + f.truncate() } - d.mutex.RUnlock() - d.mutex.Lock() - f.WriteAt(buf.Bytes(), offset) - return nn, err + return d.newWriter(f), nil } // Stat returns info about the provided path. @@ -173,7 +152,7 @@ func (d *driver) Stat(ctx context.Context, path string) (storagedriver.FileInfo, defer d.mutex.RUnlock() normalized := normalize(path) - found := d.root.find(path) + found := d.root.find(normalized) if found.path() != normalized { return nil, storagedriver.PathNotFoundError{Path: path} @@ -260,3 +239,74 @@ func (d *driver) Delete(ctx context.Context, path string) error { func (d *driver) URLFor(ctx context.Context, path string, options map[string]interface{}) (string, error) { return "", storagedriver.ErrUnsupportedMethod{} } + +type writer struct { + d *driver + f *file + closed bool + committed bool + cancelled bool +} + +func (d *driver) newWriter(f *file) storagedriver.FileWriter { + return &writer{ + d: d, + f: f, + } +} + +func (w *writer) Write(p []byte) (int, error) { + if w.closed { + return 0, fmt.Errorf("already closed") + } else if w.committed { + return 0, fmt.Errorf("already committed") + } else if w.cancelled { + return 0, fmt.Errorf("already cancelled") + } + + w.d.mutex.Lock() + defer w.d.mutex.Unlock() + + return w.f.WriteAt(p, int64(len(w.f.data))) +} + +func (w *writer) Size() int64 { + w.d.mutex.RLock() + defer w.d.mutex.RUnlock() + + return int64(len(w.f.data)) +} + +func (w *writer) Close() error { + if w.closed { + return fmt.Errorf("already closed") + } + w.closed = true + return nil +} + +func (w *writer) Cancel() error { + if w.closed { + return fmt.Errorf("already closed") + } else if w.committed { + return fmt.Errorf("already committed") + } + w.cancelled = true + + w.d.mutex.Lock() + defer w.d.mutex.Unlock() + + return w.d.root.delete(w.f.path()) +} + +func (w *writer) Commit() error { + if w.closed { + return fmt.Errorf("already closed") + } else if w.committed { + return fmt.Errorf("already committed") + } else if w.cancelled { + return fmt.Errorf("already cancelled") + } + w.committed = true + return nil +} diff --git a/registry/storage/driver/s3-aws/s3.go b/registry/storage/driver/s3-aws/s3.go index 0e113680..eb617d73 100644 --- a/registry/storage/driver/s3-aws/s3.go +++ b/registry/storage/driver/s3-aws/s3.go @@ -20,10 +20,8 @@ import ( "reflect" "strconv" "strings" - "sync" "time" - "github.com/Sirupsen/logrus" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/aws/credentials" @@ -103,9 +101,6 @@ type driver struct { Encrypt bool RootDirectory string StorageClass string - - pool sync.Pool // pool []byte buffers used for WriteStream - zeros []byte // shared, zero-valued buffer used for WriteStream } type baseEmbed struct { @@ -302,11 +297,6 @@ func New(params DriverParameters) (*Driver, error) { Encrypt: params.Encrypt, RootDirectory: params.RootDirectory, StorageClass: params.StorageClass, - zeros: make([]byte, params.ChunkSize), - } - - d.pool.New = func() interface{} { - return make([]byte, d.ChunkSize) } return &Driver{ @@ -326,7 +316,7 @@ func (d *driver) Name() string { // GetContent retrieves the content stored at "path" as a []byte. func (d *driver) GetContent(ctx context.Context, path string) ([]byte, error) { - reader, err := d.ReadStream(ctx, path, 0) + reader, err := d.Reader(ctx, path, 0) if err != nil { return nil, err } @@ -347,9 +337,9 @@ func (d *driver) PutContent(ctx context.Context, path string, contents []byte) e return parseError(path, err) } -// ReadStream retrieves an io.ReadCloser for the content stored at "path" with a +// Reader retrieves an io.ReadCloser for the content stored at "path" with a // given byte offset. -func (d *driver) ReadStream(ctx context.Context, path string, offset int64) (io.ReadCloser, error) { +func (d *driver) Reader(ctx context.Context, path string, offset int64) (io.ReadCloser, error) { resp, err := d.S3.GetObject(&s3.GetObjectInput{ Bucket: aws.String(d.Bucket), Key: aws.String(d.s3Path(path)), @@ -366,372 +356,52 @@ func (d *driver) ReadStream(ctx context.Context, path string, offset int64) (io. return resp.Body, nil } -// WriteStream stores the contents of the provided io.Reader at a -// location designated by the given path. The driver will know it has -// received the full contents when the reader returns io.EOF. The number -// of successfully READ bytes will be returned, even if an error is -// returned. May be used to resume writing a stream by providing a nonzero -// offset. Offsets past the current size will write from the position -// beyond the end of the file. -func (d *driver) WriteStream(ctx context.Context, path string, offset int64, reader io.Reader) (totalRead int64, err error) { - var partNumber int64 = 1 - bytesRead := 0 - var putErrChan chan error - parts := []*s3.CompletedPart{} - done := make(chan struct{}) // stopgap to free up waiting goroutines - - resp, err := d.S3.CreateMultipartUpload(&s3.CreateMultipartUploadInput{ - Bucket: aws.String(d.Bucket), - Key: aws.String(d.s3Path(path)), - ContentType: d.getContentType(), - ACL: d.getACL(), - ServerSideEncryption: d.getEncryptionMode(), - StorageClass: d.getStorageClass(), - }) - if err != nil { - return 0, err - } - - uploadID := resp.UploadId - - buf := d.getbuf() - - // We never want to leave a dangling multipart upload, our only consistent state is - // when there is a whole object at path. This is in order to remain consistent with - // the stat call. - // - // Note that if the machine dies before executing the defer, we will be left with a dangling - // multipart upload, which will eventually be cleaned up, but we will lose all of the progress - // made prior to the machine crashing. - defer func() { - if putErrChan != nil { - if putErr := <-putErrChan; putErr != nil { - err = putErr - } - } - - if len(parts) > 0 { - _, err := d.S3.CompleteMultipartUpload(&s3.CompleteMultipartUploadInput{ - Bucket: aws.String(d.Bucket), - Key: aws.String(d.s3Path(path)), - UploadId: uploadID, - MultipartUpload: &s3.CompletedMultipartUpload{ - Parts: parts, - }, - }) - if err != nil { - // TODO (brianbland): log errors here - d.S3.AbortMultipartUpload(&s3.AbortMultipartUploadInput{ - Bucket: aws.String(d.Bucket), - Key: aws.String(d.s3Path(path)), - UploadId: uploadID, - }) - } - } - - d.putbuf(buf) // needs to be here to pick up new buf value - close(done) // free up any waiting goroutines - }() - - // Fills from 0 to total from current - fromSmallCurrent := func(total int64) error { - current, err := d.ReadStream(ctx, path, 0) - if err != nil { - return err - } - - bytesRead = 0 - for int64(bytesRead) < total { - //The loop should very rarely enter a second iteration - nn, err := current.Read(buf[bytesRead:total]) - bytesRead += nn - if err != nil { - if err != io.EOF { - return err - } - - break - } - - } - return nil - } - - // Fills from parameter to chunkSize from reader - fromReader := func(from int64) error { - bytesRead = 0 - for from+int64(bytesRead) < d.ChunkSize { - nn, err := reader.Read(buf[from+int64(bytesRead):]) - totalRead += int64(nn) - bytesRead += nn - - if err != nil { - if err != io.EOF { - return err - } - - break - } - } - - if putErrChan == nil { - putErrChan = make(chan error) - } else { - if putErr := <-putErrChan; putErr != nil { - putErrChan = nil - return putErr - } - } - - go func(bytesRead int, from int64, buf []byte) { - defer d.putbuf(buf) // this buffer gets dropped after this call - - // DRAGONS(stevvooe): There are few things one might want to know - // about this section. First, the putErrChan is expecting an error - // and a nil or just a nil to come through the channel. This is - // covered by the silly defer below. The other aspect is the s3 - // retry backoff to deal with RequestTimeout errors. Even though - // the underlying s3 library should handle it, it doesn't seem to - // be part of the shouldRetry function (see AdRoll/goamz/s3). - defer func() { - select { - case putErrChan <- nil: // for some reason, we do this no matter what. - case <-done: - return // ensure we don't leak the goroutine - } - }() - - if bytesRead <= 0 { - return - } - - resp, err := d.S3.UploadPart(&s3.UploadPartInput{ - Bucket: aws.String(d.Bucket), - Key: aws.String(d.s3Path(path)), - PartNumber: aws.Int64(partNumber), - UploadId: uploadID, - Body: bytes.NewReader(buf[0 : int64(bytesRead)+from]), - }) - if err != nil { - logrus.Errorf("error putting part, aborting: %v", err) - select { - case putErrChan <- err: - case <-done: - return // don't leak the goroutine - } - } - - // parts and partNumber are safe, because this function is the - // only one modifying them and we force it to be executed - // serially. - parts = append(parts, &s3.CompletedPart{ - ETag: resp.ETag, - PartNumber: aws.Int64(partNumber), - }) - partNumber++ - }(bytesRead, from, buf) - - buf = d.getbuf() // use a new buffer for the next call - return nil - } - - if offset > 0 { - resp, err := d.S3.HeadObject(&s3.HeadObjectInput{ - Bucket: aws.String(d.Bucket), - Key: aws.String(d.s3Path(path)), +// Writer returns a FileWriter which will store the content written to it +// at the location designated by "path" after the call to Commit. +func (d *driver) Writer(ctx context.Context, path string, append bool) (storagedriver.FileWriter, error) { + key := d.s3Path(path) + if !append { + // TODO (brianbland): cancel other uploads at this path + resp, err := d.S3.CreateMultipartUpload(&s3.CreateMultipartUploadInput{ + Bucket: aws.String(d.Bucket), + Key: aws.String(key), + ContentType: d.getContentType(), + ACL: d.getACL(), + ServerSideEncryption: d.getEncryptionMode(), + StorageClass: d.getStorageClass(), }) if err != nil { - if s3Err, ok := err.(awserr.Error); !ok || s3Err.Code() != "NoSuchKey" { - return 0, err - } - } - - currentLength := int64(0) - if err == nil && resp.ContentLength != nil { - currentLength = *resp.ContentLength - } - - if currentLength >= offset { - if offset < d.ChunkSize { - // chunkSize > currentLength >= offset - if err = fromSmallCurrent(offset); err != nil { - return totalRead, err - } - - if err = fromReader(offset); err != nil { - return totalRead, err - } - - if totalRead+offset < d.ChunkSize { - return totalRead, nil - } - } else { - // currentLength >= offset >= chunkSize - resp, err := d.S3.UploadPartCopy(&s3.UploadPartCopyInput{ - Bucket: aws.String(d.Bucket), - Key: aws.String(d.s3Path(path)), - PartNumber: aws.Int64(partNumber), - UploadId: uploadID, - CopySource: aws.String(d.Bucket + "/" + d.s3Path(path)), - CopySourceRange: aws.String("bytes=0-" + strconv.FormatInt(offset-1, 10)), - }) - if err != nil { - return 0, err - } - - parts = append(parts, &s3.CompletedPart{ - ETag: resp.CopyPartResult.ETag, - PartNumber: aws.Int64(partNumber), - }) - partNumber++ - } - } else { - // Fills between parameters with 0s but only when to - from <= chunkSize - fromZeroFillSmall := func(from, to int64) error { - bytesRead = 0 - for from+int64(bytesRead) < to { - nn, err := bytes.NewReader(d.zeros).Read(buf[from+int64(bytesRead) : to]) - bytesRead += nn - if err != nil { - return err - } - } - - return nil - } - - // Fills between parameters with 0s, making new parts - fromZeroFillLarge := func(from, to int64) error { - bytesRead64 := int64(0) - for to-(from+bytesRead64) >= d.ChunkSize { - resp, err := d.S3.UploadPart(&s3.UploadPartInput{ - Bucket: aws.String(d.Bucket), - Key: aws.String(d.s3Path(path)), - PartNumber: aws.Int64(partNumber), - UploadId: uploadID, - Body: bytes.NewReader(d.zeros), - }) - if err != nil { - return err - } - bytesRead64 += d.ChunkSize - - parts = append(parts, &s3.CompletedPart{ - ETag: resp.ETag, - PartNumber: aws.Int64(partNumber), - }) - partNumber++ - } - - return fromZeroFillSmall(0, (to-from)%d.ChunkSize) - } - - // currentLength < offset - if currentLength < d.ChunkSize { - if offset < d.ChunkSize { - // chunkSize > offset > currentLength - if err = fromSmallCurrent(currentLength); err != nil { - return totalRead, err - } - - if err = fromZeroFillSmall(currentLength, offset); err != nil { - return totalRead, err - } - - if err = fromReader(offset); err != nil { - return totalRead, err - } - - if totalRead+offset < d.ChunkSize { - return totalRead, nil - } - } else { - // offset >= chunkSize > currentLength - if err = fromSmallCurrent(currentLength); err != nil { - return totalRead, err - } - - if err = fromZeroFillSmall(currentLength, d.ChunkSize); err != nil { - return totalRead, err - } - - resp, err := d.S3.UploadPart(&s3.UploadPartInput{ - Bucket: aws.String(d.Bucket), - Key: aws.String(d.s3Path(path)), - PartNumber: aws.Int64(partNumber), - UploadId: uploadID, - Body: bytes.NewReader(buf), - }) - if err != nil { - return totalRead, err - } - - parts = append(parts, &s3.CompletedPart{ - ETag: resp.ETag, - PartNumber: aws.Int64(partNumber), - }) - partNumber++ - - //Zero fill from chunkSize up to offset, then some reader - if err = fromZeroFillLarge(d.ChunkSize, offset); err != nil { - return totalRead, err - } - - if err = fromReader(offset % d.ChunkSize); err != nil { - return totalRead, err - } - - if totalRead+(offset%d.ChunkSize) < d.ChunkSize { - return totalRead, nil - } - } - } else { - // offset > currentLength >= chunkSize - resp, err := d.S3.UploadPartCopy(&s3.UploadPartCopyInput{ - Bucket: aws.String(d.Bucket), - Key: aws.String(d.s3Path(path)), - PartNumber: aws.Int64(partNumber), - UploadId: uploadID, - CopySource: aws.String(d.Bucket + "/" + d.s3Path(path)), - }) - if err != nil { - return 0, err - } - - parts = append(parts, &s3.CompletedPart{ - ETag: resp.CopyPartResult.ETag, - PartNumber: aws.Int64(partNumber), - }) - partNumber++ - - //Zero fill from currentLength up to offset, then some reader - if err = fromZeroFillLarge(currentLength, offset); err != nil { - return totalRead, err - } - - if err = fromReader((offset - currentLength) % d.ChunkSize); err != nil { - return totalRead, err - } - - if totalRead+((offset-currentLength)%d.ChunkSize) < d.ChunkSize { - return totalRead, nil - } - } - + return nil, err } + return d.newWriter(key, *resp.UploadId, nil), nil + } + resp, err := d.S3.ListMultipartUploads(&s3.ListMultipartUploadsInput{ + Bucket: aws.String(d.Bucket), + Prefix: aws.String(key), + }) + if err != nil { + return nil, parseError(path, err) } - for { - if err = fromReader(0); err != nil { - return totalRead, err + for _, multi := range resp.Uploads { + if key != *multi.Key { + continue } - - if int64(bytesRead) < d.ChunkSize { - break + resp, err := d.S3.ListParts(&s3.ListPartsInput{ + Bucket: aws.String(d.Bucket), + Key: aws.String(key), + UploadId: multi.UploadId, + }) + if err != nil { + return nil, parseError(path, err) } + var multiSize int64 + for _, part := range resp.Parts { + multiSize += *part.Size + } + return d.newWriter(key, *multi.UploadId, resp.Parts), nil } - - return totalRead, nil + return nil, storagedriver.PathNotFoundError{Path: path} } // Stat retrieves the FileInfo for the given path, including the current size @@ -971,12 +641,258 @@ func (d *driver) getStorageClass() *string { return aws.String(d.StorageClass) } -// getbuf returns a buffer from the driver's pool with length d.ChunkSize. -func (d *driver) getbuf() []byte { - return d.pool.Get().([]byte) +// writer attempts to upload parts to S3 in a buffered fashion where the last +// part is at least as large as the chunksize, so the multipart upload could be +// cleanly resumed in the future. This is violated if Close is called after less +// than a full chunk is written. +type writer struct { + driver *driver + key string + uploadID string + parts []*s3.Part + size int64 + readyPart []byte + pendingPart []byte + closed bool + committed bool + cancelled bool } -func (d *driver) putbuf(p []byte) { - copy(p, d.zeros) - d.pool.Put(p) +func (d *driver) newWriter(key, uploadID string, parts []*s3.Part) storagedriver.FileWriter { + var size int64 + for _, part := range parts { + size += *part.Size + } + return &writer{ + driver: d, + key: key, + uploadID: uploadID, + parts: parts, + size: size, + } +} + +func (w *writer) Write(p []byte) (int, error) { + if w.closed { + return 0, fmt.Errorf("already closed") + } else if w.committed { + return 0, fmt.Errorf("already committed") + } else if w.cancelled { + return 0, fmt.Errorf("already cancelled") + } + + // If the last written part is smaller than minChunkSize, we need to make a + // new multipart upload :sadface: + if len(w.parts) > 0 && int(*w.parts[len(w.parts)-1].Size) < minChunkSize { + var completedParts []*s3.CompletedPart + for _, part := range w.parts { + completedParts = append(completedParts, &s3.CompletedPart{ + ETag: part.ETag, + PartNumber: part.PartNumber, + }) + } + _, err := w.driver.S3.CompleteMultipartUpload(&s3.CompleteMultipartUploadInput{ + Bucket: aws.String(w.driver.Bucket), + Key: aws.String(w.key), + UploadId: aws.String(w.uploadID), + MultipartUpload: &s3.CompletedMultipartUpload{ + Parts: completedParts, + }, + }) + if err != nil { + w.driver.S3.AbortMultipartUpload(&s3.AbortMultipartUploadInput{ + Bucket: aws.String(w.driver.Bucket), + Key: aws.String(w.key), + UploadId: aws.String(w.uploadID), + }) + return 0, err + } + + resp, err := w.driver.S3.CreateMultipartUpload(&s3.CreateMultipartUploadInput{ + Bucket: aws.String(w.driver.Bucket), + Key: aws.String(w.key), + ContentType: w.driver.getContentType(), + ACL: w.driver.getACL(), + ServerSideEncryption: w.driver.getEncryptionMode(), + StorageClass: w.driver.getStorageClass(), + }) + if err != nil { + return 0, err + } + w.uploadID = *resp.UploadId + + // If the entire written file is smaller than minChunkSize, we need to make + // a new part from scratch :double sad face: + if w.size < minChunkSize { + resp, err := w.driver.S3.GetObject(&s3.GetObjectInput{ + Bucket: aws.String(w.driver.Bucket), + Key: aws.String(w.key), + }) + defer resp.Body.Close() + if err != nil { + return 0, err + } + w.parts = nil + w.readyPart, err = ioutil.ReadAll(resp.Body) + if err != nil { + return 0, err + } + } else { + // Otherwise we can use the old file as the new first part + copyPartResp, err := w.driver.S3.UploadPartCopy(&s3.UploadPartCopyInput{ + Bucket: aws.String(w.driver.Bucket), + CopySource: aws.String(w.driver.Bucket + "/" + w.key), + Key: aws.String(w.key), + PartNumber: aws.Int64(1), + UploadId: resp.UploadId, + }) + if err != nil { + return 0, err + } + w.parts = []*s3.Part{ + { + ETag: copyPartResp.CopyPartResult.ETag, + PartNumber: aws.Int64(1), + Size: aws.Int64(w.size), + }, + } + } + } + + var n int + + for len(p) > 0 { + // If no parts are ready to write, fill up the first part + if neededBytes := int(w.driver.ChunkSize) - len(w.readyPart); neededBytes > 0 { + if len(p) >= neededBytes { + w.readyPart = append(w.readyPart, p[:neededBytes]...) + n += neededBytes + p = p[neededBytes:] + } else { + w.readyPart = append(w.readyPart, p...) + n += len(p) + p = nil + } + } + + if neededBytes := int(w.driver.ChunkSize) - len(w.pendingPart); neededBytes > 0 { + if len(p) >= neededBytes { + w.pendingPart = append(w.pendingPart, p[:neededBytes]...) + n += neededBytes + p = p[neededBytes:] + err := w.flushPart() + if err != nil { + w.size += int64(n) + return n, err + } + } else { + w.pendingPart = append(w.pendingPart, p...) + n += len(p) + p = nil + } + } + } + w.size += int64(n) + return n, nil +} + +func (w *writer) Size() int64 { + return w.size +} + +func (w *writer) Close() error { + if w.closed { + return fmt.Errorf("already closed") + } + w.closed = true + return w.flushPart() +} + +func (w *writer) Cancel() error { + if w.closed { + return fmt.Errorf("already closed") + } else if w.committed { + return fmt.Errorf("already committed") + } + w.cancelled = true + _, err := w.driver.S3.AbortMultipartUpload(&s3.AbortMultipartUploadInput{ + Bucket: aws.String(w.driver.Bucket), + Key: aws.String(w.key), + UploadId: aws.String(w.uploadID), + }) + return err +} + +func (w *writer) Commit() error { + if w.closed { + return fmt.Errorf("already closed") + } else if w.committed { + return fmt.Errorf("already committed") + } else if w.cancelled { + return fmt.Errorf("already cancelled") + } + err := w.flushPart() + if err != nil { + return err + } + w.committed = true + var completedParts []*s3.CompletedPart + for _, part := range w.parts { + completedParts = append(completedParts, &s3.CompletedPart{ + ETag: part.ETag, + PartNumber: part.PartNumber, + }) + } + _, err = w.driver.S3.CompleteMultipartUpload(&s3.CompleteMultipartUploadInput{ + Bucket: aws.String(w.driver.Bucket), + Key: aws.String(w.key), + UploadId: aws.String(w.uploadID), + MultipartUpload: &s3.CompletedMultipartUpload{ + Parts: completedParts, + }, + }) + if err != nil { + w.driver.S3.AbortMultipartUpload(&s3.AbortMultipartUploadInput{ + Bucket: aws.String(w.driver.Bucket), + Key: aws.String(w.key), + UploadId: aws.String(w.uploadID), + }) + return err + } + return nil +} + +// flushPart flushes buffers to write a part to S3. +// Only called by Write (with both buffers full) and Close/Commit (always) +func (w *writer) flushPart() error { + if len(w.readyPart) == 0 && len(w.pendingPart) == 0 { + // nothing to write + return nil + } + if len(w.pendingPart) < int(w.driver.ChunkSize) { + // closing with a small pending part + // combine ready and pending to avoid writing a small part + w.readyPart = append(w.readyPart, w.pendingPart...) + w.pendingPart = nil + } + + partNumber := aws.Int64(int64(len(w.parts) + 1)) + resp, err := w.driver.S3.UploadPart(&s3.UploadPartInput{ + Bucket: aws.String(w.driver.Bucket), + Key: aws.String(w.key), + PartNumber: partNumber, + UploadId: aws.String(w.uploadID), + Body: bytes.NewReader(w.readyPart), + }) + if err != nil { + return err + } + w.parts = append(w.parts, &s3.Part{ + ETag: resp.ETag, + PartNumber: partNumber, + Size: aws.Int64(int64(len(w.readyPart))), + }) + w.readyPart = w.pendingPart + w.pendingPart = nil + return nil } diff --git a/registry/storage/driver/s3-goamz/s3.go b/registry/storage/driver/s3-goamz/s3.go index 9208965b..aa2d31b7 100644 --- a/registry/storage/driver/s3-goamz/s3.go +++ b/registry/storage/driver/s3-goamz/s3.go @@ -21,10 +21,8 @@ import ( "reflect" "strconv" "strings" - "sync" "time" - "github.com/Sirupsen/logrus" "github.com/docker/goamz/aws" "github.com/docker/goamz/s3" @@ -79,9 +77,6 @@ type driver struct { Encrypt bool RootDirectory string StorageClass s3.StorageClass - - pool sync.Pool // pool []byte buffers used for WriteStream - zeros []byte // shared, zero-valued buffer used for WriteStream } type baseEmbed struct { @@ -301,11 +296,6 @@ func New(params DriverParameters) (*Driver, error) { Encrypt: params.Encrypt, RootDirectory: params.RootDirectory, StorageClass: params.StorageClass, - zeros: make([]byte, params.ChunkSize), - } - - d.pool.New = func() interface{} { - return make([]byte, d.ChunkSize) } return &Driver{ @@ -337,9 +327,9 @@ func (d *driver) PutContent(ctx context.Context, path string, contents []byte) e return parseError(path, d.Bucket.Put(d.s3Path(path), contents, d.getContentType(), getPermissions(), d.getOptions())) } -// ReadStream retrieves an io.ReadCloser for the content stored at "path" with a +// Reader retrieves an io.ReadCloser for the content stored at "path" with a // given byte offset. -func (d *driver) ReadStream(ctx context.Context, path string, offset int64) (io.ReadCloser, error) { +func (d *driver) Reader(ctx context.Context, path string, offset int64) (io.ReadCloser, error) { headers := make(http.Header) headers.Add("Range", "bytes="+strconv.FormatInt(offset, 10)+"-") @@ -354,343 +344,37 @@ func (d *driver) ReadStream(ctx context.Context, path string, offset int64) (io. return resp.Body, nil } -// WriteStream stores the contents of the provided io.Reader at a -// location designated by the given path. The driver will know it has -// received the full contents when the reader returns io.EOF. The number -// of successfully READ bytes will be returned, even if an error is -// returned. May be used to resume writing a stream by providing a nonzero -// offset. Offsets past the current size will write from the position -// beyond the end of the file. -func (d *driver) WriteStream(ctx context.Context, path string, offset int64, reader io.Reader) (totalRead int64, err error) { - partNumber := 1 - bytesRead := 0 - var putErrChan chan error - parts := []s3.Part{} - var part s3.Part - done := make(chan struct{}) // stopgap to free up waiting goroutines - - multi, err := d.Bucket.InitMulti(d.s3Path(path), d.getContentType(), getPermissions(), d.getOptions()) +// Writer returns a FileWriter which will store the content written to it +// at the location designated by "path" after the call to Commit. +func (d *driver) Writer(ctx context.Context, path string, append bool) (storagedriver.FileWriter, error) { + key := d.s3Path(path) + if !append { + // TODO (brianbland): cancel other uploads at this path + multi, err := d.Bucket.InitMulti(key, d.getContentType(), getPermissions(), d.getOptions()) + if err != nil { + return nil, err + } + return d.newWriter(key, multi, nil), nil + } + multis, _, err := d.Bucket.ListMulti(key, "") if err != nil { - return 0, err + return nil, parseError(path, err) } - - buf := d.getbuf() - - // We never want to leave a dangling multipart upload, our only consistent state is - // when there is a whole object at path. This is in order to remain consistent with - // the stat call. - // - // Note that if the machine dies before executing the defer, we will be left with a dangling - // multipart upload, which will eventually be cleaned up, but we will lose all of the progress - // made prior to the machine crashing. - defer func() { - if putErrChan != nil { - if putErr := <-putErrChan; putErr != nil { - err = putErr - } + for _, multi := range multis { + if key != multi.Key { + continue } - - if len(parts) > 0 { - if multi == nil { - // Parts should be empty if the multi is not initialized - panic("Unreachable") - } else { - if multi.Complete(parts) != nil { - multi.Abort() - } - } - } - - d.putbuf(buf) // needs to be here to pick up new buf value - close(done) // free up any waiting goroutines - }() - - // Fills from 0 to total from current - fromSmallCurrent := func(total int64) error { - current, err := d.ReadStream(ctx, path, 0) + parts, err := multi.ListParts() if err != nil { - return err + return nil, parseError(path, err) } - - bytesRead = 0 - for int64(bytesRead) < total { - //The loop should very rarely enter a second iteration - nn, err := current.Read(buf[bytesRead:total]) - bytesRead += nn - if err != nil { - if err != io.EOF { - return err - } - - break - } - + var multiSize int64 + for _, part := range parts { + multiSize += part.Size } - return nil + return d.newWriter(key, multi, parts), nil } - - // Fills from parameter to chunkSize from reader - fromReader := func(from int64) error { - bytesRead = 0 - for from+int64(bytesRead) < d.ChunkSize { - nn, err := reader.Read(buf[from+int64(bytesRead):]) - totalRead += int64(nn) - bytesRead += nn - - if err != nil { - if err != io.EOF { - return err - } - - break - } - } - - if putErrChan == nil { - putErrChan = make(chan error) - } else { - if putErr := <-putErrChan; putErr != nil { - putErrChan = nil - return putErr - } - } - - go func(bytesRead int, from int64, buf []byte) { - defer d.putbuf(buf) // this buffer gets dropped after this call - - // DRAGONS(stevvooe): There are few things one might want to know - // about this section. First, the putErrChan is expecting an error - // and a nil or just a nil to come through the channel. This is - // covered by the silly defer below. The other aspect is the s3 - // retry backoff to deal with RequestTimeout errors. Even though - // the underlying s3 library should handle it, it doesn't seem to - // be part of the shouldRetry function (see AdRoll/goamz/s3). - defer func() { - select { - case putErrChan <- nil: // for some reason, we do this no matter what. - case <-done: - return // ensure we don't leak the goroutine - } - }() - - if bytesRead <= 0 { - return - } - - var err error - var part s3.Part - - loop: - for retries := 0; retries < 5; retries++ { - part, err = multi.PutPart(int(partNumber), bytes.NewReader(buf[0:int64(bytesRead)+from])) - if err == nil { - break // success! - } - - // NOTE(stevvooe): This retry code tries to only retry under - // conditions where the s3 package does not. We may add s3 - // error codes to the below if we see others bubble up in the - // application. Right now, the most troubling is - // RequestTimeout, which seems to only triggered when a tcp - // connection to s3 slows to a crawl. If the RequestTimeout - // ends up getting added to the s3 library and we don't see - // other errors, this retry loop can be removed. - switch err := err.(type) { - case *s3.Error: - switch err.Code { - case "RequestTimeout": - // allow retries on only this error. - default: - break loop - } - } - - backoff := 100 * time.Millisecond * time.Duration(retries+1) - logrus.Errorf("error putting part, retrying after %v: %v", err, backoff.String()) - time.Sleep(backoff) - } - - if err != nil { - logrus.Errorf("error putting part, aborting: %v", err) - select { - case putErrChan <- err: - case <-done: - return // don't leak the goroutine - } - } - - // parts and partNumber are safe, because this function is the - // only one modifying them and we force it to be executed - // serially. - parts = append(parts, part) - partNumber++ - }(bytesRead, from, buf) - - buf = d.getbuf() // use a new buffer for the next call - return nil - } - - if offset > 0 { - resp, err := d.Bucket.Head(d.s3Path(path), nil) - if err != nil { - if s3Err, ok := err.(*s3.Error); !ok || s3Err.Code != "NoSuchKey" { - return 0, err - } - } - - currentLength := int64(0) - if err == nil { - currentLength = resp.ContentLength - } - - if currentLength >= offset { - if offset < d.ChunkSize { - // chunkSize > currentLength >= offset - if err = fromSmallCurrent(offset); err != nil { - return totalRead, err - } - - if err = fromReader(offset); err != nil { - return totalRead, err - } - - if totalRead+offset < d.ChunkSize { - return totalRead, nil - } - } else { - // currentLength >= offset >= chunkSize - _, part, err = multi.PutPartCopy(partNumber, - s3.CopyOptions{CopySourceOptions: "bytes=0-" + strconv.FormatInt(offset-1, 10)}, - d.Bucket.Name+"/"+d.s3Path(path)) - if err != nil { - return 0, err - } - - parts = append(parts, part) - partNumber++ - } - } else { - // Fills between parameters with 0s but only when to - from <= chunkSize - fromZeroFillSmall := func(from, to int64) error { - bytesRead = 0 - for from+int64(bytesRead) < to { - nn, err := bytes.NewReader(d.zeros).Read(buf[from+int64(bytesRead) : to]) - bytesRead += nn - if err != nil { - return err - } - } - - return nil - } - - // Fills between parameters with 0s, making new parts - fromZeroFillLarge := func(from, to int64) error { - bytesRead64 := int64(0) - for to-(from+bytesRead64) >= d.ChunkSize { - part, err := multi.PutPart(int(partNumber), bytes.NewReader(d.zeros)) - if err != nil { - return err - } - bytesRead64 += d.ChunkSize - - parts = append(parts, part) - partNumber++ - } - - return fromZeroFillSmall(0, (to-from)%d.ChunkSize) - } - - // currentLength < offset - if currentLength < d.ChunkSize { - if offset < d.ChunkSize { - // chunkSize > offset > currentLength - if err = fromSmallCurrent(currentLength); err != nil { - return totalRead, err - } - - if err = fromZeroFillSmall(currentLength, offset); err != nil { - return totalRead, err - } - - if err = fromReader(offset); err != nil { - return totalRead, err - } - - if totalRead+offset < d.ChunkSize { - return totalRead, nil - } - } else { - // offset >= chunkSize > currentLength - if err = fromSmallCurrent(currentLength); err != nil { - return totalRead, err - } - - if err = fromZeroFillSmall(currentLength, d.ChunkSize); err != nil { - return totalRead, err - } - - part, err = multi.PutPart(int(partNumber), bytes.NewReader(buf)) - if err != nil { - return totalRead, err - } - - parts = append(parts, part) - partNumber++ - - //Zero fill from chunkSize up to offset, then some reader - if err = fromZeroFillLarge(d.ChunkSize, offset); err != nil { - return totalRead, err - } - - if err = fromReader(offset % d.ChunkSize); err != nil { - return totalRead, err - } - - if totalRead+(offset%d.ChunkSize) < d.ChunkSize { - return totalRead, nil - } - } - } else { - // offset > currentLength >= chunkSize - _, part, err = multi.PutPartCopy(partNumber, - s3.CopyOptions{}, - d.Bucket.Name+"/"+d.s3Path(path)) - if err != nil { - return 0, err - } - - parts = append(parts, part) - partNumber++ - - //Zero fill from currentLength up to offset, then some reader - if err = fromZeroFillLarge(currentLength, offset); err != nil { - return totalRead, err - } - - if err = fromReader((offset - currentLength) % d.ChunkSize); err != nil { - return totalRead, err - } - - if totalRead+((offset-currentLength)%d.ChunkSize) < d.ChunkSize { - return totalRead, nil - } - } - - } - } - - for { - if err = fromReader(0); err != nil { - return totalRead, err - } - - if int64(bytesRead) < d.ChunkSize { - break - } - } - - return totalRead, nil + return nil, storagedriver.PathNotFoundError{Path: path} } // Stat retrieves the FileInfo for the given path, including the current size @@ -882,12 +566,181 @@ func (d *driver) getContentType() string { return "application/octet-stream" } -// getbuf returns a buffer from the driver's pool with length d.ChunkSize. -func (d *driver) getbuf() []byte { - return d.pool.Get().([]byte) +// writer attempts to upload parts to S3 in a buffered fashion where the last +// part is at least as large as the chunksize, so the multipart upload could be +// cleanly resumed in the future. This is violated if Close is called after less +// than a full chunk is written. +type writer struct { + driver *driver + key string + multi *s3.Multi + parts []s3.Part + size int64 + readyPart []byte + pendingPart []byte + closed bool + committed bool + cancelled bool } -func (d *driver) putbuf(p []byte) { - copy(p, d.zeros) - d.pool.Put(p) +func (d *driver) newWriter(key string, multi *s3.Multi, parts []s3.Part) storagedriver.FileWriter { + var size int64 + for _, part := range parts { + size += part.Size + } + return &writer{ + driver: d, + key: key, + multi: multi, + parts: parts, + size: size, + } +} + +func (w *writer) Write(p []byte) (int, error) { + if w.closed { + return 0, fmt.Errorf("already closed") + } else if w.committed { + return 0, fmt.Errorf("already committed") + } else if w.cancelled { + return 0, fmt.Errorf("already cancelled") + } + + // If the last written part is smaller than minChunkSize, we need to make a + // new multipart upload :sadface: + if len(w.parts) > 0 && int(w.parts[len(w.parts)-1].Size) < minChunkSize { + err := w.multi.Complete(w.parts) + if err != nil { + w.multi.Abort() + return 0, err + } + + multi, err := w.driver.Bucket.InitMulti(w.key, w.driver.getContentType(), getPermissions(), w.driver.getOptions()) + if err != nil { + return 0, err + } + w.multi = multi + + // If the entire written file is smaller than minChunkSize, we need to make + // a new part from scratch :double sad face: + if w.size < minChunkSize { + contents, err := w.driver.Bucket.Get(w.key) + if err != nil { + return 0, err + } + w.parts = nil + w.readyPart = contents + } else { + // Otherwise we can use the old file as the new first part + _, part, err := multi.PutPartCopy(1, s3.CopyOptions{}, w.driver.Bucket.Name+"/"+w.key) + if err != nil { + return 0, err + } + w.parts = []s3.Part{part} + } + } + + var n int + + for len(p) > 0 { + // If no parts are ready to write, fill up the first part + if neededBytes := int(w.driver.ChunkSize) - len(w.readyPart); neededBytes > 0 { + if len(p) >= neededBytes { + w.readyPart = append(w.readyPart, p[:neededBytes]...) + n += neededBytes + p = p[neededBytes:] + } else { + w.readyPart = append(w.readyPart, p...) + n += len(p) + p = nil + } + } + + if neededBytes := int(w.driver.ChunkSize) - len(w.pendingPart); neededBytes > 0 { + if len(p) >= neededBytes { + w.pendingPart = append(w.pendingPart, p[:neededBytes]...) + n += neededBytes + p = p[neededBytes:] + err := w.flushPart() + if err != nil { + w.size += int64(n) + return n, err + } + } else { + w.pendingPart = append(w.pendingPart, p...) + n += len(p) + p = nil + } + } + } + w.size += int64(n) + return n, nil +} + +func (w *writer) Size() int64 { + return w.size +} + +func (w *writer) Close() error { + if w.closed { + return fmt.Errorf("already closed") + } + w.closed = true + return w.flushPart() +} + +func (w *writer) Cancel() error { + if w.closed { + return fmt.Errorf("already closed") + } else if w.committed { + return fmt.Errorf("already committed") + } + w.cancelled = true + err := w.multi.Abort() + return err +} + +func (w *writer) Commit() error { + if w.closed { + return fmt.Errorf("already closed") + } else if w.committed { + return fmt.Errorf("already committed") + } else if w.cancelled { + return fmt.Errorf("already cancelled") + } + err := w.flushPart() + if err != nil { + return err + } + w.committed = true + err = w.multi.Complete(w.parts) + if err != nil { + w.multi.Abort() + return err + } + return nil +} + +// flushPart flushes buffers to write a part to S3. +// Only called by Write (with both buffers full) and Close/Commit (always) +func (w *writer) flushPart() error { + if len(w.readyPart) == 0 && len(w.pendingPart) == 0 { + // nothing to write + return nil + } + if len(w.pendingPart) < int(w.driver.ChunkSize) { + // closing with a small pending part + // combine ready and pending to avoid writing a small part + w.readyPart = append(w.readyPart, w.pendingPart...) + w.pendingPart = nil + } + + part, err := w.multi.PutPart(len(w.parts)+1, bytes.NewReader(w.readyPart)) + if err != nil { + return err + } + w.parts = append(w.parts, part) + w.readyPart = w.pendingPart + w.pendingPart = nil + return nil } diff --git a/registry/storage/driver/storagedriver.go b/registry/storage/driver/storagedriver.go index 603020f1..2ae9a67e 100644 --- a/registry/storage/driver/storagedriver.go +++ b/registry/storage/driver/storagedriver.go @@ -49,15 +49,14 @@ type StorageDriver interface { // This should primarily be used for small objects. PutContent(ctx context.Context, path string, content []byte) error - // ReadStream retrieves an io.ReadCloser for the content stored at "path" + // Reader retrieves an io.ReadCloser for the content stored at "path" // with a given byte offset. // May be used to resume reading a stream by providing a nonzero offset. - ReadStream(ctx context.Context, path string, offset int64) (io.ReadCloser, error) + Reader(ctx context.Context, path string, offset int64) (io.ReadCloser, error) - // WriteStream stores the contents of the provided io.ReadCloser at a - // location designated by the given path. - // May be used to resume writing a stream by providing a nonzero offset. - WriteStream(ctx context.Context, path string, offset int64, reader io.Reader) (nn int64, err error) + // Writer returns a FileWriter which will store the content written to it + // at the location designated by "path" after the call to Commit. + Writer(ctx context.Context, path string, append bool) (FileWriter, error) // Stat retrieves the FileInfo for the given path, including the current // size in bytes and the creation time. @@ -83,6 +82,25 @@ type StorageDriver interface { URLFor(ctx context.Context, path string, options map[string]interface{}) (string, error) } +// FileWriter provides an abstraction for an opened writable file-like object in +// the storage backend. The FileWriter must flush all content written to it on +// the call to Close, but is only required to make its content readable on a +// call to Commit. +type FileWriter interface { + io.WriteCloser + + // Size returns the number of bytes written to this FileWriter. + Size() int64 + + // Cancel removes any written content from this FileWriter. + Cancel() error + + // Commit flushes all content written to this FileWriter and makes it + // available for future calls to StorageDriver.GetContent and + // StorageDriver.Reader. + Commit() error +} + // PathRegexp is the regular expression which each file path must match. A // file path is absolute, beginning with a slash and containing a positive // number of path components separated by slashes, where each component is diff --git a/registry/storage/driver/testsuites/testsuites.go b/registry/storage/driver/testsuites/testsuites.go index 3ff4e1e6..48d90ed8 100644 --- a/registry/storage/driver/testsuites/testsuites.go +++ b/registry/storage/driver/testsuites/testsuites.go @@ -282,11 +282,19 @@ func (suite *DriverSuite) TestWriteReadLargeStreams(c *check.C) { var fileSize int64 = 5 * 1024 * 1024 * 1024 contents := newRandReader(fileSize) - written, err := suite.StorageDriver.WriteStream(suite.ctx, filename, 0, io.TeeReader(contents, checksum)) + + writer, err := suite.StorageDriver.Writer(suite.ctx, filename, false) + c.Assert(err, check.IsNil) + written, err := io.Copy(writer, io.TeeReader(contents, checksum)) c.Assert(err, check.IsNil) c.Assert(written, check.Equals, fileSize) - reader, err := suite.StorageDriver.ReadStream(suite.ctx, filename, 0) + err = writer.Commit() + c.Assert(err, check.IsNil) + err = writer.Close() + c.Assert(err, check.IsNil) + + reader, err := suite.StorageDriver.Reader(suite.ctx, filename, 0) c.Assert(err, check.IsNil) defer reader.Close() @@ -296,9 +304,9 @@ func (suite *DriverSuite) TestWriteReadLargeStreams(c *check.C) { c.Assert(writtenChecksum.Sum(nil), check.DeepEquals, checksum.Sum(nil)) } -// TestReadStreamWithOffset tests that the appropriate data is streamed when +// TestReaderWithOffset tests that the appropriate data is streamed when // reading with a given offset. -func (suite *DriverSuite) TestReadStreamWithOffset(c *check.C) { +func (suite *DriverSuite) TestReaderWithOffset(c *check.C) { filename := randomPath(32) defer suite.deletePath(c, firstPart(filename)) @@ -311,7 +319,7 @@ func (suite *DriverSuite) TestReadStreamWithOffset(c *check.C) { err := suite.StorageDriver.PutContent(suite.ctx, filename, append(append(contentsChunk1, contentsChunk2...), contentsChunk3...)) c.Assert(err, check.IsNil) - reader, err := suite.StorageDriver.ReadStream(suite.ctx, filename, 0) + reader, err := suite.StorageDriver.Reader(suite.ctx, filename, 0) c.Assert(err, check.IsNil) defer reader.Close() @@ -320,7 +328,7 @@ func (suite *DriverSuite) TestReadStreamWithOffset(c *check.C) { c.Assert(readContents, check.DeepEquals, append(append(contentsChunk1, contentsChunk2...), contentsChunk3...)) - reader, err = suite.StorageDriver.ReadStream(suite.ctx, filename, chunkSize) + reader, err = suite.StorageDriver.Reader(suite.ctx, filename, chunkSize) c.Assert(err, check.IsNil) defer reader.Close() @@ -329,7 +337,7 @@ func (suite *DriverSuite) TestReadStreamWithOffset(c *check.C) { c.Assert(readContents, check.DeepEquals, append(contentsChunk2, contentsChunk3...)) - reader, err = suite.StorageDriver.ReadStream(suite.ctx, filename, chunkSize*2) + reader, err = suite.StorageDriver.Reader(suite.ctx, filename, chunkSize*2) c.Assert(err, check.IsNil) defer reader.Close() @@ -338,7 +346,7 @@ func (suite *DriverSuite) TestReadStreamWithOffset(c *check.C) { c.Assert(readContents, check.DeepEquals, contentsChunk3) // Ensure we get invalid offest for negative offsets. - reader, err = suite.StorageDriver.ReadStream(suite.ctx, filename, -1) + reader, err = suite.StorageDriver.Reader(suite.ctx, filename, -1) c.Assert(err, check.FitsTypeOf, storagedriver.InvalidOffsetError{}) c.Assert(err.(storagedriver.InvalidOffsetError).Offset, check.Equals, int64(-1)) c.Assert(err.(storagedriver.InvalidOffsetError).Path, check.Equals, filename) @@ -347,7 +355,7 @@ func (suite *DriverSuite) TestReadStreamWithOffset(c *check.C) { // Read past the end of the content and make sure we get a reader that // returns 0 bytes and io.EOF - reader, err = suite.StorageDriver.ReadStream(suite.ctx, filename, chunkSize*3) + reader, err = suite.StorageDriver.Reader(suite.ctx, filename, chunkSize*3) c.Assert(err, check.IsNil) defer reader.Close() @@ -357,7 +365,7 @@ func (suite *DriverSuite) TestReadStreamWithOffset(c *check.C) { c.Assert(n, check.Equals, 0) // Check the N-1 boundary condition, ensuring we get 1 byte then io.EOF. - reader, err = suite.StorageDriver.ReadStream(suite.ctx, filename, chunkSize*3-1) + reader, err = suite.StorageDriver.Reader(suite.ctx, filename, chunkSize*3-1) c.Assert(err, check.IsNil) defer reader.Close() @@ -395,78 +403,51 @@ func (suite *DriverSuite) testContinueStreamAppend(c *check.C, chunkSize int64) contentsChunk1 := randomContents(chunkSize) contentsChunk2 := randomContents(chunkSize) contentsChunk3 := randomContents(chunkSize) - contentsChunk4 := randomContents(chunkSize) - zeroChunk := make([]byte, int64(chunkSize)) fullContents := append(append(contentsChunk1, contentsChunk2...), contentsChunk3...) - nn, err := suite.StorageDriver.WriteStream(suite.ctx, filename, 0, bytes.NewReader(contentsChunk1)) + writer, err := suite.StorageDriver.Writer(suite.ctx, filename, false) + c.Assert(err, check.IsNil) + nn, err := io.Copy(writer, bytes.NewReader(contentsChunk1)) c.Assert(err, check.IsNil) c.Assert(nn, check.Equals, int64(len(contentsChunk1))) - fi, err := suite.StorageDriver.Stat(suite.ctx, filename) - c.Assert(err, check.IsNil) - c.Assert(fi, check.NotNil) - c.Assert(fi.Size(), check.Equals, int64(len(contentsChunk1))) + curSize := writer.Size() + c.Assert(curSize, check.Equals, int64(len(contentsChunk1))) - nn, err = suite.StorageDriver.WriteStream(suite.ctx, filename, fi.Size(), bytes.NewReader(contentsChunk2)) + err = writer.Close() + c.Assert(err, check.IsNil) + + writer, err = suite.StorageDriver.Writer(suite.ctx, filename, true) + c.Assert(err, check.IsNil) + c.Assert(writer.Size(), check.Equals, curSize) + + nn, err = io.Copy(writer, bytes.NewReader(contentsChunk2)) c.Assert(err, check.IsNil) c.Assert(nn, check.Equals, int64(len(contentsChunk2))) - fi, err = suite.StorageDriver.Stat(suite.ctx, filename) - c.Assert(err, check.IsNil) - c.Assert(fi, check.NotNil) - c.Assert(fi.Size(), check.Equals, 2*chunkSize) + curSize = writer.Size() + c.Assert(curSize, check.Equals, 2*chunkSize) - // Test re-writing the last chunk - nn, err = suite.StorageDriver.WriteStream(suite.ctx, filename, fi.Size()-chunkSize, bytes.NewReader(contentsChunk2)) + err = writer.Close() c.Assert(err, check.IsNil) - c.Assert(nn, check.Equals, int64(len(contentsChunk2))) - fi, err = suite.StorageDriver.Stat(suite.ctx, filename) + writer, err = suite.StorageDriver.Writer(suite.ctx, filename, true) c.Assert(err, check.IsNil) - c.Assert(fi, check.NotNil) - c.Assert(fi.Size(), check.Equals, 2*chunkSize) + c.Assert(writer.Size(), check.Equals, curSize) - nn, err = suite.StorageDriver.WriteStream(suite.ctx, filename, fi.Size(), bytes.NewReader(fullContents[fi.Size():])) + nn, err = io.Copy(writer, bytes.NewReader(fullContents[curSize:])) + c.Assert(err, check.IsNil) + c.Assert(nn, check.Equals, int64(len(fullContents[curSize:]))) + + err = writer.Commit() + c.Assert(err, check.IsNil) + err = writer.Close() c.Assert(err, check.IsNil) - c.Assert(nn, check.Equals, int64(len(fullContents[fi.Size():]))) received, err := suite.StorageDriver.GetContent(suite.ctx, filename) c.Assert(err, check.IsNil) c.Assert(received, check.DeepEquals, fullContents) - - // Writing past size of file extends file (no offset error). We would like - // to write chunk 4 one chunk length past chunk 3. It should be successful - // and the resulting file will be 5 chunks long, with a chunk of all - // zeros. - - fullContents = append(fullContents, zeroChunk...) - fullContents = append(fullContents, contentsChunk4...) - - nn, err = suite.StorageDriver.WriteStream(suite.ctx, filename, int64(len(fullContents))-chunkSize, bytes.NewReader(contentsChunk4)) - c.Assert(err, check.IsNil) - c.Assert(nn, check.Equals, chunkSize) - - fi, err = suite.StorageDriver.Stat(suite.ctx, filename) - c.Assert(err, check.IsNil) - c.Assert(fi, check.NotNil) - c.Assert(fi.Size(), check.Equals, int64(len(fullContents))) - - received, err = suite.StorageDriver.GetContent(suite.ctx, filename) - c.Assert(err, check.IsNil) - c.Assert(len(received), check.Equals, len(fullContents)) - c.Assert(received[chunkSize*3:chunkSize*4], check.DeepEquals, zeroChunk) - c.Assert(received[chunkSize*4:chunkSize*5], check.DeepEquals, contentsChunk4) - c.Assert(received, check.DeepEquals, fullContents) - - // Ensure that negative offsets return correct error. - nn, err = suite.StorageDriver.WriteStream(suite.ctx, filename, -1, bytes.NewReader(zeroChunk)) - c.Assert(err, check.NotNil) - c.Assert(err, check.FitsTypeOf, storagedriver.InvalidOffsetError{}) - c.Assert(err.(storagedriver.InvalidOffsetError).Path, check.Equals, filename) - c.Assert(err.(storagedriver.InvalidOffsetError).Offset, check.Equals, int64(-1)) - c.Assert(strings.Contains(err.Error(), suite.Name()), check.Equals, true) } // TestReadNonexistentStream tests that reading a stream for a nonexistent path @@ -474,12 +455,12 @@ func (suite *DriverSuite) testContinueStreamAppend(c *check.C, chunkSize int64) func (suite *DriverSuite) TestReadNonexistentStream(c *check.C) { filename := randomPath(32) - _, err := suite.StorageDriver.ReadStream(suite.ctx, filename, 0) + _, err := suite.StorageDriver.Reader(suite.ctx, filename, 0) c.Assert(err, check.NotNil) c.Assert(err, check.FitsTypeOf, storagedriver.PathNotFoundError{}) c.Assert(strings.Contains(err.Error(), suite.Name()), check.Equals, true) - _, err = suite.StorageDriver.ReadStream(suite.ctx, filename, 64) + _, err = suite.StorageDriver.Reader(suite.ctx, filename, 64) c.Assert(err, check.NotNil) c.Assert(err, check.FitsTypeOf, storagedriver.PathNotFoundError{}) c.Assert(strings.Contains(err.Error(), suite.Name()), check.Equals, true) @@ -800,7 +781,7 @@ func (suite *DriverSuite) TestStatCall(c *check.C) { // TestPutContentMultipleTimes checks that if storage driver can overwrite the content // in the subsequent puts. Validates that PutContent does not have to work -// with an offset like WriteStream does and overwrites the file entirely +// with an offset like Writer does and overwrites the file entirely // rather than writing the data to the [0,len(data)) of the file. func (suite *DriverSuite) TestPutContentMultipleTimes(c *check.C) { filename := randomPath(32) @@ -842,7 +823,7 @@ func (suite *DriverSuite) TestConcurrentStreamReads(c *check.C) { readContents := func() { defer wg.Done() offset := rand.Int63n(int64(len(contents))) - reader, err := suite.StorageDriver.ReadStream(suite.ctx, filename, offset) + reader, err := suite.StorageDriver.Reader(suite.ctx, filename, offset) c.Assert(err, check.IsNil) readContents, err := ioutil.ReadAll(reader) @@ -858,7 +839,7 @@ func (suite *DriverSuite) TestConcurrentStreamReads(c *check.C) { } // TestConcurrentFileStreams checks that multiple *os.File objects can be passed -// in to WriteStream concurrently without hanging. +// in to Writer concurrently without hanging. func (suite *DriverSuite) TestConcurrentFileStreams(c *check.C) { numStreams := 32 @@ -882,53 +863,54 @@ func (suite *DriverSuite) TestConcurrentFileStreams(c *check.C) { wg.Wait() } +// TODO (brianbland): evaluate the relevancy of this test // TestEventualConsistency checks that if stat says that a file is a certain size, then // you can freely read from the file (this is the only guarantee that the driver needs to provide) -func (suite *DriverSuite) TestEventualConsistency(c *check.C) { - if testing.Short() { - c.Skip("Skipping test in short mode") - } - - filename := randomPath(32) - defer suite.deletePath(c, firstPart(filename)) - - var offset int64 - var misswrites int - var chunkSize int64 = 32 - - for i := 0; i < 1024; i++ { - contents := randomContents(chunkSize) - read, err := suite.StorageDriver.WriteStream(suite.ctx, filename, offset, bytes.NewReader(contents)) - c.Assert(err, check.IsNil) - - fi, err := suite.StorageDriver.Stat(suite.ctx, filename) - c.Assert(err, check.IsNil) - - // We are most concerned with being able to read data as soon as Stat declares - // it is uploaded. This is the strongest guarantee that some drivers (that guarantee - // at best eventual consistency) absolutely need to provide. - if fi.Size() == offset+chunkSize { - reader, err := suite.StorageDriver.ReadStream(suite.ctx, filename, offset) - c.Assert(err, check.IsNil) - - readContents, err := ioutil.ReadAll(reader) - c.Assert(err, check.IsNil) - - c.Assert(readContents, check.DeepEquals, contents) - - reader.Close() - offset += read - } else { - misswrites++ - } - } - - if misswrites > 0 { - c.Log("There were " + string(misswrites) + " occurrences of a write not being instantly available.") - } - - c.Assert(misswrites, check.Not(check.Equals), 1024) -} +// func (suite *DriverSuite) TestEventualConsistency(c *check.C) { +// if testing.Short() { +// c.Skip("Skipping test in short mode") +// } +// +// filename := randomPath(32) +// defer suite.deletePath(c, firstPart(filename)) +// +// var offset int64 +// var misswrites int +// var chunkSize int64 = 32 +// +// for i := 0; i < 1024; i++ { +// contents := randomContents(chunkSize) +// read, err := suite.StorageDriver.Writer(suite.ctx, filename, offset, bytes.NewReader(contents)) +// c.Assert(err, check.IsNil) +// +// fi, err := suite.StorageDriver.Stat(suite.ctx, filename) +// c.Assert(err, check.IsNil) +// +// // We are most concerned with being able to read data as soon as Stat declares +// // it is uploaded. This is the strongest guarantee that some drivers (that guarantee +// // at best eventual consistency) absolutely need to provide. +// if fi.Size() == offset+chunkSize { +// reader, err := suite.StorageDriver.Reader(suite.ctx, filename, offset) +// c.Assert(err, check.IsNil) +// +// readContents, err := ioutil.ReadAll(reader) +// c.Assert(err, check.IsNil) +// +// c.Assert(readContents, check.DeepEquals, contents) +// +// reader.Close() +// offset += read +// } else { +// misswrites++ +// } +// } +// +// if misswrites > 0 { +// c.Log("There were " + string(misswrites) + " occurrences of a write not being instantly available.") +// } +// +// c.Assert(misswrites, check.Not(check.Equals), 1024) +// } // BenchmarkPutGetEmptyFiles benchmarks PutContent/GetContent for 0B files func (suite *DriverSuite) BenchmarkPutGetEmptyFiles(c *check.C) { @@ -968,22 +950,22 @@ func (suite *DriverSuite) benchmarkPutGetFiles(c *check.C, size int64) { } } -// BenchmarkStreamEmptyFiles benchmarks WriteStream/ReadStream for 0B files +// BenchmarkStreamEmptyFiles benchmarks Writer/Reader for 0B files func (suite *DriverSuite) BenchmarkStreamEmptyFiles(c *check.C) { suite.benchmarkStreamFiles(c, 0) } -// BenchmarkStream1KBFiles benchmarks WriteStream/ReadStream for 1KB files +// BenchmarkStream1KBFiles benchmarks Writer/Reader for 1KB files func (suite *DriverSuite) BenchmarkStream1KBFiles(c *check.C) { suite.benchmarkStreamFiles(c, 1024) } -// BenchmarkStream1MBFiles benchmarks WriteStream/ReadStream for 1MB files +// BenchmarkStream1MBFiles benchmarks Writer/Reader for 1MB files func (suite *DriverSuite) BenchmarkStream1MBFiles(c *check.C) { suite.benchmarkStreamFiles(c, 1024*1024) } -// BenchmarkStream1GBFiles benchmarks WriteStream/ReadStream for 1GB files +// BenchmarkStream1GBFiles benchmarks Writer/Reader for 1GB files func (suite *DriverSuite) BenchmarkStream1GBFiles(c *check.C) { suite.benchmarkStreamFiles(c, 1024*1024*1024) } @@ -998,11 +980,18 @@ func (suite *DriverSuite) benchmarkStreamFiles(c *check.C, size int64) { for i := 0; i < c.N; i++ { filename := path.Join(parentDir, randomPath(32)) - written, err := suite.StorageDriver.WriteStream(suite.ctx, filename, 0, bytes.NewReader(randomContents(size))) + writer, err := suite.StorageDriver.Writer(suite.ctx, filename, false) + c.Assert(err, check.IsNil) + written, err := io.Copy(writer, bytes.NewReader(randomContents(size))) c.Assert(err, check.IsNil) c.Assert(written, check.Equals, size) - rc, err := suite.StorageDriver.ReadStream(suite.ctx, filename, 0) + err = writer.Commit() + c.Assert(err, check.IsNil) + err = writer.Close() + c.Assert(err, check.IsNil) + + rc, err := suite.StorageDriver.Reader(suite.ctx, filename, 0) c.Assert(err, check.IsNil) rc.Close() } @@ -1083,11 +1072,18 @@ func (suite *DriverSuite) testFileStreams(c *check.C, size int64) { tf.Sync() tf.Seek(0, os.SEEK_SET) - nn, err := suite.StorageDriver.WriteStream(suite.ctx, filename, 0, tf) + writer, err := suite.StorageDriver.Writer(suite.ctx, filename, false) + c.Assert(err, check.IsNil) + nn, err := io.Copy(writer, tf) c.Assert(err, check.IsNil) c.Assert(nn, check.Equals, size) - reader, err := suite.StorageDriver.ReadStream(suite.ctx, filename, 0) + err = writer.Commit() + c.Assert(err, check.IsNil) + err = writer.Close() + c.Assert(err, check.IsNil) + + reader, err := suite.StorageDriver.Reader(suite.ctx, filename, 0) c.Assert(err, check.IsNil) defer reader.Close() @@ -1112,11 +1108,18 @@ func (suite *DriverSuite) writeReadCompare(c *check.C, filename string, contents func (suite *DriverSuite) writeReadCompareStreams(c *check.C, filename string, contents []byte) { defer suite.deletePath(c, firstPart(filename)) - nn, err := suite.StorageDriver.WriteStream(suite.ctx, filename, 0, bytes.NewReader(contents)) + writer, err := suite.StorageDriver.Writer(suite.ctx, filename, false) + c.Assert(err, check.IsNil) + nn, err := io.Copy(writer, bytes.NewReader(contents)) c.Assert(err, check.IsNil) c.Assert(nn, check.Equals, int64(len(contents))) - reader, err := suite.StorageDriver.ReadStream(suite.ctx, filename, 0) + err = writer.Commit() + c.Assert(err, check.IsNil) + err = writer.Close() + c.Assert(err, check.IsNil) + + reader, err := suite.StorageDriver.Reader(suite.ctx, filename, 0) c.Assert(err, check.IsNil) defer reader.Close() diff --git a/registry/storage/filereader.go b/registry/storage/filereader.go index b3a5f520..3b06c817 100644 --- a/registry/storage/filereader.go +++ b/registry/storage/filereader.go @@ -119,7 +119,7 @@ func (fr *fileReader) reader() (io.Reader, error) { } // If we don't have a reader, open one up. - rc, err := fr.driver.ReadStream(fr.ctx, fr.path, fr.offset) + rc, err := fr.driver.Reader(fr.ctx, fr.path, fr.offset) if err != nil { switch err := err.(type) { case storagedriver.PathNotFoundError: diff --git a/registry/storage/filewriter.go b/registry/storage/filewriter.go deleted file mode 100644 index 7c68f346..00000000 --- a/registry/storage/filewriter.go +++ /dev/null @@ -1,135 +0,0 @@ -package storage - -import ( - "bytes" - "fmt" - "io" - "os" - - "github.com/docker/distribution/context" - storagedriver "github.com/docker/distribution/registry/storage/driver" -) - -// fileWriter implements a remote file writer backed by a storage driver. -type fileWriter struct { - driver storagedriver.StorageDriver - - ctx context.Context - - // identifying fields - path string - - // mutable fields - size int64 // size of the file, aka the current end - offset int64 // offset is the current write offset - err error // terminal error, if set, reader is closed -} - -// fileWriterInterface makes the desired io compliant interface that the -// filewriter should implement. -type fileWriterInterface interface { - io.WriteSeeker - io.ReaderFrom - io.Closer -} - -var _ fileWriterInterface = &fileWriter{} - -// newFileWriter returns a prepared fileWriter for the driver and path. This -// could be considered similar to an "open" call on a regular filesystem. -func newFileWriter(ctx context.Context, driver storagedriver.StorageDriver, path string) (*fileWriter, error) { - fw := fileWriter{ - driver: driver, - path: path, - ctx: ctx, - } - - if fi, err := driver.Stat(ctx, path); err != nil { - switch err := err.(type) { - case storagedriver.PathNotFoundError: - // ignore, offset is zero - default: - return nil, err - } - } else { - if fi.IsDir() { - return nil, fmt.Errorf("cannot write to a directory") - } - - fw.size = fi.Size() - } - - return &fw, nil -} - -// Write writes the buffer p at the current write offset. -func (fw *fileWriter) Write(p []byte) (n int, err error) { - nn, err := fw.ReadFrom(bytes.NewReader(p)) - return int(nn), err -} - -// ReadFrom reads reader r until io.EOF writing the contents at the current -// offset. -func (fw *fileWriter) ReadFrom(r io.Reader) (n int64, err error) { - if fw.err != nil { - return 0, fw.err - } - - nn, err := fw.driver.WriteStream(fw.ctx, fw.path, fw.offset, r) - - // We should forward the offset, whether or not there was an error. - // Basically, we keep the filewriter in sync with the reader's head. If an - // error is encountered, the whole thing should be retried but we proceed - // from an expected offset, even if the data didn't make it to the - // backend. - fw.offset += nn - - if fw.offset > fw.size { - fw.size = fw.offset - } - - return nn, err -} - -// Seek moves the write position do the requested offest based on the whence -// argument, which can be os.SEEK_CUR, os.SEEK_END, or os.SEEK_SET. -func (fw *fileWriter) Seek(offset int64, whence int) (int64, error) { - if fw.err != nil { - return 0, fw.err - } - - var err error - newOffset := fw.offset - - switch whence { - case os.SEEK_CUR: - newOffset += int64(offset) - case os.SEEK_END: - newOffset = fw.size + int64(offset) - case os.SEEK_SET: - newOffset = int64(offset) - } - - if newOffset < 0 { - err = fmt.Errorf("cannot seek to negative position") - } else { - // No problems, set the offset. - fw.offset = newOffset - } - - return fw.offset, err -} - -// Close closes the fileWriter for writing. -// Calling it once is valid and correct and it will -// return a nil error. Calling it subsequent times will -// detect that fw.err has been set and will return the error. -func (fw *fileWriter) Close() error { - if fw.err != nil { - return fw.err - } - - fw.err = fmt.Errorf("filewriter@%v: closed", fw.path) - - return nil -} diff --git a/registry/storage/filewriter_test.go b/registry/storage/filewriter_test.go deleted file mode 100644 index d6782cd4..00000000 --- a/registry/storage/filewriter_test.go +++ /dev/null @@ -1,226 +0,0 @@ -package storage - -import ( - "bytes" - "crypto/rand" - "io" - "os" - "testing" - - "github.com/docker/distribution/context" - "github.com/docker/distribution/digest" - storagedriver "github.com/docker/distribution/registry/storage/driver" - "github.com/docker/distribution/registry/storage/driver/inmemory" -) - -// TestSimpleWrite takes the fileWriter through common write operations -// ensuring data integrity. -func TestSimpleWrite(t *testing.T) { - content := make([]byte, 1<<20) - n, err := rand.Read(content) - if err != nil { - t.Fatalf("unexpected error building random data: %v", err) - } - - if n != len(content) { - t.Fatalf("random read did't fill buffer") - } - - dgst, err := digest.FromReader(bytes.NewReader(content)) - if err != nil { - t.Fatalf("unexpected error digesting random content: %v", err) - } - - driver := inmemory.New() - path := "/random" - ctx := context.Background() - - fw, err := newFileWriter(ctx, driver, path) - if err != nil { - t.Fatalf("unexpected error creating fileWriter: %v", err) - } - defer fw.Close() - - n, err = fw.Write(content) - if err != nil { - t.Fatalf("unexpected error writing content: %v", err) - } - - if n != len(content) { - t.Fatalf("unexpected write length: %d != %d", n, len(content)) - } - - fr, err := newFileReader(ctx, driver, path, int64(len(content))) - if err != nil { - t.Fatalf("unexpected error creating fileReader: %v", err) - } - defer fr.Close() - - verifier, err := digest.NewDigestVerifier(dgst) - if err != nil { - t.Fatalf("unexpected error getting digest verifier: %s", err) - } - - io.Copy(verifier, fr) - - if !verifier.Verified() { - t.Fatalf("unable to verify write data") - } - - // Check the seek position is equal to the content length - end, err := fw.Seek(0, os.SEEK_END) - if err != nil { - t.Fatalf("unexpected error seeking: %v", err) - } - - if end != int64(len(content)) { - t.Fatalf("write did not advance offset: %d != %d", end, len(content)) - } - - // Double the content - doubled := append(content, content...) - doubledgst, err := digest.FromReader(bytes.NewReader(doubled)) - if err != nil { - t.Fatalf("unexpected error digesting doubled content: %v", err) - } - - nn, err := fw.ReadFrom(bytes.NewReader(content)) - if err != nil { - t.Fatalf("unexpected error doubling content: %v", err) - } - - if nn != int64(len(content)) { - t.Fatalf("writeat was short: %d != %d", n, len(content)) - } - - fr, err = newFileReader(ctx, driver, path, int64(len(doubled))) - if err != nil { - t.Fatalf("unexpected error creating fileReader: %v", err) - } - defer fr.Close() - - verifier, err = digest.NewDigestVerifier(doubledgst) - if err != nil { - t.Fatalf("unexpected error getting digest verifier: %s", err) - } - - io.Copy(verifier, fr) - - if !verifier.Verified() { - t.Fatalf("unable to verify write data") - } - - // Check that Write updated the offset. - end, err = fw.Seek(0, os.SEEK_END) - if err != nil { - t.Fatalf("unexpected error seeking: %v", err) - } - - if end != int64(len(doubled)) { - t.Fatalf("write did not advance offset: %d != %d", end, len(doubled)) - } - - // Now, we copy from one path to another, running the data through the - // fileReader to fileWriter, rather than the driver.Move command to ensure - // everything is working correctly. - fr, err = newFileReader(ctx, driver, path, int64(len(doubled))) - if err != nil { - t.Fatalf("unexpected error creating fileReader: %v", err) - } - defer fr.Close() - - fw, err = newFileWriter(ctx, driver, "/copied") - if err != nil { - t.Fatalf("unexpected error creating fileWriter: %v", err) - } - defer fw.Close() - - nn, err = io.Copy(fw, fr) - if err != nil { - t.Fatalf("unexpected error copying data: %v", err) - } - - if nn != int64(len(doubled)) { - t.Fatalf("unexpected copy length: %d != %d", nn, len(doubled)) - } - - fr, err = newFileReader(ctx, driver, "/copied", int64(len(doubled))) - if err != nil { - t.Fatalf("unexpected error creating fileReader: %v", err) - } - defer fr.Close() - - verifier, err = digest.NewDigestVerifier(doubledgst) - if err != nil { - t.Fatalf("unexpected error getting digest verifier: %s", err) - } - - io.Copy(verifier, fr) - - if !verifier.Verified() { - t.Fatalf("unable to verify write data") - } -} - -func BenchmarkFileWriter(b *testing.B) { - b.StopTimer() // not sure how long setup above will take - for i := 0; i < b.N; i++ { - // Start basic fileWriter initialization - fw := fileWriter{ - driver: inmemory.New(), - path: "/random", - } - ctx := context.Background() - if fi, err := fw.driver.Stat(ctx, fw.path); err != nil { - switch err := err.(type) { - case storagedriver.PathNotFoundError: - // ignore, offset is zero - default: - b.Fatalf("Failed to initialize fileWriter: %v", err.Error()) - } - } else { - if fi.IsDir() { - b.Fatalf("Cannot write to a directory") - } - - fw.size = fi.Size() - } - - randomBytes := make([]byte, 1<<20) - _, err := rand.Read(randomBytes) - if err != nil { - b.Fatalf("unexpected error building random data: %v", err) - } - // End basic file writer initialization - - b.StartTimer() - for j := 0; j < 100; j++ { - fw.Write(randomBytes) - } - b.StopTimer() - } -} - -func BenchmarkfileWriter(b *testing.B) { - b.StopTimer() // not sure how long setup above will take - ctx := context.Background() - for i := 0; i < b.N; i++ { - bfw, err := newFileWriter(ctx, inmemory.New(), "/random") - - if err != nil { - b.Fatalf("Failed to initialize fileWriter: %v", err.Error()) - } - - randomBytes := make([]byte, 1<<20) - _, err = rand.Read(randomBytes) - if err != nil { - b.Fatalf("unexpected error building random data: %v", err) - } - - b.StartTimer() - for j := 0; j < 100; j++ { - bfw.Write(randomBytes) - } - b.StopTimer() - } -} diff --git a/registry/storage/linkedblobstore.go b/registry/storage/linkedblobstore.go index 76a1c29d..e06f9540 100644 --- a/registry/storage/linkedblobstore.go +++ b/registry/storage/linkedblobstore.go @@ -179,7 +179,7 @@ func (lbs *linkedBlobStore) Create(ctx context.Context, options ...distribution. return nil, err } - return lbs.newBlobUpload(ctx, uuid, path, startedAt) + return lbs.newBlobUpload(ctx, uuid, path, startedAt, false) } func (lbs *linkedBlobStore) Resume(ctx context.Context, id string) (distribution.BlobWriter, error) { @@ -218,7 +218,7 @@ func (lbs *linkedBlobStore) Resume(ctx context.Context, id string) (distribution return nil, err } - return lbs.newBlobUpload(ctx, id, path, startedAt) + return lbs.newBlobUpload(ctx, id, path, startedAt, true) } func (lbs *linkedBlobStore) Delete(ctx context.Context, dgst digest.Digest) error { @@ -312,18 +312,21 @@ func (lbs *linkedBlobStore) mount(ctx context.Context, sourceRepo reference.Name } // newBlobUpload allocates a new upload controller with the given state. -func (lbs *linkedBlobStore) newBlobUpload(ctx context.Context, uuid, path string, startedAt time.Time) (distribution.BlobWriter, error) { - fw, err := newFileWriter(ctx, lbs.driver, path) +func (lbs *linkedBlobStore) newBlobUpload(ctx context.Context, uuid, path string, startedAt time.Time, append bool) (distribution.BlobWriter, error) { + fw, err := lbs.driver.Writer(ctx, path, append) if err != nil { return nil, err } bw := &blobWriter{ - blobStore: lbs, - id: uuid, - startedAt: startedAt, - digester: digest.Canonical.New(), - fileWriter: *fw, + ctx: ctx, + blobStore: lbs, + id: uuid, + startedAt: startedAt, + digester: digest.Canonical.New(), + fileWriter: fw, + driver: lbs.driver, + path: path, resumableDigestEnabled: lbs.resumableDigestEnabled, } From 666273d9f6f02d394d4202cb9d2bf2b1d642974c Mon Sep 17 00:00:00 2001 From: Arthur Baars Date: Fri, 12 Feb 2016 17:49:37 +0000 Subject: [PATCH 2/7] StorageDriver: Testsuite: call Close before getting Size Signed-off-by: Arthur Baars --- registry/storage/driver/testsuites/testsuites.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/registry/storage/driver/testsuites/testsuites.go b/registry/storage/driver/testsuites/testsuites.go index 48d90ed8..de8e3143 100644 --- a/registry/storage/driver/testsuites/testsuites.go +++ b/registry/storage/driver/testsuites/testsuites.go @@ -412,12 +412,12 @@ func (suite *DriverSuite) testContinueStreamAppend(c *check.C, chunkSize int64) c.Assert(err, check.IsNil) c.Assert(nn, check.Equals, int64(len(contentsChunk1))) - curSize := writer.Size() - c.Assert(curSize, check.Equals, int64(len(contentsChunk1))) - err = writer.Close() c.Assert(err, check.IsNil) + curSize := writer.Size() + c.Assert(curSize, check.Equals, int64(len(contentsChunk1))) + writer, err = suite.StorageDriver.Writer(suite.ctx, filename, true) c.Assert(err, check.IsNil) c.Assert(writer.Size(), check.Equals, curSize) @@ -426,12 +426,12 @@ func (suite *DriverSuite) testContinueStreamAppend(c *check.C, chunkSize int64) c.Assert(err, check.IsNil) c.Assert(nn, check.Equals, int64(len(contentsChunk2))) - curSize = writer.Size() - c.Assert(curSize, check.Equals, 2*chunkSize) - err = writer.Close() c.Assert(err, check.IsNil) + curSize = writer.Size() + c.Assert(curSize, check.Equals, 2*chunkSize) + writer, err = suite.StorageDriver.Writer(suite.ctx, filename, true) c.Assert(err, check.IsNil) c.Assert(writer.Size(), check.Equals, curSize) From 7162cb19c6c28faf4c5da9671f7839619ba66ea6 Mon Sep 17 00:00:00 2001 From: Arthur Baars Date: Fri, 12 Feb 2016 13:30:57 +0000 Subject: [PATCH 3/7] Storagedriver: GCS: implement resumable uploads Signed-off-by: Arthur Baars --- cmd/registry/main.go | 2 +- registry/storage/driver/gcs/gcs.go | 534 ++++++++++++++++-------- registry/storage/driver/gcs/gcs_test.go | 102 ++++- 3 files changed, 468 insertions(+), 170 deletions(-) diff --git a/cmd/registry/main.go b/cmd/registry/main.go index afa4ae20..49fd7ba5 100644 --- a/cmd/registry/main.go +++ b/cmd/registry/main.go @@ -10,7 +10,7 @@ import ( _ "github.com/docker/distribution/registry/proxy" _ "github.com/docker/distribution/registry/storage/driver/azure" _ "github.com/docker/distribution/registry/storage/driver/filesystem" - // _ "github.com/docker/distribution/registry/storage/driver/gcs" + _ "github.com/docker/distribution/registry/storage/driver/gcs" _ "github.com/docker/distribution/registry/storage/driver/inmemory" _ "github.com/docker/distribution/registry/storage/driver/middleware/cloudfront" // _ "github.com/docker/distribution/registry/storage/driver/oss" diff --git a/registry/storage/driver/gcs/gcs.go b/registry/storage/driver/gcs/gcs.go index 9d8a8458..14600dee 100644 --- a/registry/storage/driver/gcs/gcs.go +++ b/registry/storage/driver/gcs/gcs.go @@ -7,11 +7,8 @@ // Because gcs is a key, value store the Stat call does not support last modification // time for directories (directories are an abstraction for key, value stores) // -// Keep in mind that gcs guarantees only eventual consistency, so do not assume -// that a successful write will mean immediate access to the data written (although -// in most regions a new object put has guaranteed read after write). The only true -// guarantee is that once you call Stat and receive a certain file size, that much of -// the file is already accessible. +// Note that the contents of incomplete uploads are not accessible even though +// Stat returns their length // // +build include_gcs @@ -25,7 +22,9 @@ import ( "math/rand" "net/http" "net/url" + "regexp" "sort" + "strconv" "strings" "time" @@ -34,7 +33,6 @@ import ( "golang.org/x/oauth2/google" "golang.org/x/oauth2/jwt" "google.golang.org/api/googleapi" - storageapi "google.golang.org/api/storage/v1" "google.golang.org/cloud" "google.golang.org/cloud/storage" @@ -46,8 +44,18 @@ import ( "github.com/docker/distribution/registry/storage/driver/factory" ) -const driverName = "gcs" -const dummyProjectID = "" +const ( + driverName = "gcs" + dummyProjectID = "" + + uploadSessionContentType = "application/x-docker-upload-session" + minChunkSize = 256 * 1024 + maxChunkSize = 20 * minChunkSize + + maxTries = 5 +) + +var rangeHeader = regexp.MustCompile(`^bytes=([0-9])+-([0-9]+)$`) // driverParameters is a struct that encapsulates all of the driver parameters after all values have been set type driverParameters struct { @@ -155,7 +163,17 @@ func (d *driver) Name() string { // GetContent retrieves the content stored at "path" as a []byte. // This should primarily be used for small objects. func (d *driver) GetContent(context ctx.Context, path string) ([]byte, error) { - rc, err := d.ReadStream(context, path, 0) + gcsContext := d.context(context) + name := d.pathToKey(path) + var rc io.ReadCloser + err := retry(func() error { + var err error + rc, err = storage.NewReader(gcsContext, d.bucket, name) + return err + }) + if err == storage.ErrObjectNotExist { + return nil, storagedriver.PathNotFoundError{Path: path} + } if err != nil { return nil, err } @@ -171,25 +189,53 @@ func (d *driver) GetContent(context ctx.Context, path string) ([]byte, error) { // PutContent stores the []byte content at a location designated by "path". // This should primarily be used for small objects. func (d *driver) PutContent(context ctx.Context, path string, contents []byte) error { - wc := storage.NewWriter(d.context(context), d.bucket, d.pathToKey(path)) - wc.ContentType = "application/octet-stream" - defer wc.Close() - _, err := wc.Write(contents) - return err + return retry(func() error { + wc := storage.NewWriter(d.context(context), d.bucket, d.pathToKey(path)) + wc.ContentType = "application/octet-stream" + return putContentsClose(wc, contents) + }) } -// ReadStream retrieves an io.ReadCloser for the content stored at "path" +// Reader retrieves an io.ReadCloser for the content stored at "path" // with a given byte offset. // May be used to resume reading a stream by providing a nonzero offset. -func (d *driver) ReadStream(context ctx.Context, path string, offset int64) (io.ReadCloser, error) { - name := d.pathToKey(path) +func (d *driver) Reader(context ctx.Context, path string, offset int64) (io.ReadCloser, error) { + res, err := getObject(d.client, d.bucket, d.pathToKey(path), offset) + if err != nil { + if res != nil { + if res.StatusCode == http.StatusNotFound { + res.Body.Close() + return nil, storagedriver.PathNotFoundError{Path: path} + } + if res.StatusCode == http.StatusRequestedRangeNotSatisfiable { + res.Body.Close() + obj, err := storageStatObject(d.context(context), d.bucket, d.pathToKey(path)) + if err != nil { + return nil, err + } + if offset == int64(obj.Size) { + return ioutil.NopCloser(bytes.NewReader([]byte{})), nil + } + return nil, storagedriver.InvalidOffsetError{Path: path, Offset: offset} + } + } + return nil, err + } + if res.Header.Get("Content-Type") == uploadSessionContentType { + defer res.Body.Close() + return nil, storagedriver.PathNotFoundError{Path: path} + } + return res.Body, nil +} + +func getObject(client *http.Client, bucket string, name string, offset int64) (*http.Response, error) { // copied from google.golang.org/cloud/storage#NewReader : // to set the additional "Range" header u := &url.URL{ Scheme: "https", Host: "storage.googleapis.com", - Path: fmt.Sprintf("/%s/%s", d.bucket, name), + Path: fmt.Sprintf("/%s/%s", bucket, name), } req, err := http.NewRequest("GET", u.String(), nil) if err != nil { @@ -198,122 +244,253 @@ func (d *driver) ReadStream(context ctx.Context, path string, offset int64) (io. if offset > 0 { req.Header.Set("Range", fmt.Sprintf("bytes=%v-", offset)) } - res, err := d.client.Do(req) + var res *http.Response + err = retry(func() error { + var err error + res, err = client.Do(req) + return err + }) if err != nil { return nil, err } - if res.StatusCode == http.StatusNotFound { - res.Body.Close() - return nil, storagedriver.PathNotFoundError{Path: path} + return res, googleapi.CheckMediaResponse(res) +} + +// Writer returns a FileWriter which will store the content written to it +// at the location designated by "path" after the call to Commit. +func (d *driver) Writer(context ctx.Context, path string, append bool) (storagedriver.FileWriter, error) { + writer := &writer{ + client: d.client, + bucket: d.bucket, + name: d.pathToKey(path), + buffer: make([]byte, maxChunkSize), } - if res.StatusCode == http.StatusRequestedRangeNotSatisfiable { - res.Body.Close() - obj, err := storageStatObject(d.context(context), d.bucket, name) + + if append { + err := writer.init(path) if err != nil { return nil, err } - if offset == int64(obj.Size) { - return ioutil.NopCloser(bytes.NewReader([]byte{})), nil - } - return nil, storagedriver.InvalidOffsetError{Path: path, Offset: offset} } - if res.StatusCode < 200 || res.StatusCode > 299 { - res.Body.Close() - return nil, fmt.Errorf("storage: can't read object %v/%v, status code: %v", d.bucket, name, res.Status) - } - return res.Body, nil + return writer, nil } -// WriteStream stores the contents of the provided io.ReadCloser at a -// location designated by the given path. -// May be used to resume writing a stream by providing a nonzero offset. -// The offset must be no larger than the CurrentSize for this path. -func (d *driver) WriteStream(context ctx.Context, path string, offset int64, reader io.Reader) (totalRead int64, err error) { - if offset < 0 { - return 0, storagedriver.InvalidOffsetError{Path: path, Offset: offset} - } +type writer struct { + client *http.Client + bucket string + name string + size int64 + offset int64 + closed bool + sessionURI string + buffer []byte + buffSize int +} - if offset == 0 { - return d.writeCompletely(context, path, 0, reader) - } - - service, err := storageapi.New(d.client) +// Cancel removes any written content from this FileWriter. +func (w *writer) Cancel() error { + err := w.checkClosed() if err != nil { - return 0, err - } - objService := storageapi.NewObjectsService(service) - var obj *storageapi.Object - err = retry(5, func() error { - o, err := objService.Get(d.bucket, d.pathToKey(path)).Do() - obj = o return err - }) - // obj, err := retry(5, objService.Get(d.bucket, d.pathToKey(path)).Do) + } + w.closed = true + err = storageDeleteObject(cloud.NewContext(dummyProjectID, w.client), w.bucket, w.name) if err != nil { - return 0, err - } - - // cannot append more chunks, so redo from scratch - if obj.ComponentCount >= 1023 { - return d.writeCompletely(context, path, offset, reader) - } - - // skip from reader - objSize := int64(obj.Size) - nn, err := skip(reader, objSize-offset) - if err != nil { - return nn, err - } - - // Size <= offset - partName := fmt.Sprintf("%v#part-%d#", d.pathToKey(path), obj.ComponentCount) - gcsContext := d.context(context) - wc := storage.NewWriter(gcsContext, d.bucket, partName) - wc.ContentType = "application/octet-stream" - - if objSize < offset { - err = writeZeros(wc, offset-objSize) - if err != nil { - wc.CloseWithError(err) - return nn, err + if status, ok := err.(*googleapi.Error); ok { + if status.Code == http.StatusNotFound { + err = nil + } + } + } + return err +} + +func (w *writer) Close() error { + if w.closed { + return nil + } + w.closed = true + + err := w.writeChunk() + if err != nil { + return err + } + + // Copy the remaining bytes from the buffer to the upload session + // Normally buffSize will be smaller than minChunkSize. However, in the + // unlikely event that the upload session failed to start, this number could be higher. + // In this case we can safely clip the remaining bytes to the minChunkSize + if w.buffSize > minChunkSize { + w.buffSize = minChunkSize + } + + // commit the writes by updating the upload session + err = retry(func() error { + wc := storage.NewWriter(cloud.NewContext(dummyProjectID, w.client), w.bucket, w.name) + wc.ContentType = uploadSessionContentType + wc.Metadata = map[string]string{ + "Session-URI": w.sessionURI, + "Offset": strconv.FormatInt(w.offset, 10), + } + return putContentsClose(wc, w.buffer[0:w.buffSize]) + }) + if err != nil { + return err + } + w.size = w.offset + int64(w.buffSize) + w.buffSize = 0 + return nil +} + +func putContentsClose(wc *storage.Writer, contents []byte) error { + size := len(contents) + var nn int + var err error + for nn < size { + n, err := wc.Write(contents[nn:size]) + nn += n + if err != nil { + break } } - n, err := io.Copy(wc, reader) if err != nil { wc.CloseWithError(err) - return nn, err + return err + } + return wc.Close() +} + +// Commit flushes all content written to this FileWriter and makes it +// available for future calls to StorageDriver.GetContent and +// StorageDriver.Reader. +func (w *writer) Commit() error { + + if err := w.checkClosed(); err != nil { + return err + } + w.closed = true + + // no session started yet just perform a simple upload + if w.sessionURI == "" { + err := retry(func() error { + wc := storage.NewWriter(cloud.NewContext(dummyProjectID, w.client), w.bucket, w.name) + wc.ContentType = "application/octet-stream" + return putContentsClose(wc, w.buffer[0:w.buffSize]) + }) + if err != nil { + return err + } + w.size = w.offset + int64(w.buffSize) + w.buffSize = 0 + return nil + } + size := w.offset + int64(w.buffSize) + var nn int + // loop must be performed at least once to ensure the file is committed even when + // the buffer is empty + for { + n, err := putChunk(w.client, w.sessionURI, w.buffer[nn:w.buffSize], w.offset, size) + nn += int(n) + w.offset += n + w.size = w.offset + if err != nil { + w.buffSize = copy(w.buffer, w.buffer[nn:w.buffSize]) + return err + } + if nn == w.buffSize { + break + } + } + w.buffSize = 0 + return nil +} + +func (w *writer) checkClosed() error { + if w.closed { + return fmt.Errorf("Writer already closed") + } + return nil +} + +func (w *writer) writeChunk() error { + var err error + // chunks can be uploaded only in multiples of minChunkSize + // chunkSize is a multiple of minChunkSize less than or equal to buffSize + chunkSize := w.buffSize - (w.buffSize % minChunkSize) + if chunkSize == 0 { + return nil + } + // if their is no sessionURI yet, obtain one by starting the session + if w.sessionURI == "" { + w.sessionURI, err = startSession(w.client, w.bucket, w.name) } - err = wc.Close() if err != nil { - return nn, err + return err } - // wc was closed successfully, so the temporary part exists, schedule it for deletion at the end - // of the function - defer storageDeleteObject(gcsContext, d.bucket, partName) + nn, err := putChunk(w.client, w.sessionURI, w.buffer[0:chunkSize], w.offset, -1) + w.offset += nn + if w.offset > w.size { + w.size = w.offset + } + // shift the remaining bytes to the start of the buffer + w.buffSize = copy(w.buffer, w.buffer[int(nn):w.buffSize]) - req := &storageapi.ComposeRequest{ - Destination: &storageapi.Object{Bucket: obj.Bucket, Name: obj.Name, ContentType: obj.ContentType}, - SourceObjects: []*storageapi.ComposeRequestSourceObjects{ - { - Name: obj.Name, - Generation: obj.Generation, - }, { - Name: partName, - Generation: wc.Object().Generation, - }}, + return err +} + +func (w *writer) Write(p []byte) (int, error) { + err := w.checkClosed() + if err != nil { + return 0, err } - err = retry(5, func() error { _, err := objService.Compose(d.bucket, obj.Name, req).Do(); return err }) - if err == nil { - nn = nn + n + var nn int + for nn < len(p) { + n := copy(w.buffer[w.buffSize:], p[nn:]) + w.buffSize += n + if w.buffSize == cap(w.buffer) { + err = w.writeChunk() + if err != nil { + break + } + } + nn += n } - return nn, err } +// Size returns the number of bytes written to this FileWriter. +func (w *writer) Size() int64 { + return w.size +} + +func (w *writer) init(path string) error { + res, err := getObject(w.client, w.bucket, w.name, 0) + if err != nil { + return err + } + defer res.Body.Close() + if res.Header.Get("Content-Type") != uploadSessionContentType { + return storagedriver.PathNotFoundError{Path: path} + } + offset, err := strconv.ParseInt(res.Header.Get("X-Goog-Meta-Offset"), 10, 64) + if err != nil { + return err + } + buffer, err := ioutil.ReadAll(res.Body) + if err != nil { + return err + } + w.sessionURI = res.Header.Get("X-Goog-Meta-Session-URI") + w.buffSize = copy(w.buffer, buffer) + w.offset = offset + w.size = offset + int64(w.buffSize) + return nil +} + type request func() error -func retry(maxTries int, req request) error { +func retry(req request) error { backoff := time.Second var err error for i := 0; i < maxTries; i++ { @@ -335,53 +512,6 @@ func retry(maxTries int, req request) error { return err } -func (d *driver) writeCompletely(context ctx.Context, path string, offset int64, reader io.Reader) (totalRead int64, err error) { - wc := storage.NewWriter(d.context(context), d.bucket, d.pathToKey(path)) - wc.ContentType = "application/octet-stream" - defer wc.Close() - - // Copy the first offset bytes of the existing contents - // (padded with zeros if needed) into the writer - if offset > 0 { - existing, err := d.ReadStream(context, path, 0) - if err != nil { - return 0, err - } - defer existing.Close() - n, err := io.CopyN(wc, existing, offset) - if err == io.EOF { - err = writeZeros(wc, offset-n) - } - if err != nil { - return 0, err - } - } - return io.Copy(wc, reader) -} - -func skip(reader io.Reader, count int64) (int64, error) { - if count <= 0 { - return 0, nil - } - return io.CopyN(ioutil.Discard, reader, count) -} - -func writeZeros(wc io.Writer, count int64) error { - buf := make([]byte, 32*1024) - for count > 0 { - size := cap(buf) - if int64(size) > count { - size = int(count) - } - n, err := wc.Write(buf[0:size]) - if err != nil { - return err - } - count = count - int64(n) - } - return nil -} - // Stat retrieves the FileInfo for the given path, including the current // size in bytes and the creation time. func (d *driver) Stat(context ctx.Context, path string) (storagedriver.FileInfo, error) { @@ -390,6 +520,9 @@ func (d *driver) Stat(context ctx.Context, path string) (storagedriver.FileInfo, gcsContext := d.context(context) obj, err := storageStatObject(gcsContext, d.bucket, d.pathToKey(path)) if err == nil { + if obj.ContentType == uploadSessionContentType { + return nil, storagedriver.PathNotFoundError{Path: path} + } fi = storagedriver.FileInfoFields{ Path: path, Size: obj.Size, @@ -440,15 +573,10 @@ func (d *driver) List(context ctx.Context, path string) ([]string, error) { } for _, object := range objects.Results { // GCS does not guarantee strong consistency between - // DELETE and LIST operationsCheck that the object is not deleted, - // so filter out any objects with a non-zero time-deleted - if object.Deleted.IsZero() { - name := object.Name - // Ignore objects with names that end with '#' (these are uploaded parts) - if name[len(name)-1] != '#' { - name = d.keyToPath(name) - list = append(list, name) - } + // DELETE and LIST operations. Check that the object is not deleted, + // and filter out any objects with a non-zero time-deleted + if object.Deleted.IsZero() && object.ContentType != uploadSessionContentType { + list = append(list, d.keyToPath(object.Name)) } } for _, subpath := range objects.Prefixes { @@ -474,7 +602,7 @@ func (d *driver) Move(context ctx.Context, sourcePath string, destPath string) e gcsContext := d.context(context) _, err := storageCopyObject(gcsContext, d.bucket, d.pathToKey(sourcePath), d.bucket, d.pathToKey(destPath), nil) if err != nil { - if status := err.(*googleapi.Error); status != nil { + if status, ok := err.(*googleapi.Error); ok { if status.Code == http.StatusNotFound { return storagedriver.PathNotFoundError{Path: sourcePath} } @@ -545,7 +673,7 @@ func (d *driver) Delete(context ctx.Context, path string) error { } err = storageDeleteObject(gcsContext, d.bucket, d.pathToKey(path)) if err != nil { - if status := err.(*googleapi.Error); status != nil { + if status, ok := err.(*googleapi.Error); ok { if status.Code == http.StatusNotFound { return storagedriver.PathNotFoundError{Path: path} } @@ -555,14 +683,14 @@ func (d *driver) Delete(context ctx.Context, path string) error { } func storageDeleteObject(context context.Context, bucket string, name string) error { - return retry(5, func() error { + return retry(func() error { return storage.DeleteObject(context, bucket, name) }) } func storageStatObject(context context.Context, bucket string, name string) (*storage.Object, error) { var obj *storage.Object - err := retry(5, func() error { + err := retry(func() error { var err error obj, err = storage.StatObject(context, bucket, name) return err @@ -572,7 +700,7 @@ func storageStatObject(context context.Context, bucket string, name string) (*st func storageListObjects(context context.Context, bucket string, q *storage.Query) (*storage.Objects, error) { var objs *storage.Objects - err := retry(5, func() error { + err := retry(func() error { var err error objs, err = storage.ListObjects(context, bucket, q) return err @@ -582,7 +710,7 @@ func storageListObjects(context context.Context, bucket string, q *storage.Query func storageCopyObject(context context.Context, srcBucket, srcName string, destBucket, destName string, attrs *storage.ObjectAttrs) (*storage.Object, error) { var obj *storage.Object - err := retry(5, func() error { + err := retry(func() error { var err error obj, err = storage.CopyObject(context, srcBucket, srcName, destBucket, destName, attrs) return err @@ -626,6 +754,80 @@ func (d *driver) URLFor(context ctx.Context, path string, options map[string]int return storage.SignedURL(d.bucket, name, opts) } +func startSession(client *http.Client, bucket string, name string) (uri string, err error) { + u := &url.URL{ + Scheme: "https", + Host: "www.googleapis.com", + Path: fmt.Sprintf("/upload/storage/v1/b/%v/o", bucket), + RawQuery: fmt.Sprintf("uploadType=resumable&name=%v", name), + } + err = retry(func() error { + req, err := http.NewRequest("POST", u.String(), nil) + if err != nil { + return err + } + req.Header.Set("X-Upload-Content-Type", "application/octet-stream") + req.Header.Set("Content-Length", "0") + resp, err := client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + err = googleapi.CheckMediaResponse(resp) + if err != nil { + return err + } + uri = resp.Header.Get("Location") + return nil + }) + return uri, err +} + +func putChunk(client *http.Client, sessionURI string, chunk []byte, from int64, totalSize int64) (int64, error) { + bytesPut := int64(0) + err := retry(func() error { + req, err := http.NewRequest("PUT", sessionURI, bytes.NewReader(chunk)) + if err != nil { + return err + } + length := int64(len(chunk)) + to := from + length - 1 + size := "*" + if totalSize >= 0 { + size = strconv.FormatInt(totalSize, 10) + } + req.Header.Set("Content-Type", "application/octet-stream") + if from == to+1 { + req.Header.Set("Content-Range", fmt.Sprintf("bytes */%v", size)) + } else { + req.Header.Set("Content-Range", fmt.Sprintf("bytes %v-%v/%v", from, to, size)) + } + req.Header.Set("Content-Length", strconv.FormatInt(length, 10)) + + resp, err := client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if totalSize < 0 && resp.StatusCode == 308 { + groups := rangeHeader.FindStringSubmatch(resp.Header.Get("Range")) + end, err := strconv.ParseInt(groups[2], 10, 64) + if err != nil { + return err + } + bytesPut = end - from + 1 + return nil + } + err = googleapi.CheckMediaResponse(resp) + if err != nil { + return err + } + bytesPut = to - from + 1 + return nil + }) + return bytesPut, err +} + func (d *driver) context(context ctx.Context) context.Context { return cloud.WithContext(context, dummyProjectID, d.client) } diff --git a/registry/storage/driver/gcs/gcs_test.go b/registry/storage/driver/gcs/gcs_test.go index 7059b953..4852bf2c 100644 --- a/registry/storage/driver/gcs/gcs_test.go +++ b/registry/storage/driver/gcs/gcs_test.go @@ -85,6 +85,102 @@ func init() { }, skipGCS) } +// Test Committing a FileWriter without having called Write +func TestCommitEmpty(t *testing.T) { + if skipGCS() != "" { + t.Skip(skipGCS()) + } + + validRoot, err := ioutil.TempDir("", "driver-") + if err != nil { + t.Fatalf("unexpected error creating temporary directory: %v", err) + } + defer os.Remove(validRoot) + + driver, err := gcsDriverConstructor(validRoot) + if err != nil { + t.Fatalf("unexpected error creating rooted driver: %v", err) + } + + filename := "/test" + ctx := ctx.Background() + + writer, err := driver.Writer(ctx, filename, false) + defer driver.Delete(ctx, filename) + if err != nil { + t.Fatalf("driver.Writer: unexpected error: %v", err) + } + err = writer.Commit() + if err != nil { + t.Fatalf("writer.Commit: unexpected error: %v", err) + } + err = writer.Close() + if err != nil { + t.Fatalf("writer.Close: unexpected error: %v", err) + } + if writer.Size() != 0 { + t.Fatalf("writer.Size: %d != 0", writer.Size()) + } + readContents, err := driver.GetContent(ctx, filename) + if err != nil { + t.Fatalf("driver.GetContent: unexpected error: %v", err) + } + if len(readContents) != 0 { + t.Fatalf("len(driver.GetContent(..)): %d != 0", len(readContents)) + } +} + +// Test Committing a FileWriter after having written exactly +// defaultChunksize bytes. +func TestCommit(t *testing.T) { + if skipGCS() != "" { + t.Skip(skipGCS()) + } + + validRoot, err := ioutil.TempDir("", "driver-") + if err != nil { + t.Fatalf("unexpected error creating temporary directory: %v", err) + } + defer os.Remove(validRoot) + + driver, err := gcsDriverConstructor(validRoot) + if err != nil { + t.Fatalf("unexpected error creating rooted driver: %v", err) + } + + filename := "/test" + ctx := ctx.Background() + + contents := make([]byte, defaultChunkSize) + writer, err := driver.Writer(ctx, filename, false) + defer driver.Delete(ctx, filename) + if err != nil { + t.Fatalf("driver.Writer: unexpected error: %v", err) + } + _, err = writer.Write(contents) + if err != nil { + t.Fatalf("writer.Write: unexpected error: %v", err) + } + err = writer.Commit() + if err != nil { + t.Fatalf("writer.Commit: unexpected error: %v", err) + } + err = writer.Close() + if err != nil { + t.Fatalf("writer.Close: unexpected error: %v", err) + } + if writer.Size() != int64(len(contents)) { + t.Fatalf("writer.Size: %d != %d", writer.Size(), len(contents)) + } + readContents, err := driver.GetContent(ctx, filename) + if err != nil { + t.Fatalf("driver.GetContent: unexpected error: %v", err) + } + if len(readContents) != len(contents) { + t.Fatalf("len(driver.GetContent(..)): %d != %d", len(readContents), len(contents)) + } +} + func TestRetry(t *testing.T) { if skipGCS() != "" { t.Skip(skipGCS()) @@ -100,7 +196,7 @@ func TestRetry(t *testing.T) { } } - err := retry(2, func() error { + err := retry(func() error { return &googleapi.Error{ Code: 503, Message: "google api error", @@ -108,7 +204,7 @@ func TestRetry(t *testing.T) { }) assertError("googleapi: Error 503: google api error", err) - err = retry(2, func() error { + err = retry(func() error { return &googleapi.Error{ Code: 404, Message: "google api error", @@ -116,7 +212,7 @@ func TestRetry(t *testing.T) { }) assertError("googleapi: Error 404: google api error", err) - err = retry(2, func() error { + err = retry(func() error { return fmt.Errorf("error") }) assertError("error", err) From 307504713f5f6a8846cdc762898446ac87310119 Mon Sep 17 00:00:00 2001 From: Arthur Baars Date: Sun, 14 Feb 2016 18:15:15 +0000 Subject: [PATCH 4/7] Storagedriver: GCS: add chunksize parameter Signed-off-by: Arthur Baars --- docs/configuration.md | 1 + docs/storage-drivers/gcs.md | 11 +++++++ registry/storage/driver/gcs/gcs.go | 38 +++++++++++++++++++++++-- registry/storage/driver/gcs/gcs_test.go | 1 + 4 files changed, 48 insertions(+), 3 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index ef01d2f6..f5dc0577 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -86,6 +86,7 @@ information about each option that appears later in this page. bucket: bucketname keyfile: /path/to/keyfile rootdirectory: /gcs/object/name/prefix + chunksize: 5242880 s3: accesskey: awsaccesskey secretkey: awssecretkey diff --git a/docs/storage-drivers/gcs.md b/docs/storage-drivers/gcs.md index 33cc94b3..0aa9b30d 100644 --- a/docs/storage-drivers/gcs.md +++ b/docs/storage-drivers/gcs.md @@ -52,6 +52,17 @@ An implementation of the `storagedriver.StorageDriver` interface which uses Goog This is a prefix that will be applied to all Google Cloud Storage keys to allow you to segment data in your bucket if necessary. + + + + chunksize + + + no (default 5242880) + + + This is the chunk size used for uploading large blobs, must be a multiple of 256*1024. + diff --git a/registry/storage/driver/gcs/gcs.go b/registry/storage/driver/gcs/gcs.go index 14600dee..abe0b9f6 100644 --- a/registry/storage/driver/gcs/gcs.go +++ b/registry/storage/driver/gcs/gcs.go @@ -22,6 +22,7 @@ import ( "math/rand" "net/http" "net/url" + "reflect" "regexp" "sort" "strconv" @@ -50,7 +51,7 @@ const ( uploadSessionContentType = "application/x-docker-upload-session" minChunkSize = 256 * 1024 - maxChunkSize = 20 * minChunkSize + defaultChunkSize = 20 * minChunkSize maxTries = 5 ) @@ -65,6 +66,7 @@ type driverParameters struct { privateKey []byte client *http.Client rootDirectory string + chunkSize int } func init() { @@ -87,6 +89,7 @@ type driver struct { email string privateKey []byte rootDirectory string + chunkSize int } // FromParameters constructs a new Driver with a given parameters map @@ -103,6 +106,31 @@ func FromParameters(parameters map[string]interface{}) (storagedriver.StorageDri rootDirectory = "" } + chunkSize := defaultChunkSize + chunkSizeParam, ok := parameters["chunksize"] + if ok { + switch v := chunkSizeParam.(type) { + case string: + vv, err := strconv.Atoi(v) + if err != nil { + return nil, fmt.Errorf("chunksize parameter must be an integer, %v invalid", chunkSizeParam) + } + chunkSize = vv + case int, uint, int32, uint32, uint64, int64: + chunkSize = int(reflect.ValueOf(v).Convert(reflect.TypeOf(chunkSize)).Int()) + default: + return nil, fmt.Errorf("invalid valud for chunksize: %#v", chunkSizeParam) + } + + if chunkSize < minChunkSize { + return nil, fmt.Errorf("The chunksize %#v parameter should be a number that is larger than or equal to %d", chunkSize, minChunkSize) + } + + if chunkSize%minChunkSize != 0 { + return nil, fmt.Errorf("chunksize should be a multiple of %d", minChunkSize) + } + } + var ts oauth2.TokenSource jwtConf := new(jwt.Config) if keyfile, ok := parameters["keyfile"]; ok { @@ -121,7 +149,6 @@ func FromParameters(parameters map[string]interface{}) (storagedriver.StorageDri if err != nil { return nil, err } - } params := driverParameters{ @@ -130,6 +157,7 @@ func FromParameters(parameters map[string]interface{}) (storagedriver.StorageDri email: jwtConf.Email, privateKey: jwtConf.PrivateKey, client: oauth2.NewClient(context.Background(), ts), + chunkSize: chunkSize, } return New(params) @@ -141,12 +169,16 @@ func New(params driverParameters) (storagedriver.StorageDriver, error) { if rootDirectory != "" { rootDirectory += "/" } + if params.chunkSize <= 0 || params.chunkSize%minChunkSize != 0 { + return nil, fmt.Errorf("Invalid chunksize: %d is not a positive multiple of %d", params.chunkSize, minChunkSize) + } d := &driver{ bucket: params.bucket, rootDirectory: rootDirectory, email: params.email, privateKey: params.privateKey, client: params.client, + chunkSize: params.chunkSize, } return &base.Base{ @@ -263,7 +295,7 @@ func (d *driver) Writer(context ctx.Context, path string, append bool) (storaged client: d.client, bucket: d.bucket, name: d.pathToKey(path), - buffer: make([]byte, maxChunkSize), + buffer: make([]byte, d.chunkSize), } if append { diff --git a/registry/storage/driver/gcs/gcs_test.go b/registry/storage/driver/gcs/gcs_test.go index 4852bf2c..f2808d5f 100644 --- a/registry/storage/driver/gcs/gcs_test.go +++ b/registry/storage/driver/gcs/gcs_test.go @@ -75,6 +75,7 @@ func init() { email: email, privateKey: privateKey, client: oauth2.NewClient(ctx.Background(), ts), + chunkSize: defaultChunkSize, } return New(parameters) From a9bf7a2aaec88cfeb69a92de8c7d5f212cf30c4d Mon Sep 17 00:00:00 2001 From: Li Yi Date: Sun, 21 Feb 2016 08:54:32 +0800 Subject: [PATCH 5/7] Support FileWriter interface for OSS storage driver Change-Id: Ie5533ad85f944800499ca1040fd67bf1378815e0 Signed-off-by: Li Yi --- cmd/registry/main.go | 2 +- registry/storage/driver/oss/oss.go | 523 +++++++++++------------------ 2 files changed, 203 insertions(+), 322 deletions(-) diff --git a/cmd/registry/main.go b/cmd/registry/main.go index 49fd7ba5..5f181003 100644 --- a/cmd/registry/main.go +++ b/cmd/registry/main.go @@ -13,7 +13,7 @@ import ( _ "github.com/docker/distribution/registry/storage/driver/gcs" _ "github.com/docker/distribution/registry/storage/driver/inmemory" _ "github.com/docker/distribution/registry/storage/driver/middleware/cloudfront" - // _ "github.com/docker/distribution/registry/storage/driver/oss" + _ "github.com/docker/distribution/registry/storage/driver/oss" _ "github.com/docker/distribution/registry/storage/driver/s3-aws" _ "github.com/docker/distribution/registry/storage/driver/s3-goamz" // _ "github.com/docker/distribution/registry/storage/driver/swift" diff --git a/registry/storage/driver/oss/oss.go b/registry/storage/driver/oss/oss.go index 1ec04525..7ae70334 100644 --- a/registry/storage/driver/oss/oss.go +++ b/registry/storage/driver/oss/oss.go @@ -20,7 +20,6 @@ import ( "reflect" "strconv" "strings" - "sync" "time" "github.com/docker/distribution/context" @@ -75,9 +74,6 @@ type driver struct { ChunkSize int64 Encrypt bool RootDirectory string - - pool sync.Pool // pool []byte buffers used for WriteStream - zeros []byte // shared, zero-valued buffer used for WriteStream } type baseEmbed struct { @@ -99,8 +95,7 @@ type Driver struct { // - encrypt func FromParameters(parameters map[string]interface{}) (*Driver, error) { // Providing no values for these is valid in case the user is authenticating - // with an IAM on an ec2 instance (in which case the instance credentials will - // be summoned when GetAuth is called) + accessKey, ok := parameters["accesskeyid"] if !ok { return nil, fmt.Errorf("No accesskeyid parameter provided") @@ -220,11 +215,6 @@ func New(params DriverParameters) (*Driver, error) { ChunkSize: params.ChunkSize, Encrypt: params.Encrypt, RootDirectory: params.RootDirectory, - zeros: make([]byte, params.ChunkSize), - } - - d.pool.New = func() interface{} { - return make([]byte, d.ChunkSize) } return &Driver{ @@ -256,9 +246,9 @@ func (d *driver) PutContent(ctx context.Context, path string, contents []byte) e return parseError(path, d.Bucket.Put(d.ossPath(path), contents, d.getContentType(), getPermissions(), d.getOptions())) } -// ReadStream retrieves an io.ReadCloser for the content stored at "path" with a +// Reader retrieves an io.ReadCloser for the content stored at "path" with a // given byte offset. -func (d *driver) ReadStream(ctx context.Context, path string, offset int64) (io.ReadCloser, error) { +func (d *driver) Reader(ctx context.Context, path string, offset int64) (io.ReadCloser, error) { headers := make(http.Header) headers.Add("Range", "bytes="+strconv.FormatInt(offset, 10)+"-") @@ -279,315 +269,37 @@ func (d *driver) ReadStream(ctx context.Context, path string, offset int64) (io. return resp.Body, nil } -// WriteStream stores the contents of the provided io.Reader at a -// location designated by the given path. The driver will know it has -// received the full contents when the reader returns io.EOF. The number -// of successfully READ bytes will be returned, even if an error is -// returned. May be used to resume writing a stream by providing a nonzero -// offset. Offsets past the current size will write from the position -// beyond the end of the file. -func (d *driver) WriteStream(ctx context.Context, path string, offset int64, reader io.Reader) (totalRead int64, err error) { - partNumber := 1 - bytesRead := 0 - var putErrChan chan error - parts := []oss.Part{} - var part oss.Part - done := make(chan struct{}) // stopgap to free up waiting goroutines - - multi, err := d.Bucket.InitMulti(d.ossPath(path), d.getContentType(), getPermissions(), d.getOptions()) +// Writer returns a FileWriter which will store the content written to it +// at the location designated by "path" after the call to Commit. +func (d *driver) Writer(ctx context.Context, path string, append bool) (storagedriver.FileWriter, error) { + key := d.ossPath(path) + if !append { + // TODO (brianbland): cancel other uploads at this path + multi, err := d.Bucket.InitMulti(key, d.getContentType(), getPermissions(), d.getOptions()) + if err != nil { + return nil, err + } + return d.newWriter(key, multi, nil), nil + } + multis, _, err := d.Bucket.ListMulti(key, "") if err != nil { - return 0, err + return nil, parseError(path, err) } - - buf := d.getbuf() - - // We never want to leave a dangling multipart upload, our only consistent state is - // when there is a whole object at path. This is in order to remain consistent with - // the stat call. - // - // Note that if the machine dies before executing the defer, we will be left with a dangling - // multipart upload, which will eventually be cleaned up, but we will lose all of the progress - // made prior to the machine crashing. - defer func() { - if putErrChan != nil { - if putErr := <-putErrChan; putErr != nil { - err = putErr - } + for _, multi := range multis { + if key != multi.Key { + continue } - - if len(parts) > 0 { - if multi == nil { - // Parts should be empty if the multi is not initialized - panic("Unreachable") - } else { - if multi.Complete(parts) != nil { - multi.Abort() - } - } - } - - d.putbuf(buf) // needs to be here to pick up new buf value - close(done) // free up any waiting goroutines - }() - - // Fills from 0 to total from current - fromSmallCurrent := func(total int64) error { - current, err := d.ReadStream(ctx, path, 0) + parts, err := multi.ListParts() if err != nil { - return err + return nil, parseError(path, err) } - - bytesRead = 0 - for int64(bytesRead) < total { - //The loop should very rarely enter a second iteration - nn, err := current.Read(buf[bytesRead:total]) - bytesRead += nn - if err != nil { - if err != io.EOF { - return err - } - - break - } - + var multiSize int64 + for _, part := range parts { + multiSize += part.Size } - return nil + return d.newWriter(key, multi, parts), nil } - - // Fills from parameter to chunkSize from reader - fromReader := func(from int64) error { - bytesRead = 0 - for from+int64(bytesRead) < d.ChunkSize { - nn, err := reader.Read(buf[from+int64(bytesRead):]) - totalRead += int64(nn) - bytesRead += nn - - if err != nil { - if err != io.EOF { - return err - } - - break - } - } - - if putErrChan == nil { - putErrChan = make(chan error) - } else { - if putErr := <-putErrChan; putErr != nil { - putErrChan = nil - return putErr - } - } - - go func(bytesRead int, from int64, buf []byte) { - defer d.putbuf(buf) // this buffer gets dropped after this call - - // DRAGONS(stevvooe): There are few things one might want to know - // about this section. First, the putErrChan is expecting an error - // and a nil or just a nil to come through the channel. This is - // covered by the silly defer below. The other aspect is the OSS - // retry backoff to deal with RequestTimeout errors. Even though - // the underlying OSS library should handle it, it doesn't seem to - // be part of the shouldRetry function (see denverdino/aliyungo/oss). - defer func() { - select { - case putErrChan <- nil: // for some reason, we do this no matter what. - case <-done: - return // ensure we don't leak the goroutine - } - }() - - if bytesRead <= 0 { - return - } - - var err error - var part oss.Part - - part, err = multi.PutPartWithTimeout(int(partNumber), bytes.NewReader(buf[0:int64(bytesRead)+from]), defaultTimeout) - - if err != nil { - logrus.Errorf("error putting part, aborting: %v", err) - select { - case putErrChan <- err: - case <-done: - return // don't leak the goroutine - } - } - - // parts and partNumber are safe, because this function is the - // only one modifying them and we force it to be executed - // serially. - parts = append(parts, part) - partNumber++ - }(bytesRead, from, buf) - - buf = d.getbuf() // use a new buffer for the next call - return nil - } - - if offset > 0 { - resp, err := d.Bucket.Head(d.ossPath(path), nil) - if err != nil { - if ossErr, ok := err.(*oss.Error); !ok || ossErr.StatusCode != http.StatusNotFound { - return 0, err - } - } - - currentLength := int64(0) - if err == nil { - currentLength = resp.ContentLength - } - - if currentLength >= offset { - if offset < d.ChunkSize { - // chunkSize > currentLength >= offset - if err = fromSmallCurrent(offset); err != nil { - return totalRead, err - } - - if err = fromReader(offset); err != nil { - return totalRead, err - } - - if totalRead+offset < d.ChunkSize { - return totalRead, nil - } - } else { - // currentLength >= offset >= chunkSize - _, part, err = multi.PutPartCopy(partNumber, - oss.CopyOptions{CopySourceOptions: "bytes=0-" + strconv.FormatInt(offset-1, 10)}, - d.Bucket.Path(d.ossPath(path))) - if err != nil { - return 0, err - } - - parts = append(parts, part) - partNumber++ - } - } else { - // Fills between parameters with 0s but only when to - from <= chunkSize - fromZeroFillSmall := func(from, to int64) error { - bytesRead = 0 - for from+int64(bytesRead) < to { - nn, err := bytes.NewReader(d.zeros).Read(buf[from+int64(bytesRead) : to]) - bytesRead += nn - if err != nil { - return err - } - } - - return nil - } - - // Fills between parameters with 0s, making new parts - fromZeroFillLarge := func(from, to int64) error { - bytesRead64 := int64(0) - for to-(from+bytesRead64) >= d.ChunkSize { - part, err := multi.PutPartWithTimeout(int(partNumber), bytes.NewReader(d.zeros), defaultTimeout) - if err != nil { - return err - } - bytesRead64 += d.ChunkSize - - parts = append(parts, part) - partNumber++ - } - - return fromZeroFillSmall(0, (to-from)%d.ChunkSize) - } - - // currentLength < offset - if currentLength < d.ChunkSize { - if offset < d.ChunkSize { - // chunkSize > offset > currentLength - if err = fromSmallCurrent(currentLength); err != nil { - return totalRead, err - } - - if err = fromZeroFillSmall(currentLength, offset); err != nil { - return totalRead, err - } - - if err = fromReader(offset); err != nil { - return totalRead, err - } - - if totalRead+offset < d.ChunkSize { - return totalRead, nil - } - } else { - // offset >= chunkSize > currentLength - if err = fromSmallCurrent(currentLength); err != nil { - return totalRead, err - } - - if err = fromZeroFillSmall(currentLength, d.ChunkSize); err != nil { - return totalRead, err - } - - part, err = multi.PutPartWithTimeout(int(partNumber), bytes.NewReader(buf), defaultTimeout) - if err != nil { - return totalRead, err - } - - parts = append(parts, part) - partNumber++ - - //Zero fill from chunkSize up to offset, then some reader - if err = fromZeroFillLarge(d.ChunkSize, offset); err != nil { - return totalRead, err - } - - if err = fromReader(offset % d.ChunkSize); err != nil { - return totalRead, err - } - - if totalRead+(offset%d.ChunkSize) < d.ChunkSize { - return totalRead, nil - } - } - } else { - // offset > currentLength >= chunkSize - _, part, err = multi.PutPartCopy(partNumber, - oss.CopyOptions{}, - d.Bucket.Path(d.ossPath(path))) - if err != nil { - return 0, err - } - - parts = append(parts, part) - partNumber++ - - //Zero fill from currentLength up to offset, then some reader - if err = fromZeroFillLarge(currentLength, offset); err != nil { - return totalRead, err - } - - if err = fromReader((offset - currentLength) % d.ChunkSize); err != nil { - return totalRead, err - } - - if totalRead+((offset-currentLength)%d.ChunkSize) < d.ChunkSize { - return totalRead, nil - } - } - - } - } - - for { - if err = fromReader(0); err != nil { - return totalRead, err - } - - if int64(bytesRead) < d.ChunkSize { - break - } - } - - return totalRead, nil + return nil, storagedriver.PathNotFoundError{Path: path} } // Stat retrieves the FileInfo for the given path, including the current size @@ -778,12 +490,181 @@ func (d *driver) getContentType() string { return "application/octet-stream" } -// getbuf returns a buffer from the driver's pool with length d.ChunkSize. -func (d *driver) getbuf() []byte { - return d.pool.Get().([]byte) +// writer attempts to upload parts to S3 in a buffered fashion where the last +// part is at least as large as the chunksize, so the multipart upload could be +// cleanly resumed in the future. This is violated if Close is called after less +// than a full chunk is written. +type writer struct { + driver *driver + key string + multi *oss.Multi + parts []oss.Part + size int64 + readyPart []byte + pendingPart []byte + closed bool + committed bool + cancelled bool } -func (d *driver) putbuf(p []byte) { - copy(p, d.zeros) - d.pool.Put(p) +func (d *driver) newWriter(key string, multi *oss.Multi, parts []oss.Part) storagedriver.FileWriter { + var size int64 + for _, part := range parts { + size += part.Size + } + return &writer{ + driver: d, + key: key, + multi: multi, + parts: parts, + size: size, + } +} + +func (w *writer) Write(p []byte) (int, error) { + if w.closed { + return 0, fmt.Errorf("already closed") + } else if w.committed { + return 0, fmt.Errorf("already committed") + } else if w.cancelled { + return 0, fmt.Errorf("already cancelled") + } + + // If the last written part is smaller than minChunkSize, we need to make a + // new multipart upload :sadface: + if len(w.parts) > 0 && int(w.parts[len(w.parts)-1].Size) < minChunkSize { + err := w.multi.Complete(w.parts) + if err != nil { + w.multi.Abort() + return 0, err + } + + multi, err := w.driver.Bucket.InitMulti(w.key, w.driver.getContentType(), getPermissions(), w.driver.getOptions()) + if err != nil { + return 0, err + } + w.multi = multi + + // If the entire written file is smaller than minChunkSize, we need to make + // a new part from scratch :double sad face: + if w.size < minChunkSize { + contents, err := w.driver.Bucket.Get(w.key) + if err != nil { + return 0, err + } + w.parts = nil + w.readyPart = contents + } else { + // Otherwise we can use the old file as the new first part + _, part, err := multi.PutPartCopy(1, oss.CopyOptions{}, w.driver.Bucket.Name+"/"+w.key) + if err != nil { + return 0, err + } + w.parts = []oss.Part{part} + } + } + + var n int + + for len(p) > 0 { + // If no parts are ready to write, fill up the first part + if neededBytes := int(w.driver.ChunkSize) - len(w.readyPart); neededBytes > 0 { + if len(p) >= neededBytes { + w.readyPart = append(w.readyPart, p[:neededBytes]...) + n += neededBytes + p = p[neededBytes:] + } else { + w.readyPart = append(w.readyPart, p...) + n += len(p) + p = nil + } + } + + if neededBytes := int(w.driver.ChunkSize) - len(w.pendingPart); neededBytes > 0 { + if len(p) >= neededBytes { + w.pendingPart = append(w.pendingPart, p[:neededBytes]...) + n += neededBytes + p = p[neededBytes:] + err := w.flushPart() + if err != nil { + w.size += int64(n) + return n, err + } + } else { + w.pendingPart = append(w.pendingPart, p...) + n += len(p) + p = nil + } + } + } + w.size += int64(n) + return n, nil +} + +func (w *writer) Size() int64 { + return w.size +} + +func (w *writer) Close() error { + if w.closed { + return fmt.Errorf("already closed") + } + w.closed = true + return w.flushPart() +} + +func (w *writer) Cancel() error { + if w.closed { + return fmt.Errorf("already closed") + } else if w.committed { + return fmt.Errorf("already committed") + } + w.cancelled = true + err := w.multi.Abort() + return err +} + +func (w *writer) Commit() error { + if w.closed { + return fmt.Errorf("already closed") + } else if w.committed { + return fmt.Errorf("already committed") + } else if w.cancelled { + return fmt.Errorf("already cancelled") + } + err := w.flushPart() + if err != nil { + return err + } + w.committed = true + err = w.multi.Complete(w.parts) + if err != nil { + w.multi.Abort() + return err + } + return nil +} + +// flushPart flushes buffers to write a part to S3. +// Only called by Write (with both buffers full) and Close/Commit (always) +func (w *writer) flushPart() error { + if len(w.readyPart) == 0 && len(w.pendingPart) == 0 { + // nothing to write + return nil + } + if len(w.pendingPart) < int(w.driver.ChunkSize) { + // closing with a small pending part + // combine ready and pending to avoid writing a small part + w.readyPart = append(w.readyPart, w.pendingPart...) + w.pendingPart = nil + } + + part, err := w.multi.PutPart(len(w.parts)+1, bytes.NewReader(w.readyPart)) + if err != nil { + return err + } + w.parts = append(w.parts, part) + w.readyPart = w.pendingPart + w.pendingPart = nil + return nil } From 7fd1db93125c2880699a8cb609bf84bbdc4b9773 Mon Sep 17 00:00:00 2001 From: Brian Bland Date: Tue, 8 Mar 2016 15:57:12 -0800 Subject: [PATCH 6/7] Updates Swift driver to support new storagedriver.FileWriter interface Signed-off-by: Brian Bland --- cmd/registry/main.go | 2 +- registry/storage/driver/swift/swift.go | 398 ++++++++++++------------- 2 files changed, 185 insertions(+), 215 deletions(-) diff --git a/cmd/registry/main.go b/cmd/registry/main.go index 5f181003..686af12b 100644 --- a/cmd/registry/main.go +++ b/cmd/registry/main.go @@ -16,7 +16,7 @@ import ( _ "github.com/docker/distribution/registry/storage/driver/oss" _ "github.com/docker/distribution/registry/storage/driver/s3-aws" _ "github.com/docker/distribution/registry/storage/driver/s3-goamz" - // _ "github.com/docker/distribution/registry/storage/driver/swift" + _ "github.com/docker/distribution/registry/storage/driver/swift" ) func main() { diff --git a/registry/storage/driver/swift/swift.go b/registry/storage/driver/swift/swift.go index 86bce794..c4d5a574 100644 --- a/registry/storage/driver/swift/swift.go +++ b/registry/storage/driver/swift/swift.go @@ -16,8 +16,8 @@ package swift import ( + "bufio" "bytes" - "crypto/md5" "crypto/rand" "crypto/sha1" "crypto/tls" @@ -49,6 +49,9 @@ const defaultChunkSize = 20 * 1024 * 1024 // minChunkSize defines the minimum size of a segment const minChunkSize = 1 << 20 +// contentType defines the Content-Type header associated with stored segments +const contentType = "application/octet-stream" + // readAfterWriteTimeout defines the time we wait before an object appears after having been uploaded var readAfterWriteTimeout = 15 * time.Second @@ -282,16 +285,16 @@ func (d *driver) GetContent(ctx context.Context, path string) ([]byte, error) { // PutContent stores the []byte content at a location designated by "path". func (d *driver) PutContent(ctx context.Context, path string, contents []byte) error { - err := d.Conn.ObjectPutBytes(d.Container, d.swiftPath(path), contents, d.getContentType()) + err := d.Conn.ObjectPutBytes(d.Container, d.swiftPath(path), contents, contentType) if err == swift.ObjectNotFound { return storagedriver.PathNotFoundError{Path: path} } return err } -// ReadStream retrieves an io.ReadCloser for the content stored at "path" with a +// Reader retrieves an io.ReadCloser for the content stored at "path" with a // given byte offset. -func (d *driver) ReadStream(ctx context.Context, path string, offset int64) (io.ReadCloser, error) { +func (d *driver) Reader(ctx context.Context, path string, offset int64) (io.ReadCloser, error) { headers := make(swift.Headers) headers["Range"] = "bytes=" + strconv.FormatInt(offset, 10) + "-" @@ -305,224 +308,46 @@ func (d *driver) ReadStream(ctx context.Context, path string, offset int64) (io. return file, err } -// WriteStream stores the contents of the provided io.Reader at a -// location designated by the given path. The driver will know it has -// received the full contents when the reader returns io.EOF. The number -// of successfully READ bytes will be returned, even if an error is -// returned. May be used to resume writing a stream by providing a nonzero -// offset. Offsets past the current size will write from the position -// beyond the end of the file. -func (d *driver) WriteStream(ctx context.Context, path string, offset int64, reader io.Reader) (int64, error) { +// Writer returns a FileWriter which will store the content written to it +// at the location designated by "path" after the call to Commit. +func (d *driver) Writer(ctx context.Context, path string, append bool) (storagedriver.FileWriter, error) { var ( - segments []swift.Object - multi io.Reader - paddingReader io.Reader - currentLength int64 - cursor int64 - segmentPath string + segments []swift.Object + segmentsPath string + err error ) - partNumber := 1 - chunkSize := int64(d.ChunkSize) - zeroBuf := make([]byte, d.ChunkSize) - hash := md5.New() - - getSegment := func() string { - return fmt.Sprintf("%s/%016d", segmentPath, partNumber) - } - - max := func(a int64, b int64) int64 { - if a > b { - return a - } - return b - } - - createManifest := true - info, headers, err := d.Conn.Object(d.Container, d.swiftPath(path)) - if err == nil { - manifest, ok := headers["X-Object-Manifest"] - if !ok { - if segmentPath, err = d.swiftSegmentPath(path); err != nil { - return 0, err - } - if err := d.Conn.ObjectMove(d.Container, d.swiftPath(path), d.Container, getSegment()); err != nil { - return 0, err - } - segments = append(segments, info) - } else { - _, segmentPath = parseManifest(manifest) - if segments, err = d.getAllSegments(segmentPath); err != nil { - return 0, err - } - createManifest = false - } - currentLength = info.Bytes - } else if err == swift.ObjectNotFound { - if segmentPath, err = d.swiftSegmentPath(path); err != nil { - return 0, err + if !append { + segmentsPath, err = d.swiftSegmentPath(path) + if err != nil { + return nil, err } } else { - return 0, err - } - - // First, we skip the existing segments that are not modified by this call - for i := range segments { - if offset < cursor+segments[i].Bytes { - break + info, headers, err := d.Conn.Object(d.Container, d.swiftPath(path)) + if err == swift.ObjectNotFound { + return nil, storagedriver.PathNotFoundError{Path: path} + } else if err != nil { + return nil, err } - cursor += segments[i].Bytes - hash.Write([]byte(segments[i].Hash)) - partNumber++ - } - - // We reached the end of the file but we haven't reached 'offset' yet - // Therefore we add blocks of zeros - if offset >= currentLength { - for offset-currentLength >= chunkSize { - // Insert a block a zero - headers, err := d.Conn.ObjectPut(d.Container, getSegment(), bytes.NewReader(zeroBuf), false, "", d.getContentType(), nil) + manifest, ok := headers["X-Object-Manifest"] + if !ok { + segmentsPath, err = d.swiftSegmentPath(path) if err != nil { - if err == swift.ObjectNotFound { - return 0, storagedriver.PathNotFoundError{Path: getSegment()} - } - return 0, err + return nil, err } - currentLength += chunkSize - partNumber++ - hash.Write([]byte(headers["Etag"])) - } - - cursor = currentLength - paddingReader = bytes.NewReader(zeroBuf) - } else if offset-cursor > 0 { - // Offset is inside the current segment : we need to read the - // data from the beginning of the segment to offset - file, _, err := d.Conn.ObjectOpen(d.Container, getSegment(), false, nil) - if err != nil { - if err == swift.ObjectNotFound { - return 0, storagedriver.PathNotFoundError{Path: getSegment()} + if err := d.Conn.ObjectMove(d.Container, d.swiftPath(path), d.Container, segmentPath(segmentsPath, len(segments))); err != nil { + return nil, err } - return 0, err - } - defer file.Close() - paddingReader = file - } - - readers := []io.Reader{} - if paddingReader != nil { - readers = append(readers, io.LimitReader(paddingReader, offset-cursor)) - } - readers = append(readers, io.LimitReader(reader, chunkSize-(offset-cursor))) - multi = io.MultiReader(readers...) - - writeSegment := func(segment string) (finished bool, bytesRead int64, err error) { - currentSegment, err := d.Conn.ObjectCreate(d.Container, segment, false, "", d.getContentType(), nil) - if err != nil { - if err == swift.ObjectNotFound { - return false, bytesRead, storagedriver.PathNotFoundError{Path: segment} + segments = []swift.Object{info} + } else { + _, segmentsPath = parseManifest(manifest) + if segments, err = d.getAllSegments(segmentsPath); err != nil { + return nil, err } - return false, bytesRead, err - } - - segmentHash := md5.New() - writer := io.MultiWriter(currentSegment, segmentHash) - - n, err := io.Copy(writer, multi) - if err != nil { - return false, bytesRead, err - } - - if n > 0 { - defer func() { - closeError := currentSegment.Close() - if err != nil { - err = closeError - } - hexHash := hex.EncodeToString(segmentHash.Sum(nil)) - hash.Write([]byte(hexHash)) - }() - bytesRead += n - max(0, offset-cursor) - } - - if n < chunkSize { - // We wrote all the data - if cursor+n < currentLength { - // Copy the end of the chunk - headers := make(swift.Headers) - headers["Range"] = "bytes=" + strconv.FormatInt(cursor+n, 10) + "-" + strconv.FormatInt(cursor+chunkSize, 10) - file, _, err := d.Conn.ObjectOpen(d.Container, d.swiftPath(path), false, headers) - if err != nil { - if err == swift.ObjectNotFound { - return false, bytesRead, storagedriver.PathNotFoundError{Path: path} - } - return false, bytesRead, err - } - - _, copyErr := io.Copy(writer, file) - - if err := file.Close(); err != nil { - if err == swift.ObjectNotFound { - return false, bytesRead, storagedriver.PathNotFoundError{Path: path} - } - return false, bytesRead, err - } - - if copyErr != nil { - return false, bytesRead, copyErr - } - } - - return true, bytesRead, nil - } - - multi = io.LimitReader(reader, chunkSize) - cursor += chunkSize - partNumber++ - - return false, bytesRead, nil - } - - finished := false - read := int64(0) - bytesRead := int64(0) - for finished == false { - finished, read, err = writeSegment(getSegment()) - bytesRead += read - if err != nil { - return bytesRead, err } } - for ; partNumber < len(segments); partNumber++ { - hash.Write([]byte(segments[partNumber].Hash)) - } - - if createManifest { - if err := d.createManifest(path, d.Container+"/"+segmentPath); err != nil { - return 0, err - } - } - - expectedHash := hex.EncodeToString(hash.Sum(nil)) - waitingTime := readAfterWriteWait - endTime := time.Now().Add(readAfterWriteTimeout) - for { - var infos swift.Object - if infos, _, err = d.Conn.Object(d.Container, d.swiftPath(path)); err == nil { - if strings.Trim(infos.Hash, "\"") == expectedHash { - return bytesRead, nil - } - err = fmt.Errorf("Timeout expired while waiting for segments of %s to show up", path) - } - if time.Now().Add(waitingTime).After(endTime) { - break - } - time.Sleep(waitingTime) - waitingTime *= 2 - } - - return bytesRead, err + return d.newWriter(path, segmentsPath, segments), nil } // Stat retrieves the FileInfo for the given path, including the current size @@ -763,10 +588,6 @@ func (d *driver) swiftSegmentPath(path string) (string, error) { return strings.TrimLeft(strings.TrimRight(d.Prefix+"/segments/"+path[0:3]+"/"+path[3:], "/"), "/"), nil } -func (d *driver) getContentType() string { - return "application/octet-stream" -} - func (d *driver) getAllSegments(path string) ([]swift.Object, error) { segments, err := d.Conn.ObjectsAll(d.Container, &swift.ObjectsOpts{Prefix: path}) if err == swift.ContainerNotFound { @@ -778,7 +599,7 @@ func (d *driver) getAllSegments(path string) ([]swift.Object, error) { func (d *driver) createManifest(path string, segments string) error { headers := make(swift.Headers) headers["X-Object-Manifest"] = segments - manifest, err := d.Conn.ObjectCreate(d.Container, d.swiftPath(path), false, "", d.getContentType(), headers) + manifest, err := d.Conn.ObjectCreate(d.Container, d.swiftPath(path), false, "", contentType, headers) if err != nil { if err == swift.ObjectNotFound { return storagedriver.PathNotFoundError{Path: path} @@ -810,3 +631,152 @@ func generateSecret() (string, error) { } return hex.EncodeToString(secretBytes[:]), nil } + +func segmentPath(segmentsPath string, partNumber int) string { + return fmt.Sprintf("%s/%016d", segmentsPath, partNumber) +} + +type writer struct { + driver *driver + path string + segmentsPath string + size int64 + bw *bufio.Writer + closed bool + committed bool + cancelled bool +} + +func (d *driver) newWriter(path, segmentsPath string, segments []swift.Object) storagedriver.FileWriter { + var size int64 + for _, segment := range segments { + size += segment.Bytes + } + return &writer{ + driver: d, + path: path, + segmentsPath: segmentsPath, + size: size, + bw: bufio.NewWriterSize(&segmentWriter{ + conn: d.Conn, + container: d.Container, + segmentsPath: segmentsPath, + segmentNumber: len(segments) + 1, + maxChunkSize: d.ChunkSize, + }, d.ChunkSize), + } +} + +func (w *writer) Write(p []byte) (int, error) { + if w.closed { + return 0, fmt.Errorf("already closed") + } else if w.committed { + return 0, fmt.Errorf("already committed") + } else if w.cancelled { + return 0, fmt.Errorf("already cancelled") + } + + n, err := w.bw.Write(p) + w.size += int64(n) + return n, err +} + +func (w *writer) Size() int64 { + return w.size +} + +func (w *writer) Close() error { + if w.closed { + return fmt.Errorf("already closed") + } + + if err := w.bw.Flush(); err != nil { + return err + } + + if !w.committed && !w.cancelled { + if err := w.driver.createManifest(w.path, w.driver.Container+"/"+w.segmentsPath); err != nil { + return err + } + } + w.closed = true + + return nil +} + +func (w *writer) Cancel() error { + if w.closed { + return fmt.Errorf("already closed") + } else if w.committed { + return fmt.Errorf("already committed") + } + w.cancelled = true + return w.driver.Delete(context.Background(), w.path) +} + +func (w *writer) Commit() error { + if w.closed { + return fmt.Errorf("already closed") + } else if w.committed { + return fmt.Errorf("already committed") + } else if w.cancelled { + return fmt.Errorf("already cancelled") + } + + if err := w.bw.Flush(); err != nil { + return err + } + + if err := w.driver.createManifest(w.path, w.driver.Container+"/"+w.segmentsPath); err != nil { + return err + } + + w.committed = true + + var err error + waitingTime := readAfterWriteWait + endTime := time.Now().Add(readAfterWriteTimeout) + for { + var info swift.Object + if info, _, err = w.driver.Conn.Object(w.driver.Container, w.driver.swiftPath(w.path)); err == nil { + if info.Bytes == w.size { + break + } + err = fmt.Errorf("Timeout expired while waiting for segments of %s to show up", w.path) + } + if time.Now().Add(waitingTime).After(endTime) { + break + } + time.Sleep(waitingTime) + waitingTime *= 2 + } + + return err +} + +type segmentWriter struct { + conn swift.Connection + container string + segmentsPath string + segmentNumber int + maxChunkSize int +} + +func (sw *segmentWriter) Write(p []byte) (int, error) { + n := 0 + for offset := 0; offset < len(p); offset += sw.maxChunkSize { + chunkSize := sw.maxChunkSize + if offset+chunkSize > len(p) { + chunkSize = len(p) - offset + } + _, err := sw.conn.ObjectPut(sw.container, segmentPath(sw.segmentsPath, sw.segmentNumber), bytes.NewReader(p[offset:offset+chunkSize]), false, "", contentType, nil) + if err != nil { + return n, err + } + + sw.segmentNumber++ + n += chunkSize + } + + return n, nil +} From 5967d333425a8dd5d36c5bb456098839654d38af Mon Sep 17 00:00:00 2001 From: Brian Bland Date: Thu, 10 Mar 2016 16:46:43 -0800 Subject: [PATCH 7/7] Removes ceph rados driver in favor of Swift API gateway support Signed-off-by: Brian Bland --- CONTRIBUTING.md | 4 +- Dockerfile | 4 +- Godeps/Godeps.json | 5 - .../src/github.com/noahdesu/go-ceph/LICENSE | 21 - .../github.com/noahdesu/go-ceph/rados/conn.go | 300 -------- .../github.com/noahdesu/go-ceph/rados/doc.go | 4 - .../noahdesu/go-ceph/rados/ioctx.go | 547 -------------- .../noahdesu/go-ceph/rados/rados.go | 54 -- .../noahdesu/go-ceph/rados/rados_test.go | 703 ------------------ circle.yml | 7 +- cmd/registry/rados.go | 5 - contrib/ceph/ci-setup.sh | 122 --- docs/building.md | 5 - docs/configuration.md | 14 - docs/introduction.md | 2 +- docs/storage-drivers/oss.md | 0 docs/storage-drivers/rados.md | 83 --- docs/storagedrivers.md | 1 - registry/storage/driver/rados/doc.go | 3 - registry/storage/driver/rados/rados.go | 632 ---------------- registry/storage/driver/rados/rados_test.go | 40 - 21 files changed, 6 insertions(+), 2550 deletions(-) delete mode 100644 Godeps/_workspace/src/github.com/noahdesu/go-ceph/LICENSE delete mode 100644 Godeps/_workspace/src/github.com/noahdesu/go-ceph/rados/conn.go delete mode 100644 Godeps/_workspace/src/github.com/noahdesu/go-ceph/rados/doc.go delete mode 100644 Godeps/_workspace/src/github.com/noahdesu/go-ceph/rados/ioctx.go delete mode 100644 Godeps/_workspace/src/github.com/noahdesu/go-ceph/rados/rados.go delete mode 100644 Godeps/_workspace/src/github.com/noahdesu/go-ceph/rados/rados_test.go delete mode 100644 cmd/registry/rados.go delete mode 100755 contrib/ceph/ci-setup.sh mode change 100755 => 100644 docs/storage-drivers/oss.md delete mode 100644 docs/storage-drivers/rados.md delete mode 100644 registry/storage/driver/rados/doc.go delete mode 100644 registry/storage/driver/rados/rados.go delete mode 100644 registry/storage/driver/rados/rados_test.go diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 545137f6..7cc7aedf 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -76,7 +76,7 @@ Some simple rules to ensure quick merge: You are heavily encouraged to first discuss what you want to do. You can do so on the irc channel, or by opening an issue that clearly describes the use case you want to fulfill, or the problem you are trying to solve. If this is a major new feature, you should then submit a proposal that describes your technical solution and reasoning. -If you did discuss it first, this will likely be greenlighted very fast. It's advisable to address all feedback on this proposal before starting actual work. +If you did discuss it first, this will likely be greenlighted very fast. It's advisable to address all feedback on this proposal before starting actual work. Then you should submit your implementation, clearly linking to the issue (and possible proposal). @@ -90,7 +90,7 @@ It's mandatory to: Complying to these simple rules will greatly accelerate the review process, and will ensure you have a pleasant experience in contributing code to the Registry. -Have a look at a great, successful contribution: the [Ceph driver PR](https://github.com/docker/distribution/pull/443) +Have a look at a great, successful contribution: the [Swift driver PR](https://github.com/docker/distribution/pull/493) ## Coding Style diff --git a/Dockerfile b/Dockerfile index bf4e0d7f..0ab9629e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,12 @@ FROM golang:1.5.3 RUN apt-get update && \ - apt-get install -y librados-dev apache2-utils && \ + apt-get install -y apache2-utils && \ rm -rf /var/lib/apt/lists/* ENV DISTRIBUTION_DIR /go/src/github.com/docker/distribution ENV GOPATH $DISTRIBUTION_DIR/Godeps/_workspace:$GOPATH -ENV DOCKER_BUILDTAGS include_rados include_oss include_gcs +ENV DOCKER_BUILDTAGS include_oss include_gcs WORKDIR $DISTRIBUTION_DIR COPY . $DISTRIBUTION_DIR diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index 74b6ae5e..cd38be77 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -164,11 +164,6 @@ "ImportPath": "github.com/ncw/swift", "Rev": "c54732e87b0b283d1baf0a18db689d0aea460ba3" }, - { - "ImportPath": "github.com/noahdesu/go-ceph/rados", - "Comment": "v.0.3.0-29-gb15639c", - "Rev": "b15639c44c05368348355229070361395d9152ee" - }, { "ImportPath": "github.com/spf13/cobra", "Rev": "312092086bed4968099259622145a0c9ae280064" diff --git a/Godeps/_workspace/src/github.com/noahdesu/go-ceph/LICENSE b/Godeps/_workspace/src/github.com/noahdesu/go-ceph/LICENSE deleted file mode 100644 index 08d70bfc..00000000 --- a/Godeps/_workspace/src/github.com/noahdesu/go-ceph/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2014 Noah Watkins - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/Godeps/_workspace/src/github.com/noahdesu/go-ceph/rados/conn.go b/Godeps/_workspace/src/github.com/noahdesu/go-ceph/rados/conn.go deleted file mode 100644 index af3cfebe..00000000 --- a/Godeps/_workspace/src/github.com/noahdesu/go-ceph/rados/conn.go +++ /dev/null @@ -1,300 +0,0 @@ -package rados - -// #cgo LDFLAGS: -lrados -// #include -// #include -import "C" - -import "unsafe" -import "bytes" - -// ClusterStat represents Ceph cluster statistics. -type ClusterStat struct { - Kb uint64 - Kb_used uint64 - Kb_avail uint64 - Num_objects uint64 -} - -// Conn is a connection handle to a Ceph cluster. -type Conn struct { - cluster C.rados_t -} - -// PingMonitor sends a ping to a monitor and returns the reply. -func (c *Conn) PingMonitor(id string) (string, error) { - c_id := C.CString(id) - defer C.free(unsafe.Pointer(c_id)) - - var strlen C.size_t - var strout *C.char - - ret := C.rados_ping_monitor(c.cluster, c_id, &strout, &strlen) - defer C.rados_buffer_free(strout) - - if ret == 0 { - reply := C.GoStringN(strout, (C.int)(strlen)) - return reply, nil - } else { - return "", RadosError(int(ret)) - } -} - -// Connect establishes a connection to a RADOS cluster. It returns an error, -// if any. -func (c *Conn) Connect() error { - ret := C.rados_connect(c.cluster) - if ret == 0 { - return nil - } else { - return RadosError(int(ret)) - } -} - -// Shutdown disconnects from the cluster. -func (c *Conn) Shutdown() { - C.rados_shutdown(c.cluster) -} - -// ReadConfigFile configures the connection using a Ceph configuration file. -func (c *Conn) ReadConfigFile(path string) error { - c_path := C.CString(path) - defer C.free(unsafe.Pointer(c_path)) - ret := C.rados_conf_read_file(c.cluster, c_path) - if ret == 0 { - return nil - } else { - return RadosError(int(ret)) - } -} - -// ReadDefaultConfigFile configures the connection using a Ceph configuration -// file located at default locations. -func (c *Conn) ReadDefaultConfigFile() error { - ret := C.rados_conf_read_file(c.cluster, nil) - if ret == 0 { - return nil - } else { - return RadosError(int(ret)) - } -} - -func (c *Conn) OpenIOContext(pool string) (*IOContext, error) { - c_pool := C.CString(pool) - defer C.free(unsafe.Pointer(c_pool)) - ioctx := &IOContext{} - ret := C.rados_ioctx_create(c.cluster, c_pool, &ioctx.ioctx) - if ret == 0 { - return ioctx, nil - } else { - return nil, RadosError(int(ret)) - } -} - -// ListPools returns the names of all existing pools. -func (c *Conn) ListPools() (names []string, err error) { - buf := make([]byte, 4096) - for { - ret := int(C.rados_pool_list(c.cluster, - (*C.char)(unsafe.Pointer(&buf[0])), C.size_t(len(buf)))) - if ret < 0 { - return nil, RadosError(int(ret)) - } - - if ret > len(buf) { - buf = make([]byte, ret) - continue - } - - tmp := bytes.SplitAfter(buf[:ret-1], []byte{0}) - for _, s := range tmp { - if len(s) > 0 { - name := C.GoString((*C.char)(unsafe.Pointer(&s[0]))) - names = append(names, name) - } - } - - return names, nil - } -} - -// SetConfigOption sets the value of the configuration option identified by -// the given name. -func (c *Conn) SetConfigOption(option, value string) error { - c_opt, c_val := C.CString(option), C.CString(value) - defer C.free(unsafe.Pointer(c_opt)) - defer C.free(unsafe.Pointer(c_val)) - ret := C.rados_conf_set(c.cluster, c_opt, c_val) - if ret < 0 { - return RadosError(int(ret)) - } else { - return nil - } -} - -// GetConfigOption returns the value of the Ceph configuration option -// identified by the given name. -func (c *Conn) GetConfigOption(name string) (value string, err error) { - buf := make([]byte, 4096) - c_name := C.CString(name) - defer C.free(unsafe.Pointer(c_name)) - ret := int(C.rados_conf_get(c.cluster, c_name, - (*C.char)(unsafe.Pointer(&buf[0])), C.size_t(len(buf)))) - // FIXME: ret may be -ENAMETOOLONG if the buffer is not large enough. We - // can handle this case, but we need a reliable way to test for - // -ENAMETOOLONG constant. Will the syscall/Errno stuff in Go help? - if ret == 0 { - value = C.GoString((*C.char)(unsafe.Pointer(&buf[0]))) - return value, nil - } else { - return "", RadosError(ret) - } -} - -// WaitForLatestOSDMap blocks the caller until the latest OSD map has been -// retrieved. -func (c *Conn) WaitForLatestOSDMap() error { - ret := C.rados_wait_for_latest_osdmap(c.cluster) - if ret < 0 { - return RadosError(int(ret)) - } else { - return nil - } -} - -// GetClusterStat returns statistics about the cluster associated with the -// connection. -func (c *Conn) GetClusterStats() (stat ClusterStat, err error) { - c_stat := C.struct_rados_cluster_stat_t{} - ret := C.rados_cluster_stat(c.cluster, &c_stat) - if ret < 0 { - return ClusterStat{}, RadosError(int(ret)) - } else { - return ClusterStat{ - Kb: uint64(c_stat.kb), - Kb_used: uint64(c_stat.kb_used), - Kb_avail: uint64(c_stat.kb_avail), - Num_objects: uint64(c_stat.num_objects), - }, nil - } -} - -// ParseCmdLineArgs configures the connection from command line arguments. -func (c *Conn) ParseCmdLineArgs(args []string) error { - // add an empty element 0 -- Ceph treats the array as the actual contents - // of argv and skips the first element (the executable name) - argc := C.int(len(args) + 1) - argv := make([]*C.char, argc) - - // make the first element a string just in case it is ever examined - argv[0] = C.CString("placeholder") - defer C.free(unsafe.Pointer(argv[0])) - - for i, arg := range args { - argv[i+1] = C.CString(arg) - defer C.free(unsafe.Pointer(argv[i+1])) - } - - ret := C.rados_conf_parse_argv(c.cluster, argc, &argv[0]) - if ret < 0 { - return RadosError(int(ret)) - } else { - return nil - } -} - -// ParseDefaultConfigEnv configures the connection from the default Ceph -// environment variable(s). -func (c *Conn) ParseDefaultConfigEnv() error { - ret := C.rados_conf_parse_env(c.cluster, nil) - if ret == 0 { - return nil - } else { - return RadosError(int(ret)) - } -} - -// GetFSID returns the fsid of the cluster as a hexadecimal string. The fsid -// is a unique identifier of an entire Ceph cluster. -func (c *Conn) GetFSID() (fsid string, err error) { - buf := make([]byte, 37) - ret := int(C.rados_cluster_fsid(c.cluster, - (*C.char)(unsafe.Pointer(&buf[0])), C.size_t(len(buf)))) - // FIXME: the success case isn't documented correctly in librados.h - if ret == 36 { - fsid = C.GoString((*C.char)(unsafe.Pointer(&buf[0]))) - return fsid, nil - } else { - return "", RadosError(int(ret)) - } -} - -// GetInstanceID returns a globally unique identifier for the cluster -// connection instance. -func (c *Conn) GetInstanceID() uint64 { - // FIXME: are there any error cases for this? - return uint64(C.rados_get_instance_id(c.cluster)) -} - -// MakePool creates a new pool with default settings. -func (c *Conn) MakePool(name string) error { - c_name := C.CString(name) - defer C.free(unsafe.Pointer(c_name)) - ret := int(C.rados_pool_create(c.cluster, c_name)) - if ret == 0 { - return nil - } else { - return RadosError(ret) - } -} - -// DeletePool deletes a pool and all the data inside the pool. -func (c *Conn) DeletePool(name string) error { - c_name := C.CString(name) - defer C.free(unsafe.Pointer(c_name)) - ret := int(C.rados_pool_delete(c.cluster, c_name)) - if ret == 0 { - return nil - } else { - return RadosError(ret) - } -} - -// MonCommand sends a command to one of the monitors -func (c *Conn) MonCommand(args []byte) (buffer []byte, info string, err error) { - argv := make([]*C.char, len(args)) - for i, _ := range args { - argv[i] = (*C.char)(unsafe.Pointer(&args[i])) - } - - var ( - outs, outbuf *C.char - outslen, outbuflen C.size_t - ) - inbuf := C.CString("") - defer C.free(unsafe.Pointer(inbuf)) - - ret := C.rados_mon_command(c.cluster, - &argv[0], C.size_t(len(args)), - inbuf, // bulk input (e.g. crush map) - C.size_t(0), // length inbuf - &outbuf, // buffer - &outbuflen, // buffer length - &outs, // status string - &outslen) - - if outslen > 0 { - info = C.GoStringN(outs, C.int(outslen)) - C.free(unsafe.Pointer(outs)) - } - if outbuflen > 0 { - buffer = C.GoBytes(unsafe.Pointer(outbuf), C.int(outbuflen)) - C.free(unsafe.Pointer(outbuf)) - } - if ret != 0 { - err = RadosError(int(ret)) - return nil, info, err - } - - return -} diff --git a/Godeps/_workspace/src/github.com/noahdesu/go-ceph/rados/doc.go b/Godeps/_workspace/src/github.com/noahdesu/go-ceph/rados/doc.go deleted file mode 100644 index 14babe93..00000000 --- a/Godeps/_workspace/src/github.com/noahdesu/go-ceph/rados/doc.go +++ /dev/null @@ -1,4 +0,0 @@ -/* -Set of wrappers around librados API. -*/ -package rados diff --git a/Godeps/_workspace/src/github.com/noahdesu/go-ceph/rados/ioctx.go b/Godeps/_workspace/src/github.com/noahdesu/go-ceph/rados/ioctx.go deleted file mode 100644 index ef67b4fb..00000000 --- a/Godeps/_workspace/src/github.com/noahdesu/go-ceph/rados/ioctx.go +++ /dev/null @@ -1,547 +0,0 @@ -package rados - -// #cgo LDFLAGS: -lrados -// #include -// #include -import "C" - -import "unsafe" -import "time" - -// PoolStat represents Ceph pool statistics. -type PoolStat struct { - // space used in bytes - Num_bytes uint64 - // space used in KB - Num_kb uint64 - // number of objects in the pool - Num_objects uint64 - // number of clones of objects - Num_object_clones uint64 - // num_objects * num_replicas - Num_object_copies uint64 - Num_objects_missing_on_primary uint64 - // number of objects found on no OSDs - Num_objects_unfound uint64 - // number of objects replicated fewer times than they should be - // (but found on at least one OSD) - Num_objects_degraded uint64 - Num_rd uint64 - Num_rd_kb uint64 - Num_wr uint64 - Num_wr_kb uint64 -} - -// ObjectStat represents an object stat information -type ObjectStat struct { - // current length in bytes - Size uint64 - // last modification time - ModTime time.Time -} - -// IOContext represents a context for performing I/O within a pool. -type IOContext struct { - ioctx C.rados_ioctx_t -} - -// Pointer returns a uintptr representation of the IOContext. -func (ioctx *IOContext) Pointer() uintptr { - return uintptr(ioctx.ioctx) -} - -// Write writes len(data) bytes to the object with key oid starting at byte -// offset offset. It returns an error, if any. -func (ioctx *IOContext) Write(oid string, data []byte, offset uint64) error { - c_oid := C.CString(oid) - defer C.free(unsafe.Pointer(c_oid)) - - ret := C.rados_write(ioctx.ioctx, c_oid, - (*C.char)(unsafe.Pointer(&data[0])), - (C.size_t)(len(data)), - (C.uint64_t)(offset)) - - if ret == 0 { - return nil - } else { - return RadosError(int(ret)) - } -} - -// Read reads up to len(data) bytes from the object with key oid starting at byte -// offset offset. It returns the number of bytes read and an error, if any. -func (ioctx *IOContext) Read(oid string, data []byte, offset uint64) (int, error) { - if len(data) == 0 { - return 0, nil - } - - c_oid := C.CString(oid) - defer C.free(unsafe.Pointer(c_oid)) - - ret := C.rados_read( - ioctx.ioctx, - c_oid, - (*C.char)(unsafe.Pointer(&data[0])), - (C.size_t)(len(data)), - (C.uint64_t)(offset)) - - if ret >= 0 { - return int(ret), nil - } else { - return 0, RadosError(int(ret)) - } -} - -// Delete deletes the object with key oid. It returns an error, if any. -func (ioctx *IOContext) Delete(oid string) error { - c_oid := C.CString(oid) - defer C.free(unsafe.Pointer(c_oid)) - - ret := C.rados_remove(ioctx.ioctx, c_oid) - - if ret == 0 { - return nil - } else { - return RadosError(int(ret)) - } -} - -// Truncate resizes the object with key oid to size size. If the operation -// enlarges the object, the new area is logically filled with zeroes. If the -// operation shrinks the object, the excess data is removed. It returns an -// error, if any. -func (ioctx *IOContext) Truncate(oid string, size uint64) error { - c_oid := C.CString(oid) - defer C.free(unsafe.Pointer(c_oid)) - - ret := C.rados_trunc(ioctx.ioctx, c_oid, (C.uint64_t)(size)) - - if ret == 0 { - return nil - } else { - return RadosError(int(ret)) - } -} - -// Destroy informs librados that the I/O context is no longer in use. -// Resources associated with the context may not be freed immediately, and the -// context should not be used again after calling this method. -func (ioctx *IOContext) Destroy() { - C.rados_ioctx_destroy(ioctx.ioctx) -} - -// Stat returns a set of statistics about the pool associated with this I/O -// context. -func (ioctx *IOContext) GetPoolStats() (stat PoolStat, err error) { - c_stat := C.struct_rados_pool_stat_t{} - ret := C.rados_ioctx_pool_stat(ioctx.ioctx, &c_stat) - if ret < 0 { - return PoolStat{}, RadosError(int(ret)) - } else { - return PoolStat{ - Num_bytes: uint64(c_stat.num_bytes), - Num_kb: uint64(c_stat.num_kb), - Num_objects: uint64(c_stat.num_objects), - Num_object_clones: uint64(c_stat.num_object_clones), - Num_object_copies: uint64(c_stat.num_object_copies), - Num_objects_missing_on_primary: uint64(c_stat.num_objects_missing_on_primary), - Num_objects_unfound: uint64(c_stat.num_objects_unfound), - Num_objects_degraded: uint64(c_stat.num_objects_degraded), - Num_rd: uint64(c_stat.num_rd), - Num_rd_kb: uint64(c_stat.num_rd_kb), - Num_wr: uint64(c_stat.num_wr), - Num_wr_kb: uint64(c_stat.num_wr_kb), - }, nil - } -} - -// GetPoolName returns the name of the pool associated with the I/O context. -func (ioctx *IOContext) GetPoolName() (name string, err error) { - buf := make([]byte, 128) - for { - ret := C.rados_ioctx_get_pool_name(ioctx.ioctx, - (*C.char)(unsafe.Pointer(&buf[0])), C.unsigned(len(buf))) - if ret == -34 { // FIXME - buf = make([]byte, len(buf)*2) - continue - } else if ret < 0 { - return "", RadosError(ret) - } - name = C.GoStringN((*C.char)(unsafe.Pointer(&buf[0])), ret) - return name, nil - } -} - -// ObjectListFunc is the type of the function called for each object visited -// by ListObjects. -type ObjectListFunc func(oid string) - -// ListObjects lists all of the objects in the pool associated with the I/O -// context, and called the provided listFn function for each object, passing -// to the function the name of the object. -func (ioctx *IOContext) ListObjects(listFn ObjectListFunc) error { - var ctx C.rados_list_ctx_t - ret := C.rados_objects_list_open(ioctx.ioctx, &ctx) - if ret < 0 { - return RadosError(ret) - } - defer func() { C.rados_objects_list_close(ctx) }() - - for { - var c_entry *C.char - ret := C.rados_objects_list_next(ctx, &c_entry, nil) - if ret == -2 { // FIXME - return nil - } else if ret < 0 { - return RadosError(ret) - } - listFn(C.GoString(c_entry)) - } - - panic("invalid state") -} - -// Stat returns the size of the object and its last modification time -func (ioctx *IOContext) Stat(object string) (stat ObjectStat, err error) { - var c_psize C.uint64_t - var c_pmtime C.time_t - c_object := C.CString(object) - defer C.free(unsafe.Pointer(c_object)) - - ret := C.rados_stat( - ioctx.ioctx, - c_object, - &c_psize, - &c_pmtime) - - if ret < 0 { - return ObjectStat{}, RadosError(int(ret)) - } else { - return ObjectStat{ - Size: uint64(c_psize), - ModTime: time.Unix(int64(c_pmtime), 0), - }, nil - } -} - -// GetXattr gets an xattr with key `name`, it returns the length of -// the key read or an error if not successful -func (ioctx *IOContext) GetXattr(object string, name string, data []byte) (int, error) { - c_object := C.CString(object) - c_name := C.CString(name) - defer C.free(unsafe.Pointer(c_object)) - defer C.free(unsafe.Pointer(c_name)) - - ret := C.rados_getxattr( - ioctx.ioctx, - c_object, - c_name, - (*C.char)(unsafe.Pointer(&data[0])), - (C.size_t)(len(data))) - - if ret >= 0 { - return int(ret), nil - } else { - return 0, RadosError(int(ret)) - } -} - -// Sets an xattr for an object with key `name` with value as `data` -func (ioctx *IOContext) SetXattr(object string, name string, data []byte) error { - c_object := C.CString(object) - c_name := C.CString(name) - defer C.free(unsafe.Pointer(c_object)) - defer C.free(unsafe.Pointer(c_name)) - - ret := C.rados_setxattr( - ioctx.ioctx, - c_object, - c_name, - (*C.char)(unsafe.Pointer(&data[0])), - (C.size_t)(len(data))) - - if ret == 0 { - return nil - } else { - return RadosError(int(ret)) - } -} - -// function that lists all the xattrs for an object, since xattrs are -// a k-v pair, this function returns a map of k-v pairs on -// success, error code on failure -func (ioctx *IOContext) ListXattrs(oid string) (map[string][]byte, error) { - c_oid := C.CString(oid) - defer C.free(unsafe.Pointer(c_oid)) - - var it C.rados_xattrs_iter_t - - ret := C.rados_getxattrs(ioctx.ioctx, c_oid, &it) - if ret < 0 { - return nil, RadosError(ret) - } - defer func() { C.rados_getxattrs_end(it) }() - m := make(map[string][]byte) - for { - var c_name, c_val *C.char - var c_len C.size_t - defer C.free(unsafe.Pointer(c_name)) - defer C.free(unsafe.Pointer(c_val)) - - ret := C.rados_getxattrs_next(it, &c_name, &c_val, &c_len) - if ret < 0 { - return nil, RadosError(int(ret)) - } - // rados api returns a null name,val & 0-length upon - // end of iteration - if c_name == nil { - return m, nil // stop iteration - } - m[C.GoString(c_name)] = C.GoBytes(unsafe.Pointer(c_val), (C.int)(c_len)) - } -} - -// Remove an xattr with key `name` from object `oid` -func (ioctx *IOContext) RmXattr(oid string, name string) error { - c_oid := C.CString(oid) - c_name := C.CString(name) - defer C.free(unsafe.Pointer(c_oid)) - defer C.free(unsafe.Pointer(c_name)) - - ret := C.rados_rmxattr( - ioctx.ioctx, - c_oid, - c_name) - - if ret == 0 { - return nil - } else { - return RadosError(int(ret)) - } -} - -// Append the map `pairs` to the omap `oid` -func (ioctx *IOContext) SetOmap(oid string, pairs map[string][]byte) error { - c_oid := C.CString(oid) - defer C.free(unsafe.Pointer(c_oid)) - - var s C.size_t - var c *C.char - ptrSize := unsafe.Sizeof(c) - - c_keys := C.malloc(C.size_t(len(pairs)) * C.size_t(ptrSize)) - c_values := C.malloc(C.size_t(len(pairs)) * C.size_t(ptrSize)) - c_lengths := C.malloc(C.size_t(len(pairs)) * C.size_t(unsafe.Sizeof(s))) - - defer C.free(unsafe.Pointer(c_keys)) - defer C.free(unsafe.Pointer(c_values)) - defer C.free(unsafe.Pointer(c_lengths)) - - i := 0 - for key, value := range pairs { - // key - c_key_ptr := (**C.char)(unsafe.Pointer(uintptr(c_keys) + uintptr(i) * ptrSize)) - *c_key_ptr = C.CString(key) - defer C.free(unsafe.Pointer(*c_key_ptr)) - - // value and its length - c_value_ptr := (**C.char)(unsafe.Pointer(uintptr(c_values) + uintptr(i) * ptrSize)) - - var c_length C.size_t - if len(value) > 0 { - *c_value_ptr = (*C.char)(unsafe.Pointer(&value[0])) - c_length = C.size_t(len(value)) - } else { - *c_value_ptr = nil - c_length = C.size_t(0) - } - - c_length_ptr := (*C.size_t)(unsafe.Pointer(uintptr(c_lengths) + uintptr(i) * ptrSize)) - *c_length_ptr = c_length - - i++ - } - - op := C.rados_create_write_op() - C.rados_write_op_omap_set( - op, - (**C.char)(c_keys), - (**C.char)(c_values), - (*C.size_t)(c_lengths), - C.size_t(len(pairs))) - - ret := C.rados_write_op_operate(op, ioctx.ioctx, c_oid, nil, 0) - C.rados_release_write_op(op) - - if ret == 0 { - return nil - } else { - return RadosError(int(ret)) - } -} - -// OmapListFunc is the type of the function called for each omap key -// visited by ListOmapValues -type OmapListFunc func(key string, value []byte) - -// Iterate on a set of keys and their values from an omap -// `startAfter`: iterate only on the keys after this specified one -// `filterPrefix`: iterate only on the keys beginning with this prefix -// `maxReturn`: iterate no more than `maxReturn` key/value pairs -// `listFn`: the function called at each iteration -func (ioctx *IOContext) ListOmapValues(oid string, startAfter string, filterPrefix string, maxReturn int64, listFn OmapListFunc) error { - c_oid := C.CString(oid) - c_start_after := C.CString(startAfter) - c_filter_prefix := C.CString(filterPrefix) - c_max_return := C.uint64_t(maxReturn) - - defer C.free(unsafe.Pointer(c_oid)) - defer C.free(unsafe.Pointer(c_start_after)) - defer C.free(unsafe.Pointer(c_filter_prefix)) - - op := C.rados_create_read_op() - - var c_iter C.rados_omap_iter_t - var c_prval C.int - C.rados_read_op_omap_get_vals( - op, - c_start_after, - c_filter_prefix, - c_max_return, - &c_iter, - &c_prval, - ) - - ret := C.rados_read_op_operate(op, ioctx.ioctx, c_oid, 0) - - if int(c_prval) != 0 { - return RadosError(int(c_prval)) - } else if int(ret) != 0 { - return RadosError(int(ret)) - } - - for { - var c_key *C.char - var c_val *C.char - var c_len C.size_t - - ret = C.rados_omap_get_next(c_iter, &c_key, &c_val, &c_len) - - if int(ret) != 0 { - return RadosError(int(ret)) - } - - if c_key == nil { - break - } - - listFn(C.GoString(c_key), C.GoBytes(unsafe.Pointer(c_val), C.int(c_len))) - } - - C.rados_omap_get_end(c_iter) - C.rados_release_read_op(op) - - return nil -} - -// Fetch a set of keys and their values from an omap and returns then as a map -// `startAfter`: retrieve only the keys after this specified one -// `filterPrefix`: retrieve only the keys beginning with this prefix -// `maxReturn`: retrieve no more than `maxReturn` key/value pairs -func (ioctx *IOContext) GetOmapValues(oid string, startAfter string, filterPrefix string, maxReturn int64) (map[string][]byte, error) { - omap := map[string][]byte{} - - err := ioctx.ListOmapValues( - oid, startAfter, filterPrefix, maxReturn, - func(key string, value []byte) { - omap[key] = value - }, - ) - - return omap, err -} - -// Fetch all the keys and their values from an omap and returns then as a map -// `startAfter`: retrieve only the keys after this specified one -// `filterPrefix`: retrieve only the keys beginning with this prefix -// `iteratorSize`: internal number of keys to fetch during a read operation -func (ioctx *IOContext) GetAllOmapValues(oid string, startAfter string, filterPrefix string, iteratorSize int64) (map[string][]byte, error) { - omap := map[string][]byte{} - omapSize := 0 - - for { - err := ioctx.ListOmapValues( - oid, startAfter, filterPrefix, iteratorSize, - func (key string, value []byte) { - omap[key] = value - startAfter = key - }, - ) - - if err != nil { - return omap, err - } - - // End of omap - if len(omap) == omapSize { - break - } - - omapSize = len(omap) - } - - return omap, nil -} - -// Remove the specified `keys` from the omap `oid` -func (ioctx *IOContext) RmOmapKeys(oid string, keys []string) error { - c_oid := C.CString(oid) - defer C.free(unsafe.Pointer(c_oid)) - - var c *C.char - ptrSize := unsafe.Sizeof(c) - - c_keys := C.malloc(C.size_t(len(keys)) * C.size_t(ptrSize)) - defer C.free(unsafe.Pointer(c_keys)) - - i := 0 - for _, key := range keys { - c_key_ptr := (**C.char)(unsafe.Pointer(uintptr(c_keys) + uintptr(i) * ptrSize)) - *c_key_ptr = C.CString(key) - defer C.free(unsafe.Pointer(*c_key_ptr)) - i++ - } - - op := C.rados_create_write_op() - C.rados_write_op_omap_rm_keys( - op, - (**C.char)(c_keys), - C.size_t(len(keys))) - - ret := C.rados_write_op_operate(op, ioctx.ioctx, c_oid, nil, 0) - C.rados_release_write_op(op) - - if ret == 0 { - return nil - } else { - return RadosError(int(ret)) - } -} - -// Clear the omap `oid` -func (ioctx *IOContext) CleanOmap(oid string) error { - c_oid := C.CString(oid) - defer C.free(unsafe.Pointer(c_oid)) - - op := C.rados_create_write_op() - C.rados_write_op_omap_clear(op) - - ret := C.rados_write_op_operate(op, ioctx.ioctx, c_oid, nil, 0) - C.rados_release_write_op(op) - - if ret == 0 { - return nil - } else { - return RadosError(int(ret)) - } -} diff --git a/Godeps/_workspace/src/github.com/noahdesu/go-ceph/rados/rados.go b/Godeps/_workspace/src/github.com/noahdesu/go-ceph/rados/rados.go deleted file mode 100644 index 935bc248..00000000 --- a/Godeps/_workspace/src/github.com/noahdesu/go-ceph/rados/rados.go +++ /dev/null @@ -1,54 +0,0 @@ -package rados - -// #cgo LDFLAGS: -lrados -// #include -// #include -import "C" - -import ( - "fmt" - "unsafe" -) - -type RadosError int - -func (e RadosError) Error() string { - return fmt.Sprintf("rados: ret=%d", e) -} - -// Version returns the major, minor, and patch components of the version of -// the RADOS library linked against. -func Version() (int, int, int) { - var c_major, c_minor, c_patch C.int - C.rados_version(&c_major, &c_minor, &c_patch) - return int(c_major), int(c_minor), int(c_patch) -} - -// NewConn creates a new connection object. It returns the connection and an -// error, if any. -func NewConn() (*Conn, error) { - conn := &Conn{} - ret := C.rados_create(&conn.cluster, nil) - - if ret == 0 { - return conn, nil - } else { - return nil, RadosError(int(ret)) - } -} - -// NewConnWithUser creates a new connection object with a custom username. -// It returns the connection and an error, if any. -func NewConnWithUser(user string) (*Conn, error) { - c_user := C.CString(user) - defer C.free(unsafe.Pointer(c_user)) - - conn := &Conn{} - ret := C.rados_create(&conn.cluster, c_user) - - if ret == 0 { - return conn, nil - } else { - return nil, RadosError(int(ret)) - } -} diff --git a/Godeps/_workspace/src/github.com/noahdesu/go-ceph/rados/rados_test.go b/Godeps/_workspace/src/github.com/noahdesu/go-ceph/rados/rados_test.go deleted file mode 100644 index a31c1872..00000000 --- a/Godeps/_workspace/src/github.com/noahdesu/go-ceph/rados/rados_test.go +++ /dev/null @@ -1,703 +0,0 @@ -package rados_test - -import "testing" - -//import "bytes" -import "github.com/noahdesu/go-ceph/rados" -import "github.com/stretchr/testify/assert" -import "os" -import "os/exec" -import "io" -import "io/ioutil" -import "time" -import "net" -import "fmt" -import "sort" -import "encoding/json" - -func GetUUID() string { - out, _ := exec.Command("uuidgen").Output() - return string(out[:36]) -} - -func TestVersion(t *testing.T) { - var major, minor, patch = rados.Version() - assert.False(t, major < 0 || major > 1000, "invalid major") - assert.False(t, minor < 0 || minor > 1000, "invalid minor") - assert.False(t, patch < 0 || patch > 1000, "invalid patch") -} - -func TestGetSetConfigOption(t *testing.T) { - conn, _ := rados.NewConn() - - // rejects invalid options - err := conn.SetConfigOption("wefoijweojfiw", "welfkwjelkfj") - assert.Error(t, err, "Invalid option") - - // verify SetConfigOption changes a values - log_file_val, err := conn.GetConfigOption("log_file") - assert.NotEqual(t, log_file_val, "/dev/null") - - err = conn.SetConfigOption("log_file", "/dev/null") - assert.NoError(t, err, "Invalid option") - - log_file_val, err = conn.GetConfigOption("log_file") - assert.Equal(t, log_file_val, "/dev/null") -} - -func TestParseDefaultConfigEnv(t *testing.T) { - conn, _ := rados.NewConn() - - log_file_val, _ := conn.GetConfigOption("log_file") - assert.NotEqual(t, log_file_val, "/dev/null") - - err := os.Setenv("CEPH_ARGS", "--log-file /dev/null") - assert.NoError(t, err) - - err = conn.ParseDefaultConfigEnv() - assert.NoError(t, err) - - log_file_val, _ = conn.GetConfigOption("log_file") - assert.Equal(t, log_file_val, "/dev/null") -} - -func TestParseCmdLineArgs(t *testing.T) { - conn, _ := rados.NewConn() - conn.ReadDefaultConfigFile() - - mon_host_val, _ := conn.GetConfigOption("mon_host") - assert.NotEqual(t, mon_host_val, "1.1.1.1") - - args := []string{"--mon-host", "1.1.1.1"} - err := conn.ParseCmdLineArgs(args) - assert.NoError(t, err) - - mon_host_val, _ = conn.GetConfigOption("mon_host") - assert.Equal(t, mon_host_val, "1.1.1.1") -} - -func TestGetClusterStats(t *testing.T) { - conn, _ := rados.NewConn() - conn.ReadDefaultConfigFile() - conn.Connect() - - poolname := GetUUID() - err := conn.MakePool(poolname) - assert.NoError(t, err) - - pool, err := conn.OpenIOContext(poolname) - assert.NoError(t, err) - - // grab current stats - prev_stat, err := conn.GetClusterStats() - fmt.Printf("prev_stat: %+v\n", prev_stat) - assert.NoError(t, err) - - // make some changes to the cluster - buf := make([]byte, 1<<20) - for i := 0; i < 10; i++ { - objname := GetUUID() - pool.Write(objname, buf, 0) - } - - // wait a while for the stats to change - for i := 0; i < 30; i++ { - stat, err := conn.GetClusterStats() - assert.NoError(t, err) - - // wait for something to change - if stat == prev_stat { - fmt.Printf("curr_stat: %+v (trying again...)\n", stat) - time.Sleep(time.Second) - } else { - // success - fmt.Printf("curr_stat: %+v (change detected)\n", stat) - conn.Shutdown() - return - } - } - - pool.Destroy() - conn.Shutdown() - t.Error("Cluster stats aren't changing") -} - -func TestGetFSID(t *testing.T) { - conn, _ := rados.NewConn() - conn.ReadDefaultConfigFile() - conn.Connect() - - fsid, err := conn.GetFSID() - assert.NoError(t, err) - assert.NotEqual(t, fsid, "") - - conn.Shutdown() -} - -func TestGetInstanceID(t *testing.T) { - conn, _ := rados.NewConn() - conn.ReadDefaultConfigFile() - conn.Connect() - - id := conn.GetInstanceID() - assert.NotEqual(t, id, 0) - - conn.Shutdown() -} - -func TestMakeDeletePool(t *testing.T) { - conn, _ := rados.NewConn() - conn.ReadDefaultConfigFile() - conn.Connect() - - // get current list of pool - pools, err := conn.ListPools() - assert.NoError(t, err) - - // check that new pool name is unique - new_name := GetUUID() - for _, poolname := range pools { - if new_name == poolname { - t.Error("Random pool name exists!") - return - } - } - - // create pool - err = conn.MakePool(new_name) - assert.NoError(t, err) - - // get updated list of pools - pools, err = conn.ListPools() - assert.NoError(t, err) - - // verify that the new pool name exists - found := false - for _, poolname := range pools { - if new_name == poolname { - found = true - } - } - - if !found { - t.Error("Cannot find newly created pool") - } - - // delete the pool - err = conn.DeletePool(new_name) - assert.NoError(t, err) - - // verify that it is gone - - // get updated list of pools - pools, err = conn.ListPools() - assert.NoError(t, err) - - // verify that the new pool name exists - found = false - for _, poolname := range pools { - if new_name == poolname { - found = true - } - } - - if found { - t.Error("Deleted pool still exists") - } - - conn.Shutdown() -} - -func TestPingMonitor(t *testing.T) { - conn, _ := rados.NewConn() - conn.ReadDefaultConfigFile() - conn.Connect() - - // mon id that should work with vstart.sh - reply, err := conn.PingMonitor("a") - if err == nil { - assert.NotEqual(t, reply, "") - return - } - - // mon id that should work with micro-osd.sh - reply, err = conn.PingMonitor("0") - if err == nil { - assert.NotEqual(t, reply, "") - return - } - - // try to use a hostname as the monitor id - mon_addr, _ := conn.GetConfigOption("mon_host") - hosts, _ := net.LookupAddr(mon_addr) - for _, host := range hosts { - reply, err := conn.PingMonitor(host) - if err == nil { - assert.NotEqual(t, reply, "") - return - } - } - - t.Error("Could not find a valid monitor id") - - conn.Shutdown() -} - -func TestReadConfigFile(t *testing.T) { - conn, _ := rados.NewConn() - - // check current log_file value - log_file_val, err := conn.GetConfigOption("log_file") - assert.NoError(t, err) - assert.NotEqual(t, log_file_val, "/dev/null") - - // create a temporary ceph.conf file that changes the log_file conf - // option. - file, err := ioutil.TempFile("/tmp", "go-rados") - assert.NoError(t, err) - - _, err = io.WriteString(file, "[global]\nlog_file = /dev/null\n") - assert.NoError(t, err) - - // parse the config file - err = conn.ReadConfigFile(file.Name()) - assert.NoError(t, err) - - // check current log_file value - log_file_val, err = conn.GetConfigOption("log_file") - assert.NoError(t, err) - assert.Equal(t, log_file_val, "/dev/null") - - // cleanup - file.Close() - os.Remove(file.Name()) -} - -func TestWaitForLatestOSDMap(t *testing.T) { - conn, _ := rados.NewConn() - conn.ReadDefaultConfigFile() - conn.Connect() - - err := conn.WaitForLatestOSDMap() - assert.NoError(t, err) - - conn.Shutdown() -} - -func TestReadWrite(t *testing.T) { - conn, _ := rados.NewConn() - conn.ReadDefaultConfigFile() - conn.Connect() - - // make pool - pool_name := GetUUID() - err := conn.MakePool(pool_name) - assert.NoError(t, err) - - pool, err := conn.OpenIOContext(pool_name) - assert.NoError(t, err) - - bytes_in := []byte("input data") - err = pool.Write("obj", bytes_in, 0) - assert.NoError(t, err) - - bytes_out := make([]byte, len(bytes_in)) - n_out, err := pool.Read("obj", bytes_out, 0) - - assert.Equal(t, n_out, len(bytes_in)) - assert.Equal(t, bytes_in, bytes_out) - - pool.Destroy() -} - -func TestObjectStat(t *testing.T) { - conn, _ := rados.NewConn() - conn.ReadDefaultConfigFile() - conn.Connect() - - pool_name := GetUUID() - err := conn.MakePool(pool_name) - assert.NoError(t, err) - - pool, err := conn.OpenIOContext(pool_name) - assert.NoError(t, err) - - bytes_in := []byte("input data") - err = pool.Write("obj", bytes_in, 0) - assert.NoError(t, err) - - stat, err := pool.Stat("obj") - assert.Equal(t, uint64(len(bytes_in)), stat.Size) - assert.NotNil(t, stat.ModTime) - - pool.Destroy() - conn.Shutdown() -} - -func TestGetPoolStats(t *testing.T) { - conn, _ := rados.NewConn() - conn.ReadDefaultConfigFile() - conn.Connect() - - poolname := GetUUID() - err := conn.MakePool(poolname) - assert.NoError(t, err) - - pool, err := conn.OpenIOContext(poolname) - assert.NoError(t, err) - - // grab current stats - prev_stat, err := pool.GetPoolStats() - fmt.Printf("prev_stat: %+v\n", prev_stat) - assert.NoError(t, err) - - // make some changes to the cluster - buf := make([]byte, 1<<20) - for i := 0; i < 10; i++ { - objname := GetUUID() - pool.Write(objname, buf, 0) - } - - // wait a while for the stats to change - for i := 0; i < 30; i++ { - stat, err := pool.GetPoolStats() - assert.NoError(t, err) - - // wait for something to change - if stat == prev_stat { - fmt.Printf("curr_stat: %+v (trying again...)\n", stat) - time.Sleep(time.Second) - } else { - // success - fmt.Printf("curr_stat: %+v (change detected)\n", stat) - conn.Shutdown() - return - } - } - - pool.Destroy() - conn.Shutdown() - t.Error("Pool stats aren't changing") -} - -func TestGetPoolName(t *testing.T) { - conn, _ := rados.NewConn() - conn.ReadDefaultConfigFile() - conn.Connect() - - poolname := GetUUID() - err := conn.MakePool(poolname) - assert.NoError(t, err) - - ioctx, err := conn.OpenIOContext(poolname) - assert.NoError(t, err) - - poolname_ret, err := ioctx.GetPoolName() - assert.NoError(t, err) - - assert.Equal(t, poolname, poolname_ret) - - ioctx.Destroy() - conn.Shutdown() -} - -func TestMonCommand(t *testing.T) { - conn, _ := rados.NewConn() - conn.ReadDefaultConfigFile() - conn.Connect() - - command, err := json.Marshal(map[string]string{"prefix": "df", "format": "json"}) - assert.NoError(t, err) - - buf, info, err := conn.MonCommand(command) - assert.NoError(t, err) - assert.Equal(t, info, "") - - var message map[string]interface{} - err = json.Unmarshal(buf, &message) - assert.NoError(t, err) - - conn.Shutdown() -} - -func TestObjectIterator(t *testing.T) { - conn, _ := rados.NewConn() - conn.ReadDefaultConfigFile() - conn.Connect() - - poolname := GetUUID() - err := conn.MakePool(poolname) - assert.NoError(t, err) - - ioctx, err := conn.OpenIOContext(poolname) - assert.NoError(t, err) - - objectList := []string{} - err = ioctx.ListObjects(func(oid string) { - objectList = append(objectList, oid) - }) - assert.NoError(t, err) - assert.True(t, len(objectList) == 0) - - createdList := []string{} - for i := 0; i < 200; i++ { - oid := GetUUID() - bytes_in := []byte("input data") - err = ioctx.Write(oid, bytes_in, 0) - assert.NoError(t, err) - createdList = append(createdList, oid) - } - assert.True(t, len(createdList) == 200) - - err = ioctx.ListObjects(func(oid string) { - objectList = append(objectList, oid) - }) - assert.NoError(t, err) - assert.Equal(t, len(objectList), len(createdList)) - - sort.Strings(objectList) - sort.Strings(createdList) - - assert.Equal(t, objectList, createdList) -} - -func TestNewConnWithUser(t *testing.T) { - _, err := rados.NewConnWithUser("admin") - assert.Equal(t, err, nil) -} - -func TestReadWriteXattr(t *testing.T) { - conn, _ := rados.NewConn() - conn.ReadDefaultConfigFile() - conn.Connect() - - // make pool - pool_name := GetUUID() - err := conn.MakePool(pool_name) - assert.NoError(t, err) - - pool, err := conn.OpenIOContext(pool_name) - assert.NoError(t, err) - - bytes_in := []byte("input data") - err = pool.Write("obj", bytes_in, 0) - assert.NoError(t, err) - - my_xattr_in := []byte("my_value") - err = pool.SetXattr("obj", "my_key", my_xattr_in) - assert.NoError(t, err) - - my_xattr_out := make([]byte, len(my_xattr_in)) - n_out, err := pool.GetXattr("obj", "my_key", my_xattr_out) - - assert.Equal(t, n_out, len(my_xattr_in)) - assert.Equal(t, my_xattr_in, my_xattr_out) - - pool.Destroy() -} - -func TestListXattrs(t *testing.T) { - conn, _ := rados.NewConn() - conn.ReadDefaultConfigFile() - conn.Connect() - - // make pool - pool_name := GetUUID() - err := conn.MakePool(pool_name) - assert.NoError(t, err) - - pool, err := conn.OpenIOContext(pool_name) - assert.NoError(t, err) - - bytes_in := []byte("input data") - err = pool.Write("obj", bytes_in, 0) - assert.NoError(t, err) - - input_xattrs := make(map[string][]byte) - for i := 0; i < 200; i++ { - name := fmt.Sprintf("key_%d", i) - data := []byte(GetUUID()) - err = pool.SetXattr("obj", name, data) - assert.NoError(t, err) - input_xattrs[name] = data - } - - output_xattrs := make(map[string][]byte) - output_xattrs, err = pool.ListXattrs("obj") - assert.NoError(t, err) - assert.Equal(t, len(input_xattrs), len(output_xattrs)) - assert.Equal(t, input_xattrs, output_xattrs) - - pool.Destroy() -} - -func TestRmXattr(t *testing.T) { - conn, _ := rados.NewConn() - conn.ReadDefaultConfigFile() - conn.Connect() - - pool_name := GetUUID() - err := conn.MakePool(pool_name) - assert.NoError(t, err) - - pool, err := conn.OpenIOContext(pool_name) - assert.NoError(t, err) - - bytes_in := []byte("input data") - err = pool.Write("obj", bytes_in, 0) - assert.NoError(t, err) - - key := "key1" - val := []byte("val1") - err = pool.SetXattr("obj", key, val) - assert.NoError(t, err) - - key = "key2" - val = []byte("val2") - err = pool.SetXattr("obj", key, val) - assert.NoError(t, err) - - xattr_list := make(map[string][]byte) - xattr_list, err = pool.ListXattrs("obj") - assert.NoError(t, err) - assert.Equal(t, len(xattr_list), 2) - - pool.RmXattr("obj", "key2") - xattr_list, err = pool.ListXattrs("obj") - assert.NoError(t, err) - assert.Equal(t, len(xattr_list), 1) - - found := false - for key, _ = range xattr_list { - if key == "key2" { - found = true - } - - } - - if found { - t.Error("Deleted pool still exists") - } - - pool.Destroy() -} - -func TestReadWriteOmap(t *testing.T) { - conn, _ := rados.NewConn() - conn.ReadDefaultConfigFile() - conn.Connect() - - pool_name := GetUUID() - err := conn.MakePool(pool_name) - assert.NoError(t, err) - - pool, err := conn.OpenIOContext(pool_name) - assert.NoError(t, err) - - // Set - orig := map[string][]byte{ - "key1": []byte("value1"), - "key2": []byte("value2"), - "prefixed-key3": []byte("value3"), - "empty": []byte(""), - } - - err = pool.SetOmap("obj", orig) - assert.NoError(t, err) - - // List - remaining := map[string][]byte{} - for k, v := range orig { - remaining[k] = v - } - - err = pool.ListOmapValues("obj", "", "", 4, func(key string, value []byte) { - assert.Equal(t, remaining[key], value) - delete(remaining, key) - }) - assert.NoError(t, err) - assert.Equal(t, 0, len(remaining)) - - // Get (with a fixed number of keys) - fetched, err := pool.GetOmapValues("obj", "", "", 4) - assert.NoError(t, err) - assert.Equal(t, orig, fetched) - - // Get All (with an iterator size bigger than the map size) - fetched, err = pool.GetAllOmapValues("obj", "", "", 100) - assert.NoError(t, err) - assert.Equal(t, orig, fetched) - - // Get All (with an iterator size smaller than the map size) - fetched, err = pool.GetAllOmapValues("obj", "", "", 1) - assert.NoError(t, err) - assert.Equal(t, orig, fetched) - - // Remove - err = pool.RmOmapKeys("obj", []string{"key1", "prefixed-key3"}) - assert.NoError(t, err) - - fetched, err = pool.GetOmapValues("obj", "", "", 4) - assert.NoError(t, err) - assert.Equal(t, map[string][]byte{ - "key2": []byte("value2"), - "empty": []byte(""), - }, fetched) - - // Clear - err = pool.CleanOmap("obj") - assert.NoError(t, err) - - fetched, err = pool.GetOmapValues("obj", "", "", 4) - assert.NoError(t, err) - assert.Equal(t, map[string][]byte{}, fetched) - - pool.Destroy() -} - -func TestReadFilterOmap(t *testing.T) { - conn, _ := rados.NewConn() - conn.ReadDefaultConfigFile() - conn.Connect() - - pool_name := GetUUID() - err := conn.MakePool(pool_name) - assert.NoError(t, err) - - pool, err := conn.OpenIOContext(pool_name) - assert.NoError(t, err) - - orig := map[string][]byte{ - "key1": []byte("value1"), - "prefixed-key3": []byte("value3"), - "key2": []byte("value2"), - } - - err = pool.SetOmap("obj", orig) - assert.NoError(t, err) - - // filter by prefix - fetched, err := pool.GetOmapValues("obj", "", "prefixed", 4) - assert.NoError(t, err) - assert.Equal(t, map[string][]byte{ - "prefixed-key3": []byte("value3"), - }, fetched) - - // "start_after" a key - fetched, err = pool.GetOmapValues("obj", "key1", "", 4) - assert.NoError(t, err) - assert.Equal(t, map[string][]byte{ - "prefixed-key3": []byte("value3"), - "key2": []byte("value2"), - }, fetched) - - // maxReturn - fetched, err = pool.GetOmapValues("obj", "", "key", 1) - assert.NoError(t, err) - assert.Equal(t, map[string][]byte{ - "key1": []byte("value1"), - }, fetched) - - pool.Destroy() -} - diff --git a/circle.yml b/circle.yml index e1995d4b..f658d111 100644 --- a/circle.yml +++ b/circle.yml @@ -3,9 +3,6 @@ machine: pre: # Install gvm - bash < <(curl -s -S -L https://raw.githubusercontent.com/moovweb/gvm/1.0.22/binscripts/gvm-installer) - # Install ceph to test rados driver & create pool - - sudo -i ~/distribution/contrib/ceph/ci-setup.sh - - ceph osd pool create docker-distribution 1 # Install codecov for coverage - pip install --user codecov @@ -19,11 +16,9 @@ machine: BASE_DIR: src/github.com/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME # Trick circle brainflat "no absolute path" behavior BASE_STABLE: ../../../$HOME/.gvm/pkgsets/stable/global/$BASE_DIR - DOCKER_BUILDTAGS: "include_rados include_oss include_gcs" + DOCKER_BUILDTAGS: "include_oss include_gcs" # Workaround Circle parsing dumb bugs and/or YAML wonkyness CIRCLE_PAIN: "mode: set" - # Ceph config - RADOS_POOL: "docker-distribution" hosts: # Not used yet diff --git a/cmd/registry/rados.go b/cmd/registry/rados.go deleted file mode 100644 index a2a938c3..00000000 --- a/cmd/registry/rados.go +++ /dev/null @@ -1,5 +0,0 @@ -// +build include_rados - -package main - -// import _ "github.com/docker/distribution/registry/storage/driver/rados" diff --git a/contrib/ceph/ci-setup.sh b/contrib/ceph/ci-setup.sh deleted file mode 100755 index 14832760..00000000 --- a/contrib/ceph/ci-setup.sh +++ /dev/null @@ -1,122 +0,0 @@ -#! /bin/bash -# -# Ceph cluster setup in Circle CI -# - -set -x -set -e -set -u - -NODE=$(hostname) -CEPHDIR=/tmp/ceph - -mkdir cluster -pushd cluster - -# Install -retries=0 -until [ $retries -ge 5 ]; do - pip install ceph-deploy && break - retries=$[$retries+1] - sleep 30 -done - -retries=0 -until [ $retries -ge 5 ]; do - # apt-get can get stuck and hold the lock in some circumstances - # so preemptively kill it - kill `pgrep apt-get` || true - ceph-deploy install --release hammer $NODE && break - retries=$[$retries+1] - sleep 30 -done - -retries=0 -until [ $retries -ge 5 ]; do - ceph-deploy pkg --install librados-dev $NODE && break - retries=$[$retries+1] - sleep 30 -done - -echo $(ip route get 1 | awk '{print $NF;exit}') $(hostname) >> /etc/hosts -ssh-keygen -t rsa -f ~/.ssh/id_rsa -q -N "" -cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys -ssh-keyscan $NODE >> ~/.ssh/known_hosts -ceph-deploy new $NODE - -cat >> ceph.conf <driver's reference documentation. - - rados - Uses Ceph Object Storage. - See the driver's reference documentation. - - s3 Uses Amazon's Simple Storage Service (S3). diff --git a/docs/introduction.md b/docs/introduction.md index aefefc34..e8b05d8c 100644 --- a/docs/introduction.md +++ b/docs/introduction.md @@ -19,7 +19,7 @@ Users interact with a registry by using docker push and pull commands. > Example: `docker pull registry-1.docker.io/distribution/registry:2.1`. -Storage itself is delegated to drivers. The default storage driver is the local posix filesystem, which is suitable for development or small deployments. Additional cloud-based storage drivers like S3, Microsoft Azure, Ceph Rados, OpenStack Swift and Aliyun OSS are also supported. People looking into using other storage backends may do so by writing their own driver implementing the [Storage API](storagedrivers.md). +Storage itself is delegated to drivers. The default storage driver is the local posix filesystem, which is suitable for development or small deployments. Additional cloud-based storage drivers like S3, Microsoft Azure, OpenStack Swift and Aliyun OSS are also supported. People looking into using other storage backends may do so by writing their own driver implementing the [Storage API](storagedrivers.md). Since securing access to your hosted images is paramount, the Registry natively supports TLS and basic authentication. diff --git a/docs/storage-drivers/oss.md b/docs/storage-drivers/oss.md old mode 100755 new mode 100644 diff --git a/docs/storage-drivers/rados.md b/docs/storage-drivers/rados.md deleted file mode 100644 index 12643b2a..00000000 --- a/docs/storage-drivers/rados.md +++ /dev/null @@ -1,83 +0,0 @@ - - - -# Ceph RADOS storage driver - -An implementation of the `storagedriver.StorageDriver` interface which uses -[Ceph RADOS Object Storage][rados] for storage backend. - -## Parameters - - - - - - - - - - - - - - - - - - - - - - - -
ParameterRequiredDescription
- poolname - - yes - - Ceph pool name. -
- username - - no - - Ceph cluster user to connect as (i.e. admin, not client.admin). -
- chunksize - - no - - Size of the written RADOS objects. Default value is 4MB (4194304). -
- - -The following parameters must be used to configure the storage driver -(case-sensitive): - -* `poolname`: Name of the Ceph pool -* `username` *optional*: The user to connect as (i.e. admin, not client.admin) -* `chunksize` *optional*: Size of the written RADOS objects. Default value is -4MB (4194304). - -This drivers loads the [Ceph client configuration][rados-config] from the -following regular paths (the first found is used): - -* `$CEPH_CONF` (environment variable) -* `/etc/ceph/ceph.conf` -* `~/.ceph/config` -* `ceph.conf` (in the current working directory) - -## Developing - -To include this driver when building Docker Distribution, use the build tag -`include_rados`. Please see the [building documentation][building] for details. - -[rados]: http://ceph.com/docs/master/rados/ -[rados-config]: http://ceph.com/docs/master/rados/configuration/ceph-conf/ -[building]: https://github.com/docker/distribution/blob/master/docs/building.md#optional-build-tags diff --git a/docs/storagedrivers.md b/docs/storagedrivers.md index 158ad999..ab475c32 100644 --- a/docs/storagedrivers.md +++ b/docs/storagedrivers.md @@ -22,7 +22,6 @@ This storage driver package comes bundled with several drivers: - [filesystem](storage-drivers/filesystem.md): A local storage driver configured to use a directory tree in the local filesystem. - [s3](storage-drivers/s3.md): A driver storing objects in an Amazon Simple Storage Solution (S3) bucket. - [azure](storage-drivers/azure.md): A driver storing objects in [Microsoft Azure Blob Storage](http://azure.microsoft.com/en-us/services/storage/). -- [rados](storage-drivers/rados.md): A driver storing objects in a [Ceph Object Storage](http://ceph.com/docs/master/rados/) pool. - [swift](storage-drivers/swift.md): A driver storing objects in [Openstack Swift](http://docs.openstack.org/developer/swift/). - [oss](storage-drivers/oss.md): A driver storing objects in [Aliyun OSS](http://www.aliyun.com/product/oss). - [gcs](storage-drivers/gcs.md): A driver storing objects in a [Google Cloud Storage](https://cloud.google.com/storage/) bucket. diff --git a/registry/storage/driver/rados/doc.go b/registry/storage/driver/rados/doc.go deleted file mode 100644 index 655c68a3..00000000 --- a/registry/storage/driver/rados/doc.go +++ /dev/null @@ -1,3 +0,0 @@ -// Package rados implements the rados storage driver backend. Support can be -// enabled by including the "include_rados" build tag. -package rados diff --git a/registry/storage/driver/rados/rados.go b/registry/storage/driver/rados/rados.go deleted file mode 100644 index c2be528e..00000000 --- a/registry/storage/driver/rados/rados.go +++ /dev/null @@ -1,632 +0,0 @@ -// +build include_rados - -package rados - -import ( - "bytes" - "encoding/binary" - "fmt" - "io" - "io/ioutil" - "path" - "strconv" - - log "github.com/Sirupsen/logrus" - "github.com/docker/distribution/context" - storagedriver "github.com/docker/distribution/registry/storage/driver" - "github.com/docker/distribution/registry/storage/driver/base" - "github.com/docker/distribution/registry/storage/driver/factory" - "github.com/docker/distribution/uuid" - "github.com/noahdesu/go-ceph/rados" -) - -const driverName = "rados" - -// Prefix all the stored blob -const objectBlobPrefix = "blob:" - -// Stripes objects size to 4M -const defaultChunkSize = 4 << 20 -const defaultXattrTotalSizeName = "total-size" - -// Max number of keys fetched from omap at each read operation -const defaultKeysFetched = 1 - -//DriverParameters A struct that encapsulates all of the driver parameters after all values have been set -type DriverParameters struct { - poolname string - username string - chunksize uint64 -} - -func init() { - factory.Register(driverName, &radosDriverFactory{}) -} - -// radosDriverFactory implements the factory.StorageDriverFactory interface -type radosDriverFactory struct{} - -func (factory *radosDriverFactory) Create(parameters map[string]interface{}) (storagedriver.StorageDriver, error) { - return FromParameters(parameters) -} - -type driver struct { - Conn *rados.Conn - Ioctx *rados.IOContext - chunksize uint64 -} - -type baseEmbed struct { - base.Base -} - -// Driver is a storagedriver.StorageDriver implementation backed by Ceph RADOS -// Objects are stored at absolute keys in the provided bucket. -type Driver struct { - baseEmbed -} - -// FromParameters constructs a new Driver with a given parameters map -// Required parameters: -// - poolname: the ceph pool name -func FromParameters(parameters map[string]interface{}) (*Driver, error) { - - pool, ok := parameters["poolname"] - if !ok { - return nil, fmt.Errorf("No poolname parameter provided") - } - - username, ok := parameters["username"] - if !ok { - username = "" - } - - chunksize := uint64(defaultChunkSize) - chunksizeParam, ok := parameters["chunksize"] - if ok { - chunksize, ok = chunksizeParam.(uint64) - if !ok { - return nil, fmt.Errorf("The chunksize parameter should be a number") - } - } - - params := DriverParameters{ - fmt.Sprint(pool), - fmt.Sprint(username), - chunksize, - } - - return New(params) -} - -// New constructs a new Driver -func New(params DriverParameters) (*Driver, error) { - var conn *rados.Conn - var err error - - if params.username != "" { - log.Infof("Opening connection to pool %s using user %s", params.poolname, params.username) - conn, err = rados.NewConnWithUser(params.username) - } else { - log.Infof("Opening connection to pool %s", params.poolname) - conn, err = rados.NewConn() - } - - if err != nil { - return nil, err - } - - err = conn.ReadDefaultConfigFile() - if err != nil { - return nil, err - } - - err = conn.Connect() - if err != nil { - return nil, err - } - - log.Infof("Connected") - - ioctx, err := conn.OpenIOContext(params.poolname) - - log.Infof("Connected to pool %s", params.poolname) - - if err != nil { - return nil, err - } - - d := &driver{ - Ioctx: ioctx, - Conn: conn, - chunksize: params.chunksize, - } - - return &Driver{ - baseEmbed: baseEmbed{ - Base: base.Base{ - StorageDriver: d, - }, - }, - }, nil -} - -// Implement the storagedriver.StorageDriver interface - -func (d *driver) Name() string { - return driverName -} - -// GetContent retrieves the content stored at "path" as a []byte. -func (d *driver) GetContent(ctx context.Context, path string) ([]byte, error) { - rc, err := d.ReadStream(ctx, path, 0) - if err != nil { - return nil, err - } - defer rc.Close() - - p, err := ioutil.ReadAll(rc) - if err != nil { - return nil, err - } - - return p, nil -} - -// PutContent stores the []byte content at a location designated by "path". -func (d *driver) PutContent(ctx context.Context, path string, contents []byte) error { - if _, err := d.WriteStream(ctx, path, 0, bytes.NewReader(contents)); err != nil { - return err - } - - return nil -} - -// ReadStream retrieves an io.ReadCloser for the content stored at "path" with a -// given byte offset. -type readStreamReader struct { - driver *driver - oid string - size uint64 - offset uint64 -} - -func (r *readStreamReader) Read(b []byte) (n int, err error) { - // Determine the part available to read - bufferOffset := uint64(0) - bufferSize := uint64(len(b)) - - // End of the object, read less than the buffer size - if bufferSize > r.size-r.offset { - bufferSize = r.size - r.offset - } - - // Fill `b` - for bufferOffset < bufferSize { - // Get the offset in the object chunk - chunkedOid, chunkedOffset := r.driver.getChunkNameFromOffset(r.oid, r.offset) - - // Determine the best size to read - bufferEndOffset := bufferSize - if bufferEndOffset-bufferOffset > r.driver.chunksize-chunkedOffset { - bufferEndOffset = bufferOffset + (r.driver.chunksize - chunkedOffset) - } - - // Read the chunk - n, err = r.driver.Ioctx.Read(chunkedOid, b[bufferOffset:bufferEndOffset], chunkedOffset) - - if err != nil { - return int(bufferOffset), err - } - - bufferOffset += uint64(n) - r.offset += uint64(n) - } - - // EOF if the offset is at the end of the object - if r.offset == r.size { - return int(bufferOffset), io.EOF - } - - return int(bufferOffset), nil -} - -func (r *readStreamReader) Close() error { - return nil -} - -func (d *driver) ReadStream(ctx context.Context, path string, offset int64) (io.ReadCloser, error) { - // get oid from filename - oid, err := d.getOid(path) - - if err != nil { - return nil, err - } - - // get object stat - stat, err := d.Stat(ctx, path) - - if err != nil { - return nil, err - } - - if offset > stat.Size() { - return nil, storagedriver.InvalidOffsetError{Path: path, Offset: offset} - } - - return &readStreamReader{ - driver: d, - oid: oid, - size: uint64(stat.Size()), - offset: uint64(offset), - }, nil -} - -func (d *driver) WriteStream(ctx context.Context, path string, offset int64, reader io.Reader) (totalRead int64, err error) { - buf := make([]byte, d.chunksize) - totalRead = 0 - - oid, err := d.getOid(path) - if err != nil { - switch err.(type) { - // Trying to write new object, generate new blob identifier for it - case storagedriver.PathNotFoundError: - oid = d.generateOid() - err = d.putOid(path, oid) - if err != nil { - return 0, err - } - default: - return 0, err - } - } else { - // Check total object size only for existing ones - totalSize, err := d.getXattrTotalSize(ctx, oid) - if err != nil { - return 0, err - } - - // If offset if after the current object size, fill the gap with zeros - for totalSize < uint64(offset) { - sizeToWrite := d.chunksize - if totalSize-uint64(offset) < sizeToWrite { - sizeToWrite = totalSize - uint64(offset) - } - - chunkName, chunkOffset := d.getChunkNameFromOffset(oid, uint64(totalSize)) - err = d.Ioctx.Write(chunkName, buf[:sizeToWrite], uint64(chunkOffset)) - if err != nil { - return totalRead, err - } - - totalSize += sizeToWrite - } - } - - // Writer - for { - // Align to chunk size - sizeRead := uint64(0) - sizeToRead := uint64(offset+totalRead) % d.chunksize - if sizeToRead == 0 { - sizeToRead = d.chunksize - } - - // Read from `reader` - for sizeRead < sizeToRead { - nn, err := reader.Read(buf[sizeRead:sizeToRead]) - sizeRead += uint64(nn) - - if err != nil { - if err != io.EOF { - return totalRead, err - } - - break - } - } - - // End of file and nothing was read - if sizeRead == 0 { - break - } - - // Write chunk object - chunkName, chunkOffset := d.getChunkNameFromOffset(oid, uint64(offset+totalRead)) - err = d.Ioctx.Write(chunkName, buf[:sizeRead], uint64(chunkOffset)) - - if err != nil { - return totalRead, err - } - - // Update total object size as xattr in the first chunk of the object - err = d.setXattrTotalSize(oid, uint64(offset+totalRead)+sizeRead) - if err != nil { - return totalRead, err - } - - totalRead += int64(sizeRead) - - // End of file - if sizeRead < sizeToRead { - break - } - } - - return totalRead, nil -} - -// Stat retrieves the FileInfo for the given path, including the current size -func (d *driver) Stat(ctx context.Context, path string) (storagedriver.FileInfo, error) { - // get oid from filename - oid, err := d.getOid(path) - - if err != nil { - return nil, err - } - - // the path is a virtual directory? - if oid == "" { - return storagedriver.FileInfoInternal{ - FileInfoFields: storagedriver.FileInfoFields{ - Path: path, - Size: 0, - IsDir: true, - }, - }, nil - } - - // stat first chunk - stat, err := d.Ioctx.Stat(oid + "-0") - - if err != nil { - return nil, err - } - - // get total size of chunked object - totalSize, err := d.getXattrTotalSize(ctx, oid) - - if err != nil { - return nil, err - } - - return storagedriver.FileInfoInternal{ - FileInfoFields: storagedriver.FileInfoFields{ - Path: path, - Size: int64(totalSize), - ModTime: stat.ModTime, - }, - }, nil -} - -// List returns a list of the objects that are direct descendants of the given path. -func (d *driver) List(ctx context.Context, dirPath string) ([]string, error) { - files, err := d.listDirectoryOid(dirPath) - - if err != nil { - return nil, storagedriver.PathNotFoundError{Path: dirPath} - } - - keys := make([]string, 0, len(files)) - for k := range files { - if k != dirPath { - keys = append(keys, path.Join(dirPath, k)) - } - } - - return keys, nil -} - -// Move moves an object stored at sourcePath to destPath, removing the original -// object. -func (d *driver) Move(ctx context.Context, sourcePath string, destPath string) error { - // Get oid - oid, err := d.getOid(sourcePath) - - if err != nil { - return err - } - - // Move reference - err = d.putOid(destPath, oid) - - if err != nil { - return err - } - - // Delete old reference - err = d.deleteOid(sourcePath) - - if err != nil { - return err - } - - return nil -} - -// Delete recursively deletes all objects stored at "path" and its subpaths. -func (d *driver) Delete(ctx context.Context, objectPath string) error { - // Get oid - oid, err := d.getOid(objectPath) - - if err != nil { - return err - } - - // Deleting virtual directory - if oid == "" { - objects, err := d.listDirectoryOid(objectPath) - if err != nil { - return err - } - - for object := range objects { - err = d.Delete(ctx, path.Join(objectPath, object)) - if err != nil { - return err - } - } - } else { - // Delete object chunks - totalSize, err := d.getXattrTotalSize(ctx, oid) - - if err != nil { - return err - } - - for offset := uint64(0); offset < totalSize; offset += d.chunksize { - chunkName, _ := d.getChunkNameFromOffset(oid, offset) - - err = d.Ioctx.Delete(chunkName) - if err != nil { - return err - } - } - - // Delete reference - err = d.deleteOid(objectPath) - if err != nil { - return err - } - } - - return nil -} - -// URLFor returns a URL which may be used to retrieve the content stored at the given path. -// May return an UnsupportedMethodErr in certain StorageDriver implementations. -func (d *driver) URLFor(ctx context.Context, path string, options map[string]interface{}) (string, error) { - return "", storagedriver.ErrUnsupportedMethod{} -} - -// Generate a blob identifier -func (d *driver) generateOid() string { - return objectBlobPrefix + uuid.Generate().String() -} - -// Reference a object and its hierarchy -func (d *driver) putOid(objectPath string, oid string) error { - directory := path.Dir(objectPath) - base := path.Base(objectPath) - createParentReference := true - - // After creating this reference, skip the parents referencing since the - // hierarchy already exists - if oid == "" { - firstReference, err := d.Ioctx.GetOmapValues(directory, "", "", 1) - if (err == nil) && (len(firstReference) > 0) { - createParentReference = false - } - } - - oids := map[string][]byte{ - base: []byte(oid), - } - - // Reference object - err := d.Ioctx.SetOmap(directory, oids) - if err != nil { - return err - } - - // Esure parent virtual directories - if createParentReference { - return d.putOid(directory, "") - } - - return nil -} - -// Get the object identifier from an object name -func (d *driver) getOid(objectPath string) (string, error) { - directory := path.Dir(objectPath) - base := path.Base(objectPath) - - files, err := d.Ioctx.GetOmapValues(directory, "", base, 1) - - if (err != nil) || (files[base] == nil) { - return "", storagedriver.PathNotFoundError{Path: objectPath} - } - - return string(files[base]), nil -} - -// List the objects of a virtual directory -func (d *driver) listDirectoryOid(path string) (list map[string][]byte, err error) { - return d.Ioctx.GetAllOmapValues(path, "", "", defaultKeysFetched) -} - -// Remove a file from the files hierarchy -func (d *driver) deleteOid(objectPath string) error { - // Remove object reference - directory := path.Dir(objectPath) - base := path.Base(objectPath) - err := d.Ioctx.RmOmapKeys(directory, []string{base}) - - if err != nil { - return err - } - - // Remove virtual directory if empty (no more references) - firstReference, err := d.Ioctx.GetOmapValues(directory, "", "", 1) - - if err != nil { - return err - } - - if len(firstReference) == 0 { - // Delete omap - err := d.Ioctx.Delete(directory) - - if err != nil { - return err - } - - // Remove reference on parent omaps - if directory != "" { - return d.deleteOid(directory) - } - } - - return nil -} - -// Takes an offset in an chunked object and return the chunk name and a new -// offset in this chunk object -func (d *driver) getChunkNameFromOffset(oid string, offset uint64) (string, uint64) { - chunkID := offset / d.chunksize - chunkedOid := oid + "-" + strconv.FormatInt(int64(chunkID), 10) - chunkedOffset := offset % d.chunksize - return chunkedOid, chunkedOffset -} - -// Set the total size of a chunked object `oid` -func (d *driver) setXattrTotalSize(oid string, size uint64) error { - // Convert uint64 `size` to []byte - xattr := make([]byte, binary.MaxVarintLen64) - binary.LittleEndian.PutUint64(xattr, size) - - // Save the total size as a xattr in the first chunk - return d.Ioctx.SetXattr(oid+"-0", defaultXattrTotalSizeName, xattr) -} - -// Get the total size of the chunked object `oid` stored as xattr -func (d *driver) getXattrTotalSize(ctx context.Context, oid string) (uint64, error) { - // Fetch xattr as []byte - xattr := make([]byte, binary.MaxVarintLen64) - xattrLength, err := d.Ioctx.GetXattr(oid+"-0", defaultXattrTotalSizeName, xattr) - - if err != nil { - return 0, err - } - - if xattrLength != len(xattr) { - context.GetLogger(ctx).Errorf("object %s xattr length mismatch: %d != %d", oid, xattrLength, len(xattr)) - return 0, storagedriver.PathNotFoundError{Path: oid} - } - - // Convert []byte as uint64 - totalSize := binary.LittleEndian.Uint64(xattr) - - return totalSize, nil -} diff --git a/registry/storage/driver/rados/rados_test.go b/registry/storage/driver/rados/rados_test.go deleted file mode 100644 index ce367fb5..00000000 --- a/registry/storage/driver/rados/rados_test.go +++ /dev/null @@ -1,40 +0,0 @@ -// +build include_rados - -package rados - -import ( - "os" - "testing" - - storagedriver "github.com/docker/distribution/registry/storage/driver" - "github.com/docker/distribution/registry/storage/driver/testsuites" - - "gopkg.in/check.v1" -) - -// Hook up gocheck into the "go test" runner. -func Test(t *testing.T) { check.TestingT(t) } - -func init() { - poolname := os.Getenv("RADOS_POOL") - username := os.Getenv("RADOS_USER") - - driverConstructor := func() (storagedriver.StorageDriver, error) { - parameters := DriverParameters{ - poolname, - username, - defaultChunkSize, - } - - return New(parameters) - } - - skipCheck := func() string { - if poolname == "" { - return "RADOS_POOL must be set to run Rado tests" - } - return "" - } - - testsuites.RegisterSuite(driverConstructor, skipCheck) -}