Skip to content

Commit

Permalink
feat: added Head command (peak#730)
Browse files Browse the repository at this point in the history
This pull request adds the `head` command to the program. Closes peak#682.

The `head` command is designed to check if a file exists without
downloading the object or bucket itself. It retrieves metadata from an
object without returning the object itself. This operation is useful for
users who are only interested in an object's metadata.

- Implemented the head command functionality.
- Added end-point tests to ensure the correctness of the head command
implementation.

Usage:

Check if a bucket exists
```
s5cmd head s3://bucket-name
```

Check if a file exists and retrieve its metadata

```
s5cmd head s3://bucket-name/object
```

---------

Co-authored-by: S.Burak Yaşar <burakyasar@peak.com>
  • Loading branch information
4o4x and 4o4x authored Jul 26, 2024
1 parent 89218dc commit b5e5143
Show file tree
Hide file tree
Showing 10 changed files with 660 additions and 23 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#### Features
- Added prefix and wildcard support to `cat` command. ([#716](https://github.com/peak/s5cmd/issues/716))
- Added `head` command. ([#730](https://github.com/peak/s5cmd/pull/730))

## v2.2.2 - 13 Sep 2023

Expand Down
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,14 @@ While executing the commands, `s5cmd` detects the region according to the follow
### Examples
#### Check if a bucket exists
s5cmd head s3://bucket/
#### Print a remote object's metadata
s5cmd head s3://bucket/object.gz
#### Download a single S3 object
s5cmd cp s3://bucket/object.gz .
Expand Down
1 change: 1 addition & 0 deletions command/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ func Commands() []*cli.Command {
NewVersionCommand(),
NewBucketVersionCommand(),
NewPresignCommand(),
NewHeadCommand(),
}
}

Expand Down
206 changes: 206 additions & 0 deletions command/head.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
package command

import (
"context"
"fmt"
"time"

"github.com/peak/s5cmd/v2/log"
"github.com/peak/s5cmd/v2/log/stat"
"github.com/peak/s5cmd/v2/storage"
"github.com/peak/s5cmd/v2/storage/url"
"github.com/peak/s5cmd/v2/strutil"
"github.com/urfave/cli/v2"
)

var headHelpTemplate = `Name:
{{.HelpName}} - {{.Usage}}
Usage:
{{.HelpName}} [options] source
Options:
{{range .VisibleFlags}}{{.}}
{{end}}
Examples:
1. Print a remote object's metadata
> s5cmd {{.HelpName}} s3://bucket/prefix/object
2. Check if a remote bucket exists
> s5cmd {{.HelpName}} s3://bucket
3. Print a remote object's metadata with version ID
> s5cmd {{.HelpName}} --version-id VERSION_ID s3://bucket/prefix/object
4. Print a remote object's metadata with raw input
> s5cmd {{.HelpName}} --raw 's3://bucket/prefix/file*.txt'
`

func NewHeadCommand() *cli.Command {
cmd := &cli.Command{
Name: "head",
HelpName: "head",
Usage: "print remote object metadata",

CustomHelpTemplate: headHelpTemplate,

Flags: []cli.Flag{
&cli.StringFlag{
Name: "version-id",
Usage: "use the specified version of an object",
},
&cli.BoolFlag{
Name: "raw",
Usage: "disable the wildcard operations, useful with filenames that contains glob characters",
},
},

Before: func(c *cli.Context) error {
err := validateHeadCommand(c)
if err != nil {
printError(commandFromContext(c), c.Command.Name, err)
}
return err
},
Action: func(c *cli.Context) (err error) {
defer stat.Collect(c.Command.FullName(), &err)()

op := c.Command.Name
fullCommand := commandFromContext(c)
src, err := url.New(c.Args().Get(0), url.WithVersion(c.String("version-id")),
url.WithRaw(c.Bool("raw")))
if err != nil {
printError(fullCommand, op, err)
return err
}

return Head{
src: src,
op: op,
fullCommand: fullCommand,
storageOpts: NewStorageOpts(c),
}.Run(c.Context)
},
}
cmd.BashComplete = getBashCompleteFn(cmd, true, false)
return cmd
}

type Head struct {
src *url.URL
op string
fullCommand string
storageOpts storage.Options
}

func (h Head) Run(ctx context.Context) error {
client, err := storage.NewRemoteClient(ctx, h.src, h.storageOpts)
if err != nil {
printError(h.fullCommand, h.op, err)
return err
}

if h.src.IsBucket() {
err := client.HeadBucket(ctx, h.src)
if err != nil {
printError(h.fullCommand, h.op, err)
return err
}

msg := HeadBucketMessage{
Bucket: h.src.String(),
}

log.Info(msg)

return nil
}

object, metadata, err := client.HeadObject(ctx, h.src)
if err != nil {
printError(h.fullCommand, h.op, err)
return err
}

msg := HeadObjectMessage{
Key: object.URL.String(),
ContentType: metadata.ContentType,
ServerSideEncryption: metadata.EncryptionMethod,
LastModified: object.ModTime,
ContentLength: object.Size,
StorageClass: string(object.StorageClass),
VersionID: object.VersionID,
ETag: object.Etag,
Metadata: metadata.UserDefined,
}

log.Info(msg)

return nil
}

type HeadObjectMessage struct {
Key string `json:"key,omitempty"`
ContentType string `json:"content_type,omitempty"`
ServerSideEncryption string `json:"server_side_encryption,omitempty"`
LastModified *time.Time `json:"last_modified,omitempty"`
ContentLength int64 `json:"size,omitempty"`
StorageClass string `json:"storage_class,omitempty"`
VersionID string `json:"version_id,omitempty"`
ETag string `json:"etag,omitempty"`
Metadata map[string]string `json:"metadata"`
}

func (m HeadObjectMessage) String() string {
return m.JSON()
}

func (m HeadObjectMessage) JSON() string {
return strutil.JSON(m)
}

type HeadBucketMessage struct {
Bucket string `json:"bucket"`
}

func (m HeadBucketMessage) String() string {
return m.JSON()
}

func (m HeadBucketMessage) JSON() string {
return strutil.JSON(m)
}

func validateHeadCommand(c *cli.Context) error {
if c.Args().Len() > 1 {
return fmt.Errorf("object or bucket name is required")
}

srcurl, err := url.New(c.Args().Get(0), url.WithVersion(c.String("version-id")),
url.WithRaw(c.Bool("raw")))
if err != nil {
return err
}

if srcurl.IsPrefix() {
return fmt.Errorf("target have to be a object or a bucket")
}

if !srcurl.IsRemote() {
return fmt.Errorf("target should be remote object or bucket")
}

if srcurl.IsWildcard() && !srcurl.IsRaw() {
return fmt.Errorf("remote source %q can not contain glob characters", srcurl)
}

if err := checkVersinoningURLRemote(srcurl); err != nil {
return err
}

if err := checkVersioningWithGoogleEndpoint(c); err != nil {
return err
}

return nil
}
2 changes: 1 addition & 1 deletion command/ls.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ func (l ListMessage) String() string {
return l.Object.URL.String()
}
var etag string
// date and storage fiels
// date and storage fields
var listFormat = "%19s %2s"

// align etag
Expand Down
Loading

0 comments on commit b5e5143

Please sign in to comment.