How to copy/move all objects in Amazon S3 from one prefix to other using the AWS SDK for Node.js

57,152

Solution 1

You will need to make one AWS.S3.listObjects() to list your objects with a specific prefix. But you are correct in that you will need to make one call for every object that you want to copy from one bucket/prefix to the same or another bucket/prefix.

You can also use a utility library like async to manage your requests.

var AWS = require('aws-sdk');
var async = require('async');
var bucketName = 'foo';
var oldPrefix = 'abc/';
var newPrefix = 'xyz/';
var s3 = new AWS.S3({params: {Bucket: bucketName}, region: 'us-west-2'});

var done = function(err, data) {
  if (err) console.log(err);
  else console.log(data);
};

s3.listObjects({Prefix: oldPrefix}, function(err, data) {
  if (data.Contents.length) {
    async.each(data.Contents, function(file, cb) {
      var params = {
        Bucket: bucketName,
        CopySource: bucketName + '/' + file.Key,
        Key: file.Key.replace(oldPrefix, newPrefix)
      };
      s3.copyObject(params, function(copyErr, copyData){
        if (copyErr) {
          console.log(copyErr);
        }
        else {
          console.log('Copied: ', params.Key);
          cb();
        }
      });
    }, done);
  }
});

Hope this helps!

Solution 2

Here is a code snippet that do it in the "async await" way:

const AWS = require('aws-sdk');
AWS.config.update({
  credentials: new AWS.Credentials(....), // credential parameters
});
AWS.config.setPromisesDependency(require('bluebird'));
const s3 = new AWS.S3();

... ...

const bucketName = 'bucketName';        // example bucket
const folderToMove = 'folderToMove/';   // old folder name
const destinationFolder = 'destinationFolder/'; // new destination folder 
try {
    const listObjectsResponse = await s3.listObjects({
        Bucket: bucketName,
        Prefix: folderToMove,
        Delimiter: '/',
    }).promise();

    const folderContentInfo = listObjectsResponse.Contents;
    const folderPrefix = listObjectsResponse.Prefix;

    await Promise.all(
      folderContentInfo.map(async (fileInfo) => {
        await s3.copyObject({
          Bucket: bucketName,
          CopySource: `${bucketName}/${fileInfo.Key}`,  // old file Key
          Key: `${destinationFolder}/${fileInfo.Key.replace(folderPrefix, '')}`, // new file Key
        }).promise();
    
        await s3.deleteObject({
          Bucket: bucketName,
          Key: fileInfo.Key,
        }).promise();
      })
    );
} catch (err) {
  console.error(err); // error handling
}

Solution 3

More update on the original code which copies folders recursively. Some limitations is that the code does not handle more than 1000 objects per Prefix and of course the depth limitation if your folders are very deep.

import AWS from 'aws-sdk';

AWS.config.update({ region: 'ap-southeast-1' });

/**
 * Copy s3 folder
 * @param {string} bucket Params for the first argument
 * @param {string} source for the 2nd argument
 * @param {string} dest for the 2nd argument
 * @returns {promise} the get object promise
 */
export default async function s3CopyFolder(bucket, source, dest) {
  // sanity check: source and dest must end with '/'
  if (!source.endsWith('/') || !dest.endsWith('/')) {
    return Promise.reject(new Error('source or dest must ends with fwd slash'));
  }

  const s3 = new AWS.S3();

  // plan, list through the source, if got continuation token, recursive
  const listResponse = await s3.listObjectsV2({
    Bucket: bucket,
    Prefix: source,
    Delimiter: '/',
  }).promise();

  // copy objects
  await Promise.all(
    listResponse.Contents.map(async (file) => {
      await s3.copyObject({
        Bucket: bucket,
        CopySource: `${bucket}/${file.Key}`,
        Key: `${dest}${file.Key.replace(listResponse.Prefix, '')}`,
      }).promise();
    }),
  );

  // recursive copy sub-folders
  await Promise.all(
    listResponse.CommonPrefixes.map(async (folder) => {
      await s3CopyFolder(
        bucket,
        `${folder.Prefix}`,
        `${dest}${folder.Prefix.replace(listResponse.Prefix, '')}`,
      );
    }),
  );

  return Promise.resolve('ok');
}

Solution 4

A small change to the code of Aditya Manohar that improves the error handling in the s3.copyObject function and will actually finish the "move" request by removing the source files after the copy requests have been executed:

const AWS = require('aws-sdk');
const async = require('async');
const bucketName = 'foo';
const oldPrefix = 'abc/';
const newPrefix = 'xyz/';

const s3 = new AWS.S3({
    params: {
        Bucket: bucketName
    },
    region: 'us-west-2'
});


// 1) List all the objects in the source "directory"
s3.listObjects({
    Prefix: oldPrefix
}, function (err, data) {



    if (data.Contents.length) {

        // Build up the paramters for the delete statement
        let paramsS3Delete = {
            Bucket: bucketName,
            Delete: {
                Objects: []
            }
        };

        // Expand the array with all the keys that we have found in the ListObjects function call, so that we can remove all the keys at once after we have copied all the keys
        data.Contents.forEach(function (content) {
            paramsS3Delete.Delete.Objects.push({
                Key: content.Key
            });
        });

        // 2) Copy all the source files to the destination
        async.each(data.Contents, function (file, cb) {
            var params = {
                CopySource: bucketName + '/' + file.Key,
                Key: file.Key.replace(oldPrefix, newPrefix)
            };
            s3.copyObject(params, function (copyErr, copyData) {

                if (copyErr) {
                    console.log(err);
                } else {
                    console.log('Copied: ', params.Key);
                }
                cb();
            });
        }, function (asyncError, asyncData) {
            // All the requests for the file copy have finished
            if (asyncError) {
                return console.log(asyncError);
            } else {
                console.log(asyncData);

                // 3) Now remove the source files - that way we effectively moved all the content
                s3.deleteObjects(paramsS3Delete, (deleteError, deleteData) => {
                    if (deleteError) return console.log(deleteError);

                    return console.log(deleteData);
                })

            }
        });
    }
});

Note that I have moved the cb() callback function outside the if-then-else loop. That way even when an error occurs the async module will fire the done() function.

Solution 5

None of the above handle large directories, as the list-objects-v2 command returns no more than 1000 results at a time, providing a continuation token to access additional "pages".

Here is a solution using the modern, v3 sdk:

const copyAll = async ({
  s3Client,
  sourceBucket,
  targetBucket = sourceBucket,
  sourcePrefix,
  targetPrefix,
  concurrency = 1,
  deleteSource = false,
}) => {
  let ContinuationToken;

  const copyFile = async (sourceKey) => {
    const targetKey = sourceKey.replace(sourcePrefix, targetPrefix);

    await s3Client.send(
      new CopyObjectCommand({
        Bucket: targetBucket,
        Key: targetKey,
        CopySource: `${sourceBucket}/${sourceKey}`,
      }),
    );

    if (deleteSource) {
      await s3Client.send(
        new DeleteObjectCommand({
          Bucket: sourceBucket,
          Key: sourceKey,
        }),
      );
    }
  };

  do {
    const { Contents = [], NextContinuationToken } = await s3Client.send(
      new ListObjectsV2Command({
        Bucket: sourceBucket,
        Prefix: sourcePrefix,
        ContinuationToken,
      }),
    );

    const sourceKeys = Contents.map(({ Key }) => Key);

    await Promise.all(
      new Array(concurrency).fill(null).map(async () => {
        while (sourceKeys.length) {
          await copyFile(sourceKeys.pop());
        }
      }),
    );

    ContinuationToken = NextContinuationToken;
  } while (ContinuationToken);
};

If the Promise.all part is unclear, it's just a poor man's "thread pool", allowing you to copy multiple files concurrently, which can dramatically speed things up. These don't use any bandwidth since the content is copied within AWS, so I had no issues with a value of 20 or more for concurrency. For clarity, it's just parallelized version of:

const sourceKeys = Contents.map(({ Key }) => Key);

while (sourceKeys.length) {
  await copyFile(sourceKeys.pop());
}
Share:
57,152

Related videos on Youtube

Yousaf
Author by

Yousaf

Updated on April 22, 2022

Comments

  • Yousaf
    Yousaf about 2 years

    How do I copy all objects from one prefix to other? I have tried all possible ways to copy all objects in one shot from one prefix to other, but the only way that seems to work is by looping over a list of objects and copying them one by one. This is really inefficient. If I have hundreds of files in a folder, will I have to make 100 calls?

    var params = {
             Bucket: bucket,
             CopySource: bucket+'/'+oldDirName+'/filename.txt',
             Key: newDirName+'/filename.txt',
     };
    s3.copyObject(params, function(err, data) {
      if (err) {
          callback.apply(this, [{
              type: "error",
              message: "Error while renaming Directory",
              data: err
          }]);
      } else {
          callback.apply(this, [{
              type: "success",
              message: "Directory renamed successfully",
              data: data
          }]);
      }
    });
    
  • Yousaf
    Yousaf almost 8 years
    By the way i have used aws cli which is more efficient in doing this kind of stuff
  • oliverbytes
    oliverbytes over 7 years
    How about to move an object? do I have to call copy then delete object?
  • Raghavendra
    Raghavendra about 7 years
    is there a way to move or copy multiple files at one request as we are doing above with delete?
  • Guppie70
    Guppie70 about 7 years
    @Raghavendra: not really sure what you are looking for. If you want copy instead of moving the files, then just skip step (3) "s3.deleteObjects()". If you want to avoid multiple HTTP requests for each file, then I believe that the only way is to rely on the AWS CLI. The AWS CLI has the cp() method that allows you to copy multiple files or a complete "directory" in one go: docs.aws.amazon.com/cli/latest/reference/s3/cp.html
  • Raghavendra
    Raghavendra about 7 years
    these methods do not allow delete multiple they take a pattern prefix to copy or delete i have a set of files
  • bpavlov
    bpavlov over 6 years
    In your code asyncData is always undefined? asyncError is also always undefined?
  • Guppie70
    Guppie70 over 6 years
    @bpavlov: I guess that you are right about the asyncData object. But according to the async documentation (caolan.github.io/async/docs.html#each) the asyncError object will be filled whenever an error occurred in the iteration routine - which in this case is the s3.copyObject() logic. So when nothing goes wrong in the s3.copyObject() logic, then the asyncError object will be empty. Hope this helps!
  • Yousaf
    Yousaf about 6 years
    The best way that worked for me was AWS-CLI for bulk operations which is capable of moving/syncing folders.
  • Ahmet Cetin
    Ahmet Cetin almost 6 years
    small thing but instead of done function, you can pass console.log as a function as well :)
  • Kristoffer Berg - EcodeAS
    Kristoffer Berg - EcodeAS about 5 years
    With this code - wouldn't each file promise (await) be queued before handling the next? Using @PeterPeng solution, it do the same with promises, but it do it all in parallel. Not sure tho..
  • anguila
    anguila about 5 years
    the only recursive solution
  • agbb
    agbb about 4 years
    You could probably use new Regex("^" + oldPrefix) instead of just oldPrefix, to ensure what you are replacing is an actual prefix and not any other part of the Key string.
  • frarugi87
    frarugi87 about 4 years
    this doesn't include the continuation token handling as suggested in the description though :)
  • Dzun Ho
    Dzun Ho about 2 years
    You will need to add package "@aws-sdk/client-s3" to use "S3, CopyObjectCommand, ListObjectsV2Command"
  • Eric Haynes
    Eric Haynes about 2 years
    Ah, yes. I didn't include the import in the snippet above. FYI it's documented here: docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/…