How to copy/move all objects in Amazon S3 from one prefix to other using the AWS SDK for Node.js
Solution 1
You will need to make one AWS.S3.listObjects()
to list your objects with a specific prefix. But you are correct in that you will need to make one call for every object that you want to copy from one bucket/prefix to the same or another bucket/prefix.
You can also use a utility library like async to manage your requests.
var AWS = require('aws-sdk');
var async = require('async');
var bucketName = 'foo';
var oldPrefix = 'abc/';
var newPrefix = 'xyz/';
var s3 = new AWS.S3({params: {Bucket: bucketName}, region: 'us-west-2'});
var done = function(err, data) {
if (err) console.log(err);
else console.log(data);
};
s3.listObjects({Prefix: oldPrefix}, function(err, data) {
if (data.Contents.length) {
async.each(data.Contents, function(file, cb) {
var params = {
Bucket: bucketName,
CopySource: bucketName + '/' + file.Key,
Key: file.Key.replace(oldPrefix, newPrefix)
};
s3.copyObject(params, function(copyErr, copyData){
if (copyErr) {
console.log(copyErr);
}
else {
console.log('Copied: ', params.Key);
cb();
}
});
}, done);
}
});
Hope this helps!
Solution 2
Here is a code snippet that do it in the "async await" way:
const AWS = require('aws-sdk');
AWS.config.update({
credentials: new AWS.Credentials(....), // credential parameters
});
AWS.config.setPromisesDependency(require('bluebird'));
const s3 = new AWS.S3();
... ...
const bucketName = 'bucketName'; // example bucket
const folderToMove = 'folderToMove/'; // old folder name
const destinationFolder = 'destinationFolder/'; // new destination folder
try {
const listObjectsResponse = await s3.listObjects({
Bucket: bucketName,
Prefix: folderToMove,
Delimiter: '/',
}).promise();
const folderContentInfo = listObjectsResponse.Contents;
const folderPrefix = listObjectsResponse.Prefix;
await Promise.all(
folderContentInfo.map(async (fileInfo) => {
await s3.copyObject({
Bucket: bucketName,
CopySource: `${bucketName}/${fileInfo.Key}`, // old file Key
Key: `${destinationFolder}/${fileInfo.Key.replace(folderPrefix, '')}`, // new file Key
}).promise();
await s3.deleteObject({
Bucket: bucketName,
Key: fileInfo.Key,
}).promise();
})
);
} catch (err) {
console.error(err); // error handling
}
Solution 3
More update on the original code which copies folders recursively. Some limitations is that the code does not handle more than 1000 objects per Prefix and of course the depth limitation if your folders are very deep.
import AWS from 'aws-sdk';
AWS.config.update({ region: 'ap-southeast-1' });
/**
* Copy s3 folder
* @param {string} bucket Params for the first argument
* @param {string} source for the 2nd argument
* @param {string} dest for the 2nd argument
* @returns {promise} the get object promise
*/
export default async function s3CopyFolder(bucket, source, dest) {
// sanity check: source and dest must end with '/'
if (!source.endsWith('/') || !dest.endsWith('/')) {
return Promise.reject(new Error('source or dest must ends with fwd slash'));
}
const s3 = new AWS.S3();
// plan, list through the source, if got continuation token, recursive
const listResponse = await s3.listObjectsV2({
Bucket: bucket,
Prefix: source,
Delimiter: '/',
}).promise();
// copy objects
await Promise.all(
listResponse.Contents.map(async (file) => {
await s3.copyObject({
Bucket: bucket,
CopySource: `${bucket}/${file.Key}`,
Key: `${dest}${file.Key.replace(listResponse.Prefix, '')}`,
}).promise();
}),
);
// recursive copy sub-folders
await Promise.all(
listResponse.CommonPrefixes.map(async (folder) => {
await s3CopyFolder(
bucket,
`${folder.Prefix}`,
`${dest}${folder.Prefix.replace(listResponse.Prefix, '')}`,
);
}),
);
return Promise.resolve('ok');
}
Solution 4
A small change to the code of Aditya Manohar that improves the error handling in the s3.copyObject function and will actually finish the "move" request by removing the source files after the copy requests have been executed:
const AWS = require('aws-sdk');
const async = require('async');
const bucketName = 'foo';
const oldPrefix = 'abc/';
const newPrefix = 'xyz/';
const s3 = new AWS.S3({
params: {
Bucket: bucketName
},
region: 'us-west-2'
});
// 1) List all the objects in the source "directory"
s3.listObjects({
Prefix: oldPrefix
}, function (err, data) {
if (data.Contents.length) {
// Build up the paramters for the delete statement
let paramsS3Delete = {
Bucket: bucketName,
Delete: {
Objects: []
}
};
// Expand the array with all the keys that we have found in the ListObjects function call, so that we can remove all the keys at once after we have copied all the keys
data.Contents.forEach(function (content) {
paramsS3Delete.Delete.Objects.push({
Key: content.Key
});
});
// 2) Copy all the source files to the destination
async.each(data.Contents, function (file, cb) {
var params = {
CopySource: bucketName + '/' + file.Key,
Key: file.Key.replace(oldPrefix, newPrefix)
};
s3.copyObject(params, function (copyErr, copyData) {
if (copyErr) {
console.log(err);
} else {
console.log('Copied: ', params.Key);
}
cb();
});
}, function (asyncError, asyncData) {
// All the requests for the file copy have finished
if (asyncError) {
return console.log(asyncError);
} else {
console.log(asyncData);
// 3) Now remove the source files - that way we effectively moved all the content
s3.deleteObjects(paramsS3Delete, (deleteError, deleteData) => {
if (deleteError) return console.log(deleteError);
return console.log(deleteData);
})
}
});
}
});
Note that I have moved the cb()
callback function outside the if-then-else loop. That way even when an error occurs the async module will fire the done()
function.
Solution 5
None of the above handle large directories, as the list-objects-v2
command returns no more than 1000 results at a time, providing a continuation token to access additional "pages".
Here is a solution using the modern, v3 sdk:
const copyAll = async ({
s3Client,
sourceBucket,
targetBucket = sourceBucket,
sourcePrefix,
targetPrefix,
concurrency = 1,
deleteSource = false,
}) => {
let ContinuationToken;
const copyFile = async (sourceKey) => {
const targetKey = sourceKey.replace(sourcePrefix, targetPrefix);
await s3Client.send(
new CopyObjectCommand({
Bucket: targetBucket,
Key: targetKey,
CopySource: `${sourceBucket}/${sourceKey}`,
}),
);
if (deleteSource) {
await s3Client.send(
new DeleteObjectCommand({
Bucket: sourceBucket,
Key: sourceKey,
}),
);
}
};
do {
const { Contents = [], NextContinuationToken } = await s3Client.send(
new ListObjectsV2Command({
Bucket: sourceBucket,
Prefix: sourcePrefix,
ContinuationToken,
}),
);
const sourceKeys = Contents.map(({ Key }) => Key);
await Promise.all(
new Array(concurrency).fill(null).map(async () => {
while (sourceKeys.length) {
await copyFile(sourceKeys.pop());
}
}),
);
ContinuationToken = NextContinuationToken;
} while (ContinuationToken);
};
If the Promise.all
part is unclear, it's just a poor man's "thread pool", allowing you to copy multiple files concurrently, which can dramatically speed things up. These don't use any bandwidth since the content is copied within AWS, so I had no issues with a value of 20 or more for concurrency
. For clarity, it's just parallelized version of:
const sourceKeys = Contents.map(({ Key }) => Key);
while (sourceKeys.length) {
await copyFile(sourceKeys.pop());
}
Related videos on Youtube
Yousaf
Updated on April 22, 2022Comments
-
Yousaf about 2 years
How do I copy all objects from one prefix to other? I have tried all possible ways to copy all objects in one shot from one prefix to other, but the only way that seems to work is by looping over a list of objects and copying them one by one. This is really inefficient. If I have hundreds of files in a folder, will I have to make 100 calls?
var params = { Bucket: bucket, CopySource: bucket+'/'+oldDirName+'/filename.txt', Key: newDirName+'/filename.txt', }; s3.copyObject(params, function(err, data) { if (err) { callback.apply(this, [{ type: "error", message: "Error while renaming Directory", data: err }]); } else { callback.apply(this, [{ type: "success", message: "Directory renamed successfully", data: data }]); } });
-
Yousaf almost 8 yearsBy the way i have used aws cli which is more efficient in doing this kind of stuff
-
oliverbytes over 7 yearsHow about to move an object? do I have to call copy then delete object?
-
Raghavendra about 7 yearsis there a way to move or copy multiple files at one request as we are doing above with delete?
-
Guppie70 about 7 years@Raghavendra: not really sure what you are looking for. If you want copy instead of moving the files, then just skip step (3) "s3.deleteObjects()". If you want to avoid multiple HTTP requests for each file, then I believe that the only way is to rely on the AWS CLI. The AWS CLI has the cp() method that allows you to copy multiple files or a complete "directory" in one go: docs.aws.amazon.com/cli/latest/reference/s3/cp.html
-
Raghavendra about 7 yearsthese methods do not allow delete multiple they take a pattern prefix to copy or delete i have a set of files
-
bpavlov over 6 yearsIn your code asyncData is always undefined? asyncError is also always undefined?
-
Guppie70 over 6 years@bpavlov: I guess that you are right about the asyncData object. But according to the async documentation (caolan.github.io/async/docs.html#each) the
asyncError
object will be filled whenever an error occurred in the iteration routine - which in this case is thes3.copyObject()
logic. So when nothing goes wrong in thes3.copyObject()
logic, then theasyncError
object will be empty. Hope this helps! -
Yousaf about 6 yearsThe best way that worked for me was AWS-CLI for bulk operations which is capable of moving/syncing folders.
-
Ahmet Cetin almost 6 yearssmall thing but instead of done function, you can pass console.log as a function as well :)
-
Kristoffer Berg - EcodeAS about 5 yearsWith this code - wouldn't each file promise (await) be queued before handling the next? Using @PeterPeng solution, it do the same with promises, but it do it all in parallel. Not sure tho..
-
anguila about 5 yearsthe only recursive solution
-
agbb about 4 yearsYou could probably use
new Regex("^" + oldPrefix)
instead of justoldPrefix
, to ensure what you are replacing is an actual prefix and not any other part of the Key string. -
frarugi87 about 4 yearsthis doesn't include the continuation token handling as suggested in the description though :)
-
Dzun Ho about 2 yearsYou will need to add package "@aws-sdk/client-s3" to use "S3, CopyObjectCommand, ListObjectsV2Command"
-
Eric Haynes about 2 yearsAh, yes. I didn't include the import in the snippet above. FYI it's documented here: docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/…