Upload entire directory tree to S3 using AWS sdk in node js

34,693

Solution 1

Old-school recursive way I whipped up in a hurry. Only uses core node modules and standard AWS sdk.

var AWS = require('aws-sdk');
var path = require("path");
var fs = require('fs');

const uploadDir = function(s3Path, bucketName) {

    let s3 = new AWS.S3();

    function walkSync(currentDirPath, callback) {
        fs.readdirSync(currentDirPath).forEach(function (name) {
            var filePath = path.join(currentDirPath, name);
            var stat = fs.statSync(filePath);
            if (stat.isFile()) {
                callback(filePath, stat);
            } else if (stat.isDirectory()) {
                walkSync(filePath, callback);
            }
        });
    }

    walkSync(s3Path, function(filePath, stat) {
        let bucketPath = filePath.substring(s3Path.length+1);
        let params = {Bucket: bucketName, Key: bucketPath, Body: fs.readFileSync(filePath) };
        s3.putObject(params, function(err, data) {
            if (err) {
                console.log(err)
            } else {
                console.log('Successfully uploaded '+ bucketPath +' to ' + bucketName);
            }
        });

    });
};

uploadDir("path to your folder", "your bucket name");

Special thanks to Ali from this post with helping get the filenames

Solution 2

here is a cleaned up/debugged/working version of @Jim's solution

    function uploadArtifactsToS3() {
  const artifactFolder = `logs/${config.log}/test-results`;
  const testResultsPath = './test-results';

  const walkSync = (currentDirPath, callback) => {
    fs.readdirSync(currentDirPath).forEach((name) => {
      const filePath = path.join(currentDirPath, name);
      const stat = fs.statSync(filePath);
      if (stat.isFile()) {
        callback(filePath, stat);
      } else if (stat.isDirectory()) {
        walkSync(filePath, callback);
      }
    });
  };

  walkSync(testResultsPath, async (filePath) => {
    let bucketPath = filePath.substring(testResultsPath.length - 1);
    let params = {
      Bucket: process.env.SOURCE_BUCKET,
      Key: `${artifactFolder}/${bucketPath}`,
      Body: fs.readFileSync(filePath)
    };
    try {
      await s3.putObject(params).promise();
      console.log(`Successfully uploaded ${bucketPath} to s3 bucket`);
    } catch (error) {
      console.error(`error in uploading ${bucketPath} to s3 bucket`);
      throw new Error(`error in uploading ${bucketPath} to s3 bucket`);
    }
  });
}

Solution 3

async/await + Typescript

If you need a solution that uses modern JavaScript syntax and is compatible with TypeScript, I came up with the following code. The recursive getFiles is borrowed from this answer (After all that years, recursion still gives me headache, lol).

import { promises as fs, createReadStream } from 'fs';
import * as path from 'path';
import { S3 } from 'aws-sdk';

async function uploadDir(s3Path: string, bucketName: string) {
  const s3 = new S3();

  // Recursive getFiles from
  // https://stackoverflow.com/a/45130990/831465
  async function getFiles(dir: string): Promise<string | string[]> {
    const dirents = await fs.readdir(dir, { withFileTypes: true });
    const files = await Promise.all(
      dirents.map((dirent) => {
        const res = path.resolve(dir, dirent.name);
        return dirent.isDirectory() ? getFiles(res) : res;
      })
    );
    return Array.prototype.concat(...files);
  }

  const files = (await getFiles(s3Path)) as string[];
  const uploads = files.map((filePath) =>
    s3
      .putObject({
        Key: path.relative(s3Path, filePath),
        Bucket: bucketName,
        Body: createReadStream(filePath),
      })
      .promise()
  );
  return Promise.all(uploads);
}

await uploadDir(path.resolve('./my-path'), 'bucketname');

Solution 4

I was just contemplating this problem the other day, and was thinking something like this:

...    
var async = require('async'),
    fs = require('fs'),
    path = require("path");

var directoryName = './test',
    directoryPath = path.resolve(directoryName);

var files = fs.readdirSync(directoryPath);
async.map(files, function (f, cb) {
    var filePath = path.join(directoryPath, f);

    var options = {
        Bucket: bucket,
        Key: s3Path,
        Body: fs.readFileSync(filePath),
        ACL: s3FilePermissions
    };

    S3.putObject(options, cb);

}, function (err, results) {
    if (err) console.error(err);
    console.log(results);
});

Solution 5

Here's a version that contains a Promise on the upload method. This version allows you to perform an action when all uploads are complete Promise.all().then...

const path = require('path');
const fs = require('fs');
const AWS = require('aws-sdk');
const s3 = new AWS.S3();

const directoryToUpload = 'directory-name-here';
const bucketName = 'name-of-s3-bucket-here';

// get file paths
const filePaths = [];
const getFilePaths = (dir) => {
  fs.readdirSync(dir).forEach(function (name) {
    const filePath = path.join(dir, name);
    const stat = fs.statSync(filePath);
    if (stat.isFile()) {
      filePaths.push(filePath);
    } else if (stat.isDirectory()) {
      getFilePaths(filePath);
    }
  });
};
getFilePaths(directoryToUpload);

// upload to S3
const uploadToS3 = (dir, path) => {
  return new Promise((resolve, reject) => {
    const key = path.split(`${dir}/`)[1];
    const params = {
      Bucket: bucketName,
      Key: key,
      Body: fs.readFileSync(path),
    };
    s3.putObject(params, (err) => {
      if (err) {
        reject(err);
      } else {
        console.log(`uploaded ${params.Key} to ${params.Bucket}`);
        resolve(path);
      }
    });
  });
};

const uploadPromises = filePaths.map((path) =>
  uploadToS3(directoryToUpload, path)
);
Promise.all(uploadPromises)
  .then((result) => {
    console.log('uploads complete');
    console.log(result);
  })
  .catch((err) => console.error(err));
Share:
34,693
LifeQuery
Author by

LifeQuery

Updated on January 24, 2021

Comments

  • LifeQuery
    LifeQuery over 3 years

    I currently upload single objects to S3 using like so:

    var options = {
            Bucket: bucket,
            Key: s3Path,
            Body: body,
            ACL: s3FilePermissions
    };
    
    S3.putObject(options,
    function (err, data) {
        //console.log(data);
    });
    

    But when I have a large resources folder for example, I use the AWS CLI tool.
    I was wondering, is there a native way to do the same thing with the aws sdk (upload entire folders to s3)?

  • Hitesh Joshi
    Hitesh Joshi almost 9 years
    This doesn't work. It just hangs on progress. No error, no progress.
  • unboundev
    unboundev almost 9 years
    Are you sure the client is initialized properly? I've edited the answer to include client initialization.
  • Hitesh Joshi
    Hitesh Joshi almost 9 years
    Yes. Else it will show error. SO no error, client is initialized properly but sync won't happen.
  • Barlas Apaydin
    Barlas Apaydin almost 8 years
    fs.readFileSync(filePath) this line returns me Error: EISDIR: illegal operation on a directory, read like here: stackoverflow.com/questions/25883775/…
  • dsw88
    dsw88 almost 7 years
    This library doesn't seem to currently support uploads with files in the directory that are larger than 1MB. Until that's supported, this isn't really a great solution for a generic directory sync to S3. Other than that deal-breaking issue, it was a pretty nice library to use.
  • albanx
    albanx over 5 years
    old school is always the best
  • Black-Hole
    Black-Hole almost 5 years
    When using this library, please note that this library has not been updated for many years! There are a lot of unrepaired bugs, and I have wasted 5 hours on this library. Someone has re-issued a fixed version: npmjs.com/package/@auth0/s3
  • Martin Bartlett
    Martin Bartlett about 4 years
    Careful - this breaks when there are a huge number of folders - Error processing event: Error: EMFILE: too many open files, scandir '/tmp/diaas-uiGPwkBx/standard-launcher'
  • Arsal Imam
    Arsal Imam over 3 years
    how would you be able to manage the callback?
  • Jim Chertkov
    Jim Chertkov about 3 years
    Ha, I came back to this post years later looking to see if anyone had updated the answers with a clean async version. @ofhouse's answer looks legit, but this is easier to follow. Thanks for taking the time to post this!
  • Omar Dulaimi
    Omar Dulaimi almost 3 years
    @BarlasApaydin That's because readFileSync is getting a directory; not a file, to read.
  • ParkerD
    ParkerD over 2 years
    And now the above package has also not been updated in years.
  • OhadR
    OhadR over 2 years
    good idea. before upvoting, i have noticed that all files are uploaded to the root folder, flat, and each file in S3 contains the path (as string). is this on purpose?
  • OhadR
    OhadR over 2 years
    for windows, add something like: const key = path.relative(s3Path, filePath); const posixKey = key.split(path.sep).join(path.posix.sep);