From 4aba460a6309ece485603bab3017742b1443e5b7 Mon Sep 17 00:00:00 2001
From: MICHAEL JACKSON <mjijackson@gmail.com>
Date: Sun, 18 Jun 2017 16:54:09 +1000
Subject: [PATCH] Fix extraction race condition

This is a fix for a subtle bug that caused unpkg to incorrectly send a
404 response for valid filenames on the initial request for a package.
It also prevents the same package from being downloaded from the
registry by multiple concurrent requests.

Fixes #38
Closes #39
---
 server/middleware/RegistryUtils.js | 135 +++++++++++++++++------------
 server/middleware/index.js         |  11 +--
 2 files changed, 85 insertions(+), 61 deletions(-)

diff --git a/server/middleware/RegistryUtils.js b/server/middleware/RegistryUtils.js
index a5aa752..e422d95 100644
--- a/server/middleware/RegistryUtils.js
+++ b/server/middleware/RegistryUtils.js
@@ -4,7 +4,7 @@ const mkdirp = require('mkdirp')
 const tar = require('tar-fs')
 const RegistryCache = require('./RegistryCache')
 
-const getPackageInfoFromRegistry = (registryURL, packageName) => {
+const fetchPackageInfoFromRegistry = (registryURL, packageName) => {
   let encodedPackageName
   if (packageName.charAt(0) === '@') {
     encodedPackageName = `@${encodeURIComponent(packageName.substring(1))}`
@@ -23,34 +23,35 @@ const getPackageInfoFromRegistry = (registryURL, packageName) => {
 
 const PackageNotFound = 'PackageNotFound'
 
-const getPackageInfo = (registryURL, packageName, callback) => {
-  RegistryCache.get(packageName, (error, value) => {
-    if (error) {
-      callback(error)
-    } else if (value) {
-      callback(null, value === PackageNotFound ? null : value)
-    } else {
-      getPackageInfoFromRegistry(registryURL, packageName).then(value => {
-        if (value == null) {
-          // Keep 404s in the cache for 5 minutes. This prevents us
-          // from making unnecessary requests to the registry for
-          // bad package names. In the worst case, a brand new
-          // package's info will be available within 5 minutes.
-          RegistryCache.set(packageName, PackageNotFound, 300)
-        } else {
-          // Keep package.json in the cache for a minute.
-          RegistryCache.set(packageName, value, 60)
-        }
+const fetchPackageInfo = (registryURL, packageName) =>
+  new Promise((resolve, reject) => {
+    RegistryCache.get(packageName, (error, value) => {
+      if (error) {
+        reject(error)
+      } else if (value) {
+        resolve(value === PackageNotFound ? null : value)
+      } else {
+        fetchPackageInfoFromRegistry(registryURL, packageName).then(value => {
+          if (value == null) {
+            // Keep 404s in the cache for 5 minutes. This prevents us
+            // from making unnecessary requests to the registry for
+            // bad package names. In the worst case, a brand new
+            // package's info will be available within 5 minutes.
+            RegistryCache.set(packageName, PackageNotFound, 300)
+          } else {
+            // Keep package.json in the cache for a minute.
+            RegistryCache.set(packageName, value, 60)
+          }
 
-        callback(null, value)
-      }, error => {
-        // Do not cache errors.
-        RegistryCache.del(packageName)
-        callback(error)
-      })
-    }
+          resolve(value)
+        }, error => {
+          // Do not cache errors.
+          RegistryCache.del(packageName)
+          reject(error)
+        })
+      }
+    })
   })
-}
 
 const normalizeTarHeader = (header) => {
   // Most packages have header names that look like "package/index.js"
@@ -61,34 +62,60 @@ const normalizeTarHeader = (header) => {
   return header
 }
 
-const getPackage = (tarballURL, outputDir, callback) => {
-  mkdirp(outputDir, (error) => {
-    if (error) {
-      callback(error)
-    } else {
-      let callbackWasCalled = false
-
-      fetch(tarballURL).then(response => {
-        response.body
-          .pipe(gunzip())
-          .pipe(
-            tar.extract(outputDir, {
-              dmode: 0o666, // All dirs should be writable
-              fmode: 0o444, // All files should be readable
-              map: normalizeTarHeader
-            })
-          )
-          .on('finish', callback)
-          .on('error', (error) => {
-            if (callbackWasCalled) // LOL node streams
-              return
-
-            callbackWasCalled = true
-            callback(error)
-          })
-      })
-    }
+const fetchAndExtractPackage = (tarballURL, outputDir) =>
+  new Promise((resolve, reject) => {
+    mkdirp(outputDir, (error) => {
+      if (error) {
+        reject(error)
+      } else {
+        fetch(tarballURL).then(response => {
+          response.body
+            .pipe(gunzip())
+            .pipe(
+              tar.extract(outputDir, {
+                dmode: 0o666, // All dirs should be writable
+                fmode: 0o444, // All files should be readable
+                map: normalizeTarHeader
+              })
+            )
+            .on('finish', resolve)
+            .on('error', reject)
+        }, reject)
+      }
+    })
   })
+
+const runCache = {}
+
+// A helper that prevents running multiple async operations
+// identified by the same key concurrently. Instead, the operation
+// is performed only once the first time it is requested and all
+// subsequent calls get that same result until it is completed.
+const runOnce = (key, perform) => {
+  let promise = runCache[key]
+
+  if (!promise) {
+    promise = runCache[key] = perform()
+
+    // Clear the cache when we're done.
+    promise.then(() => {
+      delete runCache[key]
+    }, () => {
+      delete runCache[key]
+    })
+  }
+
+  return promise
+}
+
+const getPackageInfo = (registryURL, packageName, callback) => {
+  runOnce(registryURL + packageName, () => fetchPackageInfo(registryURL, packageName))
+    .then(info => callback(null, info), callback)
+}
+
+const getPackage = (tarballURL, outputDir, callback) => {
+  runOnce(tarballURL + outputDir, () => fetchAndExtractPackage(tarballURL, outputDir))
+    .then(() => callback(null), callback)
 }
 
 module.exports = {
diff --git a/server/middleware/index.js b/server/middleware/index.js
index 381de74..b62ac09 100644
--- a/server/middleware/index.js
+++ b/server/middleware/index.js
@@ -22,14 +22,11 @@ const oneMinute = 60
 const oneDay = oneMinute * 60 * 24
 const oneYear = oneDay * 365
 
-const checkLocalCache = (dir, callback) =>
-  statFile(joinPaths(dir, 'package.json'), (error, stats) => {
+const checkLocalCache = (dir, filename, callback) =>
+  statFile(joinPaths(dir, filename || 'package.json'), (error, stats) => {
     callback(stats && stats.isFile())
   })
 
-const createTempPath = (name) =>
-  joinPaths(tmpdir(), `unpkg-${name}`)
-
 const ResolveExtensions = [ '', '.js', '.json' ]
 
 /**
@@ -110,9 +107,9 @@ const createRequestHandler = (options = {}) => {
     // Step 1: Fetch the package from the registry and store a local copy.
     // Redirect if the URL does not specify an exact version number.
     const fetchPackage = (next) => {
-      const packageDir = createTempPath(displayName)
+      const packageDir = joinPaths(tmpdir(), `unpkg-${displayName}`)
 
-      checkLocalCache(packageDir, (isCached) => {
+      checkLocalCache(packageDir, filename, (isCached) => {
         if (isCached)
           return next(packageDir) // Best case: we already have this package on disk.