From 7661950de37895a71e13b001d4850f52acddc2c4 Mon Sep 17 00:00:00 2001
From: MICHAEL JACKSON <mjijackson@gmail.com>
Date: Thu, 10 Aug 2017 10:12:50 -0700
Subject: [PATCH] Break middleware out into separate files

---
 server/middleware/PackageUtils.js   |  16 +-
 server/middleware/checkBlacklist.js |  14 ++
 server/middleware/fetchPackage.js   |  70 ++++++++
 server/middleware/findFile.js       | 120 +++++++++++++
 server/middleware/index.js          | 252 ++--------------------------
 server/middleware/parseURL.js       |  30 ++++
 server/middleware/serveFile.js      |  45 +++++
 7 files changed, 302 insertions(+), 245 deletions(-)
 create mode 100644 server/middleware/checkBlacklist.js
 create mode 100644 server/middleware/fetchPackage.js
 create mode 100644 server/middleware/findFile.js
 create mode 100644 server/middleware/parseURL.js
 create mode 100644 server/middleware/serveFile.js

diff --git a/server/middleware/PackageUtils.js b/server/middleware/PackageUtils.js
index 7ed5810..2925e3f 100644
--- a/server/middleware/PackageUtils.js
+++ b/server/middleware/PackageUtils.js
@@ -25,16 +25,16 @@ const parsePackageURL = (url) => {
     return null
 
   const packageName = match[1]
-  const version = decodeParam(match[2]) || 'latest'
+  const packageVersion = decodeParam(match[2]) || 'latest'
   const filename = decodeParam(match[3])
 
-  return {        // If the URL is /@scope/name@version/path.js?main=browser:
-    pathname,     // /@scope/name@version/path.js
-    search,       // ?main=browser
-    query,        // { main: 'browser' }
-    packageName,  // @scope/name
-    version,      // version
-    filename      // /path.js
+  return {          // If the URL is /@scope/name@version/file.js?main=browser:
+    pathname,       // /@scope/name@version/path.js
+    search,         // ?main=browser
+    query,          // { main: 'browser' }
+    packageName,    // @scope/name
+    packageVersion, // version
+    filename        // /file.js
   }
 }
 
diff --git a/server/middleware/checkBlacklist.js b/server/middleware/checkBlacklist.js
new file mode 100644
index 0000000..61eba20
--- /dev/null
+++ b/server/middleware/checkBlacklist.js
@@ -0,0 +1,14 @@
+/**
+ * Check the blacklist to see if we can serve files from this package.
+ */
+function checkBlacklist(blacklist) {
+  return function (req, res, next) {
+    if (blacklist.includes(req.packageName)) {
+      res.status(403).send(`Package ${req.packageName} is blacklisted`)
+    } else {
+      next()
+    }
+  }
+}
+
+module.exports = checkBlacklist
diff --git a/server/middleware/fetchPackage.js b/server/middleware/fetchPackage.js
new file mode 100644
index 0000000..563b73f
--- /dev/null
+++ b/server/middleware/fetchPackage.js
@@ -0,0 +1,70 @@
+const fs = require('fs')
+const path = require('path')
+const tmpdir = require('os-tmpdir')
+const { maxSatisfying: maxSatisfyingVersion } = require('semver')
+const { createPackageURL } = require('./PackageUtils')
+const { getPackageInfo, getPackage } = require('./RegistryUtils')
+
+function checkLocalCache(dir, callback) {
+  fs.stat(path.join(dir, 'package.json'), function (error, stats) {
+    callback(stats && stats.isFile())
+  })
+}
+
+function createTempPath(name) {
+  return path.join(tmpdir(), `unpkg-${name}`)
+}
+
+/**
+ * Fetch the package from the registry and store a local copy on disk.
+ * Redirect if the URL does not specify an exact req.packageVersion number.
+ */
+function fetchPackage(registryURL) {
+  return function (req, res, next) {
+    req.packageDir = createTempPath(req.packageSpec)
+
+    // TODO: fix race condition! (see #38)
+    // TODO: ensure req.packageInfo is always populated so we can re-use later
+    checkLocalCache(req.packageDir, function (isCached) {
+      if (isCached)
+        return next() // Best case: we already have this package on disk.
+
+      // Fetch package info from NPM.
+      getPackageInfo(registryURL, req.packageName, function (error, packageInfo) {
+        if (error)
+          return res.status(500).send(error.message || error)
+
+        if (packageInfo == null || packageInfo.versions == null)
+          return res.status(404).send(`Cannot find package "${req.packageName}"`)
+
+        const { versions, 'dist-tags': tags } = packageInfo
+
+        if (req.packageVersion in versions) {
+          // A valid request for a package we haven't downloaded yet.
+          const packageConfig = versions[req.packageVersion]
+          const tarballURL = packageConfig.dist.tarball
+
+          getPackage(tarballURL, req.packageDir, function (error) {
+            if (error) {
+              res.status(500).send(error.message || error)
+            } else {
+              next()
+            }
+          })
+        } else if (req.packageVersion in tags) {
+          res.redirect(createPackageURL(req.packageName, tags[req.packageVersion], req.filename, req.search))
+        } else {
+          const maxVersion = maxSatisfyingVersion(Object.keys(versions), req.packageVersion)
+
+          if (maxVersion) {
+            res.redirect(createPackageURL(req.packageName, maxVersion, req.filename, req.search))
+          } else {
+            res.status(404).send(`Cannot find package ${req.packageSpec}`)
+          }
+        }
+      })
+    })
+  }
+}
+
+module.exports = fetchPackage
diff --git a/server/middleware/findFile.js b/server/middleware/findFile.js
new file mode 100644
index 0000000..7e7e169
--- /dev/null
+++ b/server/middleware/findFile.js
@@ -0,0 +1,120 @@
+const fs = require('fs')
+const path = require('path')
+
+const ResolveExtensions = [ '', '.js', '.json' ]
+
+/**
+ * Resolves a path like "lib/file" into "lib/file.js" or "lib/file.json"
+ * depending on which one is available, similar to require('lib/file').
+ */
+function resolveFile(base, useIndex, callback) {
+  ResolveExtensions.reduceRight(function (next, ext) {
+    const file = base + ext
+
+    return function () {
+      fs.stat(file, function (error, stats) {
+        if (error) {
+          if (error.code === 'ENOENT' || error.code === 'ENOTDIR') {
+            next()
+          } else {
+            callback(error)
+          }
+        } else if (useIndex && stats.isDirectory()) {
+          resolveFile(path.join(file, 'index'), false, function (error, indexFile, indexStats) {
+            if (error) {
+              callback(error)
+            } else if (indexFile) {
+              callback(null, indexFile, indexStats)
+            } else {
+              next()
+            }
+          })
+        } else {
+          callback(null, file, stats)
+        }
+      })
+    }
+  }, callback)()
+}
+
+/**
+ * Determine which file we're going to serve and get its stats.
+ * Redirect if the request targets a directory with no trailing slash.
+ */
+function findFile() {
+  return function (req, res, next) {
+    if (req.filename) {
+      const base = path.join(req.packageDir, req.filename)
+
+      // Based on the URL, figure out which file they want.
+      resolveFile(base, false, function (error, file, stats) {
+        if (error)
+          console.error(error)
+
+        if (file == null) {
+          res.status(404).send(`Cannot find file "${req.filename}" in package ${req.packageSpec}`)
+        } else if (stats.isDirectory() && req.pathname[req.pathname.length - 1] !== '/') {
+          // Append / to directory URLs.
+          res.redirect(`${req.pathname}/${req.search}`)
+        } else {
+          req.file = file.replace(req.packageDir, '')
+          req.stats = stats
+          next()
+        }
+      })
+    } else {
+      // No filename in the URL. Try to figure out which file they want by
+      // checking package.json's "unpkg", "browser", and "main" fields.
+      fs.readFile(path.join(req.packageDir, 'package.json'), 'utf8', function (error, data) {
+        if (error) {
+          console.error(error)
+          return res.status(500).send(`Cannot read ${req.packageSpec}/package.json`)
+        }
+
+        let packageConfig
+        try {
+          packageConfig = JSON.parse(data)
+        } catch (error) {
+          return res.status(500).send(`Cannot parse ${req.packageSpec}/package.json: ${error.message}`)
+        }
+
+        let mainFilename
+        const queryMain = query && query.main
+
+        if (queryMain) {
+          if (!(queryMain in packageConfig))
+            return res.status(404).send(`Cannot find field "${queryMain}" in ${req.packageSpec}/package.json`)
+
+          mainFilename = packageConfig[queryMain]
+        } else {
+          if (typeof packageConfig.unpkg === 'string') {
+            // The "unpkg" field allows packages to explicitly declare the
+            // file to serve at the bare URL (see #59).
+            mainFilename = packageConfig.unpkg
+          } else if (typeof packageConfig.browser === 'string') {
+            // Fall back to the "browser" field if declared (only support strings).
+            mainFilename = packageConfig.browser
+          } else {
+            // If there is no main, use "index" (same as npm).
+            mainFilename = packageConfig.main || 'index'
+          }
+        }
+
+        resolveFile(path.join(req.packageDir, mainFilename), true, function (error, file, stats) {
+          if (error)
+            console.error(error)
+
+          if (file == null) {
+            res.status(404).send(`Cannot find main file "${mainFilename}" in package ${req.packageSpec}`)
+          } else {
+            req.file = file.replace(req.packageDir, '')
+            req.stats = stats
+            next()
+          }
+        })
+      })
+    }
+  }
+}
+
+module.exports = findFile
diff --git a/server/middleware/index.js b/server/middleware/index.js
index 83f4c62..81a1a84 100644
--- a/server/middleware/index.js
+++ b/server/middleware/index.js
@@ -1,71 +1,9 @@
-const tmpdir = require('os-tmpdir')
-const { join: joinPaths } = require('path')
-const { stat: statFile, readFile } = require('fs')
-const { maxSatisfying: maxSatisfyingVersion } = require('semver')
-const { parsePackageURL, createPackageURL } = require('./PackageUtils')
-const { getPackageInfo, getPackage } = require('./RegistryUtils')
-const { generateDirectoryIndexHTML } = require('./IndexUtils')
-const { generateMetadata } = require('./MetadataUtils')
-const { getFileType } = require('./FileUtils')
-const {
-  sendNotFoundError,
-  sendInvalidURLError,
-  sendServerError,
-  sendRedirect,
-  sendFile,
-  sendText,
-  sendJSON,
-  sendHTML
-} = require('./ResponseUtils')
-
-const oneMinute = 60
-const oneDay = oneMinute * 60 * 24
-const oneYear = oneDay * 365
-
-const checkLocalCache = (dir, callback) =>
-  statFile(joinPaths(dir, 'package.json'), (error, stats) => {
-    callback(stats && stats.isFile())
-  })
-
-const createTempPath = (name) =>
-  joinPaths(tmpdir(), `unpkg-${name}`)
-
-const ResolveExtensions = [ '', '.js', '.json' ]
-
-/**
- * Resolves a path like "lib/file" into "lib/file.js" or
- * "lib/file.json" depending on which one is available, similar
- * to how require('lib/file') does.
- */
-const resolveFile = (path, useIndex, callback) => {
-  ResolveExtensions.reduceRight((next, ext) => {
-    const file = path + ext
-
-    return () => {
-      statFile(file, (error, stats) => {
-        if (error) {
-          if (error.code === 'ENOENT' || error.code === 'ENOTDIR') {
-            next()
-          } else {
-            callback(error)
-          }
-        } else if (useIndex && stats.isDirectory()) {
-          resolveFile(joinPaths(file, 'index'), false, (error, indexFile, indexStats) => {
-            if (error) {
-              callback(error)
-            } else if (indexFile) {
-              callback(null, indexFile, indexStats)
-            } else {
-              next()
-            }
-          })
-        } else {
-          callback(null, file, stats)
-        }
-      })
-    }
-  }, callback)()
-}
+const express = require('express')
+const parseURL = require('./parseURL')
+const checkBlacklist = require('./checkBlacklist')
+const fetchPackage = require('./fetchPackage')
+const findFile = require('./findFile')
+const serveFile = require('./serveFile')
 
 /**
  * Creates and returns a function that can be used in the "request"
@@ -93,177 +31,17 @@ const createRequestHandler = (options = {}) => {
   const maximumDepth = options.maximumDepth || Number.MAX_VALUE
   const blacklist = options.blacklist || []
 
-  const handleRequest = (req, res) => {
-    let url
-    try {
-      url = parsePackageURL(req.url)
-    } catch (error) {
-      return sendInvalidURLError(res, req.url)
-    }
+  const app = express.Router()
 
-    if (url == null)
-      return sendInvalidURLError(res, req.url)
+  app.use(
+    parseURL(),
+    checkBlacklist(blacklist),
+    fetchPackage(registryURL),
+    findFile(),
+    serveFile(registryURL, autoIndex, maximumDepth)
+  )
 
-    const { pathname, search, query, packageName, version, filename } = url
-    const displayName = `${packageName}@${version}`
-
-    const isBlacklisted = blacklist.indexOf(packageName) !== -1
-
-    if (isBlacklisted)
-      return sendText(res, 403, `Package ${packageName} is blacklisted`)
-
-    // Step 1: Fetch the package from the registry and store a local copy.
-    // Redirect if the URL does not specify an exact version number.
-    const fetchPackage = (next) => {
-      const packageDir = createTempPath(displayName)
-
-      checkLocalCache(packageDir, (isCached) => {
-        if (isCached)
-          return next(packageDir) // Best case: we already have this package on disk.
-
-        // Fetch package info from NPM registry.
-        getPackageInfo(registryURL, packageName, (error, packageInfo) => {
-          if (error)
-            return sendServerError(res, error)
-
-          if (packageInfo == null || packageInfo.versions == null)
-            return sendNotFoundError(res, `package "${packageName}"`)
-
-          const { versions, 'dist-tags': tags } = packageInfo
-
-          if (version in versions) {
-            // A valid request for a package we haven't downloaded yet.
-            const packageConfig = versions[version]
-            const tarballURL = packageConfig.dist.tarball
-
-            getPackage(tarballURL, packageDir, (error) => {
-              if (error) {
-                sendServerError(res, error)
-              } else {
-                next(packageDir)
-              }
-            })
-          } else if (version in tags) {
-            sendRedirect(res, createPackageURL(packageName, tags[version], filename, search))
-          } else {
-            const maxVersion = maxSatisfyingVersion(Object.keys(versions), version)
-
-            if (maxVersion) {
-              sendRedirect(res, createPackageURL(packageName, maxVersion, filename, search))
-            } else {
-              sendNotFoundError(res, `package ${displayName}`)
-            }
-          }
-        })
-      })
-    }
-
-    // Step 2: Determine which file we're going to serve and get its stats.
-    // Redirect if the request targets a directory with no trailing slash.
-    const findFile = (packageDir, next) => {
-      if (filename) {
-        const path = joinPaths(packageDir, filename)
-
-        // Based on the URL, figure out which file they want.
-        resolveFile(path, false, (error, file, stats) => {
-          if (error) {
-            sendServerError(res, error)
-          } else if (file == null) {
-            sendNotFoundError(res, `file "${filename}" in package ${displayName}`)
-          } else if (stats.isDirectory() && pathname[pathname.length - 1] !== '/') {
-            // Append `/` to directory URLs
-            sendRedirect(res, pathname + '/' + search)
-          } else {
-            next(file.replace(packageDir, ''), stats)
-          }
-        })
-      } else {
-        // No filename in the URL. Try to serve the package's "main" file.
-        readFile(joinPaths(packageDir, 'package.json'), 'utf8', (error, data) => {
-          if (error)
-            return sendServerError(res, error)
-
-          let packageConfig
-          try {
-            packageConfig = JSON.parse(data)
-          } catch (error) {
-            return sendText(res, 500, `Error parsing ${displayName}/package.json: ${error.message}`)
-          }
-
-          let mainFilename
-          const queryMain = query && query.main
-
-          if (queryMain) {
-            if (!(queryMain in packageConfig))
-              return sendNotFoundError(res, `field "${queryMain}" in ${displayName}/package.json`)
-
-            mainFilename = packageConfig[queryMain]
-          } else {
-            if (typeof packageConfig.unpkg === 'string') {
-              // The "unpkg" field allows packages to explicitly declare the
-              // file to serve at the bare URL (see #59).
-              mainFilename = packageConfig.unpkg
-            } else if (typeof packageConfig.browser === 'string') {
-              // Fall back to the "browser" field if declared (only support strings).
-              mainFilename = packageConfig.browser
-            } else {
-              // If there is no main, use "index" (same as npm).
-              mainFilename = packageConfig.main || 'index'
-            }
-          }
-
-          resolveFile(joinPaths(packageDir, mainFilename), true, (error, file, stats) => {
-            if (error) {
-              sendServerError(res, error)
-            } else if (file == null) {
-              sendNotFoundError(res, `main file "${mainFilename}" in package ${displayName}`)
-            } else {
-              next(file.replace(packageDir, ''), stats)
-            }
-          })
-        })
-      }
-    }
-
-    // Step 3: Send the file, JSON metadata, or HTML directory listing.
-    const serveFile = (baseDir, path, stats) => {
-      if (query.json != null) {
-        generateMetadata(baseDir, path, stats, maximumDepth, (error, metadata) => {
-          if (metadata) {
-            sendJSON(res, metadata, oneYear)
-          } else {
-            sendServerError(res, `unable to generate JSON metadata for ${displayName}${filename}`)
-          }
-        })
-      } else if (stats.isFile()) {
-        sendFile(res, joinPaths(baseDir, path), stats, oneYear)
-      } else if (autoIndex && stats.isDirectory()) {
-        getPackageInfo(registryURL, packageName, (error, packageInfo) => {
-          if (error) {
-            sendServerError(res, `unable to generate index page for ${displayName}${filename}`)
-          } else {
-            generateDirectoryIndexHTML(packageInfo, version, baseDir, path, (error, html) => {
-              if (html) {
-                sendHTML(res, html)
-              } else {
-                sendServerError(res, `unable to generate index page for ${displayName}${filename}`)
-              }
-            })
-          }
-        })
-      } else {
-        sendInvalidURLError(res, `${displayName}${filename} is a ${getFileType(stats)}`)
-      }
-    }
-
-    fetchPackage(packageDir => {
-      findFile(packageDir, (file, stats) => {
-        serveFile(packageDir, file, stats)
-      })
-    })
-  }
-
-  return handleRequest
+  return app
 }
 
 module.exports = createRequestHandler
diff --git a/server/middleware/parseURL.js b/server/middleware/parseURL.js
new file mode 100644
index 0000000..5dcc6b1
--- /dev/null
+++ b/server/middleware/parseURL.js
@@ -0,0 +1,30 @@
+const { parsePackageURL } = require('./PackageUtils')
+
+/**
+ * Parse and validate the URL.
+ */
+function parseURL() {
+  return function (req, res, next) {
+    let url
+    try {
+      url = parsePackageURL(req.url)
+    } catch (error) {
+      return res.status(403).send(`Invalid URL: ${req.url}`)
+    }
+
+    if (url == null)
+      return res.status(403).send(`Invalid URL: ${req.url}`)
+
+    req.packageName = url.packageName
+    req.packageVersion = url.packageVersion
+    req.packageSpec = `${req.packageName}@${req.packageVersion}`
+    req.pathname = url.pathname
+    req.filename = url.filename
+    req.search = url.search
+    req.query = url.query
+
+    next()
+  }
+}
+
+module.exports = parseURL
diff --git a/server/middleware/serveFile.js b/server/middleware/serveFile.js
new file mode 100644
index 0000000..922e36d
--- /dev/null
+++ b/server/middleware/serveFile.js
@@ -0,0 +1,45 @@
+const path = require('path')
+const { getPackageInfo } = require('./RegistryUtils')
+const { generateMetadata } = require('./MetadataUtils')
+const { generateDirectoryIndexHTML } = require('./IndexUtils')
+const { sendFile } = require('./ResponseUtils')
+
+/**
+ * Send the file, JSON metadata, or HTML directory listing.
+ */
+function serveFile(registryURL, autoIndex, maximumDepth) {
+  return function (req, res, next) {
+    // TODO: change query param from "json" => "meta"
+    if (req.query.json != null) {
+      generateMetadata(req.packageDir, req.file, req.stats, maximumDepth, function (error, metadata) {
+        if (metadata) {
+          res.set('Cache-Control', 'public, max-age=31536000').send(metadata)
+        } else {
+          res.status(500).send(`Cannot generate JSON metadata for ${req.packageSpec}${req.filename}`)
+        }
+      })
+    } else if (req.stats.isFile()) {
+      // TODO: use res.sendFile instead of our own custom function?
+      sendFile(res, path.join(req.packageDir, req.file), req.stats, 31536000)
+    } else if (autoIndex && req.stats.isDirectory()) {
+      // TODO: re-use packageInfo from fetchPackage middleware
+      getPackageInfo(registryURL, req.packageName, function (error, packageInfo) {
+        if (error) {
+          res.status(500).send(`Cannot generate index page for ${req.packageSpec}${req.filename}`)
+        } else {
+          generateDirectoryIndexHTML(packageInfo, req.packageVersion, req.packageDir, req.file, function (error, html) {
+            if (html) {
+              res.send(html)
+            } else {
+              res.status(500).send(`Cannot generate index page for ${req.packageSpec}${req.filename}`)
+            }
+          })
+        }
+      })
+    } else {
+      res.status(403).send(`Cannot serve ${req.packageSpec}${req.filename}; it's not a req.file`)
+    }
+  }
+}
+
+module.exports = serveFile