From cb8061f3e14aade9ebaa3f99059cfdc548cc895a Mon Sep 17 00:00:00 2001 From: MICHAEL JACKSON Date: Thu, 10 Aug 2017 20:43:20 -0700 Subject: [PATCH] Prevent multiple concurrent requests for package info --- server/PackageInfo.js | 68 ++++++++++++++++++++++++++++++ server/PackageInfoCache.js | 29 +++++++++++++ server/createMutex.js | 21 +++++++++ server/index.js | 1 - server/middleware/RegistryCache.js | 27 ------------ server/middleware/RegistryUtils.js | 54 +----------------------- server/middleware/fetchPackage.js | 15 ++++--- server/middleware/index.js | 7 ++- server/middleware/serveFile.js | 9 ++-- 9 files changed, 135 insertions(+), 96 deletions(-) create mode 100644 server/PackageInfo.js create mode 100644 server/PackageInfoCache.js create mode 100644 server/createMutex.js delete mode 100644 server/middleware/RegistryCache.js diff --git a/server/PackageInfo.js b/server/PackageInfo.js new file mode 100644 index 0000000..a804e05 --- /dev/null +++ b/server/PackageInfo.js @@ -0,0 +1,68 @@ +require('isomorphic-fetch') +const PackageInfoCache = require('./PackageInfoCache') +const createMutex = require('./createMutex') + +const RegistryURL = process.env.NPM_REGISTRY_URL || 'https://registry.npmjs.org' + +function fetchPackageInfo(packageName) { + console.log(`Fetching info for package ${packageName} from NPM`) + + let encodedPackageName + if (packageName.charAt(0) === '@') { + encodedPackageName = `@${encodeURIComponent(packageName.substring(1))}` + } else { + encodedPackageName = encodeURIComponent(packageName) + } + + const url = `${RegistryURL}/${encodedPackageName}` + + return fetch(url, { + headers: { 'Accept': 'application/json' } + }).then(function (response) { + return response.status === 404 ? null : response.json() + }) +} + +const PackageNotFound = 'PackageNotFound' + +// This mutex prevents multiple concurrent requests to +// the registry for the same package info. +const fetchMutex = createMutex(function (packageName, callback) { + fetchPackageInfo(packageName).then(function (value) { + if (value == null) { + // Cache 404s for 5 minutes. This prevents us from making + // unnecessary requests to the registry for bad package names. + // In the worst case, a brand new package's info will be + // available within 5 minutes. + PackageInfoCache.set(packageName, PackageNotFound, 300, function () { + callback(null, value) + }) + } else { + // Cache valid package info for one minute. + PackageInfoCache.set(packageName, value, 60, function () { + callback(null, value) + }) + } + }, function (error) { + // Do not cache errors. + PackageInfoCache.del(packageName, function () { + callback(error) + }) + }) +}) + +function getPackageInfo(packageName, callback) { + PackageInfoCache.get(packageName, function (error, value) { + if (error) { + callback(error) + } else if (value) { + callback(null, value === PackageNotFound ? null : value) + } else { + fetchMutex(packageName, callback) + } + }) +} + +module.exports = { + get: getPackageInfo +} diff --git a/server/PackageInfoCache.js b/server/PackageInfoCache.js new file mode 100644 index 0000000..8793a86 --- /dev/null +++ b/server/PackageInfoCache.js @@ -0,0 +1,29 @@ +const redis = require('redis') + +const RedisURL = process.env.OPENREDIS_URL || 'redis://localhost:6379' + +const db = redis.createClient(RedisURL) + +function createKey(packageName) { + return 'packageInfo-' + packageName +} + +function set(packageName, value, expiry, callback) { + db.setex(createKey(packageName), expiry, JSON.stringify(value), callback) +} + +function get(packageName, callback) { + db.get(createKey(packageName), function (error, value) { + callback(error, value && JSON.parse(value)) + }) +} + +function del(packageName, callback) { + db.del(createKey(packageName), callback) +} + +module.exports = { + set, + get, + del +} diff --git a/server/createMutex.js b/server/createMutex.js new file mode 100644 index 0000000..50b290f --- /dev/null +++ b/server/createMutex.js @@ -0,0 +1,21 @@ +function createMutex(doWork) { + const mutex = {} + + return function (key, callback) { + if (mutex[key]) { + mutex[key].push(callback) + } else { + mutex[key] = [ function () { + delete mutex[key] + }, callback ] + + doWork(key, function (error, value) { + mutex[key].forEach(function (callback) { + callback(error, value) + }) + }) + } + } +} + +module.exports = createMutex diff --git a/server/index.js b/server/index.js index fea1d6c..2836e21 100644 --- a/server/index.js +++ b/server/index.js @@ -102,7 +102,6 @@ const defaultServerConfig = { publicDir: 'public', // for the middleware - registryURL: process.env.REGISTRY_URL || 'https://registry.npmjs.org', autoIndex: !process.env.DISABLE_INDEX, blacklist: require('./PackageBlacklist').blacklist } diff --git a/server/middleware/RegistryCache.js b/server/middleware/RegistryCache.js deleted file mode 100644 index ae64732..0000000 --- a/server/middleware/RegistryCache.js +++ /dev/null @@ -1,27 +0,0 @@ -const redis = require('redis') - -const RedisURL = process.env.OPENREDIS_URL || 'redis://localhost:6379' - -const db = redis.createClient(RedisURL) - -const createKey = (key) => 'registryCache-' + key - -const set = (key, value, expiry) => { - db.setex(createKey(key), expiry, JSON.stringify(value)) -} - -const get = (key, callback) => { - db.get(createKey(key), (error, value) => { - callback(error, value && JSON.parse(value)) - }) -} - -const del = (key) => { - db.del(createKey(key)) -} - -module.exports = { - set, - get, - del -} diff --git a/server/middleware/RegistryUtils.js b/server/middleware/RegistryUtils.js index a5aa752..4ae4b36 100644 --- a/server/middleware/RegistryUtils.js +++ b/server/middleware/RegistryUtils.js @@ -2,57 +2,8 @@ require('isomorphic-fetch') const gunzip = require('gunzip-maybe') const mkdirp = require('mkdirp') const tar = require('tar-fs') -const RegistryCache = require('./RegistryCache') -const getPackageInfoFromRegistry = (registryURL, packageName) => { - let encodedPackageName - if (packageName.charAt(0) === '@') { - encodedPackageName = `@${encodeURIComponent(packageName.substring(1))}` - } else { - encodedPackageName = encodeURIComponent(packageName) - } - - const url = `${registryURL}/${encodedPackageName}` - - return fetch(url, { - headers: { 'Accept': 'application/json' } - }).then(response => ( - response.status === 404 ? null : response.json() - )) -} - -const PackageNotFound = 'PackageNotFound' - -const getPackageInfo = (registryURL, packageName, callback) => { - RegistryCache.get(packageName, (error, value) => { - if (error) { - callback(error) - } else if (value) { - callback(null, value === PackageNotFound ? null : value) - } else { - getPackageInfoFromRegistry(registryURL, packageName).then(value => { - if (value == null) { - // Keep 404s in the cache for 5 minutes. This prevents us - // from making unnecessary requests to the registry for - // bad package names. In the worst case, a brand new - // package's info will be available within 5 minutes. - RegistryCache.set(packageName, PackageNotFound, 300) - } else { - // Keep package.json in the cache for a minute. - RegistryCache.set(packageName, value, 60) - } - - callback(null, value) - }, error => { - // Do not cache errors. - RegistryCache.del(packageName) - callback(error) - }) - } - }) -} - -const normalizeTarHeader = (header) => { +function normalizeTarHeader(header) { // Most packages have header names that look like "package/index.js" // so we shorten that to just "index.js" here. A few packages use a // prefix other than "package/". e.g. the firebase package uses the @@ -61,7 +12,7 @@ const normalizeTarHeader = (header) => { return header } -const getPackage = (tarballURL, outputDir, callback) => { +function getPackage(tarballURL, outputDir, callback) { mkdirp(outputDir, (error) => { if (error) { callback(error) @@ -92,6 +43,5 @@ const getPackage = (tarballURL, outputDir, callback) => { } module.exports = { - getPackageInfo, getPackage } diff --git a/server/middleware/fetchPackage.js b/server/middleware/fetchPackage.js index 563b73f..95bb163 100644 --- a/server/middleware/fetchPackage.js +++ b/server/middleware/fetchPackage.js @@ -2,8 +2,9 @@ const fs = require('fs') const path = require('path') const tmpdir = require('os-tmpdir') const { maxSatisfying: maxSatisfyingVersion } = require('semver') +const PackageInfo = require('../PackageInfo') const { createPackageURL } = require('./PackageUtils') -const { getPackageInfo, getPackage } = require('./RegistryUtils') +const { getPackage } = require('./RegistryUtils') function checkLocalCache(dir, callback) { fs.stat(path.join(dir, 'package.json'), function (error, stats) { @@ -19,20 +20,20 @@ function createTempPath(name) { * Fetch the package from the registry and store a local copy on disk. * Redirect if the URL does not specify an exact req.packageVersion number. */ -function fetchPackage(registryURL) { +function fetchPackage() { return function (req, res, next) { req.packageDir = createTempPath(req.packageSpec) // TODO: fix race condition! (see #38) - // TODO: ensure req.packageInfo is always populated so we can re-use later checkLocalCache(req.packageDir, function (isCached) { if (isCached) return next() // Best case: we already have this package on disk. - // Fetch package info from NPM. - getPackageInfo(registryURL, req.packageName, function (error, packageInfo) { - if (error) - return res.status(500).send(error.message || error) + PackageInfo.get(req.packageName, function (error, packageInfo) { + if (error) { + console.error(error) + return res.status(500).send(`Cannot get info for package "${req.packageName}"`) + } if (packageInfo == null || packageInfo.versions == null) return res.status(404).send(`Cannot find package "${req.packageName}"`) diff --git a/server/middleware/index.js b/server/middleware/index.js index 81a1a84..aa59bd5 100644 --- a/server/middleware/index.js +++ b/server/middleware/index.js @@ -25,8 +25,7 @@ const serveFile = require('./serveFile') * /history@latest/umd/History.min.js (redirects to version) * /history@^1/umd/History.min.js (redirects to max satisfying version) */ -const createRequestHandler = (options = {}) => { - const registryURL = options.registryURL || 'https://registry.npmjs.org' +function createRequestHandler(options = {}) { const autoIndex = options.autoIndex !== false const maximumDepth = options.maximumDepth || Number.MAX_VALUE const blacklist = options.blacklist || [] @@ -36,9 +35,9 @@ const createRequestHandler = (options = {}) => { app.use( parseURL(), checkBlacklist(blacklist), - fetchPackage(registryURL), + fetchPackage(), findFile(), - serveFile(registryURL, autoIndex, maximumDepth) + serveFile(autoIndex, maximumDepth) ) return app diff --git a/server/middleware/serveFile.js b/server/middleware/serveFile.js index 922e36d..8df1543 100644 --- a/server/middleware/serveFile.js +++ b/server/middleware/serveFile.js @@ -1,5 +1,5 @@ const path = require('path') -const { getPackageInfo } = require('./RegistryUtils') +const PackageInfo = require('../PackageInfo') const { generateMetadata } = require('./MetadataUtils') const { generateDirectoryIndexHTML } = require('./IndexUtils') const { sendFile } = require('./ResponseUtils') @@ -7,9 +7,9 @@ const { sendFile } = require('./ResponseUtils') /** * Send the file, JSON metadata, or HTML directory listing. */ -function serveFile(registryURL, autoIndex, maximumDepth) { +function serveFile(autoIndex, maximumDepth) { return function (req, res, next) { - // TODO: change query param from "json" => "meta" + // TODO: change query param from "json" to "meta" if (req.query.json != null) { generateMetadata(req.packageDir, req.file, req.stats, maximumDepth, function (error, metadata) { if (metadata) { @@ -22,8 +22,7 @@ function serveFile(registryURL, autoIndex, maximumDepth) { // TODO: use res.sendFile instead of our own custom function? sendFile(res, path.join(req.packageDir, req.file), req.stats, 31536000) } else if (autoIndex && req.stats.isDirectory()) { - // TODO: re-use packageInfo from fetchPackage middleware - getPackageInfo(registryURL, req.packageName, function (error, packageInfo) { + PackageInfo.get(req.packageName, function (error, packageInfo) { if (error) { res.status(500).send(`Cannot generate index page for ${req.packageSpec}${req.filename}`) } else {