Prevent multiple concurrent requests for package info

This commit is contained in:
MICHAEL JACKSON 2017-08-10 20:43:20 -07:00
parent 7661950de3
commit cb8061f3e1
9 changed files with 135 additions and 96 deletions

68
server/PackageInfo.js Normal file
View File

@ -0,0 +1,68 @@
require('isomorphic-fetch')
const PackageInfoCache = require('./PackageInfoCache')
const createMutex = require('./createMutex')
const RegistryURL = process.env.NPM_REGISTRY_URL || 'https://registry.npmjs.org'
function fetchPackageInfo(packageName) {
console.log(`Fetching info for package ${packageName} from NPM`)
let encodedPackageName
if (packageName.charAt(0) === '@') {
encodedPackageName = `@${encodeURIComponent(packageName.substring(1))}`
} else {
encodedPackageName = encodeURIComponent(packageName)
}
const url = `${RegistryURL}/${encodedPackageName}`
return fetch(url, {
headers: { 'Accept': 'application/json' }
}).then(function (response) {
return response.status === 404 ? null : response.json()
})
}
const PackageNotFound = 'PackageNotFound'
// This mutex prevents multiple concurrent requests to
// the registry for the same package info.
const fetchMutex = createMutex(function (packageName, callback) {
fetchPackageInfo(packageName).then(function (value) {
if (value == null) {
// Cache 404s for 5 minutes. This prevents us from making
// unnecessary requests to the registry for bad package names.
// In the worst case, a brand new package's info will be
// available within 5 minutes.
PackageInfoCache.set(packageName, PackageNotFound, 300, function () {
callback(null, value)
})
} else {
// Cache valid package info for one minute.
PackageInfoCache.set(packageName, value, 60, function () {
callback(null, value)
})
}
}, function (error) {
// Do not cache errors.
PackageInfoCache.del(packageName, function () {
callback(error)
})
})
})
function getPackageInfo(packageName, callback) {
PackageInfoCache.get(packageName, function (error, value) {
if (error) {
callback(error)
} else if (value) {
callback(null, value === PackageNotFound ? null : value)
} else {
fetchMutex(packageName, callback)
}
})
}
module.exports = {
get: getPackageInfo
}

View File

@ -0,0 +1,29 @@
const redis = require('redis')
const RedisURL = process.env.OPENREDIS_URL || 'redis://localhost:6379'
const db = redis.createClient(RedisURL)
function createKey(packageName) {
return 'packageInfo-' + packageName
}
function set(packageName, value, expiry, callback) {
db.setex(createKey(packageName), expiry, JSON.stringify(value), callback)
}
function get(packageName, callback) {
db.get(createKey(packageName), function (error, value) {
callback(error, value && JSON.parse(value))
})
}
function del(packageName, callback) {
db.del(createKey(packageName), callback)
}
module.exports = {
set,
get,
del
}

21
server/createMutex.js Normal file
View File

@ -0,0 +1,21 @@
function createMutex(doWork) {
const mutex = {}
return function (key, callback) {
if (mutex[key]) {
mutex[key].push(callback)
} else {
mutex[key] = [ function () {
delete mutex[key]
}, callback ]
doWork(key, function (error, value) {
mutex[key].forEach(function (callback) {
callback(error, value)
})
})
}
}
}
module.exports = createMutex

View File

@ -102,7 +102,6 @@ const defaultServerConfig = {
publicDir: 'public',
// for the middleware
registryURL: process.env.REGISTRY_URL || 'https://registry.npmjs.org',
autoIndex: !process.env.DISABLE_INDEX,
blacklist: require('./PackageBlacklist').blacklist
}

View File

@ -1,27 +0,0 @@
const redis = require('redis')
const RedisURL = process.env.OPENREDIS_URL || 'redis://localhost:6379'
const db = redis.createClient(RedisURL)
const createKey = (key) => 'registryCache-' + key
const set = (key, value, expiry) => {
db.setex(createKey(key), expiry, JSON.stringify(value))
}
const get = (key, callback) => {
db.get(createKey(key), (error, value) => {
callback(error, value && JSON.parse(value))
})
}
const del = (key) => {
db.del(createKey(key))
}
module.exports = {
set,
get,
del
}

View File

@ -2,57 +2,8 @@ require('isomorphic-fetch')
const gunzip = require('gunzip-maybe')
const mkdirp = require('mkdirp')
const tar = require('tar-fs')
const RegistryCache = require('./RegistryCache')
const getPackageInfoFromRegistry = (registryURL, packageName) => {
let encodedPackageName
if (packageName.charAt(0) === '@') {
encodedPackageName = `@${encodeURIComponent(packageName.substring(1))}`
} else {
encodedPackageName = encodeURIComponent(packageName)
}
const url = `${registryURL}/${encodedPackageName}`
return fetch(url, {
headers: { 'Accept': 'application/json' }
}).then(response => (
response.status === 404 ? null : response.json()
))
}
const PackageNotFound = 'PackageNotFound'
const getPackageInfo = (registryURL, packageName, callback) => {
RegistryCache.get(packageName, (error, value) => {
if (error) {
callback(error)
} else if (value) {
callback(null, value === PackageNotFound ? null : value)
} else {
getPackageInfoFromRegistry(registryURL, packageName).then(value => {
if (value == null) {
// Keep 404s in the cache for 5 minutes. This prevents us
// from making unnecessary requests to the registry for
// bad package names. In the worst case, a brand new
// package's info will be available within 5 minutes.
RegistryCache.set(packageName, PackageNotFound, 300)
} else {
// Keep package.json in the cache for a minute.
RegistryCache.set(packageName, value, 60)
}
callback(null, value)
}, error => {
// Do not cache errors.
RegistryCache.del(packageName)
callback(error)
})
}
})
}
const normalizeTarHeader = (header) => {
function normalizeTarHeader(header) {
// Most packages have header names that look like "package/index.js"
// so we shorten that to just "index.js" here. A few packages use a
// prefix other than "package/". e.g. the firebase package uses the
@ -61,7 +12,7 @@ const normalizeTarHeader = (header) => {
return header
}
const getPackage = (tarballURL, outputDir, callback) => {
function getPackage(tarballURL, outputDir, callback) {
mkdirp(outputDir, (error) => {
if (error) {
callback(error)
@ -92,6 +43,5 @@ const getPackage = (tarballURL, outputDir, callback) => {
}
module.exports = {
getPackageInfo,
getPackage
}

View File

@ -2,8 +2,9 @@ const fs = require('fs')
const path = require('path')
const tmpdir = require('os-tmpdir')
const { maxSatisfying: maxSatisfyingVersion } = require('semver')
const PackageInfo = require('../PackageInfo')
const { createPackageURL } = require('./PackageUtils')
const { getPackageInfo, getPackage } = require('./RegistryUtils')
const { getPackage } = require('./RegistryUtils')
function checkLocalCache(dir, callback) {
fs.stat(path.join(dir, 'package.json'), function (error, stats) {
@ -19,20 +20,20 @@ function createTempPath(name) {
* Fetch the package from the registry and store a local copy on disk.
* Redirect if the URL does not specify an exact req.packageVersion number.
*/
function fetchPackage(registryURL) {
function fetchPackage() {
return function (req, res, next) {
req.packageDir = createTempPath(req.packageSpec)
// TODO: fix race condition! (see #38)
// TODO: ensure req.packageInfo is always populated so we can re-use later
checkLocalCache(req.packageDir, function (isCached) {
if (isCached)
return next() // Best case: we already have this package on disk.
// Fetch package info from NPM.
getPackageInfo(registryURL, req.packageName, function (error, packageInfo) {
if (error)
return res.status(500).send(error.message || error)
PackageInfo.get(req.packageName, function (error, packageInfo) {
if (error) {
console.error(error)
return res.status(500).send(`Cannot get info for package "${req.packageName}"`)
}
if (packageInfo == null || packageInfo.versions == null)
return res.status(404).send(`Cannot find package "${req.packageName}"`)

View File

@ -25,8 +25,7 @@ const serveFile = require('./serveFile')
* /history@latest/umd/History.min.js (redirects to version)
* /history@^1/umd/History.min.js (redirects to max satisfying version)
*/
const createRequestHandler = (options = {}) => {
const registryURL = options.registryURL || 'https://registry.npmjs.org'
function createRequestHandler(options = {}) {
const autoIndex = options.autoIndex !== false
const maximumDepth = options.maximumDepth || Number.MAX_VALUE
const blacklist = options.blacklist || []
@ -36,9 +35,9 @@ const createRequestHandler = (options = {}) => {
app.use(
parseURL(),
checkBlacklist(blacklist),
fetchPackage(registryURL),
fetchPackage(),
findFile(),
serveFile(registryURL, autoIndex, maximumDepth)
serveFile(autoIndex, maximumDepth)
)
return app

View File

@ -1,5 +1,5 @@
const path = require('path')
const { getPackageInfo } = require('./RegistryUtils')
const PackageInfo = require('../PackageInfo')
const { generateMetadata } = require('./MetadataUtils')
const { generateDirectoryIndexHTML } = require('./IndexUtils')
const { sendFile } = require('./ResponseUtils')
@ -7,9 +7,9 @@ const { sendFile } = require('./ResponseUtils')
/**
* Send the file, JSON metadata, or HTML directory listing.
*/
function serveFile(registryURL, autoIndex, maximumDepth) {
function serveFile(autoIndex, maximumDepth) {
return function (req, res, next) {
// TODO: change query param from "json" => "meta"
// TODO: change query param from "json" to "meta"
if (req.query.json != null) {
generateMetadata(req.packageDir, req.file, req.stats, maximumDepth, function (error, metadata) {
if (metadata) {
@ -22,8 +22,7 @@ function serveFile(registryURL, autoIndex, maximumDepth) {
// TODO: use res.sendFile instead of our own custom function?
sendFile(res, path.join(req.packageDir, req.file), req.stats, 31536000)
} else if (autoIndex && req.stats.isDirectory()) {
// TODO: re-use packageInfo from fetchPackage middleware
getPackageInfo(registryURL, req.packageName, function (error, packageInfo) {
PackageInfo.get(req.packageName, function (error, packageInfo) {
if (error) {
res.status(500).send(`Cannot generate index page for ${req.packageSpec}${req.filename}`)
} else {