diff --git a/server/PackageCache.js b/server/PackageCache.js new file mode 100644 index 0000000..3ca7770 --- /dev/null +++ b/server/PackageCache.js @@ -0,0 +1,85 @@ +require('isomorphic-fetch') +const fs = require('fs') +const path = require('path') +const tmpdir = require('os-tmpdir') +const gunzip = require('gunzip-maybe') +const mkdirp = require('mkdirp') +const tar = require('tar-fs') +const createMutex = require('./createMutex') + +function createTempPath(name, version) { + return path.join(tmpdir(), `unpkg-${name}-${version}`) +} + +function normalizeTarHeader(header) { + // Most packages have header names that look like "package/index.js" + // so we shorten that to just "index.js" here. A few packages use a + // prefix other than "package/". e.g. the firebase package uses the + // "firebase_npm/" prefix. So we just strip the first dir name. + header.name = header.name.replace(/^[^\/]+\//, '') + return header +} + +function extractResponse(response, outputDir) { + return new Promise(function (resolve, reject) { + const extract = tar.extract(outputDir, { + dmode: 0o666, // All dirs should be writable + fmode: 0o444, // All files should be readable + map: normalizeTarHeader + }) + + response.body + .pipe(gunzip()) + .pipe(extract) + .on('finish', resolve) + .on('error', reject) + }) +} + +function fetchAndExtract(tarballURL, outputDir) { + console.log(`Fetching ${tarballURL} and extracting to ${outputDir}`) + + return fetch(tarballURL).then(function (response) { + return extractResponse(response, outputDir) + }) +} + +const fetchMutex = createMutex(function (payload, callback) { + const { tarballURL, outputDir } = payload + + fs.access(outputDir, function (error) { + if (error) { + if (error.code === 'ENOENT' || error.code === 'ENOTDIR') { + // ENOENT or ENOTDIR are to be expected when we haven't yet + // fetched a package for the first time. Carry on! + mkdirp(outputDir, function (error) { + if (error) { + callback(error) + } else { + fetchAndExtract(tarballURL, outputDir).then(function () { + callback() + }, callback) + } + }) + } else { + callback(error) + } + } else { + // Best case: we already have this package cached on disk! + callback() + } + }) +}) + +function getPackage(packageConfig, callback) { + const tarballURL = packageConfig.dist.tarball + const outputDir = createTempPath(packageConfig.name, packageConfig.version) + + fetchMutex(tarballURL, { tarballURL, outputDir }, function (error) { + callback(error, outputDir) + }) +} + +module.exports = { + get: getPackage +} diff --git a/server/PackageInfo.js b/server/PackageInfo.js index a804e05..6c5a532 100644 --- a/server/PackageInfo.js +++ b/server/PackageInfo.js @@ -58,7 +58,7 @@ function getPackageInfo(packageName, callback) { } else if (value) { callback(null, value === PackageNotFound ? null : value) } else { - fetchMutex(packageName, callback) + fetchMutex(packageName, packageName, callback) } }) } diff --git a/server/createMutex.js b/server/createMutex.js index 50b290f..d635e4e 100644 --- a/server/createMutex.js +++ b/server/createMutex.js @@ -1,7 +1,7 @@ function createMutex(doWork) { const mutex = {} - return function (key, callback) { + return function (key, payload, callback) { if (mutex[key]) { mutex[key].push(callback) } else { @@ -9,7 +9,7 @@ function createMutex(doWork) { delete mutex[key] }, callback ] - doWork(key, function (error, value) { + doWork(payload, function (error, value) { mutex[key].forEach(function (callback) { callback(error, value) }) diff --git a/server/middleware/RegistryUtils.js b/server/middleware/RegistryUtils.js deleted file mode 100644 index 4ae4b36..0000000 --- a/server/middleware/RegistryUtils.js +++ /dev/null @@ -1,47 +0,0 @@ -require('isomorphic-fetch') -const gunzip = require('gunzip-maybe') -const mkdirp = require('mkdirp') -const tar = require('tar-fs') - -function normalizeTarHeader(header) { - // Most packages have header names that look like "package/index.js" - // so we shorten that to just "index.js" here. A few packages use a - // prefix other than "package/". e.g. the firebase package uses the - // "firebase_npm/" prefix. So we just strip the first dir name. - header.name = header.name.replace(/^[^\/]+\//, '') - return header -} - -function getPackage(tarballURL, outputDir, callback) { - mkdirp(outputDir, (error) => { - if (error) { - callback(error) - } else { - let callbackWasCalled = false - - fetch(tarballURL).then(response => { - response.body - .pipe(gunzip()) - .pipe( - tar.extract(outputDir, { - dmode: 0o666, // All dirs should be writable - fmode: 0o444, // All files should be readable - map: normalizeTarHeader - }) - ) - .on('finish', callback) - .on('error', (error) => { - if (callbackWasCalled) // LOL node streams - return - - callbackWasCalled = true - callback(error) - }) - }) - } - }) -} - -module.exports = { - getPackage -} diff --git a/server/middleware/fetchPackage.js b/server/middleware/fetchPackage.js index 95bb163..f5805e2 100644 --- a/server/middleware/fetchPackage.js +++ b/server/middleware/fetchPackage.js @@ -1,71 +1,51 @@ -const fs = require('fs') -const path = require('path') -const tmpdir = require('os-tmpdir') const { maxSatisfying: maxSatisfyingVersion } = require('semver') +const PackageCache = require('../PackageCache') const PackageInfo = require('../PackageInfo') const { createPackageURL } = require('./PackageUtils') -const { getPackage } = require('./RegistryUtils') - -function checkLocalCache(dir, callback) { - fs.stat(path.join(dir, 'package.json'), function (error, stats) { - callback(stats && stats.isFile()) - }) -} - -function createTempPath(name) { - return path.join(tmpdir(), `unpkg-${name}`) -} /** * Fetch the package from the registry and store a local copy on disk. * Redirect if the URL does not specify an exact req.packageVersion number. */ -function fetchPackage() { - return function (req, res, next) { - req.packageDir = createTempPath(req.packageSpec) +function fetchPackage(req, res, next) { + PackageInfo.get(req.packageName, function (error, packageInfo) { + if (error) { + console.error(error) + return res.status(500).send(`Cannot get info for package "${req.packageName}"`) + } - // TODO: fix race condition! (see #38) - checkLocalCache(req.packageDir, function (isCached) { - if (isCached) - return next() // Best case: we already have this package on disk. + if (packageInfo == null || packageInfo.versions == null) + return res.status(404).send(`Cannot find package "${req.packageName}"`) - PackageInfo.get(req.packageName, function (error, packageInfo) { + req.packageInfo = packageInfo + + const { versions, 'dist-tags': tags } = req.packageInfo + + if (req.packageVersion in versions) { + // A valid request for a package we haven't downloaded yet. + req.packageConfig = versions[req.packageVersion] + + PackageCache.get(req.packageConfig, function (error, outputDir) { if (error) { console.error(error) - return res.status(500).send(`Cannot get info for package "${req.packageName}"`) - } - - if (packageInfo == null || packageInfo.versions == null) - return res.status(404).send(`Cannot find package "${req.packageName}"`) - - const { versions, 'dist-tags': tags } = packageInfo - - if (req.packageVersion in versions) { - // A valid request for a package we haven't downloaded yet. - const packageConfig = versions[req.packageVersion] - const tarballURL = packageConfig.dist.tarball - - getPackage(tarballURL, req.packageDir, function (error) { - if (error) { - res.status(500).send(error.message || error) - } else { - next() - } - }) - } else if (req.packageVersion in tags) { - res.redirect(createPackageURL(req.packageName, tags[req.packageVersion], req.filename, req.search)) + res.status(500).send(`Cannot fetch package ${req.packageSpec}`) } else { - const maxVersion = maxSatisfyingVersion(Object.keys(versions), req.packageVersion) - - if (maxVersion) { - res.redirect(createPackageURL(req.packageName, maxVersion, req.filename, req.search)) - } else { - res.status(404).send(`Cannot find package ${req.packageSpec}`) - } + req.packageDir = outputDir + next() } }) - }) - } + } else if (req.packageVersion in tags) { + res.redirect(createPackageURL(req.packageName, tags[req.packageVersion], req.filename, req.search)) + } else { + const maxVersion = maxSatisfyingVersion(Object.keys(versions), req.packageVersion) + + if (maxVersion) { + res.redirect(createPackageURL(req.packageName, maxVersion, req.filename, req.search)) + } else { + res.status(404).send(`Cannot find package ${req.packageSpec}`) + } + } + }) } module.exports = fetchPackage diff --git a/server/middleware/findFile.js b/server/middleware/findFile.js index 7e7e169..ab48c82 100644 --- a/server/middleware/findFile.js +++ b/server/middleware/findFile.js @@ -41,79 +41,77 @@ function resolveFile(base, useIndex, callback) { * Determine which file we're going to serve and get its stats. * Redirect if the request targets a directory with no trailing slash. */ -function findFile() { - return function (req, res, next) { - if (req.filename) { - const base = path.join(req.packageDir, req.filename) +function findFile(req, res, next) { + if (req.filename) { + const base = path.join(req.packageDir, req.filename) - // Based on the URL, figure out which file they want. - resolveFile(base, false, function (error, file, stats) { + // Based on the URL, figure out which file they want. + resolveFile(base, false, function (error, file, stats) { + if (error) + console.error(error) + + if (file == null) { + res.status(404).send(`Cannot find file "${req.filename}" in package ${req.packageSpec}`) + } else if (stats.isDirectory() && req.pathname[req.pathname.length - 1] !== '/') { + // Append / to directory URLs. + res.redirect(`${req.pathname}/${req.search}`) + } else { + req.file = file.replace(req.packageDir, '') + req.stats = stats + next() + } + }) + } else { + // No filename in the URL. Try to figure out which file they want by + // checking package.json's "unpkg", "browser", and "main" fields. + fs.readFile(path.join(req.packageDir, 'package.json'), 'utf8', function (error, data) { + if (error) { + console.error(error) + return res.status(500).send(`Cannot read ${req.packageSpec}/package.json`) + } + + let packageConfig + try { + packageConfig = JSON.parse(data) + } catch (error) { + return res.status(500).send(`Cannot parse ${req.packageSpec}/package.json: ${error.message}`) + } + + let mainFilename + const queryMain = query && query.main + + if (queryMain) { + if (!(queryMain in packageConfig)) + return res.status(404).send(`Cannot find field "${queryMain}" in ${req.packageSpec}/package.json`) + + mainFilename = packageConfig[queryMain] + } else { + if (typeof packageConfig.unpkg === 'string') { + // The "unpkg" field allows packages to explicitly declare the + // file to serve at the bare URL (see #59). + mainFilename = packageConfig.unpkg + } else if (typeof packageConfig.browser === 'string') { + // Fall back to the "browser" field if declared (only support strings). + mainFilename = packageConfig.browser + } else { + // If there is no main, use "index" (same as npm). + mainFilename = packageConfig.main || 'index' + } + } + + resolveFile(path.join(req.packageDir, mainFilename), true, function (error, file, stats) { if (error) console.error(error) if (file == null) { - res.status(404).send(`Cannot find file "${req.filename}" in package ${req.packageSpec}`) - } else if (stats.isDirectory() && req.pathname[req.pathname.length - 1] !== '/') { - // Append / to directory URLs. - res.redirect(`${req.pathname}/${req.search}`) + res.status(404).send(`Cannot find main file "${mainFilename}" in package ${req.packageSpec}`) } else { req.file = file.replace(req.packageDir, '') req.stats = stats next() } }) - } else { - // No filename in the URL. Try to figure out which file they want by - // checking package.json's "unpkg", "browser", and "main" fields. - fs.readFile(path.join(req.packageDir, 'package.json'), 'utf8', function (error, data) { - if (error) { - console.error(error) - return res.status(500).send(`Cannot read ${req.packageSpec}/package.json`) - } - - let packageConfig - try { - packageConfig = JSON.parse(data) - } catch (error) { - return res.status(500).send(`Cannot parse ${req.packageSpec}/package.json: ${error.message}`) - } - - let mainFilename - const queryMain = query && query.main - - if (queryMain) { - if (!(queryMain in packageConfig)) - return res.status(404).send(`Cannot find field "${queryMain}" in ${req.packageSpec}/package.json`) - - mainFilename = packageConfig[queryMain] - } else { - if (typeof packageConfig.unpkg === 'string') { - // The "unpkg" field allows packages to explicitly declare the - // file to serve at the bare URL (see #59). - mainFilename = packageConfig.unpkg - } else if (typeof packageConfig.browser === 'string') { - // Fall back to the "browser" field if declared (only support strings). - mainFilename = packageConfig.browser - } else { - // If there is no main, use "index" (same as npm). - mainFilename = packageConfig.main || 'index' - } - } - - resolveFile(path.join(req.packageDir, mainFilename), true, function (error, file, stats) { - if (error) - console.error(error) - - if (file == null) { - res.status(404).send(`Cannot find main file "${mainFilename}" in package ${req.packageSpec}`) - } else { - req.file = file.replace(req.packageDir, '') - req.stats = stats - next() - } - }) - }) - } + }) } } diff --git a/server/middleware/index.js b/server/middleware/index.js index aa59bd5..ffcb6a8 100644 --- a/server/middleware/index.js +++ b/server/middleware/index.js @@ -33,10 +33,10 @@ function createRequestHandler(options = {}) { const app = express.Router() app.use( - parseURL(), + parseURL, checkBlacklist(blacklist), - fetchPackage(), - findFile(), + fetchPackage, + findFile, serveFile(autoIndex, maximumDepth) ) diff --git a/server/middleware/parseURL.js b/server/middleware/parseURL.js index 5dcc6b1..a2f911d 100644 --- a/server/middleware/parseURL.js +++ b/server/middleware/parseURL.js @@ -3,28 +3,26 @@ const { parsePackageURL } = require('./PackageUtils') /** * Parse and validate the URL. */ -function parseURL() { - return function (req, res, next) { - let url - try { - url = parsePackageURL(req.url) - } catch (error) { - return res.status(403).send(`Invalid URL: ${req.url}`) - } - - if (url == null) - return res.status(403).send(`Invalid URL: ${req.url}`) - - req.packageName = url.packageName - req.packageVersion = url.packageVersion - req.packageSpec = `${req.packageName}@${req.packageVersion}` - req.pathname = url.pathname - req.filename = url.filename - req.search = url.search - req.query = url.query - - next() +function parseURL(req, res, next) { + let url + try { + url = parsePackageURL(req.url) + } catch (error) { + return res.status(403).send(`Invalid URL: ${req.url}`) } + + if (url == null) + return res.status(403).send(`Invalid URL: ${req.url}`) + + req.packageName = url.packageName + req.packageVersion = url.packageVersion + req.packageSpec = `${req.packageName}@${req.packageVersion}` + req.pathname = url.pathname + req.filename = url.filename + req.search = url.search + req.query = url.query + + next() } module.exports = parseURL diff --git a/server/middleware/serveFile.js b/server/middleware/serveFile.js index 8df1543..38d23ea 100644 --- a/server/middleware/serveFile.js +++ b/server/middleware/serveFile.js @@ -1,5 +1,4 @@ const path = require('path') -const PackageInfo = require('../PackageInfo') const { generateMetadata } = require('./MetadataUtils') const { generateDirectoryIndexHTML } = require('./IndexUtils') const { sendFile } = require('./ResponseUtils') @@ -22,17 +21,11 @@ function serveFile(autoIndex, maximumDepth) { // TODO: use res.sendFile instead of our own custom function? sendFile(res, path.join(req.packageDir, req.file), req.stats, 31536000) } else if (autoIndex && req.stats.isDirectory()) { - PackageInfo.get(req.packageName, function (error, packageInfo) { - if (error) { - res.status(500).send(`Cannot generate index page for ${req.packageSpec}${req.filename}`) + generateDirectoryIndexHTML(req.packageInfo, req.packageVersion, req.packageDir, req.file, function (error, html) { + if (html) { + res.send(html) } else { - generateDirectoryIndexHTML(packageInfo, req.packageVersion, req.packageDir, req.file, function (error, html) { - if (html) { - res.send(html) - } else { - res.status(500).send(`Cannot generate index page for ${req.packageSpec}${req.filename}`) - } - }) + res.status(500).send(`Cannot generate index page for ${req.packageSpec}${req.filename}`) } }) } else {