Finer-grained caching of data from npm

This commit is contained in:
Michael Jackson 2019-07-10 16:27:19 -07:00
parent ce9206f59e
commit 40bd9dbec4
5 changed files with 247 additions and 160 deletions

View File

@ -20,4 +20,19 @@ describe('A request for a JavaScript file', () => {
done();
});
});
describe('from a scoped package', () => {
it('returns 200', done => {
request(server)
.get('/@babel/core@7.5.4/lib/index.js')
.end((err, res) => {
expect(res.statusCode).toBe(200);
expect(res.headers['content-type']).toMatch(
/\bapplication\/javascript\b/
);
expect(res.headers['content-type']).toMatch(/\bcharset=utf-8\b/);
done();
});
});
});
});

View File

@ -1,11 +1,11 @@
import { renderToString, renderToStaticMarkup } from 'react-dom/server';
import semver from 'semver';
import AutoIndexApp from '../client/autoIndex/App.js';
import MainTemplate from './utils/MainTemplate.js';
import getScripts from './utils/getScripts.js';
import { createElement, createHTML } from './utils/markupHelpers.js';
import { getAvailableVersions } from '../utils/npm.js';
const doctype = '<!DOCTYPE html>';
const globalURLs =
@ -21,15 +21,12 @@ const globalURLs =
'react-dom': '/react-dom@16.7.0/umd/react-dom.development.js'
};
function byVersion(a, b) {
return semver.lt(a, b) ? -1 : semver.gt(a, b) ? 1 : 0;
}
export default function serveAutoIndexPage(req, res) {
export default async function serveAutoIndexPage(req, res) {
const availableVersions = await getAvailableVersions(req.packageName);
const data = {
packageName: req.packageName,
packageVersion: req.packageVersion,
availableVersions: Object.keys(req.packageInfo.versions).sort(byVersion),
availableVersions: availableVersions,
filename: req.filename,
entry: req.entry,
entries: req.entries

View File

@ -1,48 +1,20 @@
import semver from 'semver';
import addLeadingSlash from '../utils/addLeadingSlash.js';
import createPackageURL from '../utils/createPackageURL.js';
import createSearch from '../utils/createSearch.js';
import { getPackageInfo as getNpmPackageInfo } from '../utils/npm.js';
function tagRedirect(req, res) {
const version = req.packageInfo['dist-tags'][req.packageVersion];
import { getPackageConfig, resolveVersion } from '../utils/npm.js';
function semverRedirect(req, res, newVersion) {
res
.set({
'Cache-Control': 'public, s-maxage=600, max-age=60', // 10 mins on CDN, 1 min on clients
'Cache-Tag': 'redirect, tag-redirect'
'Cache-Tag': 'redirect, semver-redirect'
})
.redirect(
302,
createPackageURL(req.packageName, version, req.filename, req.search)
createPackageURL(req.packageName, newVersion, req.filename, req.search)
);
}
function semverRedirect(req, res) {
const maxVersion = semver.maxSatisfying(
Object.keys(req.packageInfo.versions),
req.packageVersion
);
if (maxVersion) {
res
.set({
'Cache-Control': 'public, s-maxage=600, max-age=60', // 10 mins on CDN, 1 min on clients
'Cache-Tag': 'redirect, semver-redirect'
})
.redirect(
302,
createPackageURL(req.packageName, maxVersion, req.filename, req.search)
);
} else {
res
.status(404)
.type('text')
.send(`Cannot find package ${req.packageSpec}`);
}
}
function filenameRedirect(req, res) {
let filename;
if (req.query.module != null) {
@ -110,40 +82,35 @@ function filenameRedirect(req, res) {
}
/**
* Fetch the package metadata and tarball from npm. Redirect to the exact
* version if the request targets a tag or uses a semver version, or to the
* exact filename if the request omits the filename.
* Fetch the package config. Redirect to the exact version if the request
* targets a tag or uses semver, or to the exact filename if the request
* omits the filename.
*/
export default async function fetchPackage(req, res, next) {
let packageInfo;
try {
packageInfo = await getNpmPackageInfo(req.packageName);
} catch (error) {
console.error(error);
const version = await resolveVersion(req.packageName, req.packageVersion);
return res
.status(500)
.type('text')
.send(`Cannot get info for package "${req.packageName}"`);
}
if (packageInfo == null || packageInfo.versions == null) {
if (!version) {
return res
.status(404)
.type('text')
.send(`Cannot find package "${req.packageName}"`);
.send(`Cannot find package ${req.packageSpec}`);
}
req.packageInfo = packageInfo;
req.packageConfig = req.packageInfo.versions[req.packageVersion];
if (version !== req.packageVersion) {
return semverRedirect(req, res, version);
}
req.packageConfig = await getPackageConfig(
req.packageName,
req.packageVersion
);
if (!req.packageConfig) {
// Redirect to a fully-resolved version.
if (req.packageVersion in req.packageInfo['dist-tags']) {
return tagRedirect(req, res);
} else {
return semverRedirect(req, res);
}
// TODO: Log why.
return res
.status(500)
.type('text')
.send(`Cannot get config for package ${req.packageSpec}`);
}
if (!req.filename) {

View File

@ -1,9 +1,11 @@
import path from 'path';
import gunzip from 'gunzip-maybe';
import tar from 'tar-stream';
import addLeadingSlash from '../utils/addLeadingSlash.js';
import createPackageURL from '../utils/createPackageURL.js';
import createSearch from '../utils/createSearch.js';
import { fetchPackage as fetchNpmPackage } from '../utils/npm.js';
import { getPackage } from '../utils/npm.js';
import getIntegrity from '../utils/getIntegrity.js';
import getContentType from '../utils/getContentType.js';
@ -54,7 +56,7 @@ function stripLeadingSegment(name) {
* Follows node's resolution algorithm.
* https://nodejs.org/api/modules.html#modules_all_together
*/
function searchEntries(tarballStream, entryName, wantsIndex) {
function searchEntries(stream, entryName, wantsIndex) {
return new Promise((resolve, reject) => {
const jsEntryName = `${entryName}.js`;
const jsonEntryName = `${entryName}.json`;
@ -66,7 +68,9 @@ function searchEntries(tarballStream, entryName, wantsIndex) {
foundEntry = entries[''] = { name: '', type: 'directory' };
}
tarballStream
stream
.pipe(gunzip())
.pipe(tar.extract())
.on('error', reject)
.on('entry', (header, stream, next) => {
const entry = {
@ -173,9 +177,9 @@ export default async function findFile(req, res, next) {
.replace(trailingSlash, '')
.replace(leadingSlash, '');
const tarballStream = await fetchNpmPackage(req.packageConfig);
const stream = await getPackage(req.packageName, req.packageVersion);
const { entries, foundEntry } = await searchEntries(
tarballStream,
stream,
entryName,
wantsIndex
);

View File

@ -1,8 +1,7 @@
import url from 'url';
import https from 'https';
import gunzip from 'gunzip-maybe';
import tar from 'tar-stream';
import LRUCache from 'lru-cache';
import semver from 'semver';
import debug from './debug.js';
import bufferStream from './bufferStream.js';
@ -14,80 +13,9 @@ const agent = new https.Agent({
keepAlive: true
});
function parseJSON(res) {
return bufferStream(res).then(JSON.parse);
}
export function fetchPackageInfo(packageName) {
function get(options) {
return new Promise((accept, reject) => {
const encodedPackageName =
packageName.charAt(0) === '@'
? `@${encodeURIComponent(packageName.substring(1))}`
: encodeURIComponent(packageName);
const infoURL = `${npmRegistryURL}/${encodedPackageName}`;
debug('Fetching package info for %s from %s', packageName, infoURL);
const { hostname, pathname } = url.parse(infoURL);
const options = {
agent: agent,
hostname: hostname,
path: pathname,
headers: {
Accept: 'application/json'
}
};
https
.get(options, async res => {
if (res.statusCode === 200) {
accept(parseJSON(res));
} else if (res.statusCode === 404) {
accept(null);
} else {
const data = await bufferStream(res);
const content = data.toString('utf-8');
const error = new Error(
`Failed to fetch info for ${packageName}\nstatus: ${res.statusCode}\ndata: ${content}`
);
reject(error);
}
})
.on('error', reject);
});
}
export function fetchPackage(packageConfig) {
return new Promise((accept, reject) => {
const tarballURL = packageConfig.dist.tarball;
debug('Fetching package for %s from %s', packageConfig.name, tarballURL);
const { hostname, pathname } = url.parse(tarballURL);
const options = {
agent: agent,
hostname: hostname,
path: pathname
};
https
.get(options, async res => {
if (res.statusCode === 200) {
accept(res.pipe(gunzip()).pipe(tar.extract()));
} else {
const data = await bufferStream(res);
const spec = `${packageConfig.name}@${packageConfig.version}`;
const content = data.toString('utf-8');
const error = new Error(
`Failed to fetch tarball for ${spec}\nstatus: ${res.statusCode}\ndata: ${content}`
);
reject(error);
}
})
.on('error', reject);
https.get(options, accept).on('error', reject);
});
}
@ -103,27 +31,203 @@ const cache = new LRUCache({
const notFound = '';
export async function getPackageInfo(packageName) {
const key = `npmPackageInfo-${packageName}`;
const value = cache.get(key);
function encodePackageName(packageName) {
return packageName.charAt(0) === '@'
? `@${encodeURIComponent(packageName.substring(1))}`
: encodeURIComponent(packageName);
}
if (value != null) {
return value === notFound ? null : JSON.parse(value);
async function fetchPackageInfo(packageName) {
const name = encodePackageName(packageName);
const infoURL = `${npmRegistryURL}/${name}`;
debug('Fetching package info for %s from %s', packageName, infoURL);
const { hostname, pathname } = url.parse(infoURL);
const options = {
agent: agent,
hostname: hostname,
path: pathname,
headers: {
Accept: 'application/json'
}
};
const res = await get(options);
if (res.statusCode === 200) {
return bufferStream(res).then(JSON.parse);
}
const info = await fetchPackageInfo(packageName);
if (info == null) {
// Cache 404s for 5 minutes. This prevents us from making
// unnecessary requests to the registry for bad package names.
// In the worst case, a brand new package's info will be
// available within 5 minutes.
cache.set(key, notFound, oneMinute * 5);
if (res.statusCode === 404) {
return null;
}
// Cache valid package info for 1 minute. In the worst case,
// new versions won't be available for 1 minute.
cache.set(key, JSON.stringify(info), oneMinute);
return info;
const data = await bufferStream(res);
const content = data.toString('utf-8');
throw new Error(
`Failed to fetch info for ${packageName}\nstatus: ${res.statusCode}\ndata: ${content}`
);
}
async function fetchVersionsAndTags(packageName) {
const info = await fetchPackageInfo(packageName);
if (!info) {
return null;
}
return {
versions: Object.keys(info.versions),
tags: info['dist-tags']
};
}
async function getVersionsAndTags(packageName) {
const cacheKey = `versions-${packageName}`;
const cacheValue = cache.get(cacheKey);
if (cacheValue != null) {
return cacheValue === notFound ? null : JSON.parse(cacheValue);
}
const value = await fetchVersionsAndTags(packageName);
if (value == null) {
cache.set(cacheKey, notFound, 5 * oneMinute);
return null;
}
cache.set(cacheKey, JSON.stringify(value), oneMinute);
return value;
}
function byVersion(a, b) {
return semver.lt(a, b) ? -1 : semver.gt(a, b) ? 1 : 0;
}
/**
* Returns an array of available versions, sorted by semver.
*/
export async function getAvailableVersions(packageName) {
const versionsAndTags = await getVersionsAndTags(packageName);
if (versionsAndTags) {
return versionsAndTags.versions.sort(byVersion);
}
return [];
}
/**
* Resolves the semver range or tag to a valid version.
* Output is cached to avoid over-fetching from the registry.
*/
export async function resolveVersion(packageName, range) {
const versionsAndTags = await getVersionsAndTags(packageName);
if (versionsAndTags) {
const { versions, tags } = versionsAndTags;
if (range in tags) {
range = tags[range];
}
return versions.includes(range)
? range
: semver.maxSatisfying(versions, range);
}
return null;
}
// All the keys that sometimes appear in package info
// docs that we don't need. There are probably more.
const packageConfigExcludeKeys = [
'browserify',
'bugs',
'directories',
'engines',
'files',
'homepage',
'keywords',
'maintainers',
'scripts'
];
function cleanPackageConfig(doc) {
return Object.keys(doc).reduce((memo, key) => {
if (!key.startsWith('_') && !packageConfigExcludeKeys.includes(key)) {
memo[key] = doc[key];
}
return memo;
}, {});
}
async function fetchPackageConfig(packageName, version) {
const info = await fetchPackageInfo(packageName);
if (!info || !(version in info.versions)) {
return null;
}
return cleanPackageConfig(info.versions[version]);
}
/**
* Returns metadata about a package, mostly the same as package.json.
* Output is cached to avoid over-fetching from the registry.
*/
export async function getPackageConfig(packageName, version) {
const cacheKey = `config-${packageName}-${version}`;
const cacheValue = cache.get(cacheKey);
if (cacheValue != null) {
return cacheValue === notFound ? null : JSON.parse(cacheValue);
}
const value = await fetchPackageConfig(packageName, version);
if (value == null) {
cache.set(cacheKey, notFound, 5 * oneMinute);
return null;
}
cache.set(cacheKey, JSON.stringify(value), oneMinute);
return value;
}
/**
* Returns a stream of the tarball'd contents of the given package.
*/
export async function getPackage(packageName, version) {
const tarballName = packageName.startsWith('@')
? packageName.split('/')[1]
: packageName;
const tarballURL = `${npmRegistryURL}/${packageName}/-/${tarballName}-${version}.tgz`;
debug('Fetching package for %s from %s', packageName, tarballURL);
const { hostname, pathname } = url.parse(tarballURL);
const options = {
agent: agent,
hostname: hostname,
path: pathname
};
const res = await get(options);
if (res.statusCode === 200) {
return res;
}
const data = await bufferStream(res);
const spec = `${packageName}@${version}`;
const content = data.toString('utf-8');
throw new Error(
`Failed to fetch tarball for ${spec}\nstatus: ${res.statusCode}\ndata: ${content}`
);
}