Simplify log ingestion timer
This commit is contained in:
parent
995780bcbb
commit
fe4ccec4e2
2
Procfile
2
Procfile
|
@ -1,2 +1,2 @@
|
||||||
web: node server.js
|
web: node server.js
|
||||||
ingest_logs: node modules/ingestLogs.js
|
ingest_logs: node modules/ingestLogsEveryMinute.js
|
||||||
|
|
|
@ -30,7 +30,7 @@ services:
|
||||||
|
|
||||||
worker:
|
worker:
|
||||||
build: .
|
build: .
|
||||||
command: node_modules/.bin/nodemon --ignore modules/client modules/ingestLogs.js
|
command: node_modules/.bin/nodemon --ignore modules/client modules/ingestLogsEveryMinute.js
|
||||||
env_file: .env
|
env_file: .env
|
||||||
environment:
|
environment:
|
||||||
- DATA_URL=redis://data:6379
|
- DATA_URL=redis://data:6379
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
const parseURL = require("url").parse;
|
const parseURL = require("url").parse;
|
||||||
const startOfDay = require("date-fns/start_of_day");
|
const startOfDay = require("date-fns/start_of_day");
|
||||||
const startOfMinute = require("date-fns/start_of_minute");
|
|
||||||
const addDays = require("date-fns/add_days");
|
const addDays = require("date-fns/add_days");
|
||||||
|
|
||||||
const db = require("./utils/data");
|
const db = require("./utils/data");
|
||||||
|
@ -18,15 +17,12 @@ const domainNames = [
|
||||||
//"npmcdn.com" // We don't have log data on npmcdn.com yet :/
|
//"npmcdn.com" // We don't have log data on npmcdn.com yet :/
|
||||||
];
|
];
|
||||||
|
|
||||||
/**
|
let cachedZones;
|
||||||
* The window of time to download in a single fetch.
|
|
||||||
*/
|
|
||||||
const logWindowSeconds = 30;
|
|
||||||
|
|
||||||
/**
|
const oneSecond = 1000;
|
||||||
* The minimum time to wait between fetches.
|
const oneMinute = oneSecond * 60;
|
||||||
*/
|
const oneHour = oneMinute * 60;
|
||||||
const minInterval = 15000;
|
const oneDay = oneHour * 24;
|
||||||
|
|
||||||
function getSeconds(date) {
|
function getSeconds(date) {
|
||||||
return Math.floor(date.getTime() / 1000);
|
return Math.floor(date.getTime() / 1000);
|
||||||
|
@ -40,11 +36,6 @@ function toSeconds(ms) {
|
||||||
return Math.floor(ms / 1000);
|
return Math.floor(ms / 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
const oneSecond = 1000;
|
|
||||||
const oneMinute = oneSecond * 60;
|
|
||||||
const oneHour = oneMinute * 60;
|
|
||||||
const oneDay = oneHour * 24;
|
|
||||||
|
|
||||||
function computeCounters(stream) {
|
function computeCounters(stream) {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
const counters = {};
|
const counters = {};
|
||||||
|
@ -144,7 +135,10 @@ function processLogs(stream) {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function ingestLogs(zone, startSeconds, endSeconds) {
|
function ingestLogsForZone(zone, startDate, endDate) {
|
||||||
|
const startSeconds = toSeconds(startDate);
|
||||||
|
const endSeconds = toSeconds(endDate);
|
||||||
|
|
||||||
const startFetchTime = Date.now();
|
const startFetchTime = Date.now();
|
||||||
const fields = [
|
const fields = [
|
||||||
"EdgeStartTimestamp",
|
"EdgeStartTimestamp",
|
||||||
|
@ -186,66 +180,20 @@ function ingestLogs(zone, startSeconds, endSeconds) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function startZone(zone) {
|
function getZones(domainNames) {
|
||||||
const suffix = zone.name.replace(".", "-");
|
return Promise.all(domainNames.map(CloudflareAPI.getZones)).then(results =>
|
||||||
const startSecondsKey = `ingestLogs-start-${suffix}`;
|
results.reduce((memo, zones) => memo.concat(zones))
|
||||||
|
);
|
||||||
function takeATurn() {
|
|
||||||
const now = Date.now();
|
|
||||||
|
|
||||||
// Cloudflare keeps logs around for 7 days.
|
|
||||||
// https://support.cloudflare.com/hc/en-us/articles/216672448-Enterprise-Log-Share-Logpull-REST-API
|
|
||||||
const minSeconds = toSeconds(startOfMinute(now - oneDay * 5));
|
|
||||||
|
|
||||||
db.get(startSecondsKey, (error, value) => {
|
|
||||||
let startSeconds = value && parseInt(value, 10);
|
|
||||||
|
|
||||||
if (startSeconds == null) {
|
|
||||||
startSeconds = minSeconds;
|
|
||||||
} else if (startSeconds < minSeconds) {
|
|
||||||
console.warn(
|
|
||||||
"warning: Dropped logs for %s from %s to %s!",
|
|
||||||
zone.name,
|
|
||||||
stringifySeconds(startSeconds),
|
|
||||||
stringifySeconds(minSeconds)
|
|
||||||
);
|
|
||||||
|
|
||||||
startSeconds = minSeconds;
|
|
||||||
}
|
|
||||||
|
|
||||||
const endSeconds = startSeconds + logWindowSeconds;
|
|
||||||
|
|
||||||
// The log for a request is typically available within thirty (30) minutes
|
|
||||||
// of the request taking place under normal conditions. We deliver logs
|
|
||||||
// ordered by the time that the logs were created, i.e. the timestamp of
|
|
||||||
// the request when it was received by the edge. Given the order of
|
|
||||||
// delivery, we recommend waiting a full thirty minutes to ingest a full
|
|
||||||
// set of logs. This will help ensure that any congestion in the log
|
|
||||||
// pipeline has passed and a full set of logs can be ingested.
|
|
||||||
// https://support.cloudflare.com/hc/en-us/articles/216672448-Enterprise-Log-Share-REST-API
|
|
||||||
const maxSeconds = toSeconds(now - oneMinute * 30);
|
|
||||||
|
|
||||||
if (endSeconds < maxSeconds) {
|
|
||||||
ingestLogs(zone, startSeconds, endSeconds).then(
|
|
||||||
() => {
|
|
||||||
db.set(startSecondsKey, endSeconds);
|
|
||||||
setTimeout(takeATurn, minInterval);
|
|
||||||
},
|
|
||||||
error => {
|
|
||||||
console.error(error.stack);
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
setTimeout(takeATurn, (startSeconds - maxSeconds) * 1000);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
takeATurn();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Promise.all(domainNames.map(CloudflareAPI.getZones)).then(results => {
|
function ingestLogs(startDate, endDate) {
|
||||||
const zones = results.reduce((memo, zones) => memo.concat(zones));
|
return Promise.resolve(cachedZones || getZones(domainNames)).then(zones => {
|
||||||
zones.forEach(startZone);
|
if (!cachedZones) cachedZones = zones;
|
||||||
});
|
|
||||||
|
return Promise.all(
|
||||||
|
zones.map(zone => ingestLogsForZone(zone, startDate, endDate))
|
||||||
|
);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = ingestLogs;
|
||||||
|
|
|
@ -0,0 +1,43 @@
|
||||||
|
const addMinutes = require("date-fns/add_minutes");
|
||||||
|
const startOfMinute = require("date-fns/start_of_minute");
|
||||||
|
|
||||||
|
const ingestLogs = require("./ingestLogs");
|
||||||
|
|
||||||
|
const oneSecond = 1000;
|
||||||
|
const oneMinute = oneSecond * 60;
|
||||||
|
|
||||||
|
let currentWorkload, timer;
|
||||||
|
|
||||||
|
function work() {
|
||||||
|
const now = Date.now();
|
||||||
|
|
||||||
|
// The log for a request is typically available within thirty (30) minutes
|
||||||
|
// of the request taking place under normal conditions. We deliver logs
|
||||||
|
// ordered by the time that the logs were created, i.e. the timestamp of
|
||||||
|
// the request when it was received by the edge. Given the order of
|
||||||
|
// delivery, we recommend waiting a full thirty minutes to ingest a full
|
||||||
|
// set of logs. This will help ensure that any congestion in the log
|
||||||
|
// pipeline has passed and a full set of logs can be ingested.
|
||||||
|
// https://support.cloudflare.com/hc/en-us/articles/216672448-Enterprise-Log-Share-REST-API
|
||||||
|
const start = startOfMinute(now - oneMinute * 31);
|
||||||
|
const end = addMinutes(start, 1);
|
||||||
|
|
||||||
|
currentWorkload = ingestLogs(start, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
function shutdown() {
|
||||||
|
console.log("Shutting down...");
|
||||||
|
|
||||||
|
clearInterval(timer);
|
||||||
|
|
||||||
|
currentWorkload.then(() => {
|
||||||
|
console.log("Goodbye!");
|
||||||
|
process.exit();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
work();
|
||||||
|
|
||||||
|
process.on("SIGINT", shutdown).on("SIGTERM", shutdown);
|
||||||
|
|
||||||
|
timer = setInterval(work, oneMinute);
|
Loading…
Reference in New Issue