From 1135fff77cccf4501f65745f5664272070ed9239 Mon Sep 17 00:00:00 2001 From: keyan Date: Sat, 3 Feb 2024 20:14:54 -0600 Subject: [PATCH] refine capture svc --- capture/Dockerfile | 4 +-- capture/index.js | 59 +++++++++++++++++++++++++++++++----- copilot/capture/manifest.yml | 4 ++- 3 files changed, 56 insertions(+), 11 deletions(-) diff --git a/capture/Dockerfile b/capture/Dockerfile index 7ce63759..daf27e4d 100644 --- a/capture/Dockerfile +++ b/capture/Dockerfile @@ -11,7 +11,7 @@ RUN npm ci COPY . . -RUN curl http://ftp.de.debian.org/debian/pool/main/f/fonts-noto-color-emoji/fonts-noto-color-emoji_0~20200916-1_all.deb \ - -o fonts-noto-color-emoji.deb && dpkg -i fonts-noto-color-emoji.deb && rm fonts-noto-color-emoji.deb +ADD http://ftp.de.debian.org/debian/pool/main/f/fonts-noto-color-emoji/fonts-noto-color-emoji_0~20200916-1_all.deb fonts-noto-color-emoji.deb +RUN dpkg -i fonts-noto-color-emoji.deb CMD [ "node", "index.js" ] USER pptruser \ No newline at end of file diff --git a/capture/index.js b/capture/index.js index ce7c6494..3fda5f4e 100644 --- a/capture/index.js +++ b/capture/index.js @@ -3,12 +3,50 @@ import puppeteer from 'puppeteer' const captureUrl = process.env.CAPTURE_URL || 'http://host.docker.internal:3000/' const port = process.env.PORT || 5678 -const maxPages = process.env.MAX_PAGES || 5 -const timeout = process.env.TIMEOUT || 10000 +const maxPages = Number(process.env.MAX_PAGES) || 5 +const timeout = Number(process.env.TIMEOUT) || 10000 const cache = process.env.CACHE || 60000 const width = process.env.WIDTH || 600 const height = process.env.HEIGHT || 315 const deviceScaleFactor = process.env.SCALE_FACTOR || 2 +// from https://www.bannerbear.com/blog/ways-to-speed-up-puppeteer-screenshots/ +const args = [ + '--autoplay-policy=user-gesture-required', + '--disable-background-networking', + '--disable-background-timer-throttling', + '--disable-backgrounding-occluded-windows', + '--disable-breakpad', + '--disable-client-side-phishing-detection', + '--disable-component-update', + '--disable-default-apps', + '--disable-dev-shm-usage', + '--disable-domain-reliability', + '--disable-extensions', + '--disable-features=AudioServiceOutOfProcess', + '--disable-hang-monitor', + '--disable-ipc-flooding-protection', + '--disable-notifications', + '--disable-offer-store-unmasked-wallet-cards', + '--disable-popup-blocking', + '--disable-print-preview', + '--disable-prompt-on-repost', + '--disable-renderer-backgrounding', + '--disable-setuid-sandbox', + '--disable-speech-api', + '--disable-sync', + '--hide-scrollbars', + '--ignore-gpu-blacklist', + '--metrics-recording-only', + '--mute-audio', + '--no-default-browser-check', + '--no-first-run', + '--no-pings', + '--no-sandbox', + '--no-zygote', + '--password-store=basic', + '--use-gl=swiftshader', + '--use-mock-keychain' +] let browser const app = express() @@ -21,20 +59,24 @@ app.get('/*', async (req, res) => { const url = new URL(req.originalUrl, captureUrl) const timeLabel = `${Date.now()}-${url.href}` - let page + let page, pages try { console.time(timeLabel) browser ||= await puppeteer.launch({ headless: 'new', + useDataDir: './data', executablePath: 'google-chrome-stable', - args: ['--no-sandbox', '--disable-setuid-sandbox'] + args, + protocolTimeout: timeout, + defaultViewport: { width, height, deviceScaleFactor } }) - console.timeLog(timeLabel, 'capturing', 'current pages', (await browser.pages()).length) + pages = (await browser.pages()).length + console.timeLog(timeLabel, 'capturing', 'current pages', pages) // limit number of active pages - if ((await browser.pages()).length > maxPages + 1) { + if (pages > maxPages + 1) { console.timeLog(timeLabel, 'too many pages') return res.writeHead(503, { 'Retry-After': 1 @@ -42,10 +84,11 @@ app.get('/*', async (req, res) => { } page = await browser.newPage() - await page.setViewport({ width, height, deviceScaleFactor }) await page.emulateMediaFeatures([{ name: 'prefers-color-scheme', value: 'dark' }]) await page.goto(url.href, { waitUntil: 'load', timeout }) + console.timeLog(timeLabel, 'page loaded') const file = await page.screenshot({ type: 'png', captureBeyondViewport: false }) + console.timeLog(timeLabel, 'screenshot complete') res.setHeader('Content-Type', 'image/png') res.setHeader('Cache-Control', `public, max-age=${cache}, immutable, stale-while-revalidate=${cache * 24}, stale-if-error=${cache * 24}`) return res.status(200).end(file) @@ -53,7 +96,7 @@ app.get('/*', async (req, res) => { console.timeLog(timeLabel, 'error', err) return res.status(500).end() } finally { - console.timeEnd(timeLabel) + console.timeEnd(timeLabel, 'pages at start', pages) page?.close().catch(console.error) } }) diff --git a/copilot/capture/manifest.yml b/copilot/capture/manifest.yml index b4dfa988..d636e649 100644 --- a/copilot/capture/manifest.yml +++ b/copilot/capture/manifest.yml @@ -33,6 +33,7 @@ count: out: 60s # Number of seconds to wait before scaling down. cpu_percentage: 50 # Percentage of CPU to target for autoscaling. memory_percentage: 60 # Percentage of memory to target for autoscaling. + response_time: 3s exec: true # Enable running commands in your container. network: connect: true # Enable Service Connect for intra-environment traffic between services. @@ -44,7 +45,8 @@ network: # variables: # Pass environment variables as key value pairs. CAPTURE_URL: https://stacker.news/ - MAX_PAGES: 100 + MAX_PAGES: 10 + TIMEOUT: 3000 #secrets: # Pass secrets from AWS Systems Manager (SSM) Parameter Store. # GITHUB_TOKEN: GITHUB_TOKEN # The key is the name of the environment variable, the value is the name of the SSM parameter.