refine capture svc

This commit is contained in:
keyan 2024-02-03 20:14:54 -06:00
parent 05d866883a
commit 1135fff77c
3 changed files with 56 additions and 11 deletions

View File

@ -11,7 +11,7 @@ RUN npm ci
COPY . . COPY . .
RUN curl http://ftp.de.debian.org/debian/pool/main/f/fonts-noto-color-emoji/fonts-noto-color-emoji_0~20200916-1_all.deb \ ADD http://ftp.de.debian.org/debian/pool/main/f/fonts-noto-color-emoji/fonts-noto-color-emoji_0~20200916-1_all.deb fonts-noto-color-emoji.deb
-o fonts-noto-color-emoji.deb && dpkg -i fonts-noto-color-emoji.deb && rm fonts-noto-color-emoji.deb RUN dpkg -i fonts-noto-color-emoji.deb
CMD [ "node", "index.js" ] CMD [ "node", "index.js" ]
USER pptruser USER pptruser

View File

@ -3,12 +3,50 @@ import puppeteer from 'puppeteer'
const captureUrl = process.env.CAPTURE_URL || 'http://host.docker.internal:3000/' const captureUrl = process.env.CAPTURE_URL || 'http://host.docker.internal:3000/'
const port = process.env.PORT || 5678 const port = process.env.PORT || 5678
const maxPages = process.env.MAX_PAGES || 5 const maxPages = Number(process.env.MAX_PAGES) || 5
const timeout = process.env.TIMEOUT || 10000 const timeout = Number(process.env.TIMEOUT) || 10000
const cache = process.env.CACHE || 60000 const cache = process.env.CACHE || 60000
const width = process.env.WIDTH || 600 const width = process.env.WIDTH || 600
const height = process.env.HEIGHT || 315 const height = process.env.HEIGHT || 315
const deviceScaleFactor = process.env.SCALE_FACTOR || 2 const deviceScaleFactor = process.env.SCALE_FACTOR || 2
// from https://www.bannerbear.com/blog/ways-to-speed-up-puppeteer-screenshots/
const args = [
'--autoplay-policy=user-gesture-required',
'--disable-background-networking',
'--disable-background-timer-throttling',
'--disable-backgrounding-occluded-windows',
'--disable-breakpad',
'--disable-client-side-phishing-detection',
'--disable-component-update',
'--disable-default-apps',
'--disable-dev-shm-usage',
'--disable-domain-reliability',
'--disable-extensions',
'--disable-features=AudioServiceOutOfProcess',
'--disable-hang-monitor',
'--disable-ipc-flooding-protection',
'--disable-notifications',
'--disable-offer-store-unmasked-wallet-cards',
'--disable-popup-blocking',
'--disable-print-preview',
'--disable-prompt-on-repost',
'--disable-renderer-backgrounding',
'--disable-setuid-sandbox',
'--disable-speech-api',
'--disable-sync',
'--hide-scrollbars',
'--ignore-gpu-blacklist',
'--metrics-recording-only',
'--mute-audio',
'--no-default-browser-check',
'--no-first-run',
'--no-pings',
'--no-sandbox',
'--no-zygote',
'--password-store=basic',
'--use-gl=swiftshader',
'--use-mock-keychain'
]
let browser let browser
const app = express() const app = express()
@ -21,20 +59,24 @@ app.get('/*', async (req, res) => {
const url = new URL(req.originalUrl, captureUrl) const url = new URL(req.originalUrl, captureUrl)
const timeLabel = `${Date.now()}-${url.href}` const timeLabel = `${Date.now()}-${url.href}`
let page let page, pages
try { try {
console.time(timeLabel) console.time(timeLabel)
browser ||= await puppeteer.launch({ browser ||= await puppeteer.launch({
headless: 'new', headless: 'new',
useDataDir: './data',
executablePath: 'google-chrome-stable', executablePath: 'google-chrome-stable',
args: ['--no-sandbox', '--disable-setuid-sandbox'] args,
protocolTimeout: timeout,
defaultViewport: { width, height, deviceScaleFactor }
}) })
console.timeLog(timeLabel, 'capturing', 'current pages', (await browser.pages()).length) pages = (await browser.pages()).length
console.timeLog(timeLabel, 'capturing', 'current pages', pages)
// limit number of active pages // limit number of active pages
if ((await browser.pages()).length > maxPages + 1) { if (pages > maxPages + 1) {
console.timeLog(timeLabel, 'too many pages') console.timeLog(timeLabel, 'too many pages')
return res.writeHead(503, { return res.writeHead(503, {
'Retry-After': 1 'Retry-After': 1
@ -42,10 +84,11 @@ app.get('/*', async (req, res) => {
} }
page = await browser.newPage() page = await browser.newPage()
await page.setViewport({ width, height, deviceScaleFactor })
await page.emulateMediaFeatures([{ name: 'prefers-color-scheme', value: 'dark' }]) await page.emulateMediaFeatures([{ name: 'prefers-color-scheme', value: 'dark' }])
await page.goto(url.href, { waitUntil: 'load', timeout }) await page.goto(url.href, { waitUntil: 'load', timeout })
console.timeLog(timeLabel, 'page loaded')
const file = await page.screenshot({ type: 'png', captureBeyondViewport: false }) const file = await page.screenshot({ type: 'png', captureBeyondViewport: false })
console.timeLog(timeLabel, 'screenshot complete')
res.setHeader('Content-Type', 'image/png') res.setHeader('Content-Type', 'image/png')
res.setHeader('Cache-Control', `public, max-age=${cache}, immutable, stale-while-revalidate=${cache * 24}, stale-if-error=${cache * 24}`) res.setHeader('Cache-Control', `public, max-age=${cache}, immutable, stale-while-revalidate=${cache * 24}, stale-if-error=${cache * 24}`)
return res.status(200).end(file) return res.status(200).end(file)
@ -53,7 +96,7 @@ app.get('/*', async (req, res) => {
console.timeLog(timeLabel, 'error', err) console.timeLog(timeLabel, 'error', err)
return res.status(500).end() return res.status(500).end()
} finally { } finally {
console.timeEnd(timeLabel) console.timeEnd(timeLabel, 'pages at start', pages)
page?.close().catch(console.error) page?.close().catch(console.error)
} }
}) })

View File

@ -33,6 +33,7 @@ count:
out: 60s # Number of seconds to wait before scaling down. out: 60s # Number of seconds to wait before scaling down.
cpu_percentage: 50 # Percentage of CPU to target for autoscaling. cpu_percentage: 50 # Percentage of CPU to target for autoscaling.
memory_percentage: 60 # Percentage of memory to target for autoscaling. memory_percentage: 60 # Percentage of memory to target for autoscaling.
response_time: 3s
exec: true # Enable running commands in your container. exec: true # Enable running commands in your container.
network: network:
connect: true # Enable Service Connect for intra-environment traffic between services. connect: true # Enable Service Connect for intra-environment traffic between services.
@ -44,7 +45,8 @@ network:
# #
variables: # Pass environment variables as key value pairs. variables: # Pass environment variables as key value pairs.
CAPTURE_URL: https://stacker.news/ CAPTURE_URL: https://stacker.news/
MAX_PAGES: 100 MAX_PAGES: 10
TIMEOUT: 3000
#secrets: # Pass secrets from AWS Systems Manager (SSM) Parameter Store. #secrets: # Pass secrets from AWS Systems Manager (SSM) Parameter Store.
# GITHUB_TOKEN: GITHUB_TOKEN # The key is the name of the environment variable, the value is the name of the SSM parameter. # GITHUB_TOKEN: GITHUB_TOKEN # The key is the name of the environment variable, the value is the name of the SSM parameter.