commit
5b376bf5b9
|
@ -1,7 +1,7 @@
|
|||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
||||
|
||||
# dependencies
|
||||
/node_modules
|
||||
node_modules/
|
||||
/.pnp
|
||||
.pnp.js
|
||||
.cache
|
||||
|
@ -42,9 +42,6 @@ envbak
|
|||
!.elasticbeanstalk/*.cfg.yml
|
||||
!.elasticbeanstalk/*.global.yml
|
||||
|
||||
# copilot
|
||||
copilot/
|
||||
|
||||
# service worker
|
||||
public/sw.js*
|
||||
sw/precache-manifest.json
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
node_modules
|
|
@ -0,0 +1,15 @@
|
|||
FROM ghcr.io/puppeteer/puppeteer:18.2.1
|
||||
|
||||
EXPOSE 5678
|
||||
|
||||
USER root
|
||||
WORKDIR /home/pptruser
|
||||
|
||||
ENV PUPPETEER_SKIP_DOWNLOAD true
|
||||
COPY ./package*.json ./
|
||||
RUN npm ci
|
||||
|
||||
COPY . .
|
||||
|
||||
CMD [ "node", "index.js" ]
|
||||
USER pptruser
|
|
@ -0,0 +1,60 @@
|
|||
import express from 'express'
|
||||
import puppeteer from 'puppeteer'
|
||||
|
||||
const captureUrl = process.env.CAPTURE_URL || 'http://host.docker.internal:3000/'
|
||||
const port = process.env.PORT || 5678
|
||||
const maxPages = process.env.MAX_PAGES || 5
|
||||
const timeout = process.env.TIMEOUT || 10000
|
||||
const cache = process.env.CACHE || 60000
|
||||
const width = process.env.WIDTH || 600
|
||||
const height = process.env.HEIGHT || 315
|
||||
const deviceScaleFactor = process.env.SCALE_FACTOR || 2
|
||||
|
||||
let browser
|
||||
const app = express()
|
||||
|
||||
app.get('/health', (req, res) => {
|
||||
res.status(200).end()
|
||||
})
|
||||
|
||||
app.get('/*', async (req, res) => {
|
||||
browser ||= await puppeteer.launch({
|
||||
headless: 'new',
|
||||
executablePath: 'google-chrome-stable',
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
||||
})
|
||||
const url = new URL(req.originalUrl, captureUrl)
|
||||
console.time(url.href)
|
||||
console.timeLog(url.href, 'capturing', 'current pages', (await browser.pages()).length)
|
||||
|
||||
// limit number of active pages
|
||||
if ((await browser.pages()).length > maxPages + 1) {
|
||||
console.timeLog(url.href, 'too many pages')
|
||||
console.timeEnd(url.href)
|
||||
return res.writeHead(503, {
|
||||
'Retry-After': 1
|
||||
}).end()
|
||||
}
|
||||
|
||||
let page
|
||||
try {
|
||||
page = await browser.newPage()
|
||||
await page.setViewport({ width, height, deviceScaleFactor })
|
||||
await page.emulateMediaFeatures([{ name: 'prefers-color-scheme', value: 'dark' }])
|
||||
await page.goto(url.href, { waitUntil: 'load', timeout })
|
||||
const file = await page.screenshot({ type: 'png', captureBeyondViewport: false })
|
||||
res.setHeader('Content-Type', 'image/png')
|
||||
res.setHeader('Cache-Control', `public, max-age=${cache}, immutable`)
|
||||
res.status(200).end(file)
|
||||
} catch (err) {
|
||||
console.log(err)
|
||||
return res.status(500).end()
|
||||
} finally {
|
||||
console.timeEnd(url.href)
|
||||
page?.close()
|
||||
}
|
||||
})
|
||||
|
||||
app.listen(port, () =>
|
||||
console.log(`Screenshot listen on http://:${port}`)
|
||||
)
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
"name": "capture",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"express": "^4.18.2",
|
||||
"puppeteer": "^20.8.2"
|
||||
},
|
||||
"type": "module"
|
||||
}
|
|
@ -18,7 +18,7 @@ export function SeoSearch ({ sub }) {
|
|||
description: desc,
|
||||
images: [
|
||||
{
|
||||
url: 'https://stacker.news/api/capture' + router.asPath
|
||||
url: 'https://capture.stacker.news' + router.asPath
|
||||
}
|
||||
],
|
||||
site_name: 'Stacker News'
|
||||
|
@ -81,7 +81,7 @@ export default function Seo ({ sub, item, user }) {
|
|||
description: desc,
|
||||
images: [
|
||||
{
|
||||
url: 'https://stacker.news/api/capture' + pathNoQuery
|
||||
url: 'https://capture.stacker.news' + pathNoQuery
|
||||
}
|
||||
],
|
||||
site_name: 'Stacker News'
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
application: stackernews
|
|
@ -0,0 +1,56 @@
|
|||
# The manifest for the "capture" service.
|
||||
# Read the full specification for the "Load Balanced Web Service" type at:
|
||||
# https://aws.github.io/copilot-cli/docs/manifest/lb-web-service/
|
||||
|
||||
# Your service name will be used in naming your resources like log groups, ECS services, etc.
|
||||
name: capture
|
||||
type: Load Balanced Web Service
|
||||
|
||||
# Distribute traffic to your service.
|
||||
http:
|
||||
# Requests to this path will be forwarded to your service.
|
||||
# To match all requests you can use the "/" path.
|
||||
path: '/'
|
||||
# You can specify a custom health check path. The default is "/".
|
||||
# healthcheck: '/'
|
||||
|
||||
# Configuration for your containers and service.
|
||||
image:
|
||||
# Docker build arguments. For additional overrides: https://aws.github.io/copilot-cli/docs/manifest/lb-web-service/#image-build
|
||||
build: capture/Dockerfile
|
||||
# Port exposed through your container to route traffic to it.
|
||||
port: 5678
|
||||
|
||||
cpu: 1024 # Number of CPU units for the task.
|
||||
memory: 2048 # Amount of memory in MiB used by the task.
|
||||
platform: linux/x86_64 # See https://aws.github.io/copilot-cli/docs/manifest/lb-web-service/#platform
|
||||
count:
|
||||
range:
|
||||
min: 1 # Minimum number of tasks to run for the service.
|
||||
max: 3 # Maximum number of tasks to run for the service.
|
||||
cooldown:
|
||||
in: 60s # Number of seconds to wait before scaling up.
|
||||
out: 60s # Number of seconds to wait before scaling down.
|
||||
cpu_percentage: 50 # Percentage of CPU to target for autoscaling.
|
||||
memory_percentage: 60 # Percentage of memory to target for autoscaling.
|
||||
exec: true # Enable running commands in your container.
|
||||
network:
|
||||
connect: true # Enable Service Connect for intra-environment traffic between services.
|
||||
|
||||
# storage:
|
||||
# readonly_fs: true # Limit to read-only access to mounted root filesystems.
|
||||
|
||||
# Optional fields for more advanced use-cases.
|
||||
#
|
||||
variables: # Pass environment variables as key value pairs.
|
||||
CAPTURE_URL: https://stacker.news/
|
||||
|
||||
#secrets: # Pass secrets from AWS Systems Manager (SSM) Parameter Store.
|
||||
# GITHUB_TOKEN: GITHUB_TOKEN # The key is the name of the environment variable, the value is the name of the SSM parameter.
|
||||
|
||||
# You can override any of the values defined above by environment.
|
||||
#environments:
|
||||
# test:
|
||||
# count: 2 # Number of tasks to run for the "test" environment.
|
||||
# deployment: # The deployment strategy for the "test" environment.
|
||||
# rolling: 'recreate' # Stops existing tasks before new ones are started for faster deployments.
|
|
@ -0,0 +1,21 @@
|
|||
# The manifest for the "capture" environment.
|
||||
# Read the full specification for the "Environment" type at:
|
||||
# https://aws.github.io/copilot-cli/docs/manifest/environment/
|
||||
|
||||
# Your environment name will be used in naming your resources like VPC, cluster, etc.
|
||||
name: capture
|
||||
type: Environment
|
||||
|
||||
# Import your own VPC and subnets or configure how they should be created.
|
||||
# network:
|
||||
# vpc:
|
||||
# id:
|
||||
|
||||
# Configure the load balancers in your environment, once created.
|
||||
# http:
|
||||
# public:
|
||||
# private:
|
||||
|
||||
# Configure observability for your environment resources.
|
||||
observability:
|
||||
container_insights: false
|
|
@ -0,0 +1,21 @@
|
|||
# The manifest for the "imgproxy" environment.
|
||||
# Read the full specification for the "Environment" type at:
|
||||
# https://aws.github.io/copilot-cli/docs/manifest/environment/
|
||||
|
||||
# Your environment name will be used in naming your resources like VPC, cluster, etc.
|
||||
name: imgproxy
|
||||
type: Environment
|
||||
|
||||
# Import your own VPC and subnets or configure how they should be created.
|
||||
# network:
|
||||
# vpc:
|
||||
# id:
|
||||
|
||||
# Configure the load balancers in your environment, once created.
|
||||
# http:
|
||||
# public:
|
||||
# private:
|
||||
|
||||
# Configure observability for your environment resources.
|
||||
observability:
|
||||
container_insights: true
|
|
@ -0,0 +1,63 @@
|
|||
# The manifest for the "imgproxy" service.
|
||||
# Read the full specification for the "Load Balanced Web Service" type at:
|
||||
# https://aws.github.io/copilot-cli/docs/manifest/lb-web-service/
|
||||
|
||||
# Your service name will be used in naming your resources like log groups, ECS services, etc.
|
||||
name: imgproxy
|
||||
type: Load Balanced Web Service
|
||||
|
||||
# Distribute traffic to your service.
|
||||
http:
|
||||
# Requests to this path will be forwarded to your service.
|
||||
# To match all requests you can use the "/" path.
|
||||
path: '/'
|
||||
# You can specify a custom health check path. The default is "/".
|
||||
healthcheck: '/health'
|
||||
|
||||
# Configuration for your containers and service.
|
||||
image:
|
||||
location: ${PRIVATE_REPO}/imgproxy:v3.21.0-ml-amd64
|
||||
# Port exposed through your container to route traffic to it.
|
||||
port: 8080
|
||||
|
||||
cpu: 2048 # 2 vCPUs see https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html
|
||||
memory: 4096
|
||||
count:
|
||||
range:
|
||||
min: 1 # Minimum number of tasks to run for the service.
|
||||
max: 4 # Maximum number of tasks to run for the service.
|
||||
cooldown:
|
||||
in: 60s # Number of seconds to wait before scaling up.
|
||||
out: 60s # Number of seconds to wait before scaling down.
|
||||
cpu_percentage: 50 # Percentage of CPU to target for autoscaling.
|
||||
memory_percentage: 60 # Percentage of memory to target for autoscaling.
|
||||
exec: true # Enable running commands in your container.
|
||||
network:
|
||||
connect: true # Enable Service Connect for intra-environment traffic between services.
|
||||
|
||||
# storage:
|
||||
# readonly_fs: true # Limit to read-only access to mounted root filesystems.
|
||||
|
||||
# Optional fields for more advanced use-cases.
|
||||
variables: # Pass environment variables as key value pairs.
|
||||
IMGPROXY_ENABLE_WEBP_DETECTION: 1
|
||||
IMGPROXY_ENABLE_AVIF_DETECTION: 1
|
||||
IMGPROXY_MAX_ANIMATION_FRAMES: 2000
|
||||
IMGPROXY_MAX_SRC_RESOLUTION: 50
|
||||
IMGPROXY_MAX_ANIMATION_FRAME_RESOLUTION: 200
|
||||
IMGPROXY_READ_TIMEOUT: 10
|
||||
IMGPROXY_WRITE_TIMEOUT: 10
|
||||
IMGPROXY_DOWNLOAD_TIMEOUT: 9
|
||||
IMGPROXY_WORKERS: 4
|
||||
|
||||
secrets: # Pass secrets from AWS Systems Manager (SSM) Parameter Store.
|
||||
IMGPROXY_KEY: /copilot/${COPILOT_APPLICATION_NAME}/${COPILOT_ENVIRONMENT_NAME}/secrets/imgproxy_key
|
||||
IMGPROXY_SALT: /copilot/${COPILOT_APPLICATION_NAME}/${COPILOT_ENVIRONMENT_NAME}/secrets/imgproxy_salt
|
||||
IMGPROXY_LICENSE_KEY: /copilot/${COPILOT_APPLICATION_NAME}/${COPILOT_ENVIRONMENT_NAME}/secrets/imgproxy_license_key
|
||||
|
||||
# You can override any of the values defined above by environment.
|
||||
#environments:
|
||||
# test:
|
||||
# count: 2 # Number of tasks to run for the "test" environment.
|
||||
# deployment: # The deployment strategy for the "test" environment.
|
||||
# rolling: 'recreate' # Stops existing tasks before new ones are started for faster deployments.
|
File diff suppressed because it is too large
Load Diff
|
@ -57,7 +57,6 @@
|
|||
"nprogress": "^0.2.0",
|
||||
"opentimestamps": "^0.4.9",
|
||||
"page-metadata-parser": "^1.1.4",
|
||||
"pageres": "^7.1.0",
|
||||
"pg-boss": "^9.0.3",
|
||||
"piexifjs": "^1.0.6",
|
||||
"prisma": "^5.4.2",
|
||||
|
|
|
@ -1,78 +0,0 @@
|
|||
import path from 'path'
|
||||
import AWS from 'aws-sdk'
|
||||
import { PassThrough } from 'stream'
|
||||
import { datePivot } from '../../../lib/time'
|
||||
const { spawn } = require('child_process')
|
||||
const encodeS3URI = require('node-s3-url-encode')
|
||||
|
||||
const bucketName = 'sn-capture'
|
||||
const bucketRegion = 'us-east-1'
|
||||
const contentType = 'image/png'
|
||||
const bucketUrl = 'https://sn-capture.s3.amazonaws.com/'
|
||||
const s3PathPrefix = process.env.NODE_ENV === 'development' ? 'dev/' : ''
|
||||
let capturing = false
|
||||
|
||||
AWS.config.update({
|
||||
region: bucketRegion
|
||||
})
|
||||
|
||||
export default async function handler (req, res) {
|
||||
return new Promise(resolve => {
|
||||
const joinedPath = path.join(...(req.query.path || []))
|
||||
const searchQ = req.query.q ? `?q=${req.query.q}` : ''
|
||||
const s3PathPUT = s3PathPrefix + (joinedPath === '.' ? '_' : joinedPath) + searchQ
|
||||
const s3PathGET = s3PathPrefix + (joinedPath === '.' ? '_' : joinedPath) + encodeS3URI(searchQ)
|
||||
const url = process.env.PUBLIC_URL + '/' + joinedPath + searchQ
|
||||
const aws = new AWS.S3({ apiVersion: '2006-03-01' })
|
||||
|
||||
// check to see if we have a recent version of the object
|
||||
aws.headObject({
|
||||
Bucket: bucketName,
|
||||
Key: s3PathPUT,
|
||||
IfModifiedSince: datePivot(new Date(), { minutes: -15 })
|
||||
}).promise().then(() => {
|
||||
// this path is cached so return it
|
||||
res.writeHead(302, { Location: bucketUrl + s3PathGET }).end()
|
||||
resolve()
|
||||
}).catch(() => {
|
||||
// we don't have it cached, so capture it and cache it
|
||||
if (capturing) {
|
||||
return res.writeHead(503, {
|
||||
'Retry-After': 1
|
||||
}).end()
|
||||
}
|
||||
|
||||
capturing = true
|
||||
const pass = new PassThrough()
|
||||
aws.upload({
|
||||
Bucket: bucketName,
|
||||
Key: s3PathPUT,
|
||||
ACL: 'public-read',
|
||||
Body: pass,
|
||||
ContentType: contentType
|
||||
}).promise().catch(console.log)
|
||||
|
||||
res.setHeader('Content-Type', contentType)
|
||||
const capture = spawn(
|
||||
'node', ['./spawn/capture.js', url], { maxBuffer: 1024 * 1024 * 5 })
|
||||
|
||||
capture.on('close', code => {
|
||||
if (code !== 0) {
|
||||
res.status(500).end()
|
||||
} else {
|
||||
res.status(200).end()
|
||||
}
|
||||
pass.end()
|
||||
capture.removeAllListeners()
|
||||
capturing = false
|
||||
resolve()
|
||||
})
|
||||
capture.on('error', err => console.log('error', err))
|
||||
capture.stderr.on('data', data => console.log('error stderr', data.toString()))
|
||||
capture.stdout.on('data', data => {
|
||||
res.write(data)
|
||||
pass.write(data)
|
||||
})
|
||||
})
|
||||
})
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
#!/usr/bin/node
|
||||
|
||||
import Pageres from 'pageres'
|
||||
|
||||
async function captureUrl () {
|
||||
try {
|
||||
const streams = await new Pageres({ crop: true, scale: 2, timeout: 10, launchOptions: { args: ['--single-process'] } })
|
||||
.source(process.argv[2], ['600x315'])
|
||||
.run()
|
||||
process.stdout.write(streams[0], () => process.exit(0))
|
||||
} catch (e) {
|
||||
console.log(e)
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
captureUrl()
|
|
@ -1,3 +0,0 @@
|
|||
{
|
||||
"type": "module"
|
||||
}
|
Loading…
Reference in New Issue