stacker.news/scripts/nostr-link-extract.js

const WebSocket = require('ws') // You might need to install this: npm install ws
const { nip19 } = require('nostr-tools') // Keep this for formatting
const fs = require('fs')
const path = require('path')

// ANSI color codes
const colors = {
  reset: '\x1b[0m',
  bright: '\x1b[1m',
  dim: '\x1b[2m',
  underscore: '\x1b[4m',
  blink: '\x1b[5m',
  reverse: '\x1b[7m',
  hidden: '\x1b[8m',

  fg: {
    black: '\x1b[30m',
    red: '\x1b[31m',
    green: '\x1b[32m',
    yellow: '\x1b[33m',
    blue: '\x1b[34m',
    magenta: '\x1b[35m',
    cyan: '\x1b[36m',
    white: '\x1b[37m',
    gray: '\x1b[90m',
    crimson: '\x1b[38m'
  },
  bg: {
    black: '\x1b[40m',
    red: '\x1b[41m',
    green: '\x1b[42m',
    yellow: '\x1b[43m',
    blue: '\x1b[44m',
    magenta: '\x1b[45m',
    cyan: '\x1b[46m',
    white: '\x1b[47m',
    gray: '\x1b[100m',
    crimson: '\x1b[48m'
  }
}

// Default configuration
let config = {
  userPubkeys: [],
  ignorePubkeys: [],
  timeIntervalHours: 12,
  verbosity: 'normal', // Can be 'minimal', 'normal', or 'debug'
  relayUrls: [
    'wss://relay.nostr.band',
    'wss://relay.primal.net',
    'wss://relay.damus.io'
  ],
  batchSize: 100,
  mediaPatterns: [
    {
      type: 'extensions',
      patterns: ['\\.jpg$', '\\.jpeg$', '\\.png$', '\\.gif$', '\\.bmp$', '\\.webp$', '\\.tiff$', '\\.ico$',
        '\\.mp4$', '\\.webm$', '\\.mov$', '\\.avi$', '\\.mkv$', '\\.flv$', '\\.wmv$',
        '\\.mp3$', '\\.wav$', '\\.ogg$', '\\.flac$', '\\.aac$', '\\.m4a$']
    },
    {
      type: 'domains',
      patterns: [
        'nostr\\.build\\/[ai]\\/\\w+',
        'i\\.imgur\\.com\\/\\w+',
        'i\\.ibb\\.co\\/\\w+\\/',
        'tenor\\.com\\/view\\/',
        'giphy\\.com\\/gifs\\/',
        'soundcloud\\.com\\/',
        'spotify\\.com\\/',
        'fountain\\.fm\\/'
      ]
    }
  ]
}

/**
 * Logger utility that respects the configured verbosity level
 */
const logger = {
  // Always show error messages
  error: (message) => {
    console.error(`${colors.fg.red}Error: ${message}${colors.reset}`)
  },

  // Minimal essential info - always show regardless of verbosity
  info: (message) => {
    console.log(`${colors.fg.green}${message}${colors.reset}`)
  },

  // Progress updates - show in normal and debug modes
  progress: (message) => {
    if (config.verbosity !== 'minimal') {
      console.log(`${colors.fg.blue}${message}${colors.reset}`)
    }
  },

  // Detailed debug info - only show in debug mode
  debug: (message) => {
    if (config.verbosity === 'debug') {
      console.log(`${colors.fg.gray}${message}${colors.reset}`)
    }
  },

  // Results info - formatted differently for clarity
  result: (message) => {
    console.log(`${colors.bright}${colors.fg.green}${message}${colors.reset}`)
  }
}

/**
 * Load configuration from a JSON file
 * @param {String} configPath - Path to the config file
 * @returns {Object} - Configuration object
 */
function loadConfig (configPath) {
  try {
    const configData = fs.readFileSync(configPath, 'utf8')
    const loadedConfig = JSON.parse(configData)

    // Merge with default config to ensure all properties exist
    return { ...config, ...loadedConfig }
  } catch (error) {
    logger.error(`Error loading config file: ${error.message}`)
    logger.info('Using default configuration')
    return config
  }
}

/**
 * Checks if a URL is a media file or hosted on a media platform based on configured patterns
 * @param {String} url - URL to check
 * @returns {Boolean} - true if it's likely a media URL
 */
function isMediaUrl (url) {
  // Check for media patterns from config
  if (config.mediaPatterns) {
    for (const patternGroup of config.mediaPatterns) {
      for (const pattern of patternGroup.patterns) {
        const regex = new RegExp(pattern, 'i')
        if (regex.test(url)) return true
      }
    }
  }

  return false
}

/**
 * Fetches events from Nostr relays using WebSockets
 * @param {Array} relayUrls - Array of relay URLs
 * @param {Object} filter - Nostr filter object
 * @param {Number} timeoutMs - Timeout in milliseconds
 * @returns {Promise<Array>} - Array of events matching the filter
 */
async function fetchEvents (relayUrls, filter, timeoutMs = 10000) {
  logger.debug(`Fetching events with filter: ${JSON.stringify(filter)}`)
  const events = []

  for (const url of relayUrls) {
    try {
      const ws = new WebSocket(url)

      const relayEvents = await new Promise((resolve, reject) => {
        const timeout = setTimeout(() => {
          ws.close()
          resolve([]) // Resolve with empty array on timeout
        }, timeoutMs)

        const localEvents = []

        ws.on('open', () => {
          // Create a unique request ID
          const requestId = `req${Math.floor(Math.random() * 10000)}`

          // Format and send the request
          const request = JSON.stringify(['REQ', requestId, filter])
          ws.send(request)

          ws.on('message', (data) => {
            try {
              const message = JSON.parse(data.toString())

              // Check if it's an EVENT message
              if (message[0] === 'EVENT' && message[2]) {
                localEvents.push(message[2])
              } else if (message[0] === 'EOSE') {
                clearTimeout(timeout)
                ws.close()
                resolve(localEvents)
              }
            } catch (error) {
              logger.debug(`Error parsing message: ${error.message}`)
            }
          })
        })

        ws.on('error', (error) => {
          logger.debug(`WebSocket error for ${url}: ${error.message}`)
          clearTimeout(timeout)
          resolve([]) // Resolve with empty array on error
        })

        ws.on('close', () => {
          clearTimeout(timeout)
          resolve(localEvents)
        })
      })

      logger.debug(`Got ${relayEvents.length} events from ${url}`)
      events.push(...relayEvents)
    } catch (error) {
      logger.debug(`Error connecting to ${url}: ${error.message}`)
    }
  }

  // Remove duplicates based on event ID
  const uniqueEvents = {}
  events.forEach(event => {
    if (!uniqueEvents[event.id]) {
      uniqueEvents[event.id] = event
    }
  })

  return Object.values(uniqueEvents)
}

/**
 * Get Nostr notes from followings of specified users that contain external links
 * and were posted within the specified time interval.
 *
 * @param {Array} userPubkeys - Array of Nostr user public keys
 * @param {Number} timeIntervalHours - Number of hours to look back from now
 * @param {Array} relayUrls - Array of Nostr relay URLs
 * @param {Array} ignorePubkeys - Array of pubkeys to ignore (optional)
 * @returns {Promise<Array>} - Array of note objects containing external links within the time interval
 */
async function getNotesWithLinks (userPubkeys, timeIntervalHours, relayUrls, ignorePubkeys = []) {
  // Calculate the cutoff time in seconds (Nostr uses UNIX timestamp)
  const now = Math.floor(Date.now() / 1000)
  const cutoffTime = now - (timeIntervalHours * 60 * 60)

  const allNotesWithLinks = []
  const allFollowedPubkeys = new Set() // To collect all followed pubkeys
  const ignoreSet = new Set(ignorePubkeys) // Convert ignore list to Set for efficient lookups

  if (ignoreSet.size > 0) {
    logger.debug(`Ignoring ${ignoreSet.size} author(s) as requested`)
  }

  logger.info(`Fetching follow lists for ${userPubkeys.length} users...`)
  // First get the followings for each user
  for (const pubkey of userPubkeys) {
    try {
      // Skip if this pubkey is in the ignore list
      if (ignoreSet.has(pubkey)) {
        logger.debug(`Skipping user ${pubkey} as it's in the ignore list`)
        continue
      }

      logger.debug(`Fetching follow list for ${pubkey} from ${relayUrls.length} relays...`)

      // Get the most recent contact list (kind 3)
      const followListEvents = await fetchEvents(relayUrls, {
        kinds: [3],
        authors: [pubkey]
      })

      if (followListEvents.length === 0) {
        logger.debug(`No follow list found for user ${pubkey}. Verify this pubkey has contacts on these relays.`)
        continue
      }

      // Find the most recent follow list event
      const latestFollowList = followListEvents.reduce((latest, event) =>
        !latest || event.created_at > latest.created_at ? event : latest, null)

      if (!latestFollowList) {
        logger.debug(`No valid follow list found for user ${pubkey}`)
        continue
      }

      logger.debug(`Found follow list created at: ${new Date(latestFollowList.created_at * 1000).toISOString()}`)

      // Check if tags property exists
      if (!latestFollowList.tags) {
        logger.debug(`No tags found in follow list for user ${pubkey}`)
        logger.debug('Follow list data:', JSON.stringify(latestFollowList, null, 2))
        continue
      }

      // Extract followed pubkeys from the follow list (tag type 'p')
      const followedPubkeys = latestFollowList.tags
        .filter(tag => tag[0] === 'p')
        .map(tag => tag[1])
        .filter(pk => !ignoreSet.has(pk)) // Filter out pubkeys from the ignore list

      if (!followedPubkeys || followedPubkeys.length === 0) {
        logger.debug(`No followed users found for user ${pubkey} (after filtering ignore list)`)
        continue
      }

      // Add all followed pubkeys to our set
      followedPubkeys.forEach(pk => allFollowedPubkeys.add(pk))

      logger.debug(`Added ${followedPubkeys.length} followed users for ${pubkey} (total: ${allFollowedPubkeys.size})`)
    } catch (error) {
      logger.error(`Error processing user ${pubkey}: ${error}`)
    }
  }

  // If we found any followed pubkeys, fetch their notes in batches
  if (allFollowedPubkeys.size > 0) {
    // Convert Set to Array for the filter
    const followedPubkeysArray = Array.from(allFollowedPubkeys)
    const batchSize = config.batchSize || 100 // Use config batch size or default to 100
    const totalBatches = Math.ceil(followedPubkeysArray.length / batchSize)

    logger.progress(`Processing ${followedPubkeysArray.length} followed users in ${totalBatches} batches...`)

    // Process in batches
    for (let batchNum = 0; batchNum < totalBatches; batchNum++) {
      const start = batchNum * batchSize
      const end = Math.min(start + batchSize, followedPubkeysArray.length)
      const batch = followedPubkeysArray.slice(start, end)

      logger.progress(`Fetching batch ${batchNum + 1}/${totalBatches} (${batch.length} authors)...`)

      // Fetch notes from the current batch of users
      const notes = await fetchEvents(relayUrls, {
        kinds: [1],
        authors: batch,
        since: cutoffTime
      }, 30000) // Use a longer timeout for this larger query

      logger.debug(`Retrieved ${notes.length} notes from batch ${batchNum + 1}`)

      // Filter notes that have URLs (excluding notes with only media URLs)
      const notesWithUrls = notes.filter(note => {
        // Extract all URLs from content
        const urlRegex = /(https?:\/\/[^\s]+)/g
        const matches = note.content.match(urlRegex) || []

        if (matches.length === 0) return false // No URLs at all

        // Check if any URL is not a media file
        const hasNonMediaUrl = matches.some(url => !isMediaUrl(url))

        return hasNonMediaUrl
      })

      logger.debug(`Found ${notesWithUrls.length} notes containing non-media URLs in batch ${batchNum + 1}`)

      // Get all unique authors from the filtered notes in this batch
      const authorsWithUrls = new Set(notesWithUrls.map(note => note.pubkey))

      // Fetch metadata for all relevant authors in this batch
      if (authorsWithUrls.size > 0) {
        logger.debug(`Fetching metadata for ${authorsWithUrls.size} authors from batch ${batchNum + 1}...`)
        const allMetadata = await fetchEvents(relayUrls, {
          kinds: [0],
          authors: Array.from(authorsWithUrls)
        })

        // Create a map of author pubkey to their latest metadata
        const metadataByAuthor = {}
        allMetadata.forEach(meta => {
          if (!metadataByAuthor[meta.pubkey] || meta.created_at > metadataByAuthor[meta.pubkey].created_at) {
            metadataByAuthor[meta.pubkey] = meta
          }
        })

        // Attach metadata to notes
        for (const note of notesWithUrls) {
          if (metadataByAuthor[note.pubkey]) {
            try {
              const metadata = JSON.parse(metadataByAuthor[note.pubkey].content)
              note.userMetadata = metadata
            } catch (e) {
              logger.debug(`Error parsing metadata for ${note.pubkey}: ${e.message}`)
            }
          }
        }
      }

      // Add all notes with URLs from this batch to our results
      allNotesWithLinks.push(...notesWithUrls)

      // Show incremental progress during batch processing
      if (allNotesWithLinks.length > 0 && batchNum < totalBatches - 1) {
        logger.progress(`Found ${allNotesWithLinks.length} notes with links so far...`)
      }
    }

    logger.progress(`Completed processing all ${totalBatches} batches`)
  }

  return allNotesWithLinks
}

/**
 * Format the notes for display with colorful output
 *
 * @param {Array} notes - Array of note objects
 * @returns {String} - Formatted string with note information
 */
function formatNoteOutput (notes) {
  const output = []

  for (const note of notes) {
    // Get note ID as npub
    const noteId = nip19.noteEncode(note.id)
    const pubkey = nip19.npubEncode(note.pubkey)

    // Get user display name or fall back to npub
    const userName = note.userMetadata
      ? (note.userMetadata.display_name || note.userMetadata.name || pubkey)
      : pubkey

    // Get timestamp as readable date
    const timestamp = new Date(note.created_at * 1000).toISOString()

    // Extract URLs from content, marking media URLs with colors
    const urlRegex = /(https?:\/\/[^\s]+)/g
    const matches = note.content.match(urlRegex) || []

    // Format URLs with colors
    const markedUrls = matches.map(url => {
      const isMedia = isMediaUrl(url)
      if (isMedia) {
        return `${colors.fg.gray}${url}${colors.reset} (media)`
      } else {
        return `${colors.bright}${colors.fg.cyan}${url}${colors.reset}`
      }
    })

    // Format output with colors
    output.push(`${colors.bright}${colors.fg.yellow}Note by ${colors.fg.magenta}${userName}${colors.fg.yellow} at ${timestamp}${colors.reset}`)
    output.push(`${colors.fg.green}Note ID: ${colors.reset}${noteId}`)
    output.push(`${colors.fg.green}Pubkey: ${colors.reset}${pubkey}`)

    // Add links with a heading
    output.push(`${colors.bright}${colors.fg.blue}External URLs:${colors.reset}`)
    markedUrls.forEach(url => {
      output.push(`  • ${url}`)
    })

    // Add content with a heading
    output.push(`${colors.bright}${colors.fg.blue}Note content:${colors.reset}`)

    // Colorize any links in content when displaying
    let coloredContent = note.content
    for (const url of matches) {
      const isMedia = isMediaUrl(url)
      const colorCode = isMedia ? colors.fg.gray : colors.bright + colors.fg.cyan
      coloredContent = coloredContent.replace(
        new RegExp(escapeRegExp(url), 'g'),
        `${colorCode}${url}${colors.reset}`
      )
    }
    output.push(coloredContent)

    output.push(`${colors.fg.yellow}${'-'.repeat(50)}${colors.reset}`)
  }

  return output.join('\n')
}

/**
 * Escape special characters for use in a regular expression
 * @param {String} string - String to escape
 * @returns {String} - Escaped string
 */
function escapeRegExp (string) {
  return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
}

/**
 * Convert a pubkey from npub to hex format if needed
 * @param {String} key - Pubkey in either npub or hex format
 * @returns {String} - Pubkey in hex format
 */
function normalizeToHexPubkey (key) {
  // If it's an npub, decode it
  if (typeof key === 'string' && key.startsWith('npub1')) {
    try {
      const { type, data } = nip19.decode(key)
      if (type === 'npub') {
        return data
      }
    } catch (e) {
      logger.error(`Error decoding npub ${key}: ${e.message}`)
    }
  }
  // Otherwise assume it's already in hex format
  return key
}

/**
 * Main function to execute the script
 */
async function main () {
  // Load configuration from file
  const configPath = path.join(__dirname, 'nostr-link-extract.config.json')
  logger.info(`Loading configuration from ${configPath}`)
  config = loadConfig(configPath)

  try {
    logger.info(`Starting Nostr link extraction (time interval: ${config.timeIntervalHours} hours)`)

    // Convert any npub format keys to hex
    const hexUserPubkeys = config.userPubkeys.map(normalizeToHexPubkey)
    const hexIgnorePubkeys = config.ignorePubkeys.map(normalizeToHexPubkey)

    // Log the conversion for clarity (helpful for debugging)
    if (config.userPubkeys.some(key => key.startsWith('npub1'))) {
      logger.debug('Converted user npubs to hex format for Nostr protocol')
    }
    if (config.ignorePubkeys.some(key => key.startsWith('npub1'))) {
      logger.debug('Converted ignore list npubs to hex format for Nostr protocol')
    }

    const notesWithLinks = await getNotesWithLinks(
      hexUserPubkeys,
      config.timeIntervalHours,
      config.relayUrls,
      hexIgnorePubkeys
    )

    if (notesWithLinks.length > 0) {
      const formattedOutput = formatNoteOutput(notesWithLinks)
      console.log(formattedOutput)
      logger.result(`Total notes with links: ${notesWithLinks.length}`)
    } else {
      logger.info('No notes with links found in the specified time interval.')
    }
  } catch (error) {
    logger.error(`${error}`)
  }
}

// Execute the script
main()