stacker.news/scripts/twitter-link-extract.js

#!/usr/bin/env node

const { execSync } = require('child_process')
module.paths.push(execSync('npm config get prefix').toString().trim() + '/lib/node_modules')
const { TwitterApi } = require('twitter-api-v2')
const fs = require('fs')
const path = require('path')
const sqlite3 = require('sqlite3').verbose()

// ANSI color codes for output formatting
const colors = {
  reset: '\x1b[0m',
  bright: '\x1b[1m',
  fg: {
    green: '\x1b[32m',
    blue: '\x1b[34m',
    yellow: '\x1b[33m',
    red: '\x1b[31m',
    cyan: '\x1b[36m',
    gray: '\x1b[90m'
  }
}

// Add DB utilities for persistent caching
const db = {
  connection: null,

  async init () {
    return new Promise((resolve, reject) => {
      const dbPath = path.join(__dirname, 'twitter-links.db')
      this.connection = new sqlite3.Database(dbPath, (err) => {
        if (err) {
          logger.error(`Error opening database: ${err.message}`)
          reject(err)
          return
        }

        this.connection.run(`
          CREATE TABLE IF NOT EXISTS tweets (
            id TEXT PRIMARY KEY,
            author_id TEXT,
            content TEXT,
            created_at TEXT,
            author_username TEXT,
            author_name TEXT,
            processed_at INTEGER
          )
        `, (err) => {
          if (err) {
            logger.error(`Error creating table: ${err.message}`)
            reject(err)
            return
          }

          // Add the processed_replies and cache_info tables
          this.connection.run(`
            CREATE TABLE IF NOT EXISTS processed_replies (
              tweet_id TEXT PRIMARY KEY,
              processed_at INTEGER
            )
          `, (err) => {
            if (err) {
              logger.error(`Error creating processed_replies table: ${err.message}`)
              reject(err)
              return
            }

            this.connection.run(`
              CREATE TABLE IF NOT EXISTS cache_info (
                key TEXT PRIMARY KEY,
                value TEXT,
                updated_at INTEGER
              )
            `, (err) => {
              if (err) {
                logger.error(`Error creating cache_info table: ${err.message}`)
                reject(err)
                return
              }

              this.connection.run(`
                CREATE TABLE IF NOT EXISTS url_history (
                  url TEXT PRIMARY KEY,
                  first_seen INTEGER,
                  last_seen INTEGER,
                  seen_count INTEGER DEFAULT 1,
                  hosts_sharing INTEGER DEFAULT 1
                )
              `, (err) => {
                if (err) {
                  logger.error(`Error creating url_history table: ${err.message}`)
                  reject(err)
                  return
                }

                resolve()
              })
            })
          })
        })
      })
    })
  },

  async getLatestTweetTimestamp () {
    return new Promise((resolve, reject) => {
      this.connection.get(
        'SELECT MAX(created_at) as latest FROM tweets',
        (err, row) => {
          if (err) {
            reject(err)
            return
          }
          // Add validation to ensure we don't get a future date
          const now = new Date()
          const latestDate = row?.latest ? new Date(row.latest) : new Date(0)

          // If latest is in the future or invalid, return epoch
          if (!latestDate || latestDate > now) {
            resolve('1970-01-01T00:00:00.000Z')
            return
          }

          resolve(latestDate.toISOString())
        }
      )
    })
  },

  async saveTweet (tweet) {
    return new Promise((resolve, reject) => {
      this.connection.run(
        `INSERT OR IGNORE INTO tweets (id, author_id, content, created_at, author_username, author_name, processed_at)
         VALUES (?, ?, ?, ?, ?, ?, ?)`,
        [
          tweet.id,
          tweet.author_id,
          tweet.text,
          tweet.created_at,
          tweet.author_username,
          tweet.author_name,
          Math.floor(Date.now() / 1000)
        ],
        (err) => {
          if (err) {
            reject(err)
            return
          }
          resolve()
        }
      )
    })
  },

  // Add method to load cached tweet IDs
  async loadCachedTweetIds () {
    return new Promise((resolve, reject) => {
      this.connection.all(
        'SELECT id FROM tweets',
        (err, rows) => {
          if (err) {
            reject(err)
            return
          }

          const tweetIds = rows.map(row => row.id)
          resolve(tweetIds)
        }
      )
    })
  },

  // Add method to check if a tweet's replies have been processed
  async isReplyProcessed (tweetId) {
    return new Promise((resolve, reject) => {
      this.connection.get(
        'SELECT tweet_id FROM processed_replies WHERE tweet_id = ?',
        [tweetId],
        (err, row) => {
          if (err) {
            reject(err)
            return
          }
          resolve(!!row) // Return true if we found a record
        }
      )
    })
  },

  // Add method to mark a tweet as having its replies processed
  async markRepliesProcessed (tweetId) {
    return new Promise((resolve, reject) => {
      this.connection.run(
        'INSERT OR REPLACE INTO processed_replies (tweet_id, processed_at) VALUES (?, ?)',
        [tweetId, Math.floor(Date.now() / 1000)],
        (err) => {
          if (err) {
            reject(err)
            return
          }
          resolve()
        }
      )
    })
  },

  // Add method to track API usage
  async recordApiUsage (endpoint, count = 1) {
    const now = Math.floor(Date.now() / 1000)
    const today = new Date().toISOString().split('T')[0]

    return new Promise((resolve, reject) => {
      this.connection.get(
        'SELECT value FROM cache_info WHERE key = ?',
        [`api_usage_${endpoint}_${today}`],
        (err, row) => {
          if (err) {
            reject(err)
            return
          }

          const currentCount = row ? parseInt(row.value, 10) : 0
          const newCount = currentCount + count

          this.connection.run(
            'INSERT OR REPLACE INTO cache_info (key, value, updated_at) VALUES (?, ?, ?)',
            [`api_usage_${endpoint}_${today}`, newCount.toString(), now],
            (err) => {
              if (err) {
                reject(err)
                return
              }
              resolve(newCount)
            }
          )
        }
      )
    })
  },

  // Get today's API usage
  async getApiUsage () {
    const today = new Date().toISOString().split('T')[0]

    return new Promise((resolve, reject) => {
      this.connection.all(
        "SELECT key, value FROM cache_info WHERE key LIKE 'api_usage_%_" + today + "'",
        (err, rows) => {
          if (err) {
            reject(err)
            return
          }

          const usage = {}
          rows.forEach(row => {
            const endpoint = row.key.replace('api_usage_', '').replace(`_${today}`, '')
            usage[endpoint] = parseInt(row.value, 10)
          })

          resolve(usage)
        }
      )
    })
  },

  // Track URL history
  async recordUrl (url, hostname, username) {
    const now = Math.floor(Date.now() / 1000)

    return new Promise((resolve, reject) => {
      // First check if URL exists
      this.connection.get(
        'SELECT url, seen_count, hosts_sharing FROM url_history WHERE url = ?',
        [url],
        (err, row) => {
          if (err) {
            reject(err)
            return
          }

          if (row) {
            // URL exists, update it
            this.connection.run(
              'UPDATE url_history SET last_seen = ?, seen_count = seen_count + 1 WHERE url = ?',
              [now, url],
              (err) => {
                if (err) {
                  reject(err)
                  return
                }
                resolve()
              }
            )
          } else {
            // New URL
            this.connection.run(
              'INSERT INTO url_history (url, first_seen, last_seen, seen_count) VALUES (?, ?, ?, 1)',
              [url, now, now],
              (err) => {
                if (err) {
                  reject(err)
                  return
                }
                resolve()
              }
            )
          }
        }
      )
    })
  },

  async close () {
    return new Promise((resolve, reject) => {
      if (this.connection) {
        this.connection.close((err) => {
          if (err) reject(err)
          else resolve()
        })
      } else {
        resolve()
      }
    })
  }
}

// Add API efficiency configuration to default config
let config = {
  listIds: [],
  timeIntervalHours: 12,
  verbosity: 'normal',
  bearerToken: '',
  mediaPatterns: [
    {
      type: 'extensions',
      patterns: ['\\.jpg$', '\\.jpeg$', '\\.png$', '\\.gif$', '\\.mp4$', '\\.webm$']
    },
    {
      type: 'domains',
      patterns: [
        'pbs\\.twimg\\.com',
        'i\\.imgur\\.com',
        'youtube\\.com\\/watch',
        'youtu\\.be\\/',
        'vimeo\\.com\\/'
      ]
    }
  ],
  // Add API usage efficiency controls
  apiEfficiency: {
    // Maximum tweets per member to process
    maxTweetsPerMember: 25,
    // Maximum members per list to process
    maxMembersPerList: 200,
    // Maximum replies per tweet to fetch
    maxRepliesPerTweet: 20,
    // Only fetch replies for tweets with links or higher engagement
    fetchRepliesForTweetsWithLinks: true,
    // Get missing root tweets for conversations
    fetchMissingRootTweets: true,
    // Maximum pages to fetch for each pagination (lists, members, tweets, replies)
    maxPagination: {
      listMembers: 2,
      memberTweets: 1,
      listTweets: 2,
      replies: 2
    },
    // Delay between API calls in milliseconds
    delays: {
      betweenLists: 10000,
      betweenMembers: 10000,
      betweenPagination: 5000,
      afterInitialChecks: 15000
    }
  }
}

// Logger utility
const logger = {
  error: (message) => console.error(`${colors.fg.red}Error: ${message}${colors.reset}`),
  info: (message) => console.log(`${colors.fg.green}${message}${colors.reset}`),
  progress: (message) => {
    if (config.verbosity !== 'minimal') {
      console.log(`${colors.fg.blue}${message}${colors.reset}`)
    }
  },
  debug: (message) => {
    if (config.verbosity === 'debug') {
      console.log(`${colors.fg.gray}${message}${colors.reset}`)
    }
  },
  result: (message) => console.log(`${colors.bright}${colors.fg.green}${message}${colors.reset}`)
}

function loadConfig (configPath) {
  try {
    const configData = fs.readFileSync(configPath, 'utf8')
    const loadedConfig = JSON.parse(configData)
    return { ...config, ...loadedConfig }
  } catch (error) {
    logger.error(`Error loading config file: ${error.message}`)
    logger.info('Using default configuration')
    return config
  }
}

function isMediaUrl (url) {
  if (config.mediaPatterns) {
    for (const patternGroup of config.mediaPatterns) {
      for (const pattern of patternGroup.patterns) {
        const regex = new RegExp(pattern, 'i')
        if (regex.test(url)) return true
      }
    }
  }
  return false
}

function checkSystemTime () {
  const localTime = new Date()
  console.log('System time check:')
  console.log(`- Current local time: ${localTime.toISOString()}`)
  console.log(`- Timestamp (ms): ${localTime.getTime()}`)
  console.log(`- Year: ${localTime.getFullYear()}`)

  // Compare with an external time source
  try {
    // This will make a request to get a server timestamp
    const httpTime = new Date(new Date().toUTCString())
    console.log(`- HTTP header time: ${httpTime.toISOString()}`)
    if (Math.abs(localTime - httpTime) > 60000) { // More than 1 minute difference
      console.log(`WARNING: Your system time might be off by ${Math.abs(localTime - httpTime) / 1000} seconds`)
    }
  } catch (e) {
    console.log(`- Could not check external time: ${e.message}`)
  }
}

async function sleep (ms) {
  return new Promise(resolve => setTimeout(resolve, ms))
}

// Add tweet cache to avoid duplicate requests
const tweetCache = {
  tweets: new Map(),

  async initFromDb () {
    try {
      const cachedIds = await db.loadCachedTweetIds()
      logger.info(`Loaded ${cachedIds.length} tweet IDs from database cache`)

      // Mark these as seen in our in-memory cache
      cachedIds.forEach(id => {
        this.tweets.set(id, { id, cached: true })
      })
    } catch (err) {
      logger.error(`Error loading tweet cache from DB: ${err.message}`)
    }
  },

  add (tweets) {
    if (!Array.isArray(tweets)) tweets = [tweets]

    tweets.forEach(tweet => {
      if (tweet && tweet.id) {
        this.tweets.set(tweet.id, tweet)

        // Save to DB for persistence
        if (tweet.text && tweet.created_at) {
          db.saveTweet(tweet).catch(err => {
            logger.error(`Error saving tweet to DB: ${err.message}`)
          })
        }
      }
    })
  },

  get (id) {
    return this.tweets.get(id)
  },

  has (id) {
    return this.tweets.has(id)
  },

  getAll () {
    return Array.from(this.tweets.values())
  },

  size () {
    return this.tweets.size
  }
}

// Track processed tweets to avoid duplicate work
const processedReplies = {}

// Add function to load already processed replies from DB
async function loadProcessedReplies () {
  try {
    // Get tweet IDs from DB for which we've already fetched replies
    const connection = db.connection

    return new Promise((resolve, reject) => {
      connection.all('SELECT tweet_id FROM processed_replies', (err, rows) => {
        if (err) {
          reject(err)
          return
        }

        // Add to our in-memory tracking
        rows.forEach(row => {
          processedReplies[row.tweet_id] = true
        })

        logger.info(`Loaded ${rows.length} previously processed reply records from database`)
        resolve()
      })
    })
  } catch (err) {
    logger.error(`Error loading processed replies: ${err.message}`)
  }
}

// Enhanced API tracking wrapper
async function callTwitterApi (endpoint, apiCall, incrementAmount = 1) {
  // Record API usage
  const usageCount = await db.recordApiUsage(endpoint, incrementAmount)
  logger.debug(`API call to ${endpoint}: usage today = ${usageCount}`)

  // Set thresholds based on Pro tier limits
  // https://docs.x.com/x-api/fundamentals/rate-limits
  const fifteenMinuteLimits = {
    lists: 75, // 75 requests / 15 minutes per app
    tweets: 450, // 450 requests / 15 minutes per app (900 per user)
    users: 500, // Most user endpoints are 300-900 / 15 minutes
    search: 450, // 450 requests / 15 minutes per app
    default: 300
  }

  // Calculate daily limits as approximately 96 fifteen-minute periods per day
  // But using a more conservative factor of 20x to avoid hitting limits
  const dailyLimitFactor = 20
  const dailyLimits = {}

  for (const [key, value] of Object.entries(fifteenMinuteLimits)) {
    dailyLimits[key] = value * dailyLimitFactor
  }

  const limit = dailyLimits[endpoint] || dailyLimits.default
  const warningThreshold = limit * 0.8

  // Warn if approaching limit
  if (usageCount > warningThreshold) {
    logger.error(`WARNING: ${endpoint} API usage at ${usageCount}/${limit} (${Math.round(usageCount / limit * 100)}% of daily limit)`)
  }

  // Emergency stop if exceeded
  if (usageCount >= limit) {
    throw new Error(`EMERGENCY STOP: Daily API limit for ${endpoint} exceeded (${usageCount}/${limit})`)
  }

  try {
    // Make the call
    const result = await apiCall()

    // Check rate limit headers if available
    if (result && result._headers) {
      const remaining = result._headers.get('x-rate-limit-remaining')
      const resetTime = result._headers.get('x-rate-limit-reset')
      const limit = result._headers.get('x-rate-limit-limit')

      if (remaining && limit) {
        const remainingPercent = Math.round((parseInt(remaining) / parseInt(limit)) * 100)
        logger.debug(`Rate limit status for ${endpoint}: ${remaining}/${limit} (${remainingPercent}% remaining)`)

        // If we're below 10% of remaining requests, log a warning
        if (remainingPercent < 10) {
          logger.error(`URGENT: Only ${remainingPercent}% of rate limit remaining for ${endpoint}`)

          if (resetTime) {
            const resetDate = new Date(parseInt(resetTime) * 1000)
            const resetInSeconds = Math.round((resetDate.getTime() - Date.now()) / 1000)
            logger.info(`Rate limit resets in ${resetInSeconds} seconds (${resetDate.toISOString()})`)
          }
        }
      }
    }

    return result
  } catch (error) {
    // Check if this is a rate limit error
    if (error.code === 88 || error.code === 429 ||
        (error.message && (error.message.includes('429') || error.message.includes('Rate limit')))) {
      logger.error(`Rate limit exceeded for ${endpoint}. Backing off.`)

      // If we have rate limit info in the error, use it
      if (error.rateLimit) {
        const resetTime = error.rateLimit.reset
        if (resetTime) {
          const resetDate = new Date(resetTime * 1000)
          const waitTime = Math.max(resetDate.getTime() - Date.now(), 60000) // at least 1 minute
          logger.info(`Rate limit resets at ${resetDate.toISOString()}. Waiting ${Math.round(waitTime / 1000)} seconds.`)
          await sleep(waitTime)
        } else {
          // Default backoff of 5 minutes
          logger.info('No reset time available. Using default 5 minute backoff.')
          await sleep(300000)
        }
      } else {
        // Default backoff of 5 minutes
        logger.info('No rate limit details available. Using default 5 minute backoff.')
        await sleep(300000)
      }

      // Throw a more informative error
      throw new Error(`Rate limit exceeded for ${endpoint}. Try again later or reduce request frequency.`)
    }

    // For other errors, just pass them through
    throw error
  }
}

async function getTweetsFromListMembers (client, listIds, sinceTime) {
  const allTweets = []

  // Process one list at a time with significant delays between lists
  for (const listId of listIds) {
    try {
      logger.info(`Getting members of list ${listId}...`)

      // Add delay before starting each list to avoid rate limits
      await sleep(config.apiEfficiency.delays.betweenLists)

      // Use paginated approach to get all list members
      const members = []
      let nextToken
      let paginationCount = 0
      const maxMemberPages = config.apiEfficiency.maxPagination.listMembers

      do {
        // Add delay between pagination requests
        if (paginationCount > 0) await sleep(config.apiEfficiency.delays.betweenPagination)

        try {
          const response = await rateLimitHandler(async () => {
            return client.v2.listMembers(listId, {
              'user.fields': 'username,name',
              max_results: 100,
              pagination_token: nextToken
            })
          }, 5)

          if (response?.data?.length > 0) {
            members.push(...response.data)
            logger.info(`Found ${response.data.length} members in list ${listId}${paginationCount > 0 ? ` (page ${paginationCount + 1})` : ''}`)
          }

          // Check for more pages
          nextToken = response?.meta?.next_token
          paginationCount++
        } catch (memberError) {
          logger.error(`Could not get list members page: ${memberError.message}`)
          break
        }
      } while (nextToken && paginationCount < maxMemberPages)

      if (!members || members.length === 0) {
        logger.error('Couldn\'t parse list members response or no members found')
        continue
      }

      logger.info(`Found total of ${members.length} members in list ${listId}`)

      // Process more members but still keep a reasonable limit
      const memberLimit = Math.min(members.length, config.apiEfficiency.maxMembersPerList)
      const limitedMembers = members.slice(0, memberLimit)

      logger.info(`Processing tweets from ${memberLimit} members...`)

      // Process each member's timeline with longer delays between requests
      for (const member of limitedMembers) {
        try {
          logger.progress(`Getting tweets from @${member.username}...`)

          // Much longer delay between requests to avoid rate limits
          await sleep(config.apiEfficiency.delays.betweenMembers)

          // Use pagination to get more tweets from each member
          const userTweets = []
          let memberNextToken
          let memberPaginationCount = 0
          const maxMemberPages = config.apiEfficiency.maxPagination.memberTweets

          do {
            // Add delay between pagination requests
            if (memberPaginationCount > 0) await sleep(config.apiEfficiency.delays.betweenPagination)

            const response = await client.v2.userTimeline(member.id, {
              max_results: config.apiEfficiency.maxTweetsPerMember,
              'tweet.fields': 'created_at,author_id,conversation_id,entities,public_metrics',
              'user.fields': 'username,name',
              expansions: 'author_id',
              pagination_token: memberNextToken
            })

            if (response?.data?.length > 0) {
              // Filter out tweets we've already seen
              const newTweets = response.data.filter(tweet => !tweetCache.has(tweet.id))

              if (newTweets.length > 0) {
                userTweets.push(...newTweets)
                logger.debug(`Found ${newTweets.length} new tweets from @${member.username}${memberPaginationCount > 0 ? ` (page ${memberPaginationCount + 1})` : ''}`)

                // Add to cache
                tweetCache.add(newTweets)
              } else {
                logger.debug(`No new tweets found for @${member.username} on page ${memberPaginationCount + 1}`)
              }
            }

            // Check for more pages - but only continue if we got new tweets
            memberNextToken = response?.meta?.next_token

            // Stop pagination if we didn't get any new tweets
            if (userTweets.length === 0) {
              memberNextToken = undefined
            }

            memberPaginationCount++
          } while (memberNextToken && memberPaginationCount < maxMemberPages)

          if (userTweets.length > 0) {
            logger.info(`Found ${userTweets.length} new tweets from @${member.username}`)

            // Get the author data from the response
            const author = {
              username: member.username,
              name: member.name || member.username
            }

            // Process tweets with author data
            const processedTweets = userTweets.map(tweet => ({
              ...tweet,
              author_username: author.username,
              author_name: author.name
            }))

            // Filter to tweets from the requested time period
            const filteredTweets = processedTweets.filter(tweet => {
              const tweetDate = new Date(tweet.created_at)
              const cutoffDate = new Date(sinceTime)
              return tweetDate >= cutoffDate
            })

            logger.debug(`${filteredTweets.length} tweets from @${member.username} after date filtering`)
            allTweets.push(...filteredTweets)
          }
        } catch (userError) {
          logger.error(`Error getting tweets for ${member.username}: ${userError.message}`)

          // If we hit rate limits, wait longer
          if (userError.code === 429 || userError.message.includes('429')) {
            const waitTime = 90000 // 90 seconds
            logger.info(`Rate limit hit, waiting ${waitTime / 1000} seconds...`)
            await sleep(waitTime)
          }
        }
      }
    } catch (listError) {
      logger.error(`Error processing list ${listId}: ${listError.message}`)
    }
  }

  return allTweets
}

async function getTweetsFromLists (client, listIds, sinceTime) {
  const allTweets = []

  // Add delay at the start
  await sleep(5000)

  for (const listId of listIds) {
    try {
      logger.progress(`Fetching tweets from list ${listId}...`)

      // Get list info first to confirm access
      try {
        const listInfo = await rateLimitHandler(async () => {
          return client.v2.list(listId)
        }, 5)
        logger.info(`List info: ${listInfo.data.name}`)

        // Add significant delay after getting list info before fetching tweets
        await sleep(config.apiEfficiency.delays.betweenLists)

        // Use pagination to get more tweets from the list
        const listTweets = []
        let listNextToken
        let listPaginationCount = 0
        const maxListPages = config.apiEfficiency.maxPagination.listTweets

        do {
          // Add delay between pagination requests
          if (listPaginationCount > 0) await sleep(config.apiEfficiency.delays.betweenPagination)

          try {
            // Use the standard client method
            const response = await client.v2.listTweets(listId, {
              max_results: 100,
              'tweet.fields': 'created_at,author_id,conversation_id,entities,public_metrics',
              'user.fields': 'username,name',
              expansions: 'author_id',
              pagination_token: listNextToken
            })

            // Add debug logging and proper type checking
            logger.debug(`Response structure: ${JSON.stringify(response?.meta || {})}`)

            // Check if response.data exists and is an array
            const replyData = Array.isArray(response?.data) ? response.data : (response?.data?.data && Array.isArray(response.data.data) ? response.data.data : [])

            if (replyData.length > 0) {
              // Filter out tweets we've already seen
              const newTweets = replyData.filter(tweet => !tweetCache.has(tweet.id))

              if (newTweets.length > 0) {
                logger.info(`Found ${newTweets.length} new tweets in list ${listId}${listPaginationCount > 0 ? ` (page ${listPaginationCount + 1})` : ''}`)

                // Process tweets with better author handling
                const processedTweets = newTweets.map(tweet => {
                  // Find author in includes or set defaults if missing
                  const authorIncludes = response.includes?.users || (response.data?.includes?.users || [])
                  const author = authorIncludes.find(u => u.id === tweet.author_id) || {}

                  return {
                    ...tweet,
                    author_username: author.username || 'unknown_user',
                    author_name: author.name || author.username || 'Unknown User'
                  }
                })

                listTweets.push(...processedTweets)

                // Add to cache
                tweetCache.add(processedTweets)
              } else {
                logger.info(`No new tweets found in list ${listId} on page ${listPaginationCount + 1}`)
              }
            }

            // Check for more pages - but only continue if we got new tweets
            listNextToken = response?.meta?.next_token

            // Stop pagination if we didn't get any new tweets
            if (replyData.length > 0 && replyData.filter(tweet => !tweetCache.has(tweet.id)).length === 0) {
              listNextToken = undefined
            }

            listPaginationCount++
          } catch (err) {
            logger.error(`API call to get list tweets failed: ${err.message}`)
            logger.debug(`Error details: ${err.stack}`)
            break
          }
        } while (listNextToken && listPaginationCount < maxListPages)

        if (listTweets.length > 0) {
          logger.info(`Total new tweets found in list ${listId}: ${listTweets.length}`)

          // Add to our collection
          allTweets.push(...listTweets)
        }
      } catch (error) {
        logger.error(`List access failed: ${error.message}`)
        if (error.message.includes('403')) {
          logger.error('You need Twitter API v2 Essential or higher access for lists endpoints.')
        }
      }
    } catch (error) {
      logger.error(`Error processing list ${listId}: ${error.message}`)
    }
  }

  return allTweets
}

// Add a new function to check if a URL is a Twitter status link
function isTwitterStatusLink (url) {
  return url && (
    url.match(/twitter\.com\/.*\/status\//) ||
    url.match(/x\.com\/.*\/status\//)
  )
}

// Add a function to extract tweet ID from Twitter URL
function extractTweetIdFromUrl (url) {
  if (!isTwitterStatusLink(url)) return null

  const match = url.match(/\/status\/(\d+)/)
  return match ? match[1] : null
}

// Add a function to fetch tweets by IDs
async function fetchTweetsByIds (client, ids) {
  if (!ids.length) return []

  try {
    // Get unique IDs (remove duplicates)
    const uniqueIds = [...new Set(ids)]

    // Split into chunks of 100 (API limitation)
    const chunks = []
    for (let i = 0; i < uniqueIds.length; i += 100) {
      chunks.push(uniqueIds.slice(i, i + 100))
    }

    const allTweets = []

    for (const chunk of chunks) {
      // Add delay between chunk requests
      if (chunks.length > 1) await sleep(15000)

      logger.progress(`Fetching ${chunk.length} referenced tweets...`)

      const response = await rateLimitHandler(async () => {
        return client.v2.tweets(chunk, {
          'tweet.fields': 'created_at,author_id,entities',
          'user.fields': 'username,name',
          expansions: 'author_id'
        })
      }, 3)

      if (response && response.data) {
        // Process the tweets with author data
        const processedTweets = response.data.map(tweet => {
          const author = response.includes?.users?.find(u => u.id === tweet.author_id) || {}
          return {
            ...tweet,
            author_username: author?.username,
            author_name: author?.name
          }
        })

        allTweets.push(...processedTweets)
      }
    }

    return allTweets
  } catch (error) {
    logger.error(`Error fetching referenced tweets: ${error.message}`)
    return []
  }
}

// Add function to check if a tweet contains non-Twitter links
function hasNonTwitterLinks (tweet) {
  if (!tweet.entities?.urls?.length) return false

  return tweet.entities.urls.some(url => {
    return url.expanded_url && !isTwitterStatusLink(url.expanded_url)
  })
}

// Enhance the fetchRepliesForTweets function to use DB tracking
async function fetchRepliesForTweets (client, tweets) {
  const allReplies = []

  // Only fetch replies for tweets that have links or high engagement if configured
  let tweetsToProcess = tweets

  if (config.apiEfficiency.fetchRepliesForTweetsWithLinks) {
    // Filter to tweets with links or high engagement
    tweetsToProcess = tweets.filter(tweet => {
      // Check if tweet has URLs
      const hasLinks = tweet.entities?.urls?.length > 0

      // Check if tweet has high engagement (optional additional criteria)
      const hasHighEngagement = tweet.public_metrics && (
        (tweet.public_metrics.retweet_count >= 5) ||
        (tweet.public_metrics.reply_count >= 3) ||
        (tweet.public_metrics.like_count >= 10)
      )

      return hasLinks || hasHighEngagement
    })

    logger.info(`Filtering ${tweets.length} tweets to ${tweetsToProcess.length} tweets with links or high engagement for reply fetching`)
  }

  const tweetIds = tweetsToProcess.map(t => t.id)

  // Process in smaller batches to avoid rate limits
  const batchSize = 5

  for (let i = 0; i < tweetIds.length; i += batchSize) {
    const batchIds = tweetIds.slice(i, i + batchSize)
    logger.progress(`Fetching replies for batch ${i / batchSize + 1}/${Math.ceil(tweetIds.length / batchSize)}...`)

    // Add delay between batches
    if (i > 0) await sleep(30000)

    for (const tweetId of batchIds) {
      // Skip if we've already processed this tweet
      if (processedReplies[tweetId] || await db.isReplyProcessed(tweetId)) {
        logger.debug(`Skipping replies for tweet ${tweetId} - already processed`)
        processedReplies[tweetId] = true
        continue
      }

      try {
        // Add small delay between individual requests
        await sleep(5000)

        // Search for replies to this tweet using conversation_id with pagination
        const repliesForTweet = []
        let nextToken
        let paginationCount = 0
        const maxPagination = config.apiEfficiency.maxPagination.replies

        do {
          // Add delay between pagination requests
          if (paginationCount > 0) await sleep(config.apiEfficiency.delays.betweenPagination)

          const response = await callTwitterApi(
            'search',
            async () => {
              return await rateLimitHandler(async () => {
                return client.v2.search(`conversation_id:${tweetId}`, {
                  'tweet.fields': 'created_at,author_id,conversation_id,entities',
                  'user.fields': 'username,name',
                  expansions: 'author_id',
                  max_results: config.apiEfficiency.maxRepliesPerTweet,
                  pagination_token: nextToken
                })
              }, 3)
            }
          )

          // Add debug logging for response structure
          logger.debug(`Replies response structure: ${JSON.stringify(response?.meta || {})}`)

          // Check if response.data exists and is an array
          const replyData = Array.isArray(response?.data) ? response.data : (response?.data?.data && Array.isArray(response.data.data) ? response.data.data : [])

          if (replyData.length > 0) {
            // Filter out tweets we've already seen
            const newReplies = replyData.filter(reply => !tweetCache.has(reply.id))

            if (newReplies.length > 0) {
              logger.info(`Found ${newReplies.length} new replies to tweet ${tweetId}${paginationCount > 0 ? ` (page ${paginationCount + 1})` : ''}`)

              // Process the replies with author data
              const processedReplies = newReplies.map(reply => {
                const authorIncludes = response.includes?.users || (response.data?.includes?.users || [])
                const author = authorIncludes.find(u => u.id === reply.author_id) || {}
                return {
                  ...reply,
                  author_username: author?.username,
                  author_name: author?.name,
                  is_reply: true,
                  reply_to: tweetId
                }
              })

              repliesForTweet.push(...processedReplies)

              // Add to cache
              tweetCache.add(processedReplies)
            } else {
              logger.debug(`No new replies found for tweet ${tweetId} on page ${paginationCount + 1}`)
            }
          }

          // Check if there are more pages
          nextToken = response?.meta?.next_token

          // Stop pagination if we didn't get any new replies on this page
          if (replyData.length > 0 && replyData.filter(reply => !tweetCache.has(reply.id)).length === 0) {
            nextToken = undefined
          }

          paginationCount++
        } while (nextToken && paginationCount < maxPagination)

        if (repliesForTweet.length > 0) {
          logger.info(`Total new replies found for tweet ${tweetId}: ${repliesForTweet.length}`)
          allReplies.push(...repliesForTweet)
        }

        // Mark as processed in memory and DB
        processedReplies[tweetId] = true
        await db.markRepliesProcessed(tweetId)
      } catch (error) {
        logger.error(`Error fetching replies for tweet ${tweetId}: ${error.message}`)

        // If we hit rate limits, wait longer
        if (error.code === 429 || error.message.includes('429')) {
          const waitTime = 90000 // 90 seconds
          logger.info(`Rate limit hit, waiting ${waitTime / 1000} seconds...`)
          await sleep(waitTime)
        }
      }
    }
  }

  return allReplies
}

// Add function to get the original tweet of a conversation if not already in the dataset
async function fetchConversationRootTweets (client, tweets) {
  // Skip if not enabled
  if (!config.apiEfficiency.fetchMissingRootTweets) {
    logger.info('Skipping root tweet fetching (disabled in config)')
    return []
  }

  // Find tweets that are replies but we don't have their parent in our dataset
  const conversations = {}
  const rootTweetsToFetch = new Set()

  // Group by conversation ID
  tweets.forEach(tweet => {
    const convoId = tweet.conversation_id || tweet.id
    if (!conversations[convoId]) {
      conversations[convoId] = []
    }
    conversations[convoId].push(tweet)
  })

  // For each conversation, check if we have the root tweet
  for (const convoId in conversations) {
    const convoTweets = conversations[convoId]

    // Find if we have a non-reply tweet in this conversation
    const hasRoot = convoTweets.some(t => !t.is_reply)

    // If all tweets are replies, we need to fetch the root
    if (!hasRoot && convoId && !tweetCache.has(convoId)) {
      rootTweetsToFetch.add(convoId)
    }
  }

  // Now fetch the missing root tweets
  if (rootTweetsToFetch.size > 0) {
    logger.info(`Fetching ${rootTweetsToFetch.size} missing root tweets for conversations...`)

    const rootIds = Array.from(rootTweetsToFetch)
    const rootTweets = await fetchTweetsByIds(client, rootIds)

    logger.info(`Found ${rootTweets.length} root tweets`)

    // Add to cache
    tweetCache.add(rootTweets)

    return rootTweets
  }

  return []
}

// Enhance formatTweetOutput function to better handle authors and URLs
function formatTweetOutput (tweets, referencedTweetsMap = {}) {
  // Group tweets by conversation_id to keep replies with their parent tweets
  const conversationGroups = {}

  for (const tweet of tweets) {
    const conversationId = tweet.conversation_id || tweet.id
    if (!conversationGroups[conversationId]) {
      conversationGroups[conversationId] = []
    }
    conversationGroups[conversationId].push(tweet)
  }

  const output = []

  // Track external links for deduplication across the output
  const seenExternalUrls = new Set()

  // Process each conversation group separately
  for (const conversationId in conversationGroups) {
    const conversationTweets = conversationGroups[conversationId]

    // Sort tweets within a conversation: main tweet first, then replies
    conversationTweets.sort((a, b) => {
      // If one is a reply and the other isn't, non-reply comes first
      if (a.is_reply && !b.is_reply) return 1
      if (!a.is_reply && b.is_reply) return -1

      // Otherwise sort by timestamp
      return new Date(a.created_at) - new Date(b.created_at)
    })

    // Track all external URLs in this conversation
    const conversationExternalUrls = []
    let mainTweetAuthor = null

    // Flag to track if this conversation has external links
    let hasExternalLinks = false

    // First pass: collect all external URLs from the conversation
    for (const tweet of conversationTweets) {
      // Ensure author information exists, set a default if missing
      if (!tweet.author_username) {
        tweet.author_username = tweet.author_name || 'unknown_user'
      }

      // Keep track of the main tweet author
      if (!tweet.is_reply && !mainTweetAuthor) {
        mainTweetAuthor = tweet.author_username
      }

      // Process URLs in this tweet
      const timestamp = new Date(tweet.created_at).toISOString()

      // Extract URLs from entities if available, otherwise fall back to regex
      let urls = []
      if (tweet.entities && tweet.entities.urls && Array.isArray(tweet.entities.urls)) {
        // Use entity URLs as the primary source - these are the most reliable and include expanded URLs
        urls = tweet.entities.urls.map(url => ({
          short_url: url.url,
          expanded_url: url.expanded_url || url.url,
          display_url: url.display_url || url.url,
          title: url.title || '',
          description: url.description || ''
        }))
      } else {
        // Fallback to regex extraction if no entities
        const extractedUrls = tweet.text.match(/(https?:\/\/[^\s]+)/g) || []
        urls = extractedUrls.map(url => ({
          short_url: url,
          expanded_url: url,
          display_url: url
        }))
      }

      // Special handling for retweets - ensure we capture URLs even from truncated content
      const isRetweet = tweet.text.startsWith('RT @')

      // For retweets, we want to extract any URLs even from truncated text
      if (isRetweet) {
        // If it's a retweet with a truncated URL at the end (ending with … or ...)
        const endsWithTruncation = tweet.text.match(/https?:\/\/[^\s]*(?:…|\.{3})$/)

        if (endsWithTruncation || urls.length === 0) {
          // Remove the RT @username: prefix to get just the retweeted content
          const rtText = tweet.text.replace(/^RT @[\w\d_]+: /, '')

          // Extract all potential URLs, including truncated ones
          const rtUrlMatches = rtText.match(/(?:https?:\/\/[^\s]*(?:…|\.{3})?)/g) || []

          if (rtUrlMatches.length > 0) {
            // Process any URLs found in the retweet text
            const rtUrls = rtUrlMatches.map(url => {
              // Remove trailing punctuation that might have been included
              const cleanUrl = url.replace(/[.,;:!?…]+$/, '')
              // For truncated URLs, try to find the full version in the entities if available
              const isTruncated = cleanUrl.endsWith('…') || cleanUrl.endsWith('...')
              let expandedUrl = cleanUrl

              // If the URL is truncated and we have entities, try to find a match
              if (isTruncated && tweet.entities?.urls) {
                // Find a matching t.co URL in the entities
                const matchingEntity = tweet.entities.urls.find(u =>
                  cleanUrl.startsWith(u.url.substring(0, Math.min(u.url.length, cleanUrl.length)))
                )

                if (matchingEntity) {
                  expandedUrl = matchingEntity.expanded_url
                }
              }

              return {
                short_url: cleanUrl,
                expanded_url: expandedUrl,
                display_url: cleanUrl,
                is_truncated: isTruncated
              }
            })

            // Add any new URLs not already in our list
            for (const rtUrl of rtUrls) {
              if (!urls.some(u => u.short_url === rtUrl.short_url)) {
                urls.push(rtUrl)
              }
            }
          }
        }
      }

      // Separate external content URLs and Twitter status URLs
      const contentUrls = []
      const twitterStatusUrls = []

      // Track referenced tweets that contain external links
      const referencedTweetsWithLinks = []

      urls.forEach(url => {
        // Make sure expanded_url exists and isn't truncated
        if (url.expanded_url) {
          // Fix truncated URL issue by removing ... at the end if present
          if (url.expanded_url.endsWith('…') || url.expanded_url.endsWith('...')) {
            // For truncated URLs, try to find a full t.co URL in the text that starts with this prefix
            if (tweet.entities?.urls) {
              // Look for a matching full URL in the tweet entities
              const potentialMatch = tweet.entities.urls.find(entityUrl =>
                entityUrl.url.startsWith(url.short_url.replace(/[….]+$/, ''))
              )
              if (potentialMatch) {
                url.expanded_url = potentialMatch.expanded_url || url.expanded_url
              }
            }
          }
        }

        if (isTwitterStatusLink(url.expanded_url)) {
          // Look up the tweet ID in our referenced tweets map
          const tweetId = extractTweetIdFromUrl(url.expanded_url)
          const referencedTweet = tweetId ? referencedTweetsMap[tweetId] : null

          if (referencedTweet) {
            // Check if the referenced tweet has non-Twitter links
            if (hasNonTwitterLinks(referencedTweet)) {
              // Store the referenced tweet for showing its links later
              referencedTweetsWithLinks.push(referencedTweet)
            }

            twitterStatusUrls.push({
              ...url,
              referenced_tweet: referencedTweet,
              has_links: hasNonTwitterLinks(referencedTweet)
            })
          } else {
            twitterStatusUrls.push(url)
          }
        } else {
          // Non-Twitter links go directly to content URLs
          contentUrls.push(url)
          hasExternalLinks = true
        }
      })

      // Add direct external content links from this tweet
      contentUrls.forEach(url => {
        // Skip invalid URLs
        if (!url.expanded_url || url.expanded_url.length < 8) return

        // Ensure the URL isn't truncated
        if (url.expanded_url.endsWith('…') || url.expanded_url.endsWith('...')) {
          // For already identified truncated URLs, we'll mark them but still show them
          url.is_truncated = true
        }

        const isMedia = isMediaUrl(url.expanded_url)
        const isTruncated = !!url.is_truncated

        // Track this URL to avoid duplicates
        try {
          const urlObj = new URL(url.expanded_url)
          const hostname = urlObj.hostname

          // Record URL in database for tracking
          db.recordUrl(url.expanded_url, hostname, tweet.author_username).catch(err => {
            logger.error(`Error recording URL history: ${err.message}`)
          })
        } catch (e) {
          // Invalid URL, just continue
          logger.debug(`Skipping invalid URL: ${url.expanded_url}`)
          return
        }

        conversationExternalUrls.push({
          url: url.expanded_url,
          short_url: url.short_url,
          isMedia,
          isTruncated,
          source: 'direct',
          tweet_id: tweet.id,
          tweet_author: tweet.author_username,
          timestamp,
          is_reply: tweet.is_reply || false
        })
      })

      // Add external links from referenced tweets
      referencedTweetsWithLinks.forEach(referencedTweet => {
        if (referencedTweet.entities?.urls) {
          referencedTweet.entities.urls.forEach(urlEntity => {
            if (!isTwitterStatusLink(urlEntity.expanded_url)) {
              const isMedia = isMediaUrl(urlEntity.expanded_url)
              hasExternalLinks = true

              conversationExternalUrls.push({
                url: urlEntity.expanded_url,
                short_url: urlEntity.url,
                isMedia,
                isTruncated: false,
                source: 'referenced',
                referencedAuthor: referencedTweet.author_username || 'unknown_user',
                tweet_id: tweet.id,
                tweet_author: tweet.author_username,
                timestamp,
                is_reply: tweet.is_reply || false
              })
            }
          })
        }
      })
    }

    // Only proceed if this conversation has external URLs
    if (conversationExternalUrls.length === 0 || !hasExternalLinks) {
      continue
    }

    // Group external URLs by domain
    const urlsByDomain = {}
    conversationExternalUrls.forEach(item => {
      if (item.isMedia) return // Skip media URLs if we're focused on external links

      try {
        const urlObj = new URL(item.url)
        const domain = urlObj.hostname

        if (!urlsByDomain[domain]) {
          urlsByDomain[domain] = []
        }
        urlsByDomain[domain].push(item)
      } catch (e) {
        // If URL parsing fails, just continue
      }
    })

    // Calculate how many unique domains we have
    const uniqueDomains = Object.keys(urlsByDomain)

    // Get the main tweet (the first non-reply, or the first tweet if all are replies)
    const mainTweet = conversationTweets.find(t => !t.is_reply) || conversationTweets[0]

    // Handle potentially invalid timestamps
    let mainTimestamp
    try {
      mainTimestamp = new Date(mainTweet.created_at).toISOString()
    } catch (e) {
      // If date parsing fails, use current date
      mainTimestamp = new Date().toISOString()
      logger.error(`Invalid date found: ${mainTweet.created_at}. Using current time instead.`)
    }

    // Handle undefined authors
    const authorUsername = mainTweet.author_username || 'unknown_user'

    // Output the conversation header
    output.push(`${colors.bright}${colors.fg.yellow}Tweet by @${authorUsername} at ${mainTimestamp}${colors.reset}`)
    output.push(`${colors.fg.green}Tweet ID: ${colors.reset}${mainTweet.id}`)

    if (conversationTweets.length > 1) {
      output.push(`${colors.fg.cyan}Thread with ${conversationTweets.length} tweets and ${uniqueDomains.length} unique domains${colors.reset}`)
    }

    output.push(`${colors.bright}${colors.fg.blue}External URLs:${colors.reset}`)

    // Display all external URLs with appropriate formatting
    // First, deduplicate URLs
    const uniqueExternalUrls = []
    const seenUrlsInConversation = new Set()

    conversationExternalUrls.forEach(item => {
      // Skip media URLs if we're focused on external links
      if (item.isMedia) return

      // Skip if we've seen this URL before
      if (seenUrlsInConversation.has(item.url) || seenExternalUrls.has(item.url)) {
        return
      }

      // Skip invalid or very short URLs
      if (!item.url || item.url.length < 8) return

      seenUrlsInConversation.add(item.url)
      seenExternalUrls.add(item.url)
      uniqueExternalUrls.push(item)
    })

    // Then display them
    uniqueExternalUrls.forEach(item => {
      let urlDisplay = `${colors.bright}${colors.fg.cyan}${item.url}${colors.reset}`

      // Add short URL info if it's a t.co link that got expanded
      if (item.short_url && item.short_url.includes('t.co/') && item.short_url !== item.url) {
        urlDisplay = `${colors.bright}${colors.fg.cyan}${item.url}${colors.reset} (${item.short_url})`
      }

      if (item.isTruncated) {
        urlDisplay += ' (truncated)'
      }

      if (item.source === 'referenced') {
        urlDisplay += ` (via @${item.referencedAuthor || 'unknown'})`
      }

      // Add information if this URL is from a reply
      if (item.is_reply) {
        if (item.tweet_author === mainTweetAuthor) {
          urlDisplay += ` ${colors.fg.yellow}(in self-reply)${colors.reset}`
        } else {
          urlDisplay += ` ${colors.fg.yellow}(in reply by @${item.tweet_author || 'unknown'})${colors.reset}`
        }
      }

      output.push(`  • ${urlDisplay}`)
    })

    // Show media URLs separately if there are any
    const mediaUrls = conversationExternalUrls.filter(item => item.isMedia)
    if (mediaUrls.length > 0) {
      output.push(`${colors.fg.gray}Media:${colors.reset}`)
      const uniqueMediaUrls = []
      const seenMediaUrls = new Set()

      mediaUrls.forEach(item => {
        if (!seenMediaUrls.has(item.url)) {
          seenMediaUrls.add(item.url)
          uniqueMediaUrls.push(item)
        }
      })

      // Show at most 3 media URLs to keep output concise
      const displayMediaUrls = uniqueMediaUrls.slice(0, 3)
      displayMediaUrls.forEach(item => {
        output.push(`  • ${colors.fg.gray}${item.url}${colors.reset}`)
      })

      if (uniqueMediaUrls.length > 3) {
        output.push(`  • ${colors.fg.gray}... and ${uniqueMediaUrls.length - 3} more media files${colors.reset}`)
      }
    }

    // Show the main tweet content
    output.push(`${colors.bright}${colors.fg.blue}Content:${colors.reset}`)
    output.push(mainTweet.text)

    // Optionally show replies content if there are external links in replies
    const repliesWithLinks = conversationTweets.filter(t =>
      t.is_reply &&
      conversationExternalUrls.some(url => url.tweet_id === t.id && !url.isMedia)
    )

    if (repliesWithLinks.length > 0) {
      output.push(`${colors.bright}${colors.fg.blue}Replies with links:${colors.reset}`)

      repliesWithLinks.forEach(reply => {
        // Handle undefined authors
        const replyAuthor = reply.author_username || 'unknown_user'
        output.push(`  ${colors.fg.cyan}@${replyAuthor}:${colors.reset} ${reply.text}`)
      })
    }

    output.push(`${colors.fg.yellow}${'-'.repeat(50)}${colors.reset}`)
  }

  return output.join('\n')
}

// Update the rate limit handler to work with the updated callTwitterApi function
async function rateLimitHandler (operation, maxRetries = 3) {
  let retries = 0
  let backoffTime = 30000 // Start with 30 seconds

  while (retries < maxRetries) {
    try {
      return await operation()
    } catch (error) {
      // Check if this is a rate limit error
      const isRateLimit = error.code === 88 || error.code === 429 ||
                         (error.message && (error.message.includes('429') ||
                                           error.message.includes('Rate limit')))

      if (isRateLimit) {
        retries++
        logger.error(`Rate limit hit (attempt ${retries}/${maxRetries}). Waiting ${backoffTime / 1000} seconds...`)

        // Try to get reset time from headers if available
        if (error.rateLimit && error.rateLimit.reset) {
          const resetTime = error.rateLimit.reset * 1000
          const waitTime = resetTime - Date.now()

          if (waitTime > 0) {
            logger.info(`Rate limit resets in ${Math.ceil(waitTime / 1000)} seconds.`)
            backoffTime = Math.min(waitTime + 1000, 120000) // Wait until reset plus 1 second, max 2 minutes
          }
        }

        await sleep(backoffTime)
        backoffTime *= 2 // Exponential backoff
      } else {
        throw error // Not a rate limit error, rethrow
      }
    }
  }

  throw new Error(`Failed after ${maxRetries} retries due to rate limits`)
}

// Modify main function to include quota management and DB usage
async function main () {
  // Debug system time
  checkSystemTime()

  // Initialize DB
  await db.init()

  try {
    // Load configuration
    const configPath = path.join(__dirname, 'twitter-link-extract.config.json')
    logger.info(`Loading configuration from ${configPath}`)
    config = loadConfig(configPath)

    if (!config.bearerToken) {
      throw new Error('Twitter Bearer Token is required in config file')
    }

    // Initialize tweet cache from DB
    await tweetCache.initFromDb()

    // Load already processed replies
    await loadProcessedReplies()

    // Check API usage for today
    const apiUsage = await db.getApiUsage()
    logger.info(`Today's API usage: ${JSON.stringify(apiUsage)}`)

    // Create Twitter client
    const client = new TwitterApi(config.bearerToken)

    // Validate the token format
    if (!config.bearerToken.startsWith('AAAA')) {
      logger.error('The bearer token format appears incorrect. It should start with "AAAA"')
    }

    // Test connection
    try {
      // Don't attempt to call me() which requires user context
      // Instead, try to fetch a public tweet which only requires basic access
      await callTwitterApi('tweets', async () => {
        return client.v2.tweets('1722701605825642574') // Public tweet ID
      })
      logger.info('API connection working. Successfully accessed public tweet')

      // Add delay before trying lists to avoid rate limits
      await sleep(10000)

      // Now try lists
      try {
        // Test list lookup separately
        await callTwitterApi('lists', async () => {
          return client.v2.list(config.listIds[0])
        })
        logger.info('List access working')
      } catch (listError) {
        logger.error(`List access failed: ${listError.message}`)
        if (listError.message.includes('403')) {
          logger.error('You need Twitter API v2 Essential or higher access for lists endpoints.')
        }
      }
    } catch (error) {
      throw new Error(`API authentication failed: ${error.message}`)
    }

    // Add delay after initial checks
    await sleep(config.apiEfficiency.delays.afterInitialChecks)

    // More explicit timestamp handling
    const now = new Date()
    const hoursAgo = new Date(now.getTime() - (config.timeIntervalHours * 60 * 60 * 1000))
    const latestStored = new Date(await db.getLatestTweetTimestamp())

    // Use the more recent of: hoursAgo or latestStored
    const startTime = new Date(Math.max(hoursAgo.getTime(), latestStored.getTime()))

    // Ensure we're not in the future
    const finalStartTime = new Date(Math.min(startTime.getTime(), now.getTime()))

    logger.info(`Fetching tweets since ${finalStartTime.toISOString()}`)
    logger.info(`API efficiency settings: max ${config.apiEfficiency.maxMembersPerList} members per list, max ${config.apiEfficiency.maxTweetsPerMember} tweets per member`)

    let tweets = []

    // Skip the search approach and go straight to list methods
    logger.info('Using list-only approach as requested...')

    // Try the direct list tweets approach first with rate limit handling
    try {
      const listTweets = await callTwitterApi('lists', async () => {
        return getTweetsFromLists(client, config.listIds, finalStartTime.toISOString())
      })

      tweets = listTweets
      logger.info(`Found ${tweets.length} tweets from lists directly`)

      // Add to cache
      tweetCache.add(tweets)

      // If direct list approach fails, try member approach
      if (tweets.length === 0) {
        logger.info('No tweets found with direct list approach. Trying list members approach...')

        const memberTweets = await callTwitterApi('users', async () => {
          return getTweetsFromListMembers(client, config.listIds, finalStartTime.toISOString())
        })

        tweets = memberTweets
        logger.info(`Found ${tweets.length} tweets from list members`)

        // Add to cache
        tweetCache.add(tweets)
      }
    } catch (error) {
      logger.error(`List approaches failed: ${error.message}`)

      // Try one more time with the members approach if direct list failed
      if (tweets.length === 0) {
        logger.info('Retrying with list members approach...')
        try {
          tweets = await callTwitterApi('users', async () => {
            return getTweetsFromListMembers(client, config.listIds, finalStartTime.toISOString())
          })

          // Add to cache
          tweetCache.add(tweets)
        } catch (memberError) {
          logger.error(`List members approach also failed: ${memberError.message}`)
        }
      }
    }

    // First level filtering - keep tweets with any URLs
    const tweetsWithLinks = tweets.filter(tweet => {
      const urls = tweet.text.match(/(https?:\/\/[^\s]+)/g) || []
      return urls.length > 0
    })

    logger.info(`Found ${tweetsWithLinks.length} tweets with any kind of links`)

    // Extract Twitter status links that need analysis
    const twitterStatusLinks = []
    for (const tweet of tweetsWithLinks) {
      const urls = tweet.entities?.urls || []
      for (const url of urls) {
        if (url.expanded_url && isTwitterStatusLink(url.expanded_url)) {
          const tweetId = extractTweetIdFromUrl(url.expanded_url)
          if (tweetId && !tweetCache.has(tweetId)) {
            twitterStatusLinks.push(tweetId)
          }
        }
      }
    }

    logger.info(`Found ${twitterStatusLinks.length} new Twitter status links to analyze`)

    // Fetch the referenced tweets
    let referencedTweets = []
    if (twitterStatusLinks.length > 0) {
      referencedTweets = await callTwitterApi('tweets', async () => {
        return fetchTweetsByIds(client, twitterStatusLinks)
      })
      logger.info(`Retrieved ${referencedTweets.length} referenced tweets`)

      // Add to cache
      tweetCache.add(referencedTweets)
    }

    // Create a map for quick lookup
    const referencedTweetsMap = {}
    for (const tweet of referencedTweets) {
      referencedTweetsMap[tweet.id] = tweet
    }

    // After getting tweets from lists, fetch replies as well
    if (tweetsWithLinks.length > 0) {
      logger.info(`Preparing to fetch replies${config.apiEfficiency.fetchRepliesForTweetsWithLinks ? ' for tweets with links' : ''}...`)

      try {
        const replies = await fetchRepliesForTweets(client, tweetsWithLinks)
        logger.info(`Found ${replies.length} new replies to tweets`)

        // Add replies to the main tweets collection
        tweetsWithLinks.push(...replies)

        // Also try to fetch the original tweet for any conversations where we only have replies
        try {
          const rootTweets = await callTwitterApi('tweets', async () => {
            return fetchConversationRootTweets(client, tweetsWithLinks)
          })

          if (rootTweets.length > 0) {
            logger.info(`Adding ${rootTweets.length} root tweets to complete conversations`)
            tweetsWithLinks.push(...rootTweets)
          }
        } catch (rootError) {
          logger.error(`Error fetching root tweets: ${rootError.message}`)
        }

        // Sort all tweets by conversation for better processing
        tweetsWithLinks.sort((a, b) => {
          // First sort by conversation_id
          if (a.conversation_id !== b.conversation_id) {
            return a.conversation_id?.localeCompare(b.conversation_id || '')
          }
          // Then by timestamp
          return new Date(a.created_at) - new Date(b.created_at)
        })
      } catch (replyError) {
        logger.error(`Error fetching replies: ${replyError.message}`)
      }
    }

    // Log cache stats
    logger.info(`Cache statistics: ${tweetCache.size()} unique tweets collected`)

    if (tweetsWithLinks.length > 0) {
      const formattedOutput = formatTweetOutput(tweetsWithLinks, referencedTweetsMap)
      console.log(formattedOutput)

      // Count how many conversations are actually shown
      const shownConversations = formattedOutput.split('-'.repeat(50)).length - 1

      logger.result(`Total conversations with valuable links: ${shownConversations}`)
    } else {
      logger.info('No tweets with links found in the specified time interval.')
    }

    // Show final API usage stats
    const finalApiUsage = await db.getApiUsage()
    logger.info(`Final API usage for today: ${JSON.stringify(finalApiUsage)}`)

    // Make recommendations for next run
    const apiUsagePercentages = Object.entries(finalApiUsage).map(([endpoint, count]) => {
      const limit = { lists: 75, tweets: 1000, users: 500, search: 450 }[endpoint] || 500
      return [endpoint, (count / limit) * 100]
    })

    const highestUsage = apiUsagePercentages.reduce((max, [endpoint, percentage]) =>
      percentage > max[1] ? [endpoint, percentage] : max, ['', 0])

    if (highestUsage[1] > 80) {
      logger.error(`WARNING: ${highestUsage[0]} endpoint at ${Math.round(highestUsage[1])}% of daily limit. Consider reducing runs for today.`)
    } else if (highestUsage[1] > 50) {
      logger.info(`NOTE: ${highestUsage[0]} endpoint at ${Math.round(highestUsage[1])}% of daily limit. Monitor usage if running again today.`)
    } else {
      logger.info('API usage is well within limits. Safe to run again today.')
    }
  } catch (error) {
    logger.error(error.message)
  } finally {
    await db.close()
  }
}

main()