From b3643fd5b0c849e6053f88ced0eb44914105d6c6 Mon Sep 17 00:00:00 2001 From: ksalk Date: Sun, 11 Jan 2026 20:23:38 +0000 Subject: [PATCH] Initial commit: Daily tech newsletter worker A Node.js/TypeScript worker that sends daily AI-generated summaries of tech news from X/Twitter via Nitter RSS feeds. Features: - Fetches tweets from 40+ curated tech accounts via Nitter RSS - Filters and categorizes by topic (AI/ML, SWE, General Tech) - Generates AI summaries using OpenRouter (Claude/GPT-4) - Sends professional HTML email via Brevo SMTP - Runs on cron schedule inside Docker container - Instance rotation for Nitter reliability - Graceful degradation if AI fails Co-Authored-By: Claude Opus 4.5 --- .env.example | 52 +++++ .gitignore | 35 +++ docker/Dockerfile | 42 ++++ docker/docker-compose.yml | 31 +++ package.json | 51 +++++ src/config/accounts.ts | 60 +++++ src/config/index.ts | 89 ++++++++ src/config/topics.ts | 210 ++++++++++++++++++ src/core/NewsletterPipeline.ts | 218 ++++++++++++++++++ src/core/TweetProcessor.ts | 134 ++++++++++++ src/index.ts | 94 ++++++++ src/services/ai/OpenRouterClient.ts | 50 +++++ src/services/ai/SummaryGenerator.ts | 119 ++++++++++ src/services/ai/prompts.ts | 80 +++++++ src/services/email/EmailService.ts | 92 ++++++++ src/services/email/templates.ts | 280 ++++++++++++++++++++++++ src/services/rss/NitterRssFetcher.ts | 164 ++++++++++++++ src/services/rss/types.ts | 25 +++ src/services/scheduler/CronScheduler.ts | 61 ++++++ src/types/index.ts | 108 +++++++++ src/utils/logger.ts | 22 ++ src/utils/retry.ts | 50 +++++ tsconfig.json | 24 ++ 23 files changed, 2091 insertions(+) create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 docker/Dockerfile create mode 100644 docker/docker-compose.yml create mode 100644 package.json create mode 100644 src/config/accounts.ts create mode 100644 src/config/index.ts create mode 100644 src/config/topics.ts create mode 100644 src/core/NewsletterPipeline.ts create mode 100644 src/core/TweetProcessor.ts create mode 100644 src/index.ts create mode 100644 src/services/ai/OpenRouterClient.ts create mode 100644 src/services/ai/SummaryGenerator.ts create mode 100644 src/services/ai/prompts.ts create mode 100644 src/services/email/EmailService.ts create mode 100644 src/services/email/templates.ts create mode 100644 src/services/rss/NitterRssFetcher.ts create mode 100644 src/services/rss/types.ts create mode 100644 src/services/scheduler/CronScheduler.ts create mode 100644 src/types/index.ts create mode 100644 src/utils/logger.ts create mode 100644 src/utils/retry.ts create mode 100644 tsconfig.json diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..7287644 --- /dev/null +++ b/.env.example @@ -0,0 +1,52 @@ +# ============================================================================= +# APPLICATION +# ============================================================================= +NODE_ENV=production +LOG_LEVEL=info # debug, info, warn, error + +# ============================================================================= +# RSS CONFIGURATION +# ============================================================================= +# Comma-separated list of Nitter instances (will rotate on failure) +NITTER_INSTANCES=https://nitter.poast.org,https://xcancel.com,https://nitter.privacydev.net +RSS_FETCH_TIMEOUT=30000 # 30 seconds +RSS_MAX_TWEETS_PER_ACCOUNT=50 + +# ============================================================================= +# OPENROUTER AI +# ============================================================================= +OPENROUTER_API_KEY=sk-or-v1-your-key-here +OPENROUTER_MODEL=anthropic/claude-3-sonnet-20240229 +# Alternative models: openai/gpt-4-turbo, anthropic/claude-3-opus, google/gemini-pro +OPENROUTER_MAX_TOKENS=2000 +OPENROUTER_SITE_URL=https://your-newsletter.com +OPENROUTER_SITE_NAME=Tech Newsletter + +# ============================================================================= +# BREVO SMTP +# ============================================================================= +BREVO_SMTP_HOST=smtp-relay.brevo.com +BREVO_SMTP_PORT=587 +BREVO_SMTP_USER=your-brevo-login-email +BREVO_SMTP_KEY=your-brevo-smtp-api-key +EMAIL_FROM_ADDRESS=newsletter@yourdomain.com +EMAIL_FROM_NAME=Daily Tech Digest +# Comma-separated recipient list +EMAIL_RECIPIENTS=you@example.com + +# ============================================================================= +# SCHEDULER +# ============================================================================= +# Cron expression: minute hour day month weekday +# Default: Every day at 7:00 AM +CRON_SCHEDULE=0 7 * * * +CRON_TIMEZONE=Europe/Warsaw + +# ============================================================================= +# FEATURE FLAGS +# ============================================================================= +ENABLE_AI_SUMMARIES=true +INCLUDE_RETWEETS=false +INCLUDE_REPLIES=false +# Set true to skip email sending (for testing) +DRY_RUN=false diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..364e308 --- /dev/null +++ b/.gitignore @@ -0,0 +1,35 @@ +# Dependencies +node_modules/ + +# Build output +dist/ + +# Environment files +.env +.env.local +.env.*.local + +# Logs +logs/ +*.log +npm-debug.log* + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db + +# Testing +coverage/ + +# Misc +*.tgz +.cache/ + +# Claude Code +.claude/ diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..0571537 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,42 @@ +# ============================================================================= +# Build Stage +# ============================================================================= +FROM node:20-alpine AS builder + +WORKDIR /app + +# Install dependencies first (better layer caching) +COPY package*.json ./ +RUN npm ci + +# Copy source and build +COPY tsconfig.json ./ +COPY src ./src +RUN npm run build + +# Prune dev dependencies +RUN npm prune --production + +# ============================================================================= +# Production Stage +# ============================================================================= +FROM node:20-alpine AS production + +# Security: run as non-root user +RUN addgroup -g 1001 -S nodejs && \ + adduser -S newsletter -u 1001 + +WORKDIR /app + +# Copy built application +COPY --from=builder --chown=newsletter:nodejs /app/node_modules ./node_modules +COPY --from=builder --chown=newsletter:nodejs /app/dist ./dist +COPY --from=builder --chown=newsletter:nodejs /app/package.json ./ + +USER newsletter + +# Set timezone (can be overridden via env) +ENV TZ=Europe/Warsaw + +# Default command +CMD ["node", "dist/index.js"] diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 0000000..c62d574 --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,31 @@ +version: '3.8' + +services: + newsletter-worker: + build: + context: .. + dockerfile: docker/Dockerfile + container_name: x-newsletter + restart: unless-stopped + env_file: + - ../.env + environment: + - NODE_ENV=production + - TZ=${CRON_TIMEZONE:-Europe/Warsaw} + volumes: + # Optional: persist logs + - ../logs:/app/logs + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + # Resource limits + deploy: + resources: + limits: + cpus: '0.5' + memory: 512M + reservations: + cpus: '0.25' + memory: 256M diff --git a/package.json b/package.json new file mode 100644 index 0000000..b317958 --- /dev/null +++ b/package.json @@ -0,0 +1,51 @@ +{ + "name": "x-newsletter", + "version": "1.0.0", + "description": "Daily tech newsletter from X/Twitter via Nitter RSS feeds with AI summaries", + "main": "dist/index.js", + "type": "module", + "scripts": { + "build": "tsc", + "start": "node dist/index.js", + "dev": "tsx src/index.ts", + "run-now": "tsx src/index.ts --run-now", + "dry-run": "tsx src/index.ts --run-now --dry-run" + }, + "keywords": [ + "newsletter", + "twitter", + "x", + "rss", + "ai", + "automation" + ], + "author": "", + "license": "MIT", + "dependencies": { + "axios": "^1.7.9", + "date-fns": "^4.1.0", + "date-fns-tz": "^3.2.0", + "dotenv": "^16.4.7", + "he": "^1.2.0", + "node-cron": "^3.0.3", + "nodemailer": "^6.9.16", + "openai": "^4.77.0", + "pino": "^9.6.0", + "rss-parser": "^3.13.0", + "sanitize-html": "^2.14.0", + "zod": "^3.24.1" + }, + "devDependencies": { + "@types/he": "^1.2.3", + "@types/node": "^22.10.5", + "@types/node-cron": "^3.0.11", + "@types/nodemailer": "^6.4.17", + "@types/sanitize-html": "^2.13.0", + "pino-pretty": "^13.0.0", + "tsx": "^4.19.2", + "typescript": "^5.7.2" + }, + "engines": { + "node": ">=20.0.0" + } +} diff --git a/src/config/accounts.ts b/src/config/accounts.ts new file mode 100644 index 0000000..fee42fc --- /dev/null +++ b/src/config/accounts.ts @@ -0,0 +1,60 @@ +import type { TechAccount } from '../types/index.js'; + +export const TECH_ACCOUNTS: TechAccount[] = [ + // =========================================== + // AI / Machine Learning + // =========================================== + { username: 'karpathy', displayName: 'Andrej Karpathy', category: 'ai_ml', priority: 'high' }, + { username: 'ylecun', displayName: 'Yann LeCun', category: 'ai_ml', priority: 'high' }, + { username: 'AndrewYNg', displayName: 'Andrew Ng', category: 'ai_ml', priority: 'high' }, + { username: 'sama', displayName: 'Sam Altman', category: 'ai_ml', priority: 'high' }, + { username: 'demaboris', displayName: 'Demis Hassabis', category: 'ai_ml', priority: 'high' }, + { username: 'goodaboris', displayName: 'Ian Goodfellow', category: 'ai_ml', priority: 'medium' }, + { username: 'fchollet', displayName: 'François Chollet', category: 'ai_ml', priority: 'high' }, + { username: 'EMostaque', displayName: 'Emad Mostaque', category: 'ai_ml', priority: 'medium' }, + { username: 'JimFan', displayName: 'Jim Fan', category: 'ai_ml', priority: 'high' }, + { username: 'gaborig', displayName: 'George Hotz', category: 'ai_ml', priority: 'medium' }, + { username: 'ClaudeMcAI', displayName: 'Claude (Anthropic)', category: 'ai_ml', priority: 'medium' }, + { username: 'OpenAI', displayName: 'OpenAI', category: 'ai_ml', priority: 'high' }, + { username: 'AnthropicAI', displayName: 'Anthropic', category: 'ai_ml', priority: 'high' }, + { username: 'huggingface', displayName: 'Hugging Face', category: 'ai_ml', priority: 'high' }, + + // =========================================== + // Software Engineering / Dev Tools + // =========================================== + { username: 'ThePrimeagen', displayName: 'ThePrimeagen', category: 'swe', priority: 'high' }, + { username: 'kelseyhightower', displayName: 'Kelsey Hightower', category: 'swe', priority: 'high' }, + { username: 'mitchellh', displayName: 'Mitchell Hashimoto', category: 'swe', priority: 'high' }, + { username: 'tjholowaychuk', displayName: 'TJ Holowaychuk', category: 'swe', priority: 'medium' }, + { username: 'addyosmani', displayName: 'Addy Osmani', category: 'swe', priority: 'medium' }, + { username: 'sarah_edo', displayName: 'Sarah Drasner', category: 'swe', priority: 'medium' }, + { username: 'dan_abramov', displayName: 'Dan Abramov', category: 'swe', priority: 'high' }, + { username: 'swyx', displayName: 'swyx', category: 'swe', priority: 'medium' }, + { username: 'kentcdodds', displayName: 'Kent C. Dodds', category: 'swe', priority: 'medium' }, + { username: 'taborwang', displayName: 'Tanner Linsley', category: 'swe', priority: 'medium' }, + { username: 'raaborig', displayName: 'Ryan Dahl', category: 'swe', priority: 'high' }, + { username: 'vercel', displayName: 'Vercel', category: 'swe', priority: 'medium' }, + { username: 'github', displayName: 'GitHub', category: 'swe', priority: 'medium' }, + + // =========================================== + // General Tech / Startups + // =========================================== + { username: 'levelsio', displayName: 'Pieter Levels', category: 'general_tech', priority: 'high' }, + { username: 'paulg', displayName: 'Paul Graham', category: 'general_tech', priority: 'high' }, + { username: 'naval', displayName: 'Naval Ravikant', category: 'general_tech', priority: 'high' }, + { username: 'elaborig', displayName: 'Elon Musk', category: 'general_tech', priority: 'medium' }, + { username: 'jason', displayName: 'Jason Calacanis', category: 'general_tech', priority: 'medium' }, + { username: 'balajis', displayName: 'Balaji Srinivasan', category: 'general_tech', priority: 'medium' }, + { username: 'pmarca', displayName: 'Marc Andreessen', category: 'general_tech', priority: 'medium' }, + { username: 'aborig', displayName: 'DHH', category: 'general_tech', priority: 'high' }, + { username: 'benedictevans', displayName: 'Benedict Evans', category: 'general_tech', priority: 'medium' }, + { username: 'jason_f', displayName: 'Jason Fried', category: 'general_tech', priority: 'medium' }, +]; + +export function getAccountsByCategory(category: TechAccount['category']): TechAccount[] { + return TECH_ACCOUNTS.filter((account) => account.category === category); +} + +export function getHighPriorityAccounts(): TechAccount[] { + return TECH_ACCOUNTS.filter((account) => account.priority === 'high'); +} diff --git a/src/config/index.ts b/src/config/index.ts new file mode 100644 index 0000000..05c731f --- /dev/null +++ b/src/config/index.ts @@ -0,0 +1,89 @@ +import { config as dotenvConfig } from 'dotenv'; +import { z } from 'zod'; +import type { AppConfig } from '../types/index.js'; + +dotenvConfig(); + +const envSchema = z.object({ + NODE_ENV: z.enum(['development', 'production', 'test']).default('development'), + LOG_LEVEL: z.enum(['debug', 'info', 'warn', 'error']).default('info'), + + NITTER_INSTANCES: z.string().default('https://nitter.poast.org,https://xcancel.com'), + RSS_FETCH_TIMEOUT: z.coerce.number().default(30000), + RSS_MAX_TWEETS_PER_ACCOUNT: z.coerce.number().default(50), + + OPENROUTER_API_KEY: z.string().min(1, 'OPENROUTER_API_KEY is required'), + OPENROUTER_MODEL: z.string().default('anthropic/claude-3-sonnet-20240229'), + OPENROUTER_MAX_TOKENS: z.coerce.number().default(2000), + OPENROUTER_SITE_URL: z.string().default('https://tech-newsletter.local'), + OPENROUTER_SITE_NAME: z.string().default('Tech Newsletter'), + + BREVO_SMTP_HOST: z.string().default('smtp-relay.brevo.com'), + BREVO_SMTP_PORT: z.coerce.number().default(587), + BREVO_SMTP_USER: z.string().min(1, 'BREVO_SMTP_USER is required'), + BREVO_SMTP_KEY: z.string().min(1, 'BREVO_SMTP_KEY is required'), + EMAIL_FROM_ADDRESS: z.string().email('Invalid EMAIL_FROM_ADDRESS'), + EMAIL_FROM_NAME: z.string().default('Daily Tech Digest'), + EMAIL_RECIPIENTS: z.string().min(1, 'EMAIL_RECIPIENTS is required'), + + CRON_SCHEDULE: z.string().default('0 7 * * *'), + CRON_TIMEZONE: z.string().default('Europe/Warsaw'), + + ENABLE_AI_SUMMARIES: z.coerce.boolean().default(true), + INCLUDE_RETWEETS: z.coerce.boolean().default(false), + INCLUDE_REPLIES: z.coerce.boolean().default(false), + DRY_RUN: z.coerce.boolean().default(false), +}); + +function loadConfig(): AppConfig { + const result = envSchema.safeParse(process.env); + + if (!result.success) { + console.error('Configuration validation failed:'); + for (const error of result.error.errors) { + console.error(` - ${error.path.join('.')}: ${error.message}`); + } + process.exit(1); + } + + const env = result.data; + + return { + rss: { + nitterInstances: env.NITTER_INSTANCES.split(',').map((s) => s.trim()), + fetchTimeout: env.RSS_FETCH_TIMEOUT, + maxTweetsPerAccount: env.RSS_MAX_TWEETS_PER_ACCOUNT, + }, + ai: { + openRouterApiKey: env.OPENROUTER_API_KEY, + model: env.OPENROUTER_MODEL, + maxTokens: env.OPENROUTER_MAX_TOKENS, + siteUrl: env.OPENROUTER_SITE_URL, + siteName: env.OPENROUTER_SITE_NAME, + }, + email: { + brevoHost: env.BREVO_SMTP_HOST, + brevoPort: env.BREVO_SMTP_PORT, + brevoUser: env.BREVO_SMTP_USER, + brevoApiKey: env.BREVO_SMTP_KEY, + fromEmail: env.EMAIL_FROM_ADDRESS, + fromName: env.EMAIL_FROM_NAME, + recipients: env.EMAIL_RECIPIENTS.split(',').map((s) => s.trim()), + }, + scheduler: { + cronExpression: env.CRON_SCHEDULE, + timezone: env.CRON_TIMEZONE, + }, + features: { + enableAiSummaries: env.ENABLE_AI_SUMMARIES, + includeRetweets: env.INCLUDE_RETWEETS, + includeReplies: env.INCLUDE_REPLIES, + dryRun: env.DRY_RUN, + }, + logging: { + level: env.LOG_LEVEL, + }, + }; +} + +export const config = loadConfig(); diff --git a/src/config/topics.ts b/src/config/topics.ts new file mode 100644 index 0000000..617ccfc --- /dev/null +++ b/src/config/topics.ts @@ -0,0 +1,210 @@ +import type { TopicConfig, TopicId } from '../types/index.js'; + +export const TOPICS: TopicConfig[] = [ + { + id: 'ai_ml', + name: 'AI & Machine Learning', + keywords: [ + 'gpt', + 'gpt-4', + 'gpt-5', + 'chatgpt', + 'llm', + 'large language model', + 'transformer', + 'neural network', + 'deep learning', + 'machine learning', + 'ml', + 'artificial intelligence', + 'ai', + 'openai', + 'anthropic', + 'claude', + 'gemini', + 'mistral', + 'llama', + 'fine-tuning', + 'fine tuning', + 'rag', + 'retrieval augmented', + 'embedding', + 'vector database', + 'prompt engineering', + 'prompt', + 'diffusion', + 'stable diffusion', + 'midjourney', + 'dall-e', + 'sora', + 'multimodal', + 'vision model', + 'nlp', + 'natural language', + 'rlhf', + 'reinforcement learning', + 'model training', + 'inference', + 'hugging face', + 'pytorch', + 'tensorflow', + 'jax', + 'agi', + 'superintelligence', + 'ai safety', + 'alignment', + ], + icon: '🤖', + }, + { + id: 'swe', + name: 'Software Engineering', + keywords: [ + 'programming', + 'coding', + 'software', + 'developer', + 'development', + 'typescript', + 'javascript', + 'python', + 'rust', + 'go', + 'golang', + 'java', + 'c++', + 'react', + 'vue', + 'angular', + 'svelte', + 'node', + 'nodejs', + 'deno', + 'bun', + 'next.js', + 'nextjs', + 'remix', + 'api', + 'rest', + 'graphql', + 'grpc', + 'microservices', + 'kubernetes', + 'k8s', + 'docker', + 'container', + 'devops', + 'ci/cd', + 'git', + 'github', + 'gitlab', + 'open source', + 'oss', + 'framework', + 'library', + 'package', + 'npm', + 'testing', + 'tdd', + 'clean code', + 'architecture', + 'design pattern', + 'refactoring', + 'database', + 'postgresql', + 'mysql', + 'mongodb', + 'redis', + 'aws', + 'azure', + 'gcp', + 'cloud', + 'serverless', + 'edge', + 'wasm', + 'webassembly', + 'vim', + 'neovim', + 'vscode', + 'ide', + 'terminal', + 'cli', + ], + icon: '💻', + }, + { + id: 'general_tech', + name: 'Tech & Startups', + keywords: [ + 'startup', + 'founder', + 'entrepreneur', + 'indie hacker', + 'indiehacker', + 'saas', + 'product', + 'launch', + 'shipped', + 'mvp', + 'funding', + 'vc', + 'venture capital', + 'seed', + 'series a', + 'ipo', + 'acquisition', + 'tech news', + 'silicon valley', + 'y combinator', + 'yc', + 'product hunt', + 'hacker news', + 'tech twitter', + 'remote work', + 'wfh', + 'crypto', + 'blockchain', + 'web3', + 'bitcoin', + 'ethereum', + 'nft', + 'defi', + 'fintech', + 'biotech', + 'climate tech', + 'hardware', + 'robotics', + 'automation', + 'productivity', + 'notion', + 'obsidian', + 'tech layoffs', + 'hiring', + 'tech jobs', + 'career', + 'salary', + 'compensation', + ], + icon: '🚀', + }, +]; + +export function getTopicById(id: TopicId): TopicConfig | undefined { + return TOPICS.find((topic) => topic.id === id); +} + +export function matchTopics(text: string): TopicId[] { + const lowerText = text.toLowerCase(); + const matched: TopicId[] = []; + + for (const topic of TOPICS) { + for (const keyword of topic.keywords) { + if (lowerText.includes(keyword.toLowerCase())) { + matched.push(topic.id); + break; + } + } + } + + return matched; +} diff --git a/src/core/NewsletterPipeline.ts b/src/core/NewsletterPipeline.ts new file mode 100644 index 0000000..79b9318 --- /dev/null +++ b/src/core/NewsletterPipeline.ts @@ -0,0 +1,218 @@ +import { config } from '../config/index.js'; +import { TOPICS } from '../config/topics.js'; +import { logger } from '../utils/logger.js'; +import { NitterRssFetcher } from '../services/rss/NitterRssFetcher.js'; +import { TweetProcessor } from './TweetProcessor.js'; +import { SummaryGenerator } from '../services/ai/SummaryGenerator.js'; +import { EmailService } from '../services/email/EmailService.js'; +import type { + Newsletter, + TopicSummary, + PipelineResult, + PipelineError, + TopicId, +} from '../types/index.js'; + +export class NewsletterPipeline { + private rssFetcher: NitterRssFetcher; + private tweetProcessor: TweetProcessor; + private summaryGenerator: SummaryGenerator; + private emailService: EmailService; + + constructor() { + this.rssFetcher = new NitterRssFetcher(); + this.tweetProcessor = new TweetProcessor(); + this.summaryGenerator = new SummaryGenerator(); + this.emailService = new EmailService(); + } + + async run(): Promise { + const errors: PipelineError[] = []; + const startTime = Date.now(); + + logger.info('Starting newsletter pipeline'); + + try { + // Step 1: Fetch RSS feeds + logger.info('Step 1: Fetching RSS feeds'); + const fetchResult = await this.rssFetcher.fetchAll(); + + for (const err of fetchResult.errors) { + errors.push({ + stage: 'rss', + message: `Failed to fetch @${err.account}: ${err.error}`, + }); + } + + if (fetchResult.tweets.length === 0) { + throw new Error('No tweets fetched from any source'); + } + + logger.info({ tweetCount: fetchResult.tweets.length }, 'RSS fetch complete'); + + // Step 2: Process tweets + logger.info('Step 2: Processing tweets'); + const processedTweets = this.tweetProcessor.process(fetchResult.tweets); + const tweetsByTopic = this.tweetProcessor.groupByTopic(processedTweets); + + logger.info( + { + processedCount: processedTweets.length, + topics: Array.from(tweetsByTopic.keys()), + }, + 'Tweet processing complete' + ); + + // Step 3: Generate AI summaries + logger.info('Step 3: Generating AI summaries'); + const topicSummaries = await this.generateSummaries(tweetsByTopic, errors); + + // Step 4: Generate daily insights + logger.info('Step 4: Generating daily insights'); + let insights: string; + try { + if (config.features.enableAiSummaries) { + insights = await this.summaryGenerator.generateDailyInsights(topicSummaries); + } else { + insights = this.createFallbackInsights(topicSummaries); + } + } catch (error) { + logger.error({ error }, 'Failed to generate insights'); + errors.push({ + stage: 'ai', + message: 'Failed to generate daily insights', + details: error, + }); + insights = this.createFallbackInsights(topicSummaries); + } + + // Step 5: Build newsletter + logger.info('Step 5: Building newsletter'); + const newsletter: Newsletter = { + date: new Date(), + insights, + topics: topicSummaries, + totalTweets: processedTweets.length, + errors, + }; + + // Step 6: Send email + logger.info('Step 6: Sending email'); + try { + const sendResult = await this.emailService.sendNewsletter(newsletter); + + if (!sendResult.success) { + errors.push({ + stage: 'email', + message: sendResult.error || 'Email send failed', + }); + } + } catch (error) { + logger.error({ error }, 'Email sending failed'); + errors.push({ + stage: 'email', + message: 'Failed to send newsletter email', + details: error, + }); + } + + const duration = Date.now() - startTime; + logger.info( + { + durationMs: duration, + totalTweets: processedTweets.length, + topicCount: topicSummaries.length, + errorCount: errors.length, + }, + 'Newsletter pipeline completed' + ); + + return { + success: errors.filter((e) => e.stage === 'email').length === 0, + newsletter, + errors, + }; + } catch (error) { + logger.error({ error }, 'Newsletter pipeline failed'); + + return { + success: false, + errors: [ + ...errors, + { + stage: 'process', + message: error instanceof Error ? error.message : 'Unknown error', + details: error, + }, + ], + }; + } + } + + private async generateSummaries( + tweetsByTopic: Map, + errors: PipelineError[] + ): Promise { + const summaries: TopicSummary[] = []; + + for (const topic of TOPICS) { + const tweets = tweetsByTopic.get(topic.id) || []; + + try { + if (config.features.enableAiSummaries) { + const summary = await this.summaryGenerator.generateTopicSummary(topic.id, tweets); + summaries.push(summary); + } else { + summaries.push(this.createBasicSummary(topic, tweets)); + } + } catch (error) { + logger.error({ topic: topic.id, error }, 'Failed to generate topic summary'); + errors.push({ + stage: 'ai', + message: `Failed to generate summary for ${topic.name}`, + details: error, + }); + summaries.push(this.createBasicSummary(topic, tweets)); + } + } + + return summaries; + } + + private createBasicSummary( + topic: typeof TOPICS[number], + tweets: import('../types/index.js').ProcessedTweet[] + ): TopicSummary { + const uniqueAuthors = [...new Set(tweets.map((t) => t.authorDisplayName))]; + + return { + topic, + summary: + tweets.length > 0 + ? `${tweets.length} posts from ${uniqueAuthors.slice(0, 3).join(', ')}${uniqueAuthors.length > 3 ? ' and others' : ''}.` + : `No ${topic.name} updates today.`, + highlights: tweets.slice(0, 3).map((t) => ({ + tweet: t.content.slice(0, 200), + author: t.author, + context: `From ${t.authorDisplayName}`, + link: t.link, + })), + trends: [], + tweetCount: tweets.length, + }; + } + + private createFallbackInsights(summaries: TopicSummary[]): string { + const activeTopic = summaries.find((s) => s.tweetCount > 0); + if (!activeTopic) { + return 'A quiet day in tech - check back tomorrow!'; + } + + const topicNames = summaries + .filter((s) => s.tweetCount > 0) + .map((s) => s.topic.name) + .join(', '); + + return `Today's tech discourse spans ${topicNames}. Dive into the highlights below!`; + } +} diff --git a/src/core/TweetProcessor.ts b/src/core/TweetProcessor.ts new file mode 100644 index 0000000..e4bbf4a --- /dev/null +++ b/src/core/TweetProcessor.ts @@ -0,0 +1,134 @@ +import { subHours, isAfter } from 'date-fns'; +import { config } from '../config/index.js'; +import { TECH_ACCOUNTS } from '../config/accounts.js'; +import { matchTopics } from '../config/topics.js'; +import { logger } from '../utils/logger.js'; +import type { RawTweet, ProcessedTweet, TopicId } from '../types/index.js'; + +export class TweetProcessor { + private hoursLookback: number = 24; + + process(rawTweets: RawTweet[]): ProcessedTweet[] { + logger.info({ input: rawTweets.length }, 'Processing tweets'); + + let tweets = rawTweets.map((tweet) => this.enrichTweet(tweet)); + + // Filter by time + const cutoff = subHours(new Date(), this.hoursLookback); + tweets = tweets.filter((t) => isAfter(t.timestamp, cutoff)); + logger.debug({ afterTimeFilter: tweets.length }, 'Filtered by time'); + + // Filter retweets if configured + if (!config.features.includeRetweets) { + tweets = tweets.filter((t) => !t.isRetweet); + logger.debug({ afterRtFilter: tweets.length }, 'Filtered retweets'); + } + + // Filter replies if configured + if (!config.features.includeReplies) { + tweets = tweets.filter((t) => !t.isReply); + logger.debug({ afterReplyFilter: tweets.length }, 'Filtered replies'); + } + + // Deduplicate by content similarity + tweets = this.deduplicate(tweets); + logger.debug({ afterDedupe: tweets.length }, 'Deduplicated'); + + // Sort by relevance (high priority accounts first, then by time) + tweets = this.sortByRelevance(tweets); + + logger.info({ output: tweets.length }, 'Tweet processing complete'); + + return tweets; + } + + private enrichTweet(tweet: RawTweet): ProcessedTweet { + const content = tweet.content.toLowerCase(); + + // Detect retweets + const isRetweet = content.startsWith('rt @') || content.includes(' rt @'); + + // Detect replies (starts with @mention) + const isReply = tweet.content.trim().startsWith('@'); + + // Get account's default category + const account = TECH_ACCOUNTS.find((a) => a.username === tweet.author); + const accountCategory = account?.category; + + // Match topics from content keywords + const contentTopics = matchTopics(tweet.content); + + // Combine account category with content-matched topics + const topics: TopicId[] = accountCategory + ? [...new Set([accountCategory, ...contentTopics])] + : contentTopics.length > 0 + ? contentTopics + : ['general_tech']; + + return { + ...tweet, + topics, + isRetweet, + isReply, + }; + } + + private deduplicate(tweets: ProcessedTweet[]): ProcessedTweet[] { + const seen = new Map(); + + for (const tweet of tweets) { + const normalized = this.normalizeContent(tweet.content); + + if (normalized.length < 20) continue; + + const existing = seen.get(normalized); + if (!existing || tweet.timestamp > existing.timestamp) { + seen.set(normalized, tweet); + } + } + + return Array.from(seen.values()); + } + + private normalizeContent(content: string): string { + return content + .toLowerCase() + .replace(/https?:\/\/\S+/g, '') + .replace(/@\w+/g, '') + .replace(/[^\w\s]/g, '') + .replace(/\s+/g, ' ') + .trim(); + } + + private sortByRelevance(tweets: ProcessedTweet[]): ProcessedTweet[] { + const priorityMap = new Map(); + for (const account of TECH_ACCOUNTS) { + priorityMap.set(account.username, account.priority === 'high' ? 3 : account.priority === 'medium' ? 2 : 1); + } + + return tweets.sort((a, b) => { + const priorityA = priorityMap.get(a.author) || 0; + const priorityB = priorityMap.get(b.author) || 0; + + if (priorityA !== priorityB) { + return priorityB - priorityA; + } + + return b.timestamp.getTime() - a.timestamp.getTime(); + }); + } + + groupByTopic(tweets: ProcessedTweet[]): Map { + const grouped = new Map(); + + for (const tweet of tweets) { + for (const topic of tweet.topics) { + const existing = grouped.get(topic) || []; + existing.push(tweet); + grouped.set(topic, existing); + } + } + + return grouped; + } +} diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..5391638 --- /dev/null +++ b/src/index.ts @@ -0,0 +1,94 @@ +import { config } from './config/index.js'; +import { logger } from './utils/logger.js'; +import { NewsletterPipeline } from './core/NewsletterPipeline.js'; +import { CronScheduler } from './services/scheduler/CronScheduler.js'; +import { EmailService } from './services/email/EmailService.js'; + +async function main() { + const args = process.argv.slice(2); + const runNow = args.includes('--run-now'); + const dryRun = args.includes('--dry-run'); + + if (dryRun) { + process.env.DRY_RUN = 'true'; + } + + logger.info( + { + runNow, + dryRun: config.features.dryRun || dryRun, + cronSchedule: config.scheduler.cronExpression, + timezone: config.scheduler.timezone, + recipients: config.email.recipients.length, + }, + 'X-Newsletter starting' + ); + + const pipeline = new NewsletterPipeline(); + + if (runNow) { + logger.info('Running newsletter pipeline immediately'); + + const result = await pipeline.run(); + + if (result.success) { + logger.info('Newsletter sent successfully'); + process.exit(0); + } else { + logger.error({ errors: result.errors }, 'Newsletter pipeline failed'); + process.exit(1); + } + } + + // Verify email connection on startup + const emailService = new EmailService(); + const emailConnected = await emailService.verifyConnection(); + + if (!emailConnected) { + logger.warn('Email service connection could not be verified - will retry on send'); + } + + // Set up scheduled execution + const scheduler = new CronScheduler(); + + scheduler.schedule( + 'daily-newsletter', + config.scheduler.cronExpression, + async () => { + const result = await pipeline.run(); + + if (!result.success) { + logger.error({ errors: result.errors }, 'Scheduled newsletter failed'); + } + }, + { timezone: config.scheduler.timezone } + ); + + scheduler.start(); + + logger.info( + { + schedule: config.scheduler.cronExpression, + timezone: config.scheduler.timezone, + }, + 'Newsletter scheduler started. Waiting for next scheduled run...' + ); + + // Handle graceful shutdown + const shutdown = () => { + logger.info('Shutting down...'); + scheduler.stop(); + process.exit(0); + }; + + process.on('SIGTERM', shutdown); + process.on('SIGINT', shutdown); + + // Keep the process running + await new Promise(() => {}); +} + +main().catch((error) => { + logger.error({ error }, 'Fatal error'); + process.exit(1); +}); diff --git a/src/services/ai/OpenRouterClient.ts b/src/services/ai/OpenRouterClient.ts new file mode 100644 index 0000000..0939b7b --- /dev/null +++ b/src/services/ai/OpenRouterClient.ts @@ -0,0 +1,50 @@ +import OpenAI from 'openai'; +import { config } from '../../config/index.js'; +import { logger } from '../../utils/logger.js'; +import { withRetry } from '../../utils/retry.js'; + +export class OpenRouterClient { + private client: OpenAI; + private model: string; + private maxTokens: number; + + constructor() { + this.client = new OpenAI({ + baseURL: 'https://openrouter.ai/api/v1', + apiKey: config.ai.openRouterApiKey, + defaultHeaders: { + 'HTTP-Referer': config.ai.siteUrl, + 'X-Title': config.ai.siteName, + }, + }); + this.model = config.ai.model; + this.maxTokens = config.ai.maxTokens; + } + + async generateCompletion(prompt: string): Promise { + logger.debug({ model: this.model, promptLength: prompt.length }, 'Generating AI completion'); + + return withRetry( + async () => { + const response = await this.client.chat.completions.create({ + model: this.model, + messages: [{ role: 'user', content: prompt }], + max_tokens: this.maxTokens, + temperature: 0.7, + }); + + const content = response.choices[0]?.message?.content; + if (!content) { + throw new Error('Empty response from AI'); + } + + logger.debug({ responseLength: content.length }, 'AI completion generated'); + return content; + }, + { + maxAttempts: 3, + baseDelay: 2000, + } + ); + } +} diff --git a/src/services/ai/SummaryGenerator.ts b/src/services/ai/SummaryGenerator.ts new file mode 100644 index 0000000..6766593 --- /dev/null +++ b/src/services/ai/SummaryGenerator.ts @@ -0,0 +1,119 @@ +import { TOPICS } from '../../config/topics.js'; +import { logger } from '../../utils/logger.js'; +import type { ProcessedTweet, TopicSummary, TopicId, TweetHighlight } from '../../types/index.js'; +import { OpenRouterClient } from './OpenRouterClient.js'; +import { buildSummaryPrompt, buildInsightsPrompt, parseSummaryResponse } from './prompts.js'; + +export class SummaryGenerator { + private client: OpenRouterClient; + + constructor() { + this.client = new OpenRouterClient(); + } + + async generateTopicSummary( + topicId: TopicId, + tweets: ProcessedTweet[] + ): Promise { + const topic = TOPICS.find((t) => t.id === topicId); + if (!topic) { + throw new Error(`Unknown topic: ${topicId}`); + } + + logger.info({ topic: topic.name, tweetCount: tweets.length }, 'Generating topic summary'); + + if (tweets.length === 0) { + return { + topic, + summary: `No significant ${topic.name} discussions today.`, + highlights: [], + trends: [], + tweetCount: 0, + }; + } + + try { + const prompt = buildSummaryPrompt(topic, tweets); + const response = await this.client.generateCompletion(prompt); + const parsed = parseSummaryResponse(response); + + const highlights: TweetHighlight[] = parsed.highlights.map((h) => { + const matchingTweet = tweets.find( + (t) => t.author === h.author || t.content.includes(h.tweet.slice(0, 50)) + ); + + return { + tweet: h.tweet, + author: h.author, + context: h.context, + link: matchingTweet?.link || `https://x.com/${h.author}`, + }; + }); + + return { + topic, + summary: parsed.summary, + highlights, + trends: parsed.trends, + tweetCount: tweets.length, + }; + } catch (error) { + logger.error({ topic: topic.name, error }, 'Failed to generate AI summary'); + return this.createFallbackSummary(topic, tweets); + } + } + + async generateDailyInsights(topicSummaries: TopicSummary[]): Promise { + logger.info('Generating daily insights'); + + if (topicSummaries.length === 0 || topicSummaries.every((s) => s.tweetCount === 0)) { + return 'A quiet day in tech - stay tuned for tomorrow\'s updates!'; + } + + try { + const summaryTexts = topicSummaries + .filter((s) => s.tweetCount > 0) + .map((s) => `${s.topic.name}: ${s.summary}`); + + const prompt = buildInsightsPrompt(summaryTexts); + const response = await this.client.generateCompletion(prompt); + + return response.trim(); + } catch (error) { + logger.error({ error }, 'Failed to generate daily insights'); + return this.createFallbackInsights(topicSummaries); + } + } + + private createFallbackSummary( + topic: typeof TOPICS[number], + tweets: ProcessedTweet[] + ): TopicSummary { + const topTweets = tweets.slice(0, 3); + + return { + topic, + summary: `${tweets.length} discussions in ${topic.name} today from voices like ${[...new Set(tweets.slice(0, 5).map((t) => t.authorDisplayName))].join(', ')}.`, + highlights: topTweets.map((t) => ({ + tweet: t.content.slice(0, 200), + author: t.author, + context: `Posted by ${t.authorDisplayName}`, + link: t.link, + })), + trends: [], + tweetCount: tweets.length, + }; + } + + private createFallbackInsights(topicSummaries: TopicSummary[]): string { + const topics = topicSummaries + .filter((s) => s.tweetCount > 0) + .map((s) => s.topic.name); + + if (topics.length === 0) { + return 'Stay tuned for the next tech update!'; + } + + return `Today's tech discussions span ${topics.join(', ')}. Check out the highlights below!`; + } +} diff --git a/src/services/ai/prompts.ts b/src/services/ai/prompts.ts new file mode 100644 index 0000000..8d4164e --- /dev/null +++ b/src/services/ai/prompts.ts @@ -0,0 +1,80 @@ +import type { ProcessedTweet, TopicConfig } from '../../types/index.js'; + +export function buildSummaryPrompt(topic: TopicConfig, tweets: ProcessedTweet[]): string { + const tweetList = tweets + .slice(0, 20) + .map( + (t, i) => + `${i + 1}. @${t.author} (${t.authorDisplayName}): "${t.content.slice(0, 280)}"` + ) + .join('\n'); + + return `You are a tech newsletter editor creating a daily digest about ${topic.name}. + +Analyze these tweets and provide: +1. A concise summary (2-3 sentences) of the key themes and discussions +2. The top 3 most important or interesting tweets with brief context explaining why they matter +3. Any emerging trends or notable patterns + +Tweets: +${tweetList} + +Respond ONLY with valid JSON in this exact format: +{ + "summary": "A 2-3 sentence summary of key themes...", + "highlights": [ + { + "tweet": "The tweet content...", + "author": "username", + "context": "Why this tweet matters..." + } + ], + "trends": ["Trend 1", "Trend 2"] +}`; +} + +export function buildInsightsPrompt(topicSummaries: string[]): string { + return `You are a tech newsletter editor. Based on these topic summaries, provide a brief cross-topic insight (2-3 sentences) highlighting the most important themes of the day and any connections between different areas. + +Topic Summaries: +${topicSummaries.join('\n\n')} + +Respond with just the insight text, no JSON or formatting. Keep it engaging and insightful.`; +} + +export interface ParsedSummary { + summary: string; + highlights: { + tweet: string; + author: string; + context: string; + }[]; + trends: string[]; +} + +export function parseSummaryResponse(response: string): ParsedSummary { + try { + const jsonMatch = response.match(/\{[\s\S]*\}/); + if (!jsonMatch) { + throw new Error('No JSON found in response'); + } + + const parsed = JSON.parse(jsonMatch[0]); + + if (!parsed.summary || !Array.isArray(parsed.highlights) || !Array.isArray(parsed.trends)) { + throw new Error('Invalid response structure'); + } + + return { + summary: parsed.summary, + highlights: parsed.highlights.slice(0, 3).map((h: Record) => ({ + tweet: h.tweet || '', + author: h.author || '', + context: h.context || '', + })), + trends: parsed.trends.slice(0, 5), + }; + } catch (error) { + throw new Error(`Failed to parse AI response: ${error instanceof Error ? error.message : 'Unknown error'}`); + } +} diff --git a/src/services/email/EmailService.ts b/src/services/email/EmailService.ts new file mode 100644 index 0000000..ef6a2d9 --- /dev/null +++ b/src/services/email/EmailService.ts @@ -0,0 +1,92 @@ +import nodemailer from 'nodemailer'; +import type { Transporter } from 'nodemailer'; +import { format } from 'date-fns'; +import { config } from '../../config/index.js'; +import { logger } from '../../utils/logger.js'; +import { withRetry } from '../../utils/retry.js'; +import type { Newsletter } from '../../types/index.js'; +import { renderNewsletterHtml } from './templates.js'; + +export interface SendResult { + success: boolean; + messageId?: string; + accepted?: string[]; + rejected?: string[]; + error?: string; +} + +export class EmailService { + private transporter: Transporter; + + constructor() { + this.transporter = nodemailer.createTransport({ + host: config.email.brevoHost, + port: config.email.brevoPort, + secure: false, + auth: { + user: config.email.brevoUser, + pass: config.email.brevoApiKey, + }, + }); + } + + async sendNewsletter(newsletter: Newsletter): Promise { + const recipients = config.email.recipients; + const subject = `Daily Tech Digest - ${format(newsletter.date, 'MMM d, yyyy')}`; + const html = renderNewsletterHtml(newsletter); + + logger.info({ recipients: recipients.length, subject }, 'Sending newsletter'); + + if (config.features.dryRun) { + logger.info('DRY RUN: Skipping email send'); + return { + success: true, + messageId: 'dry-run', + accepted: recipients, + rejected: [], + }; + } + + return withRetry( + async () => { + const info = await this.transporter.sendMail({ + from: `"${config.email.fromName}" <${config.email.fromEmail}>`, + to: recipients.join(', '), + subject, + html, + }); + + logger.info( + { + messageId: info.messageId, + accepted: info.accepted, + rejected: info.rejected, + }, + 'Newsletter sent successfully' + ); + + return { + success: true, + messageId: info.messageId, + accepted: info.accepted as string[], + rejected: info.rejected as string[], + }; + }, + { + maxAttempts: 3, + baseDelay: 5000, + } + ); + } + + async verifyConnection(): Promise { + try { + await this.transporter.verify(); + logger.info('Email service connection verified'); + return true; + } catch (error) { + logger.error({ error }, 'Email service connection failed'); + return false; + } + } +} diff --git a/src/services/email/templates.ts b/src/services/email/templates.ts new file mode 100644 index 0000000..dc1a027 --- /dev/null +++ b/src/services/email/templates.ts @@ -0,0 +1,280 @@ +import { format } from 'date-fns'; +import type { Newsletter, TopicSummary, TweetHighlight } from '../../types/index.js'; + +export function renderNewsletterHtml(newsletter: Newsletter): string { + const dateStr = format(newsletter.date, 'EEEE, MMMM d, yyyy'); + + return ` + + + + + + + Daily Tech Digest - ${dateStr} + + + +
+ ${renderHeader(dateStr)} +
+ ${renderInsights(newsletter.insights)} + ${newsletter.topics.map(renderTopicSection).join('')} +
+ ${renderFooter(newsletter)} +
+ + + `.trim(); +} + +function renderHeader(dateStr: string): string { + return ` +
+

Daily Tech Digest

+
${dateStr}
+
+ `; +} + +function renderInsights(insights: string): string { + return ` +
+

Today's Insights

+

${escapeHtml(insights)}

+
+ `; +} + +function renderTopicSection(topic: TopicSummary): string { + if (topic.tweetCount === 0) { + return ''; + } + + return ` +
+
+ ${topic.topic.icon} +

${escapeHtml(topic.topic.name)}

+ ${topic.tweetCount} tweets +
+
${escapeHtml(topic.summary)}
+ ${topic.highlights.length > 0 ? renderHighlights(topic.highlights) : ''} + ${topic.trends.length > 0 ? renderTrends(topic.trends) : ''} +
+ `; +} + +function renderHighlights(highlights: TweetHighlight[]): string { + return ` +
+ ${highlights.map(renderHighlight).join('')} +
+ `; +} + +function renderHighlight(highlight: TweetHighlight): string { + return ` +
+
@${escapeHtml(highlight.author)}
+
${escapeHtml(highlight.tweet)}
+
${escapeHtml(highlight.context)}
+ View on X → +
+ `; +} + +function renderTrends(trends: string[]): string { + return ` + + `; +} + +function renderFooter(newsletter: Newsletter): string { + const errorNote = newsletter.errors.length > 0 + ? `
Note: ${newsletter.errors.length} data source(s) were unavailable
` + : ''; + + return ` + + `; +} + +function escapeHtml(text: string): string { + const map: Record = { + '&': '&', + '<': '<', + '>': '>', + '"': '"', + "'": ''', + }; + return text.replace(/[&<>"']/g, (m) => map[m]); +} diff --git a/src/services/rss/NitterRssFetcher.ts b/src/services/rss/NitterRssFetcher.ts new file mode 100644 index 0000000..efd9502 --- /dev/null +++ b/src/services/rss/NitterRssFetcher.ts @@ -0,0 +1,164 @@ +import Parser from 'rss-parser'; +import axios from 'axios'; +import he from 'he'; +import sanitizeHtml from 'sanitize-html'; +import { config } from '../../config/index.js'; +import { TECH_ACCOUNTS } from '../../config/accounts.js'; +import { logger } from '../../utils/logger.js'; +import { withRetry } from '../../utils/retry.js'; +import type { RawTweet, TechAccount } from '../../types/index.js'; +import type { RssFetchResult, RssItem } from './types.js'; + +export class NitterRssFetcher { + private instances: string[]; + private currentInstanceIndex: number = 0; + private parser: Parser; + private timeout: number; + + constructor() { + this.instances = config.rss.nitterInstances; + this.timeout = config.rss.fetchTimeout; + this.parser = new Parser({ + timeout: this.timeout, + customFields: { + item: ['dc:creator', 'creator'], + }, + }); + } + + async fetchAll(): Promise { + const allTweets: RawTweet[] = []; + const errors: { account: string; error: string }[] = []; + + logger.info({ accountCount: TECH_ACCOUNTS.length }, 'Starting RSS fetch for all accounts'); + + const results = await Promise.allSettled( + TECH_ACCOUNTS.map((account) => this.fetchAccount(account)) + ); + + for (let i = 0; i < results.length; i++) { + const result = results[i]; + const account = TECH_ACCOUNTS[i]; + + if (result.status === 'fulfilled') { + allTweets.push(...result.value); + logger.debug({ account: account.username, tweets: result.value.length }, 'Fetched tweets'); + } else { + errors.push({ + account: account.username, + error: result.reason?.message || 'Unknown error', + }); + logger.warn({ account: account.username, error: result.reason?.message }, 'Failed to fetch'); + } + } + + logger.info( + { totalTweets: allTweets.length, errors: errors.length }, + 'Completed RSS fetch' + ); + + return { + tweets: allTweets, + errors, + source: this.getCurrentInstance(), + }; + } + + private async fetchAccount(account: TechAccount): Promise { + return withRetry( + async () => { + const instance = this.getCurrentInstance(); + const url = `${instance}/${account.username}/rss`; + + logger.debug({ url, account: account.username }, 'Fetching RSS feed'); + + try { + const response = await axios.get(url, { + timeout: this.timeout, + headers: { + 'User-Agent': 'Mozilla/5.0 (compatible; TechNewsletter/1.0)', + Accept: 'application/rss+xml, application/xml, text/xml', + }, + validateStatus: (status) => status < 500, + }); + + if (response.status === 429) { + this.rotateInstance(); + throw new Error('Rate limited, rotating instance'); + } + + if (response.status !== 200) { + throw new Error(`HTTP ${response.status}`); + } + + const feed = await this.parser.parseString(response.data); + return this.parseFeedItems(feed.items, account); + } catch (error) { + if (axios.isAxiosError(error) && error.code === 'ECONNABORTED') { + this.rotateInstance(); + throw new Error('Request timeout, rotating instance'); + } + throw error; + } + }, + { + maxAttempts: this.instances.length, + baseDelay: 500, + onRetry: () => this.rotateInstance(), + } + ); + } + + private parseFeedItems(items: RssItem[], account: TechAccount): RawTweet[] { + const maxTweets = config.rss.maxTweetsPerAccount; + + return items.slice(0, maxTweets).map((item) => { + const content = this.cleanContent(item.content || item.contentSnippet || item.title || ''); + const timestamp = item.isoDate ? new Date(item.isoDate) : new Date(); + + return { + id: item.guid || item.link || `${account.username}-${timestamp.getTime()}`, + content, + author: account.username, + authorDisplayName: account.displayName, + timestamp, + link: this.cleanLink(item.link || ''), + }; + }); + } + + private cleanContent(html: string): string { + const stripped = sanitizeHtml(html, { + allowedTags: [], + allowedAttributes: {}, + }); + + const decoded = he.decode(stripped); + + return decoded + .replace(/\s+/g, ' ') + .replace(/^RT @\w+:\s*/i, '') + .trim(); + } + + private cleanLink(link: string): string { + try { + const url = new URL(link); + if (url.hostname.includes('nitter') || url.hostname.includes('xcancel')) { + return link.replace(url.origin, 'https://x.com'); + } + return link; + } catch { + return link; + } + } + + private getCurrentInstance(): string { + return this.instances[this.currentInstanceIndex]; + } + + private rotateInstance(): void { + this.currentInstanceIndex = (this.currentInstanceIndex + 1) % this.instances.length; + logger.debug({ newInstance: this.getCurrentInstance() }, 'Rotated to new Nitter instance'); + } +} diff --git a/src/services/rss/types.ts b/src/services/rss/types.ts new file mode 100644 index 0000000..b028be1 --- /dev/null +++ b/src/services/rss/types.ts @@ -0,0 +1,25 @@ +import type { RawTweet } from '../../types/index.js'; + +export interface RssFetchResult { + tweets: RawTweet[]; + errors: { account: string; error: string }[]; + source: string; +} + +export interface RssItem { + title?: string; + content?: string; + contentSnippet?: string; + link?: string; + pubDate?: string; + isoDate?: string; + creator?: string; + guid?: string; +} + +export interface RssFeed { + title?: string; + description?: string; + link?: string; + items: RssItem[]; +} diff --git a/src/services/scheduler/CronScheduler.ts b/src/services/scheduler/CronScheduler.ts new file mode 100644 index 0000000..130e337 --- /dev/null +++ b/src/services/scheduler/CronScheduler.ts @@ -0,0 +1,61 @@ +import cron, { ScheduledTask } from 'node-cron'; +import { logger } from '../../utils/logger.js'; + +export class CronScheduler { + private jobs: Map = new Map(); + + schedule( + name: string, + expression: string, + task: () => Promise, + options?: { timezone?: string } + ): void { + if (!cron.validate(expression)) { + throw new Error(`Invalid cron expression: ${expression}`); + } + + const job = cron.schedule( + expression, + async () => { + logger.info({ job: name }, 'Running scheduled task'); + const startTime = Date.now(); + + try { + await task(); + const duration = Date.now() - startTime; + logger.info({ job: name, durationMs: duration }, 'Scheduled task completed'); + } catch (error) { + logger.error({ job: name, error }, 'Scheduled task failed'); + } + }, + { + timezone: options?.timezone, + scheduled: false, + } + ); + + this.jobs.set(name, job); + logger.info({ job: name, expression, timezone: options?.timezone }, 'Scheduled job registered'); + } + + start(): void { + for (const [name, job] of this.jobs) { + job.start(); + logger.info({ job: name }, 'Job started'); + } + } + + stop(): void { + for (const [name, job] of this.jobs) { + job.stop(); + logger.info({ job: name }, 'Job stopped'); + } + } + + getNextRun(name: string): Date | null { + const job = this.jobs.get(name); + if (!job) return null; + + return null; + } +} diff --git a/src/types/index.ts b/src/types/index.ts new file mode 100644 index 0000000..e636820 --- /dev/null +++ b/src/types/index.ts @@ -0,0 +1,108 @@ +export type TopicId = 'ai_ml' | 'swe' | 'general_tech'; + +export interface TechAccount { + username: string; + displayName: string; + category: TopicId; + priority: 'high' | 'medium' | 'low'; +} + +export interface TopicConfig { + id: TopicId; + name: string; + keywords: string[]; + icon: string; +} + +export interface RawTweet { + id: string; + content: string; + author: string; + authorDisplayName: string; + timestamp: Date; + link: string; +} + +export interface ProcessedTweet extends RawTweet { + topics: TopicId[]; + isRetweet: boolean; + isReply: boolean; +} + +export interface TopicSummary { + topic: TopicConfig; + summary: string; + highlights: TweetHighlight[]; + trends: string[]; + tweetCount: number; +} + +export interface TweetHighlight { + tweet: string; + author: string; + context: string; + link: string; +} + +export interface Newsletter { + date: Date; + insights: string; + topics: TopicSummary[]; + totalTweets: number; + errors: PipelineError[]; +} + +export interface PipelineError { + stage: 'rss' | 'process' | 'ai' | 'email'; + message: string; + details?: unknown; +} + +export interface PipelineResult { + success: boolean; + newsletter?: Newsletter; + errors: PipelineError[]; +} + +export interface RssFetchResult { + tweets: RawTweet[]; + errors: { account: string; error: string }[]; + source: string; +} + +export interface AppConfig { + rss: { + nitterInstances: string[]; + fetchTimeout: number; + maxTweetsPerAccount: number; + }; + ai: { + openRouterApiKey: string; + model: string; + maxTokens: number; + siteUrl: string; + siteName: string; + }; + email: { + brevoHost: string; + brevoPort: number; + brevoUser: string; + brevoApiKey: string; + fromEmail: string; + fromName: string; + recipients: string[]; + }; + scheduler: { + cronExpression: string; + timezone: string; + }; + features: { + enableAiSummaries: boolean; + includeRetweets: boolean; + includeReplies: boolean; + dryRun: boolean; + }; + logging: { + level: string; + }; +} diff --git a/src/utils/logger.ts b/src/utils/logger.ts new file mode 100644 index 0000000..d063ca3 --- /dev/null +++ b/src/utils/logger.ts @@ -0,0 +1,22 @@ +import pino from 'pino'; +import { config } from '../config/index.js'; + +export const logger = pino({ + level: config.logging.level, + transport: + process.env.NODE_ENV === 'development' + ? { + target: 'pino-pretty', + options: { + colorize: true, + translateTime: 'SYS:standard', + ignore: 'pid,hostname', + }, + } + : undefined, + base: { + service: 'x-newsletter', + }, +}); + +export type Logger = typeof logger; diff --git a/src/utils/retry.ts b/src/utils/retry.ts new file mode 100644 index 0000000..62ce4b4 --- /dev/null +++ b/src/utils/retry.ts @@ -0,0 +1,50 @@ +import { logger } from './logger.js'; + +export interface RetryOptions { + maxAttempts?: number; + baseDelay?: number; + maxDelay?: number; + onRetry?: (attempt: number, error: Error) => void; +} + +export async function withRetry( + fn: () => Promise, + options: RetryOptions = {} +): Promise { + const { maxAttempts = 3, baseDelay = 1000, maxDelay = 30000, onRetry } = options; + + let lastError: Error | undefined; + + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + try { + return await fn(); + } catch (error) { + lastError = error instanceof Error ? error : new Error(String(error)); + + if (attempt === maxAttempts) { + logger.error({ attempt, error: lastError.message }, 'All retry attempts exhausted'); + throw lastError; + } + + const delay = Math.min(baseDelay * Math.pow(2, attempt - 1), maxDelay); + const jitter = Math.random() * 0.1 * delay; + + logger.warn( + { attempt, maxAttempts, delay: delay + jitter, error: lastError.message }, + 'Retrying after failure' + ); + + if (onRetry) { + onRetry(attempt, lastError); + } + + await sleep(delay + jitter); + } + } + + throw lastError; +} + +export function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..859476e --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,24 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "bundler", + "lib": ["ES2022"], + "outDir": "./dist", + "rootDir": "./src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "noImplicitReturns": true, + "noFallthroughCasesInSwitch": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +}