using System.Net.Http.Json; using LexWells.Infrastructure.Common.Interfaces; using NewsArchival.Core.Interfaces; namespace NewsArchival.Scraper; public class Worker( ILogger logger, IConfiguration configuration, IScraperService scraperService, IRobotsService robotsService, IHttpClientFactory httpClient) : BackgroundService { protected override async Task ExecuteAsync(CancellationToken stoppingToken) { var apiClient = httpClient.CreateClient("NewsApi"); var hubs = configuration.GetSection("ScraperSettings:Hubs").Get>() ?? []; logger.LogInformation("Scraper Worker started at: {time}", DateTimeOffset.Now); while (!stoppingToken.IsCancellationRequested) { var cycleStartTime = DateTime.UtcNow; foreach (var hub in hubs) { try { logger.LogInformation("Processing hub: {Hub}", hub); var articles = await scraperService.GetArticles(hub); foreach (var article in articles) { if (await ArticleExistsAsync(apiClient, article.Id, stoppingToken)) continue; var response = await apiClient.PostAsJsonAsync("api/articles", article, stoppingToken); if (response.IsSuccessStatusCode) { logger.LogInformation("Successfully archived: {Title}", article.Title); } else { var error = await response.Content.ReadAsStringAsync(stoppingToken); logger.LogError("API Rejected {Id}: {Status} - {Error}", article.Id, response.StatusCode, error); } } var uri = new Uri("https://apnews.com"); var delayMs = await robotsService.GetCrawlDelayAsync(uri); await Task.Delay(delayMs, stoppingToken); } catch (Exception ex) { logger.LogError(ex, "Critical error processing hub {Hub}", hub); } } await ApplyGlobalDelay(cycleStartTime, stoppingToken); } } private async Task ArticleExistsAsync(HttpClient client, string id, CancellationToken ct) { var response = await client.GetAsync($"api/check?id={id}", ct); return response.IsSuccessStatusCode; } private async Task ApplyGlobalDelay(DateTime startTime, CancellationToken ct) { var elapsed = DateTime.UtcNow - startTime; var sleepTime = TimeSpan.FromHours(1) - elapsed; if (sleepTime > TimeSpan.Zero) { logger.LogInformation("Cycle complete. Sleeping for {Minutes} minutes...", sleepTime.TotalMinutes); await Task.Delay(sleepTime, ct); } } }