using System.Security.Cryptography; using System.Text; using AngleSharp; using LexWells.Infrastructure.Common.Interfaces; using NewsArchival.Core.Interfaces; using NewsArchival.Core.Models; namespace NewsArchival.Scraper.Services; public abstract class ScraperService( ILogger logger, ICacheService cache, IRobotsService robots, IHttpClientFactory httpClient, IBrowsingContext browsingContext) : IScraperService { protected readonly ILogger _logger = logger; protected readonly ICacheService _cache = cache; protected readonly IRobotsService _robots = robots; protected readonly IHttpClientFactory _httpClient = httpClient; protected readonly IBrowsingContext _browsingContext = browsingContext; #region IScraperService public abstract Task> GetArticles(string hub); #endregion protected static string GenerateId(string url) { if (string.IsNullOrWhiteSpace(url)) return "unknown"; var baseUrl = new Uri("https://apnews.com"); var uri = new Uri(baseUrl, url); var cleanPath = uri.AbsolutePath.TrimEnd('/'); var normalizedUrl = $"{uri.Scheme}://{uri.Host}{cleanPath}".ToLowerInvariant(); var bytes = SHA256.HashData(Encoding.UTF8.GetBytes(normalizedUrl)); return Convert.ToHexString(bytes).ToLower(); } }