From daadcb36dcfa8f360a9754dfdd736cd8d87b4c45 Mon Sep 17 00:00:00 2001 From: drew Date: Wed, 1 Apr 2026 13:34:19 -0400 Subject: [PATCH] Initial Commit --- .gitignore | 21 +++ .../DependencyInjection.cs | 24 +++ .../Interfaces/ICacheService.cs | 9 ++ .../Interfaces/IRobotsService.cs | 14 ++ .../LexWells.Infrastructure.Common.csproj | 26 +++ .../Models/RobotsEntry.cs | 7 + .../Services/RedisCacheService.cs | 61 +++++++ .../Services/RobotsService.cs | 151 ++++++++++++++++++ .../DataDependencyInjection.cs | 22 +++ .../Entities/BaseEntity.cs | 9 ++ ...ells.Infrastructure.EntityFramework.csproj | 24 +++ .../LexWellsDbContext.cs | 13 ++ .../UpdateAuditableEntitiesInterceptor.cs | 33 ++++ LexWells.Infrastructure.sln | 48 ++++++ 14 files changed, 462 insertions(+) create mode 100644 .gitignore create mode 100644 LexWells.Infrastructure.Common/DependencyInjection.cs create mode 100644 LexWells.Infrastructure.Common/Interfaces/ICacheService.cs create mode 100644 LexWells.Infrastructure.Common/Interfaces/IRobotsService.cs create mode 100644 LexWells.Infrastructure.Common/LexWells.Infrastructure.Common.csproj create mode 100644 LexWells.Infrastructure.Common/Models/RobotsEntry.cs create mode 100644 LexWells.Infrastructure.Common/Services/RedisCacheService.cs create mode 100644 LexWells.Infrastructure.Common/Services/RobotsService.cs create mode 100644 LexWells.Infrastructure.EntityFramework/DataDependencyInjection.cs create mode 100644 LexWells.Infrastructure.EntityFramework/Entities/BaseEntity.cs create mode 100644 LexWells.Infrastructure.EntityFramework/LexWells.Infrastructure.EntityFramework.csproj create mode 100644 LexWells.Infrastructure.EntityFramework/LexWellsDbContext.cs create mode 100644 LexWells.Infrastructure.EntityFramework/UpdateAuditableEntitiesInterceptor.cs create mode 100644 LexWells.Infrastructure.sln diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c56cb3d --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +# Build results +[Dd]ebug/ +[Rr]elease/ +x64/ +x86/ +[Bb]in/ +[Oo]bj/ + +# IDE - Rider/Visual Studio +.idea/ +*.user +*.userosscache +*.sln.doccache + +# NuGet +*.nupkg +node_modules/ + +# OS files +.DS_Store +Thumbs.db \ No newline at end of file diff --git a/LexWells.Infrastructure.Common/DependencyInjection.cs b/LexWells.Infrastructure.Common/DependencyInjection.cs new file mode 100644 index 0000000..84d88d6 --- /dev/null +++ b/LexWells.Infrastructure.Common/DependencyInjection.cs @@ -0,0 +1,24 @@ +using LexWells.Infrastructure.Common.Interfaces; +using LexWells.Infrastructure.Common.Services; +using Microsoft.Extensions.DependencyInjection; + +namespace LexWells.Infrastructure.Common; + +public static class DependencyInjection +{ + public static IServiceCollection AddLexWellsInfrastructure(this IServiceCollection services, string redisConnectionString) + { + services.AddStackExchangeRedisCache(options => + { + options.Configuration = redisConnectionString; + }); + + services.AddSingleton(); + + services.AddHttpClient(); + + services.AddSingleton(); + + return services; + } +} \ No newline at end of file diff --git a/LexWells.Infrastructure.Common/Interfaces/ICacheService.cs b/LexWells.Infrastructure.Common/Interfaces/ICacheService.cs new file mode 100644 index 0000000..bfdb38c --- /dev/null +++ b/LexWells.Infrastructure.Common/Interfaces/ICacheService.cs @@ -0,0 +1,9 @@ +namespace LexWells.Infrastructure.Common.Interfaces; + +public interface ICacheService +{ + Task GetAsync(string key); + Task?> GetCollectionAsync(string key) where TConcrete : T; + Task SetAsync(string key, T value, TimeSpan? expiration = null); + Task RemoveAsync(string key); +} diff --git a/LexWells.Infrastructure.Common/Interfaces/IRobotsService.cs b/LexWells.Infrastructure.Common/Interfaces/IRobotsService.cs new file mode 100644 index 0000000..8dd83eb --- /dev/null +++ b/LexWells.Infrastructure.Common/Interfaces/IRobotsService.cs @@ -0,0 +1,14 @@ +namespace LexWells.Infrastructure.Common.Interfaces; + +public interface IRobotsService +{ + /// + /// Checks if the User-Agent is allowed to access the specific path. + /// + Task CanCrawlAsync(Uri uri, string userAgent = "LexWellsBot"); + + /// + /// Gets the crawl delay specified by the host, or a default if none exists. + /// + Task GetCrawlDelayAsync(Uri uri, string userAgent = "LexWellsBot"); +} \ No newline at end of file diff --git a/LexWells.Infrastructure.Common/LexWells.Infrastructure.Common.csproj b/LexWells.Infrastructure.Common/LexWells.Infrastructure.Common.csproj new file mode 100644 index 0000000..b9cdf2e --- /dev/null +++ b/LexWells.Infrastructure.Common/LexWells.Infrastructure.Common.csproj @@ -0,0 +1,26 @@ + + + + net9.0 + enable + enable + + LexWells.Infrastructure.Common + 1.0.0 + Kenneth Wells + LexWells + Shared infrastructure and caching logic for LexWells projects. + + true + /home/drew/source/LocalNuGet + nupkg + + + + + + + + + + diff --git a/LexWells.Infrastructure.Common/Models/RobotsEntry.cs b/LexWells.Infrastructure.Common/Models/RobotsEntry.cs new file mode 100644 index 0000000..731ae35 --- /dev/null +++ b/LexWells.Infrastructure.Common/Models/RobotsEntry.cs @@ -0,0 +1,7 @@ +namespace LexWells.Infrastructure.Common.Models; + +public record RobotsEntry( + bool IsAllowed, + int? CrawlDelay, + string? SitemapUrl, + DateTime ExpiresAt); \ No newline at end of file diff --git a/LexWells.Infrastructure.Common/Services/RedisCacheService.cs b/LexWells.Infrastructure.Common/Services/RedisCacheService.cs new file mode 100644 index 0000000..8d11f4a --- /dev/null +++ b/LexWells.Infrastructure.Common/Services/RedisCacheService.cs @@ -0,0 +1,61 @@ +using System.Text.Json; +using LexWells.Infrastructure.Common.Interfaces; +using Microsoft.Extensions.Caching.Distributed; + +namespace LexWells.Infrastructure.Common.Services; + +public class RedisCacheService : ICacheService +{ + private readonly IDistributedCache _cache; + + public RedisCacheService(IDistributedCache cache) + { + _cache = cache; + } + + public async Task?> GetCollectionAsync(string key) + where TConcrete : T + { + var cachedData = await _cache.GetStringAsync(key); + + if (string.IsNullOrEmpty(cachedData)) return null; + + var result = JsonSerializer.Deserialize>(cachedData); + return result?.Cast(); + } + + public async Task GetAsync(string key) + { + var cachedData = await _cache.GetStringAsync(key); + + // Explicitly return null if Redis has NOTHING + if (string.IsNullOrWhiteSpace(cachedData)) + { + return default; + } + + try { + return JsonSerializer.Deserialize(cachedData); + } + catch { + return default; + } + } + + public async Task SetAsync(string key, T value, TimeSpan? expiration = null) + { + var options = new DistributedCacheEntryOptions + { + AbsoluteExpirationRelativeToNow = expiration ?? TimeSpan.FromHours(1) + }; + + string dataToStore = value is string s ? s : JsonSerializer.Serialize(value); + + await _cache.SetStringAsync(key, dataToStore, options); + } + + public async Task RemoveAsync(string key) + { + await _cache.RemoveAsync(key); + } +} \ No newline at end of file diff --git a/LexWells.Infrastructure.Common/Services/RobotsService.cs b/LexWells.Infrastructure.Common/Services/RobotsService.cs new file mode 100644 index 0000000..809fc28 --- /dev/null +++ b/LexWells.Infrastructure.Common/Services/RobotsService.cs @@ -0,0 +1,151 @@ +using System.Net; +using Microsoft.Extensions.Logging; +using LexWells.Infrastructure.Common.Interfaces; +using LexWells.Infrastructure.Common.Models; + +namespace LexWells.Infrastructure.Common.Services; + +public class RobotsService( + IHttpClientFactory clientFactory, + ICacheService cache, + ILogger logger) : IRobotsService +{ + private const string CachePrefix = "robots_"; + private const string DefaultUserAgent = "NewsArchiveBot"; + + public async Task CanCrawlAsync(Uri uri, string userAgent = DefaultUserAgent) + { + var entry = await GetRobotsAsync(uri, userAgent); + return entry.IsAllowed; + } + + public async Task GetCrawlDelayAsync(Uri uri, string userAgent = DefaultUserAgent) + { + var entry = await GetRobotsAsync(uri, userAgent); + return entry.CrawlDelay.HasValue + ? TimeSpan.FromSeconds(entry.CrawlDelay.Value) + : TimeSpan.FromSeconds(1); + } + + private async Task GetRobotsAsync(Uri uri, string userAgent) + { + string host = uri.Host; + string cacheKey = $"{CachePrefix}{host}"; + + try + { + var cached = await cache.GetAsync(cacheKey); + + if (cached != null && cached.ExpiresAt > DateTime.UtcNow) + { + return cached; + } + } + catch (Exception ex) + { + logger.LogWarning(ex, "Failed to retrieve or deserialize robots cache for {Host}", host); + } + + var entry = await FetchAndParseRobotsAsync(uri, userAgent); + + var cacheDuration = entry.IsAllowed ? TimeSpan.FromDays(1) : TimeSpan.FromMinutes(10); + + try + { + await cache.SetAsync(cacheKey, entry, cacheDuration); + } + catch (Exception ex) + { + logger.LogError(ex, "Failed to save robots entry to cache for {Host}", host); + } + + return entry; + } + + private async Task FetchAndParseRobotsAsync(Uri uri, string userAgent) + { + var client = clientFactory.CreateClient(); + var robotsUrl = $"{uri.Scheme}://{uri.Host}/robots.txt"; + + client.DefaultRequestHeaders.UserAgent.ParseAdd( + "Mozilla/5.0 (compatible; NewsArchiveBot/1.0; +http://lexwells.com/bot)"); + + try + { + var response = await client.GetStringAsync(robotsUrl); + return ParseRobotsContent(response, userAgent, uri.AbsolutePath); + } + catch (HttpRequestException ex) when (ex.StatusCode == HttpStatusCode.NotFound) + { + return new RobotsEntry(true, null, null, DateTime.UtcNow.AddDays(1)); + } + catch (Exception ex) + { + logger.LogError(ex, "Error fetching robots.txt from {Url}", robotsUrl); + return new RobotsEntry(false, null, null, DateTime.UtcNow.AddMinutes(10)); + } + } + + private RobotsEntry ParseRobotsContent(string content, string userAgent, string path) + { + bool isAllowed = true; + int? crawlDelay = null; + bool foundSpecificMatch = false; + string myBotName = userAgent.ToLower(); + + var lines = content.Split(new[] { "\n", "\r\n" }, StringSplitOptions.RemoveEmptyEntries); + bool inRelevantSection = false; + + foreach (var line in lines) + { + var cleanLine = line.Trim(); + if (string.IsNullOrWhiteSpace(cleanLine) || cleanLine.StartsWith("#")) continue; + + var parts = cleanLine.Split(':', 2); + if (parts.Length < 2) continue; + + var key = parts[0].Trim().ToLower(); + var value = parts[1].Trim(); + + if (key == "user-agent") + { + var currentAgent = value.ToLower(); + if (currentAgent == myBotName) + { + inRelevantSection = true; + foundSpecificMatch = true; + isAllowed = true; + crawlDelay = null; + } + else if (currentAgent == "*" && !foundSpecificMatch) + { + inRelevantSection = true; + } + else + { + inRelevantSection = false; + } + continue; + } + + if (inRelevantSection) + { + if (key == "disallow") + { + if (string.IsNullOrEmpty(value)) isAllowed = true; + else if (path.StartsWith(value, StringComparison.OrdinalIgnoreCase)) isAllowed = false; + } + else if (key == "allow") + { + if (path.StartsWith(value, StringComparison.OrdinalIgnoreCase)) isAllowed = true; + } + else if (key == "crawl-delay") + { + if (int.TryParse(value, out int delay)) crawlDelay = delay; + } + } + } + + return new RobotsEntry(isAllowed, crawlDelay, null, DateTime.UtcNow.AddDays(1)); + } +} \ No newline at end of file diff --git a/LexWells.Infrastructure.EntityFramework/DataDependencyInjection.cs b/LexWells.Infrastructure.EntityFramework/DataDependencyInjection.cs new file mode 100644 index 0000000..a44c1f9 --- /dev/null +++ b/LexWells.Infrastructure.EntityFramework/DataDependencyInjection.cs @@ -0,0 +1,22 @@ +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.DependencyInjection; + +namespace LexWells.Infrastructure.EntityFramework; + +public static class DataDependencyInjection +{ + public static IServiceCollection AddLexWellsDatabase( + this IServiceCollection services, + string connectionString) where TContext : DbContext + { + services.AddDbContext(options => + { + options.UseSqlite(connectionString, sqliteOptions => + { + sqliteOptions.MigrationsAssembly(typeof(TContext).Assembly.FullName); + }); + }); + + return services; + } +} \ No newline at end of file diff --git a/LexWells.Infrastructure.EntityFramework/Entities/BaseEntity.cs b/LexWells.Infrastructure.EntityFramework/Entities/BaseEntity.cs new file mode 100644 index 0000000..5ad9cac --- /dev/null +++ b/LexWells.Infrastructure.EntityFramework/Entities/BaseEntity.cs @@ -0,0 +1,9 @@ +namespace LexWells.Infrastructure.EntityFramework.Entities; + +public abstract class BaseEntity +{ + public string Id { get; set; } = string.Empty; + + public DateTime CreatedAt { get; set; } = DateTime.UtcNow; + public DateTime? UpdatedAt { get; set; } +} \ No newline at end of file diff --git a/LexWells.Infrastructure.EntityFramework/LexWells.Infrastructure.EntityFramework.csproj b/LexWells.Infrastructure.EntityFramework/LexWells.Infrastructure.EntityFramework.csproj new file mode 100644 index 0000000..6f62e5b --- /dev/null +++ b/LexWells.Infrastructure.EntityFramework/LexWells.Infrastructure.EntityFramework.csproj @@ -0,0 +1,24 @@ + + + + net9.0 + enable + enable + + LexWells.Infrastructure.EntityFramework + 1.0.0 + Kenneth Wells + LexWells + Shared EF Core architecture and SQLite helpers for LexWells projects. + + true + /home/drew/source/LocalNuGet + + + + + + + + + \ No newline at end of file diff --git a/LexWells.Infrastructure.EntityFramework/LexWellsDbContext.cs b/LexWells.Infrastructure.EntityFramework/LexWellsDbContext.cs new file mode 100644 index 0000000..2dd323b --- /dev/null +++ b/LexWells.Infrastructure.EntityFramework/LexWellsDbContext.cs @@ -0,0 +1,13 @@ +using Microsoft.EntityFrameworkCore; + +namespace LexWells.Infrastructure.EntityFramework; + +public abstract class LexWellsDbContext(DbContextOptions options) : DbContext(options) +{ + protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder) + { + optionsBuilder.AddInterceptors(new UpdateAuditableEntitiesInterceptor()); + + base.OnConfiguring(optionsBuilder); + } +} diff --git a/LexWells.Infrastructure.EntityFramework/UpdateAuditableEntitiesInterceptor.cs b/LexWells.Infrastructure.EntityFramework/UpdateAuditableEntitiesInterceptor.cs new file mode 100644 index 0000000..33658c7 --- /dev/null +++ b/LexWells.Infrastructure.EntityFramework/UpdateAuditableEntitiesInterceptor.cs @@ -0,0 +1,33 @@ +using LexWells.Infrastructure.EntityFramework.Entities; +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Diagnostics; + +namespace LexWells.Infrastructure.EntityFramework; + +public class UpdateAuditableEntitiesInterceptor : SaveChangesInterceptor +{ + public override ValueTask> SavingChangesAsync( + DbContextEventData eventData, + InterceptionResult result, + CancellationToken cancellationToken = default) + { + var context = eventData.Context; + + if (context == null) + { + return base.SavingChangesAsync(eventData, result, cancellationToken); + } + + // Find all entities that are being modified and inherit from BaseEntity + var entries = context.ChangeTracker + .Entries() + .Where(e => e.State == EntityState.Modified); + + foreach (var entry in entries) + { + entry.Entity.UpdatedAt = DateTime.UtcNow; + } + + return base.SavingChangesAsync(eventData, result, cancellationToken); + } +} \ No newline at end of file diff --git a/LexWells.Infrastructure.sln b/LexWells.Infrastructure.sln new file mode 100644 index 0000000..47359e7 --- /dev/null +++ b/LexWells.Infrastructure.sln @@ -0,0 +1,48 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LexWells.Infrastructure.Common", "LexWells.Infrastructure.Common\LexWells.Infrastructure.Common.csproj", "{40C2D397-C853-4155-A056-3EF634F90B53}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LexWells.Infrastructure.EntityFramework", "LexWells.Infrastructure.EntityFramework\LexWells.Infrastructure.EntityFramework.csproj", "{1F65E771-C904-4291-AFDE-8A383FA0D970}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {40C2D397-C853-4155-A056-3EF634F90B53}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {40C2D397-C853-4155-A056-3EF634F90B53}.Debug|Any CPU.Build.0 = Debug|Any CPU + {40C2D397-C853-4155-A056-3EF634F90B53}.Debug|x64.ActiveCfg = Debug|Any CPU + {40C2D397-C853-4155-A056-3EF634F90B53}.Debug|x64.Build.0 = Debug|Any CPU + {40C2D397-C853-4155-A056-3EF634F90B53}.Debug|x86.ActiveCfg = Debug|Any CPU + {40C2D397-C853-4155-A056-3EF634F90B53}.Debug|x86.Build.0 = Debug|Any CPU + {40C2D397-C853-4155-A056-3EF634F90B53}.Release|Any CPU.ActiveCfg = Release|Any CPU + {40C2D397-C853-4155-A056-3EF634F90B53}.Release|Any CPU.Build.0 = Release|Any CPU + {40C2D397-C853-4155-A056-3EF634F90B53}.Release|x64.ActiveCfg = Release|Any CPU + {40C2D397-C853-4155-A056-3EF634F90B53}.Release|x64.Build.0 = Release|Any CPU + {40C2D397-C853-4155-A056-3EF634F90B53}.Release|x86.ActiveCfg = Release|Any CPU + {40C2D397-C853-4155-A056-3EF634F90B53}.Release|x86.Build.0 = Release|Any CPU + {1F65E771-C904-4291-AFDE-8A383FA0D970}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {1F65E771-C904-4291-AFDE-8A383FA0D970}.Debug|Any CPU.Build.0 = Debug|Any CPU + {1F65E771-C904-4291-AFDE-8A383FA0D970}.Debug|x64.ActiveCfg = Debug|Any CPU + {1F65E771-C904-4291-AFDE-8A383FA0D970}.Debug|x64.Build.0 = Debug|Any CPU + {1F65E771-C904-4291-AFDE-8A383FA0D970}.Debug|x86.ActiveCfg = Debug|Any CPU + {1F65E771-C904-4291-AFDE-8A383FA0D970}.Debug|x86.Build.0 = Debug|Any CPU + {1F65E771-C904-4291-AFDE-8A383FA0D970}.Release|Any CPU.ActiveCfg = Release|Any CPU + {1F65E771-C904-4291-AFDE-8A383FA0D970}.Release|Any CPU.Build.0 = Release|Any CPU + {1F65E771-C904-4291-AFDE-8A383FA0D970}.Release|x64.ActiveCfg = Release|Any CPU + {1F65E771-C904-4291-AFDE-8A383FA0D970}.Release|x64.Build.0 = Release|Any CPU + {1F65E771-C904-4291-AFDE-8A383FA0D970}.Release|x86.ActiveCfg = Release|Any CPU + {1F65E771-C904-4291-AFDE-8A383FA0D970}.Release|x86.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal