Initial Commit

This commit is contained in:
drew 2026-04-01 13:34:19 -04:00
commit daadcb36dc
14 changed files with 462 additions and 0 deletions

21
.gitignore vendored Normal file
View File

@ -0,0 +1,21 @@
# Build results
[Dd]ebug/
[Rr]elease/
x64/
x86/
[Bb]in/
[Oo]bj/
# IDE - Rider/Visual Studio
.idea/
*.user
*.userosscache
*.sln.doccache
# NuGet
*.nupkg
node_modules/
# OS files
.DS_Store
Thumbs.db

View File

@ -0,0 +1,24 @@
using LexWells.Infrastructure.Common.Interfaces;
using LexWells.Infrastructure.Common.Services;
using Microsoft.Extensions.DependencyInjection;
namespace LexWells.Infrastructure.Common;
public static class DependencyInjection
{
public static IServiceCollection AddLexWellsInfrastructure(this IServiceCollection services, string redisConnectionString)
{
services.AddStackExchangeRedisCache(options =>
{
options.Configuration = redisConnectionString;
});
services.AddSingleton<ICacheService, RedisCacheService>();
services.AddHttpClient();
services.AddSingleton<IRobotsService, RobotsService>();
return services;
}
}

View File

@ -0,0 +1,9 @@
namespace LexWells.Infrastructure.Common.Interfaces;
public interface ICacheService
{
Task<T?> GetAsync<T>(string key);
Task<IEnumerable<T>?> GetCollectionAsync<T, TConcrete>(string key) where TConcrete : T;
Task SetAsync<T>(string key, T value, TimeSpan? expiration = null);
Task RemoveAsync(string key);
}

View File

@ -0,0 +1,14 @@
namespace LexWells.Infrastructure.Common.Interfaces;
public interface IRobotsService
{
/// <summary>
/// Checks if the User-Agent is allowed to access the specific path.
/// </summary>
Task<bool> CanCrawlAsync(Uri uri, string userAgent = "LexWellsBot");
/// <summary>
/// Gets the crawl delay specified by the host, or a default if none exists.
/// </summary>
Task<TimeSpan> GetCrawlDelayAsync(Uri uri, string userAgent = "LexWellsBot");
}

View File

@ -0,0 +1,26 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<PackageId>LexWells.Infrastructure.Common</PackageId>
<Version>1.0.0</Version>
<Authors>Kenneth Wells</Authors>
<Company>LexWells</Company>
<Description>Shared infrastructure and caching logic for LexWells projects.</Description>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
<PackageOutputPath>/home/drew/source/LocalNuGet</PackageOutputPath>
<PackageFormat>nupkg</PackageFormat>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Caching.Abstractions" Version="9.0.14" />
<PackageReference Include="Microsoft.Extensions.Caching.StackExchangeRedis" Version="9.0.14" />
<PackageReference Include="Microsoft.Extensions.Http" Version="9.0.14" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.4" />
</ItemGroup>
</Project>

View File

@ -0,0 +1,7 @@
namespace LexWells.Infrastructure.Common.Models;
public record RobotsEntry(
bool IsAllowed,
int? CrawlDelay,
string? SitemapUrl,
DateTime ExpiresAt);

View File

@ -0,0 +1,61 @@
using System.Text.Json;
using LexWells.Infrastructure.Common.Interfaces;
using Microsoft.Extensions.Caching.Distributed;
namespace LexWells.Infrastructure.Common.Services;
public class RedisCacheService : ICacheService
{
private readonly IDistributedCache _cache;
public RedisCacheService(IDistributedCache cache)
{
_cache = cache;
}
public async Task<IEnumerable<T>?> GetCollectionAsync<T, TConcrete>(string key)
where TConcrete : T
{
var cachedData = await _cache.GetStringAsync(key);
if (string.IsNullOrEmpty(cachedData)) return null;
var result = JsonSerializer.Deserialize<HashSet<TConcrete>>(cachedData);
return result?.Cast<T>();
}
public async Task<T?> GetAsync<T>(string key)
{
var cachedData = await _cache.GetStringAsync(key);
// Explicitly return null if Redis has NOTHING
if (string.IsNullOrWhiteSpace(cachedData))
{
return default;
}
try {
return JsonSerializer.Deserialize<T>(cachedData);
}
catch {
return default;
}
}
public async Task SetAsync<T>(string key, T value, TimeSpan? expiration = null)
{
var options = new DistributedCacheEntryOptions
{
AbsoluteExpirationRelativeToNow = expiration ?? TimeSpan.FromHours(1)
};
string dataToStore = value is string s ? s : JsonSerializer.Serialize(value);
await _cache.SetStringAsync(key, dataToStore, options);
}
public async Task RemoveAsync(string key)
{
await _cache.RemoveAsync(key);
}
}

View File

@ -0,0 +1,151 @@
using System.Net;
using Microsoft.Extensions.Logging;
using LexWells.Infrastructure.Common.Interfaces;
using LexWells.Infrastructure.Common.Models;
namespace LexWells.Infrastructure.Common.Services;
public class RobotsService(
IHttpClientFactory clientFactory,
ICacheService cache,
ILogger<RobotsService> logger) : IRobotsService
{
private const string CachePrefix = "robots_";
private const string DefaultUserAgent = "NewsArchiveBot";
public async Task<bool> CanCrawlAsync(Uri uri, string userAgent = DefaultUserAgent)
{
var entry = await GetRobotsAsync(uri, userAgent);
return entry.IsAllowed;
}
public async Task<TimeSpan> GetCrawlDelayAsync(Uri uri, string userAgent = DefaultUserAgent)
{
var entry = await GetRobotsAsync(uri, userAgent);
return entry.CrawlDelay.HasValue
? TimeSpan.FromSeconds(entry.CrawlDelay.Value)
: TimeSpan.FromSeconds(1);
}
private async Task<RobotsEntry> GetRobotsAsync(Uri uri, string userAgent)
{
string host = uri.Host;
string cacheKey = $"{CachePrefix}{host}";
try
{
var cached = await cache.GetAsync<RobotsEntry>(cacheKey);
if (cached != null && cached.ExpiresAt > DateTime.UtcNow)
{
return cached;
}
}
catch (Exception ex)
{
logger.LogWarning(ex, "Failed to retrieve or deserialize robots cache for {Host}", host);
}
var entry = await FetchAndParseRobotsAsync(uri, userAgent);
var cacheDuration = entry.IsAllowed ? TimeSpan.FromDays(1) : TimeSpan.FromMinutes(10);
try
{
await cache.SetAsync(cacheKey, entry, cacheDuration);
}
catch (Exception ex)
{
logger.LogError(ex, "Failed to save robots entry to cache for {Host}", host);
}
return entry;
}
private async Task<RobotsEntry> FetchAndParseRobotsAsync(Uri uri, string userAgent)
{
var client = clientFactory.CreateClient();
var robotsUrl = $"{uri.Scheme}://{uri.Host}/robots.txt";
client.DefaultRequestHeaders.UserAgent.ParseAdd(
"Mozilla/5.0 (compatible; NewsArchiveBot/1.0; +http://lexwells.com/bot)");
try
{
var response = await client.GetStringAsync(robotsUrl);
return ParseRobotsContent(response, userAgent, uri.AbsolutePath);
}
catch (HttpRequestException ex) when (ex.StatusCode == HttpStatusCode.NotFound)
{
return new RobotsEntry(true, null, null, DateTime.UtcNow.AddDays(1));
}
catch (Exception ex)
{
logger.LogError(ex, "Error fetching robots.txt from {Url}", robotsUrl);
return new RobotsEntry(false, null, null, DateTime.UtcNow.AddMinutes(10));
}
}
private RobotsEntry ParseRobotsContent(string content, string userAgent, string path)
{
bool isAllowed = true;
int? crawlDelay = null;
bool foundSpecificMatch = false;
string myBotName = userAgent.ToLower();
var lines = content.Split(new[] { "\n", "\r\n" }, StringSplitOptions.RemoveEmptyEntries);
bool inRelevantSection = false;
foreach (var line in lines)
{
var cleanLine = line.Trim();
if (string.IsNullOrWhiteSpace(cleanLine) || cleanLine.StartsWith("#")) continue;
var parts = cleanLine.Split(':', 2);
if (parts.Length < 2) continue;
var key = parts[0].Trim().ToLower();
var value = parts[1].Trim();
if (key == "user-agent")
{
var currentAgent = value.ToLower();
if (currentAgent == myBotName)
{
inRelevantSection = true;
foundSpecificMatch = true;
isAllowed = true;
crawlDelay = null;
}
else if (currentAgent == "*" && !foundSpecificMatch)
{
inRelevantSection = true;
}
else
{
inRelevantSection = false;
}
continue;
}
if (inRelevantSection)
{
if (key == "disallow")
{
if (string.IsNullOrEmpty(value)) isAllowed = true;
else if (path.StartsWith(value, StringComparison.OrdinalIgnoreCase)) isAllowed = false;
}
else if (key == "allow")
{
if (path.StartsWith(value, StringComparison.OrdinalIgnoreCase)) isAllowed = true;
}
else if (key == "crawl-delay")
{
if (int.TryParse(value, out int delay)) crawlDelay = delay;
}
}
}
return new RobotsEntry(isAllowed, crawlDelay, null, DateTime.UtcNow.AddDays(1));
}
}

View File

@ -0,0 +1,22 @@
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.DependencyInjection;
namespace LexWells.Infrastructure.EntityFramework;
public static class DataDependencyInjection
{
public static IServiceCollection AddLexWellsDatabase<TContext>(
this IServiceCollection services,
string connectionString) where TContext : DbContext
{
services.AddDbContext<TContext>(options =>
{
options.UseSqlite(connectionString, sqliteOptions =>
{
sqliteOptions.MigrationsAssembly(typeof(TContext).Assembly.FullName);
});
});
return services;
}
}

View File

@ -0,0 +1,9 @@
namespace LexWells.Infrastructure.EntityFramework.Entities;
public abstract class BaseEntity
{
public string Id { get; set; } = string.Empty;
public DateTime CreatedAt { get; set; } = DateTime.UtcNow;
public DateTime? UpdatedAt { get; set; }
}

View File

@ -0,0 +1,24 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<PackageId>LexWells.Infrastructure.EntityFramework</PackageId>
<Version>1.0.0</Version>
<Authors>Kenneth Wells</Authors>
<Company>LexWells</Company>
<Description>Shared EF Core architecture and SQLite helpers for LexWells projects.</Description>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
<PackageOutputPath>/home/drew/source/LocalNuGet</PackageOutputPath>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.EntityFrameworkCore" Version="9.0.14" />
<PackageReference Include="Microsoft.EntityFrameworkCore.Sqlite" Version="9.0.14" />
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="9.0.14" />
</ItemGroup>
</Project>

View File

@ -0,0 +1,13 @@
using Microsoft.EntityFrameworkCore;
namespace LexWells.Infrastructure.EntityFramework;
public abstract class LexWellsDbContext(DbContextOptions options) : DbContext(options)
{
protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder)
{
optionsBuilder.AddInterceptors(new UpdateAuditableEntitiesInterceptor());
base.OnConfiguring(optionsBuilder);
}
}

View File

@ -0,0 +1,33 @@
using LexWells.Infrastructure.EntityFramework.Entities;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Diagnostics;
namespace LexWells.Infrastructure.EntityFramework;
public class UpdateAuditableEntitiesInterceptor : SaveChangesInterceptor
{
public override ValueTask<InterceptionResult<int>> SavingChangesAsync(
DbContextEventData eventData,
InterceptionResult<int> result,
CancellationToken cancellationToken = default)
{
var context = eventData.Context;
if (context == null)
{
return base.SavingChangesAsync(eventData, result, cancellationToken);
}
// Find all entities that are being modified and inherit from BaseEntity
var entries = context.ChangeTracker
.Entries<BaseEntity>()
.Where(e => e.State == EntityState.Modified);
foreach (var entry in entries)
{
entry.Entity.UpdatedAt = DateTime.UtcNow;
}
return base.SavingChangesAsync(eventData, result, cancellationToken);
}
}

View File

@ -0,0 +1,48 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.0.31903.59
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LexWells.Infrastructure.Common", "LexWells.Infrastructure.Common\LexWells.Infrastructure.Common.csproj", "{40C2D397-C853-4155-A056-3EF634F90B53}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LexWells.Infrastructure.EntityFramework", "LexWells.Infrastructure.EntityFramework\LexWells.Infrastructure.EntityFramework.csproj", "{1F65E771-C904-4291-AFDE-8A383FA0D970}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|Any CPU = Release|Any CPU
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{40C2D397-C853-4155-A056-3EF634F90B53}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{40C2D397-C853-4155-A056-3EF634F90B53}.Debug|Any CPU.Build.0 = Debug|Any CPU
{40C2D397-C853-4155-A056-3EF634F90B53}.Debug|x64.ActiveCfg = Debug|Any CPU
{40C2D397-C853-4155-A056-3EF634F90B53}.Debug|x64.Build.0 = Debug|Any CPU
{40C2D397-C853-4155-A056-3EF634F90B53}.Debug|x86.ActiveCfg = Debug|Any CPU
{40C2D397-C853-4155-A056-3EF634F90B53}.Debug|x86.Build.0 = Debug|Any CPU
{40C2D397-C853-4155-A056-3EF634F90B53}.Release|Any CPU.ActiveCfg = Release|Any CPU
{40C2D397-C853-4155-A056-3EF634F90B53}.Release|Any CPU.Build.0 = Release|Any CPU
{40C2D397-C853-4155-A056-3EF634F90B53}.Release|x64.ActiveCfg = Release|Any CPU
{40C2D397-C853-4155-A056-3EF634F90B53}.Release|x64.Build.0 = Release|Any CPU
{40C2D397-C853-4155-A056-3EF634F90B53}.Release|x86.ActiveCfg = Release|Any CPU
{40C2D397-C853-4155-A056-3EF634F90B53}.Release|x86.Build.0 = Release|Any CPU
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Debug|Any CPU.Build.0 = Debug|Any CPU
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Debug|x64.ActiveCfg = Debug|Any CPU
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Debug|x64.Build.0 = Debug|Any CPU
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Debug|x86.ActiveCfg = Debug|Any CPU
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Debug|x86.Build.0 = Debug|Any CPU
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Release|Any CPU.ActiveCfg = Release|Any CPU
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Release|Any CPU.Build.0 = Release|Any CPU
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Release|x64.ActiveCfg = Release|Any CPU
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Release|x64.Build.0 = Release|Any CPU
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Release|x86.ActiveCfg = Release|Any CPU
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Release|x86.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal