Initial Commit
This commit is contained in:
commit
daadcb36dc
|
|
@ -0,0 +1,21 @@
|
|||
# Build results
|
||||
[Dd]ebug/
|
||||
[Rr]elease/
|
||||
x64/
|
||||
x86/
|
||||
[Bb]in/
|
||||
[Oo]bj/
|
||||
|
||||
# IDE - Rider/Visual Studio
|
||||
.idea/
|
||||
*.user
|
||||
*.userosscache
|
||||
*.sln.doccache
|
||||
|
||||
# NuGet
|
||||
*.nupkg
|
||||
node_modules/
|
||||
|
||||
# OS files
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
using LexWells.Infrastructure.Common.Interfaces;
|
||||
using LexWells.Infrastructure.Common.Services;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
|
||||
namespace LexWells.Infrastructure.Common;
|
||||
|
||||
public static class DependencyInjection
|
||||
{
|
||||
public static IServiceCollection AddLexWellsInfrastructure(this IServiceCollection services, string redisConnectionString)
|
||||
{
|
||||
services.AddStackExchangeRedisCache(options =>
|
||||
{
|
||||
options.Configuration = redisConnectionString;
|
||||
});
|
||||
|
||||
services.AddSingleton<ICacheService, RedisCacheService>();
|
||||
|
||||
services.AddHttpClient();
|
||||
|
||||
services.AddSingleton<IRobotsService, RobotsService>();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
namespace LexWells.Infrastructure.Common.Interfaces;
|
||||
|
||||
public interface ICacheService
|
||||
{
|
||||
Task<T?> GetAsync<T>(string key);
|
||||
Task<IEnumerable<T>?> GetCollectionAsync<T, TConcrete>(string key) where TConcrete : T;
|
||||
Task SetAsync<T>(string key, T value, TimeSpan? expiration = null);
|
||||
Task RemoveAsync(string key);
|
||||
}
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
namespace LexWells.Infrastructure.Common.Interfaces;
|
||||
|
||||
public interface IRobotsService
|
||||
{
|
||||
/// <summary>
|
||||
/// Checks if the User-Agent is allowed to access the specific path.
|
||||
/// </summary>
|
||||
Task<bool> CanCrawlAsync(Uri uri, string userAgent = "LexWellsBot");
|
||||
|
||||
/// <summary>
|
||||
/// Gets the crawl delay specified by the host, or a default if none exists.
|
||||
/// </summary>
|
||||
Task<TimeSpan> GetCrawlDelayAsync(Uri uri, string userAgent = "LexWellsBot");
|
||||
}
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
|
||||
<PackageId>LexWells.Infrastructure.Common</PackageId>
|
||||
<Version>1.0.0</Version>
|
||||
<Authors>Kenneth Wells</Authors>
|
||||
<Company>LexWells</Company>
|
||||
<Description>Shared infrastructure and caching logic for LexWells projects.</Description>
|
||||
|
||||
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
|
||||
<PackageOutputPath>/home/drew/source/LocalNuGet</PackageOutputPath>
|
||||
<PackageFormat>nupkg</PackageFormat>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Caching.Abstractions" Version="9.0.14" />
|
||||
<PackageReference Include="Microsoft.Extensions.Caching.StackExchangeRedis" Version="9.0.14" />
|
||||
<PackageReference Include="Microsoft.Extensions.Http" Version="9.0.14" />
|
||||
<PackageReference Include="Newtonsoft.Json" Version="13.0.4" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
namespace LexWells.Infrastructure.Common.Models;
|
||||
|
||||
public record RobotsEntry(
|
||||
bool IsAllowed,
|
||||
int? CrawlDelay,
|
||||
string? SitemapUrl,
|
||||
DateTime ExpiresAt);
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
using System.Text.Json;
|
||||
using LexWells.Infrastructure.Common.Interfaces;
|
||||
using Microsoft.Extensions.Caching.Distributed;
|
||||
|
||||
namespace LexWells.Infrastructure.Common.Services;
|
||||
|
||||
public class RedisCacheService : ICacheService
|
||||
{
|
||||
private readonly IDistributedCache _cache;
|
||||
|
||||
public RedisCacheService(IDistributedCache cache)
|
||||
{
|
||||
_cache = cache;
|
||||
}
|
||||
|
||||
public async Task<IEnumerable<T>?> GetCollectionAsync<T, TConcrete>(string key)
|
||||
where TConcrete : T
|
||||
{
|
||||
var cachedData = await _cache.GetStringAsync(key);
|
||||
|
||||
if (string.IsNullOrEmpty(cachedData)) return null;
|
||||
|
||||
var result = JsonSerializer.Deserialize<HashSet<TConcrete>>(cachedData);
|
||||
return result?.Cast<T>();
|
||||
}
|
||||
|
||||
public async Task<T?> GetAsync<T>(string key)
|
||||
{
|
||||
var cachedData = await _cache.GetStringAsync(key);
|
||||
|
||||
// Explicitly return null if Redis has NOTHING
|
||||
if (string.IsNullOrWhiteSpace(cachedData))
|
||||
{
|
||||
return default;
|
||||
}
|
||||
|
||||
try {
|
||||
return JsonSerializer.Deserialize<T>(cachedData);
|
||||
}
|
||||
catch {
|
||||
return default;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task SetAsync<T>(string key, T value, TimeSpan? expiration = null)
|
||||
{
|
||||
var options = new DistributedCacheEntryOptions
|
||||
{
|
||||
AbsoluteExpirationRelativeToNow = expiration ?? TimeSpan.FromHours(1)
|
||||
};
|
||||
|
||||
string dataToStore = value is string s ? s : JsonSerializer.Serialize(value);
|
||||
|
||||
await _cache.SetStringAsync(key, dataToStore, options);
|
||||
}
|
||||
|
||||
public async Task RemoveAsync(string key)
|
||||
{
|
||||
await _cache.RemoveAsync(key);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,151 @@
|
|||
using System.Net;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using LexWells.Infrastructure.Common.Interfaces;
|
||||
using LexWells.Infrastructure.Common.Models;
|
||||
|
||||
namespace LexWells.Infrastructure.Common.Services;
|
||||
|
||||
public class RobotsService(
|
||||
IHttpClientFactory clientFactory,
|
||||
ICacheService cache,
|
||||
ILogger<RobotsService> logger) : IRobotsService
|
||||
{
|
||||
private const string CachePrefix = "robots_";
|
||||
private const string DefaultUserAgent = "NewsArchiveBot";
|
||||
|
||||
public async Task<bool> CanCrawlAsync(Uri uri, string userAgent = DefaultUserAgent)
|
||||
{
|
||||
var entry = await GetRobotsAsync(uri, userAgent);
|
||||
return entry.IsAllowed;
|
||||
}
|
||||
|
||||
public async Task<TimeSpan> GetCrawlDelayAsync(Uri uri, string userAgent = DefaultUserAgent)
|
||||
{
|
||||
var entry = await GetRobotsAsync(uri, userAgent);
|
||||
return entry.CrawlDelay.HasValue
|
||||
? TimeSpan.FromSeconds(entry.CrawlDelay.Value)
|
||||
: TimeSpan.FromSeconds(1);
|
||||
}
|
||||
|
||||
private async Task<RobotsEntry> GetRobotsAsync(Uri uri, string userAgent)
|
||||
{
|
||||
string host = uri.Host;
|
||||
string cacheKey = $"{CachePrefix}{host}";
|
||||
|
||||
try
|
||||
{
|
||||
var cached = await cache.GetAsync<RobotsEntry>(cacheKey);
|
||||
|
||||
if (cached != null && cached.ExpiresAt > DateTime.UtcNow)
|
||||
{
|
||||
return cached;
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogWarning(ex, "Failed to retrieve or deserialize robots cache for {Host}", host);
|
||||
}
|
||||
|
||||
var entry = await FetchAndParseRobotsAsync(uri, userAgent);
|
||||
|
||||
var cacheDuration = entry.IsAllowed ? TimeSpan.FromDays(1) : TimeSpan.FromMinutes(10);
|
||||
|
||||
try
|
||||
{
|
||||
await cache.SetAsync(cacheKey, entry, cacheDuration);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogError(ex, "Failed to save robots entry to cache for {Host}", host);
|
||||
}
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
private async Task<RobotsEntry> FetchAndParseRobotsAsync(Uri uri, string userAgent)
|
||||
{
|
||||
var client = clientFactory.CreateClient();
|
||||
var robotsUrl = $"{uri.Scheme}://{uri.Host}/robots.txt";
|
||||
|
||||
client.DefaultRequestHeaders.UserAgent.ParseAdd(
|
||||
"Mozilla/5.0 (compatible; NewsArchiveBot/1.0; +http://lexwells.com/bot)");
|
||||
|
||||
try
|
||||
{
|
||||
var response = await client.GetStringAsync(robotsUrl);
|
||||
return ParseRobotsContent(response, userAgent, uri.AbsolutePath);
|
||||
}
|
||||
catch (HttpRequestException ex) when (ex.StatusCode == HttpStatusCode.NotFound)
|
||||
{
|
||||
return new RobotsEntry(true, null, null, DateTime.UtcNow.AddDays(1));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogError(ex, "Error fetching robots.txt from {Url}", robotsUrl);
|
||||
return new RobotsEntry(false, null, null, DateTime.UtcNow.AddMinutes(10));
|
||||
}
|
||||
}
|
||||
|
||||
private RobotsEntry ParseRobotsContent(string content, string userAgent, string path)
|
||||
{
|
||||
bool isAllowed = true;
|
||||
int? crawlDelay = null;
|
||||
bool foundSpecificMatch = false;
|
||||
string myBotName = userAgent.ToLower();
|
||||
|
||||
var lines = content.Split(new[] { "\n", "\r\n" }, StringSplitOptions.RemoveEmptyEntries);
|
||||
bool inRelevantSection = false;
|
||||
|
||||
foreach (var line in lines)
|
||||
{
|
||||
var cleanLine = line.Trim();
|
||||
if (string.IsNullOrWhiteSpace(cleanLine) || cleanLine.StartsWith("#")) continue;
|
||||
|
||||
var parts = cleanLine.Split(':', 2);
|
||||
if (parts.Length < 2) continue;
|
||||
|
||||
var key = parts[0].Trim().ToLower();
|
||||
var value = parts[1].Trim();
|
||||
|
||||
if (key == "user-agent")
|
||||
{
|
||||
var currentAgent = value.ToLower();
|
||||
if (currentAgent == myBotName)
|
||||
{
|
||||
inRelevantSection = true;
|
||||
foundSpecificMatch = true;
|
||||
isAllowed = true;
|
||||
crawlDelay = null;
|
||||
}
|
||||
else if (currentAgent == "*" && !foundSpecificMatch)
|
||||
{
|
||||
inRelevantSection = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
inRelevantSection = false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inRelevantSection)
|
||||
{
|
||||
if (key == "disallow")
|
||||
{
|
||||
if (string.IsNullOrEmpty(value)) isAllowed = true;
|
||||
else if (path.StartsWith(value, StringComparison.OrdinalIgnoreCase)) isAllowed = false;
|
||||
}
|
||||
else if (key == "allow")
|
||||
{
|
||||
if (path.StartsWith(value, StringComparison.OrdinalIgnoreCase)) isAllowed = true;
|
||||
}
|
||||
else if (key == "crawl-delay")
|
||||
{
|
||||
if (int.TryParse(value, out int delay)) crawlDelay = delay;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new RobotsEntry(isAllowed, crawlDelay, null, DateTime.UtcNow.AddDays(1));
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
|
||||
namespace LexWells.Infrastructure.EntityFramework;
|
||||
|
||||
public static class DataDependencyInjection
|
||||
{
|
||||
public static IServiceCollection AddLexWellsDatabase<TContext>(
|
||||
this IServiceCollection services,
|
||||
string connectionString) where TContext : DbContext
|
||||
{
|
||||
services.AddDbContext<TContext>(options =>
|
||||
{
|
||||
options.UseSqlite(connectionString, sqliteOptions =>
|
||||
{
|
||||
sqliteOptions.MigrationsAssembly(typeof(TContext).Assembly.FullName);
|
||||
});
|
||||
});
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
namespace LexWells.Infrastructure.EntityFramework.Entities;
|
||||
|
||||
public abstract class BaseEntity
|
||||
{
|
||||
public string Id { get; set; } = string.Empty;
|
||||
|
||||
public DateTime CreatedAt { get; set; } = DateTime.UtcNow;
|
||||
public DateTime? UpdatedAt { get; set; }
|
||||
}
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
|
||||
<PackageId>LexWells.Infrastructure.EntityFramework</PackageId>
|
||||
<Version>1.0.0</Version>
|
||||
<Authors>Kenneth Wells</Authors>
|
||||
<Company>LexWells</Company>
|
||||
<Description>Shared EF Core architecture and SQLite helpers for LexWells projects.</Description>
|
||||
|
||||
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
|
||||
<PackageOutputPath>/home/drew/source/LocalNuGet</PackageOutputPath>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore" Version="9.0.14" />
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore.Sqlite" Version="9.0.14" />
|
||||
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="9.0.14" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
using Microsoft.EntityFrameworkCore;
|
||||
|
||||
namespace LexWells.Infrastructure.EntityFramework;
|
||||
|
||||
public abstract class LexWellsDbContext(DbContextOptions options) : DbContext(options)
|
||||
{
|
||||
protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder)
|
||||
{
|
||||
optionsBuilder.AddInterceptors(new UpdateAuditableEntitiesInterceptor());
|
||||
|
||||
base.OnConfiguring(optionsBuilder);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
using LexWells.Infrastructure.EntityFramework.Entities;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.EntityFrameworkCore.Diagnostics;
|
||||
|
||||
namespace LexWells.Infrastructure.EntityFramework;
|
||||
|
||||
public class UpdateAuditableEntitiesInterceptor : SaveChangesInterceptor
|
||||
{
|
||||
public override ValueTask<InterceptionResult<int>> SavingChangesAsync(
|
||||
DbContextEventData eventData,
|
||||
InterceptionResult<int> result,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var context = eventData.Context;
|
||||
|
||||
if (context == null)
|
||||
{
|
||||
return base.SavingChangesAsync(eventData, result, cancellationToken);
|
||||
}
|
||||
|
||||
// Find all entities that are being modified and inherit from BaseEntity
|
||||
var entries = context.ChangeTracker
|
||||
.Entries<BaseEntity>()
|
||||
.Where(e => e.State == EntityState.Modified);
|
||||
|
||||
foreach (var entry in entries)
|
||||
{
|
||||
entry.Entity.UpdatedAt = DateTime.UtcNow;
|
||||
}
|
||||
|
||||
return base.SavingChangesAsync(eventData, result, cancellationToken);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 17
|
||||
VisualStudioVersion = 17.0.31903.59
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LexWells.Infrastructure.Common", "LexWells.Infrastructure.Common\LexWells.Infrastructure.Common.csproj", "{40C2D397-C853-4155-A056-3EF634F90B53}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LexWells.Infrastructure.EntityFramework", "LexWells.Infrastructure.EntityFramework\LexWells.Infrastructure.EntityFramework.csproj", "{1F65E771-C904-4291-AFDE-8A383FA0D970}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
Debug|x64 = Debug|x64
|
||||
Debug|x86 = Debug|x86
|
||||
Release|Any CPU = Release|Any CPU
|
||||
Release|x64 = Release|x64
|
||||
Release|x86 = Release|x86
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{40C2D397-C853-4155-A056-3EF634F90B53}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{40C2D397-C853-4155-A056-3EF634F90B53}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{40C2D397-C853-4155-A056-3EF634F90B53}.Debug|x64.ActiveCfg = Debug|Any CPU
|
||||
{40C2D397-C853-4155-A056-3EF634F90B53}.Debug|x64.Build.0 = Debug|Any CPU
|
||||
{40C2D397-C853-4155-A056-3EF634F90B53}.Debug|x86.ActiveCfg = Debug|Any CPU
|
||||
{40C2D397-C853-4155-A056-3EF634F90B53}.Debug|x86.Build.0 = Debug|Any CPU
|
||||
{40C2D397-C853-4155-A056-3EF634F90B53}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{40C2D397-C853-4155-A056-3EF634F90B53}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{40C2D397-C853-4155-A056-3EF634F90B53}.Release|x64.ActiveCfg = Release|Any CPU
|
||||
{40C2D397-C853-4155-A056-3EF634F90B53}.Release|x64.Build.0 = Release|Any CPU
|
||||
{40C2D397-C853-4155-A056-3EF634F90B53}.Release|x86.ActiveCfg = Release|Any CPU
|
||||
{40C2D397-C853-4155-A056-3EF634F90B53}.Release|x86.Build.0 = Release|Any CPU
|
||||
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Debug|x64.ActiveCfg = Debug|Any CPU
|
||||
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Debug|x64.Build.0 = Debug|Any CPU
|
||||
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Debug|x86.ActiveCfg = Debug|Any CPU
|
||||
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Debug|x86.Build.0 = Debug|Any CPU
|
||||
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Release|x64.ActiveCfg = Release|Any CPU
|
||||
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Release|x64.Build.0 = Release|Any CPU
|
||||
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Release|x86.ActiveCfg = Release|Any CPU
|
||||
{1F65E771-C904-4291-AFDE-8A383FA0D970}.Release|x86.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
Loading…
Reference in New Issue