Introduce Beam.Fluent and Beam.Models projects

Added new Beam.Fluent and Beam.Models projects with staged download builder and data context models. Refactored and moved model classes from Beam.Temporary.Cli to Beam.Models. Added new data providers and extended DataBindings in Beam.Dynamic. Renamed Beam.Puppeteer to Beam.Playwright and updated related classes. Updated project references and package versions. Removed obsolete and unused files from Beam.Temporary.Cli.
This commit is contained in:
qwsdcvghyu89
2025-09-18 18:32:25 +10:00
parent 849bdcd089
commit a7d148a96f
72 changed files with 2100 additions and 721 deletions
+31
View File
@@ -0,0 +1,31 @@
<?xml version='1.0' encoding='utf-8'?>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="aeqw89.DataKeys" Version="2.0.1" />
<PackageReference Include="aeqw89.PersistentData" Version="1.1.0" />
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.7" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.7" />
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.7" />
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Beam.Dynamic\Beam.Dynamic.csproj">
<PrivateAssets>all</PrivateAssets>
</ProjectReference>
<ProjectReference Include="..\Beam.Models\Beam.Models.csproj" />
<ProjectReference Include="..\Beam.Playwright\Beam.Playwright.csproj">
<PrivateAssets>all</PrivateAssets>
</ProjectReference>
<ProjectReference Include="..\Beam.Stealth\Beam.Stealth.csproj">
<PrivateAssets>all</PrivateAssets>
</ProjectReference>
<ProjectReference Include="..\Beam\Beam.csproj">
<PrivateAssets>all</PrivateAssets>
</ProjectReference>
</ItemGroup>
</Project>
+186
View File
@@ -0,0 +1,186 @@
using HtmlAgilityPack;
using Beam.Playwright;
using Beam.Stealth;
namespace Beam.Fluent {
public static partial class DownloadBuilder<RawType, OutType> {
private sealed class ContextStage : IContextStage {
private readonly DownloadContextBuilder<RawType> _ctxBuilder;
private readonly AsyncTransformer<RawType, OutType> _transformer;
private int _parallelism = 4;
private bool _useFragments = false;
private PlaywrightAsyncManipulator? _usePlaywrightManipulator = null;
private StealthAsyncManipulator? _useStealthManipulator = null;
private StealthConfig? _stealthConfig = null;
public ContextStage(DownloadContextBuilder<RawType> ctxBuilder, AsyncTransformer<RawType, OutType> transformer) {
_ctxBuilder = ctxBuilder;
_transformer = transformer;
}
public IContextStage Configure(Action<DownloadContextBuilder<RawType>> configure) {
configure(_ctxBuilder);
return this;
}
public IContextStage WithParallelism(int degree) {
_parallelism = Math.Max(1, degree);
return this;
}
public IContextStage WithTimeout(TimeSpan timeout) {
_ctxBuilder.WithTimeOut(timeout);
return this;
}
public IContextStage WithRetryReporter(IProgress<RetryReport> reporter) {
_ctxBuilder.WithRetryReporter(reporter);
return this;
}
/// <summary>
/// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with <see cref="UsePlaywright(PlaywrightAsyncManipulator)"/>
/// </summary>
/// <returns></returns>
public IContextStage UseFragments() {
if (_usePlaywrightManipulator is not null)
_usePlaywrightManipulator = null;
_useFragments = true;
return this;
}
/// <summary>
/// Use a puppet browser to download the links. This strategy is mutually exclusive with <see cref="UseFragments"/>
/// </summary>
/// <param name="manipulator">The page manipulator</param>
/// <returns></returns>
public IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator) {
if (_useFragments)
_useFragments = false;
if (_useStealthManipulator is not null)
_useStealthManipulator = null;
_usePlaywrightManipulator = manipulator;
return this;
}
public IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) {
if (_usePlaywrightManipulator is not null)
_usePlaywrightManipulator = null;
_useStealthManipulator = manipulator;
_stealthConfig = config;
return this;
}
private object ConstructUnitDownloader(DownloadContext<RawType> context) {
return (_useFragments, _useStealthManipulator, _usePlaywrightManipulator, _transformer, context.AsyncFailurePredicates) switch {
// ──────────────── fragmented HTML ────────────────
(true, null, _, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
=> new UnitFragmentDownloader<OutType>(
context.Web,
asyncHtmlTransformer,
documentFailurePredicates,
_parallelism,
context.DownloadLogger),
// ──────────────── fragmented binary ────────────────
(true, null, _, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
=> new UnitFragmentDownloaderBinary<OutType>(
context.Client,
asyncBinaryTransformer,
responseFailurePredicates,
_parallelism,
context.DownloadLogger),
// ──────────────── single HTML ────────────────
(false, null, null, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
=> new UnitDownloader<OutType>(
context.Web,
asyncHtmlTransformer,
documentFailurePredicates),
// ──────────────── single binary ────────────────
(false, null, null, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
=> new UnitDownloaderBinary<OutType>(
context.Client,
asyncBinaryTransformer,
responseFailurePredicates),
// ──────────────── single playwright binary ────────────────
(false, _, PlaywrightAsyncManipulator manipulator, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
=> new PlaywrightUnitDownloader<OutType>(
context.Client,
manipulator,
asyncBinaryTransformer,
responseFailurePredicates),
// ──────────────── single playwrigt HTML ────────────────
(false, _, PlaywrightAsyncManipulator manipulator, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
=> new PlaywrightUnitPageDownloader<OutType>(
context.Web,
manipulator,
asyncHtmlTransformer,
documentFailurePredicates),
// ──────────────── single stealth HTML ────────────────
(false, StealthAsyncManipulator manipulator, _, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
=> new StealthUnitPageDownloader<OutType>(
context.Web,
_stealthConfig ?? throw new Exception($"Stealth config is null"),
manipulator,
asyncHtmlTransformer,
documentFailurePredicates),
// ──────────────── single stealth binary ────────────────
(false, StealthAsyncManipulator manipulator, _, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
=> new StealthUnitDownloader<OutType>(
context.Client,
_stealthConfig ?? throw new Exception($"Stealth config is null"),
manipulator,
asyncBinaryTransformer,
responseFailurePredicates),
// ──────────────── fragment stealth HTML ────────────────
(true, StealthAsyncManipulator manipulator, _, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
AsyncDownloadFailurePredicate<HtmlDocument>[] downloadFailurePredicates)
=> new StealthFragmentPageDownloader<OutType>(
context.Web,
_stealthConfig ?? throw new Exception($"Stealth config is null"),
manipulator,
asyncHtmlTransformer),
// ──────────────── fragment stealth binary ────────────────
(true, StealthAsyncManipulator manipulator, _, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
=> new StealthFragmentDownloader<OutType>(
context.Client,
_stealthConfig ?? throw new Exception($"Stealth config is null"),
manipulator,
asyncBinaryTransformer),
_ => throw new Exception($"Unsupported transformer / failure-predicate combinatAsyion. Missing pattern: {_useFragments} , {_transformer.GetType().AsUniqueName()} , {context.AsyncFailurePredicates?.GetType().AsUniqueName()}"),
};
}
private IAsyncEnumerator<Ordered<OutType>> ConstructDownloader(DownloadContext<RawType> context) {
var copyOfContext = context.CreateBuilder().Build();
return _useFragments switch {
true => new SequentialFragmentDownloader<RawType, OutType>(
copyOfContext,
ctx => (IUnitDownloader<Fragment<Ordered<OutType>>>)ConstructUnitDownloader(ctx),
context.DownloadLogger).UnwrapFragmented(),
false => new SequentialDownloader<RawType, OutType>(
copyOfContext,
ctx => (IUnitDownloader<OutType>)ConstructUnitDownloader(ctx),
context.DownloadLogger).WrapOrdered()
};
}
public DownloadEnumerable<OutType> Build() {
var context = _ctxBuilder.Build();
var enumerable = new DownloadEnumerable<OutType>(ConstructDownloader(context));
return enumerable;
}
}
}
}
@@ -0,0 +1,43 @@
using System.Collections.Concurrent;
using System.Text.Json;
namespace Beam.Fluent {
public static partial class DownloadBuilder<RawType, OutType> {
private sealed class DownloadStage(DownloadEnumerable<OutType> download) : IDownloadStage {
private IAsyncEnumerable<Ordered<OutType>> _download = download;
public DownloadEnumerable<OutType> AsAsyncEnumerable() {
return new DownloadEnumerable<OutType>(_download.GetAsyncEnumerator());
}
private async IAsyncEnumerable<Ordered<OutType>> _SaveToDirectory(string dir) {
Directory.CreateDirectory(dir);
await foreach(var download in _download) {
await System.IO.File.WriteAllTextAsync(Path.Combine(dir, $"{Path.GetRandomFileName()}.{download.Order}.json"), JsonSerializer.Serialize(dir));
yield return download;
}
}
public IDownloadStage SaveToDirectory(string dir) {
_download = _SaveToDirectory(dir);
return this;
}
public IDownloadStage SaveToFiles(IEnumerable<string> files) {
throw new NotImplementedException();
}
public IDownloadStage SaveToMemory(ConcurrentBag<OutType> bag) {
throw new NotImplementedException();
}
public void WaitForDownload() {
throw new NotImplementedException();
}
public Task WaitForDownloadAsync() {
throw new NotImplementedException();
}
}
}
}
@@ -0,0 +1,7 @@
namespace Beam.Fluent {
public static partial class DownloadBuilder<RawType, OutType> {
public interface IAlternativeLinkStage {
IAlternativeTransformStage WithLinks(IEnumerable<SourceLink> links);
}
}
}
@@ -0,0 +1,10 @@
namespace Beam.Fluent {
public static partial class DownloadBuilder<RawType, OutType> {
public interface IAlternativeTransformStage {
IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer);
IContextStage WithTransformer(Func<RawType, OutType> transformer) {
return WithTransformer(rt => Task.FromResult(transformer(rt)));
}
}
}
}
@@ -0,0 +1,17 @@
using Beam.Playwright;
using Beam.Stealth;
namespace Beam.Fluent {
public static partial class DownloadBuilder<RawType, OutType> {
public interface IContextStage {
IContextStage Configure(Action<DownloadContextBuilder<RawType>> configure);
IContextStage WithParallelism(int degree);
IContextStage WithTimeout(TimeSpan timeout);
IContextStage WithRetryReporter(IProgress<RetryReport> reporter);
DownloadEnumerable<OutType> Build();
IContextStage UseFragments();
IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator);
IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config);
}
}
}
@@ -0,0 +1,14 @@
using System.Collections.Concurrent;
namespace Beam.Fluent {
public static partial class DownloadBuilder<RawType, OutType> {
public interface IDownloadStage {
IDownloadStage SaveToDirectory(string dir);
IDownloadStage SaveToFiles(IEnumerable<string> files);
IDownloadStage SaveToMemory(ConcurrentBag<OutType> bag);
DownloadEnumerable<OutType> AsAsyncEnumerable();
void WaitForDownload();
Task WaitForDownloadAsync();
}
}
}
+13
View File
@@ -0,0 +1,13 @@
using Beam.Models;
namespace Beam.Fluent {
public static partial class DownloadBuilder<RawType, OutType> {
/* ────────────────────────────── Stages ─────────────────────────────── */
public interface ILinkStage {
ITransformStage WithLink();
ITransformStage WithLinkGenerator();
ILinkStage WithRange(Range range);
}
}
}
@@ -0,0 +1,9 @@
using Beam.Dynamic;
namespace Beam.Fluent {
public static partial class DownloadBuilder<RawType, OutType> {
public interface ITransformStage {
IContextStage WithTransformer(Func<DataBindings, AsyncTransformer<RawType, OutType>> factory);
}
}
}
+53
View File
@@ -0,0 +1,53 @@
using Beam.Models;
namespace Beam.Fluent {
public static partial class DownloadBuilder<RawType, OutType> {
/* ──────────────────────────── Stage types ─────────────────────────── */
private sealed record LinkStage(
WebResource Source,
State Initial,
BeamDataContext Data,
DownloadContextBuilder<RawType> CtxBuilder) : ILinkStage, IAlternativeLinkStage {
private State? endState;
private bool linksFrozen = false;
public ITransformStage WithLink() {
var link = Data.Templates[Source.Key].Builder.Build(Initial);
CtxBuilder.WithLinks([link]);
return new TransformStage(Source, Data, CtxBuilder);
}
public ITransformStage WithLinkGenerator() {
var template = Data.Templates[Source.Key];
var generator = SourceLinkEnumerable.FromGenerator(new OrderedSourceLinkGenerator(
template.Builder,
new NumberedStateChanger(template.Factory.Behavior),
Initial, endState));
CtxBuilder.WithLinks(generator);
linksFrozen = true;
return new TransformStage(Source, Data, CtxBuilder);
}
public IAlternativeTransformStage WithLinks(IEnumerable<SourceLink> links) {
CtxBuilder.WithLinks(links);
return new TransformStage(Source, Data, CtxBuilder);
}
public ILinkStage WithRange(Range range) {
if (linksFrozen)
throw new InvalidOperationException($"WithRange must be called before WithLinkGenerator");
if (range.End.Value < range.Start.Value)
throw new ArgumentOutOfRangeException(nameof(range), $" start must be < end");
var template = Data.Templates[Source.Key];
var stateChanger = new NumberedStateChanger(template.Factory.Behavior);
endState = Initial.Copy();
stateChanger.Apply(Initial, range.Start.Value - 1);
stateChanger.Apply(endState, range.End.Value - 1);
return this;
}
}
}
}
@@ -0,0 +1,6 @@
namespace Beam.Fluent {
public static partial class DownloadBuilder<RawType, OutType> {
/* ────────────────────────── Implementation ────────────────────────── */
private enum SourceKind { Meta, Text }
}
}
@@ -0,0 +1,20 @@
using Beam.Dynamic;
using Beam.Models;
namespace Beam.Fluent {
public static partial class DownloadBuilder<RawType, OutType> {
private sealed record TransformStage(
WebResource Source,
BeamDataContext Data,
DownloadContextBuilder<RawType> CtxBuilder) : ITransformStage, IAlternativeTransformStage {
public IContextStage WithTransformer(Func<DataBindings, AsyncTransformer<RawType, OutType>> factory) {
var transformer = factory(Data.Bindings[Source.Bindings]);
return new ContextStage(CtxBuilder, transformer);
}
public IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer) {
return new ContextStage(CtxBuilder, transformer);
}
}
}
}
+41
View File
@@ -0,0 +1,41 @@
using aeqw89.DataKeys;
using Beam;
using Microsoft.Extensions.Logging;
using System;
using System.Collections.Generic;
using Beam.Models;
namespace Beam.Fluent {
/// <summary>
/// Typesafe, staged builder that prevents callers from forgetting the mandatory steps
/// (source → link selection → transformer) and surfaces operational knobs as firstclass
/// methods instead of magic parameters.
/// </summary>
public static partial class DownloadBuilder<RawType, OutType> {
/* ──────────────────────────── Entry points ─────────────────────────── */
public static ILinkStage FromResource(DataKey<ResourceDictionary> dict, string kind, BeamDataContext beamDataDictionary)
=> Create(dict, beamDataDictionary, kind);
public static IAlternativeLinkStage FromScratch()
=> new LinkStage(null!, null!, null!, new());
private static ILinkStage Create(DataKey<ResourceDictionary> resourceDict, BeamDataContext data, string kind) {
var (source, initial) = Resolve(resourceDict, kind, data);
var ctxBuilder = new DownloadContextBuilder<RawType>().WithLinks(Array.Empty<SourceLink>()); // placeholder, filled later.
return new LinkStage(source, initial, data, ctxBuilder);
}
private static (WebResource Source, State Initial) Resolve(DataKey<ResourceDictionary> resourceDict, string kind, BeamDataContext data) {
if (!data.ResourceDictionaries.TryGetValue(resourceDict, out var dict))
throw new KeyNotFoundException($"Novel '{resourceDict}' not found in BeamDataDictionary.");
if (!dict.Resources.TryGetValue(kind, out var sourceKey))
throw new KeyNotFoundException($"Novel kind '{kind}' not found in '{resourceDict}'");
if (!data.Resources.TryGetValue(sourceKey, out var source))
throw new KeyNotFoundException($"Novel source '{sourceKey}' was not found");
if (!data.InitialStates.TryGetValue(sourceKey.To<ImmutableState>(), out var istate))
throw new KeyNotFoundException($"Immutable state for kind '{kind}' not found");
return (source, istate);
}
}
}