Refactor downloaders to use generic options and unify logic

Replaces specialized binary and HTML downloaders with a generic, options-driven UnitDownloader and UnitFragmentDownloader pattern. Introduces UnitDownloaderOptions and builder classes for flexible configuration, updates interfaces and method signatures to support progress reporting, and removes redundant binary-specific classes. Updates Playwright and Stealth downloaders to use the new generic base, and adds improved error handling and reporting. Also updates dependency versions and makes minor API consistency improvements across the Fluent and Models layers.
This commit is contained in:
qwsdcvghyu89
2025-09-29 21:27:56 +10:00
parent 8e60109f5e
commit 2958a26e4f
30 changed files with 621 additions and 422 deletions
+1 -2
View File
@@ -6,12 +6,11 @@
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="aeqw89.DataKeys" Version="2.0.1" />
<PackageReference Include="aeqw89.DataKeys" Version="2.1.1" />
<PackageReference Include="aeqw89.PersistentData" Version="1.4.5" />
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.9" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.9" />
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.9" />
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Beam.Data\Beam.Data.csproj" />
+43 -76
View File
@@ -8,13 +8,14 @@ using Beam.Downloaders;
namespace Beam.Fluent;
internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, OutType> {
internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, OutType> where RawType : IDocument {
private readonly DownloadContextBuilder<RawType> _ctxBuilder;
private readonly AsyncTransformer<RawType, OutType> _transformer;
private FragmentMode _fragmentMode = FragmentMode.Single;
private Channel _channel = Channel.Plain;
private readonly ContentKind _contentKind;
private int _parallelism = 4;
private UnitDownloaderOptionsBuilder<RawType, OutType> _optionsBuilder = new();
// ──────────────── playwright ────────────────
private PlaywrightAsyncManipulator? _playwrightManipulator = null;
@@ -31,12 +32,15 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
_ctxBuilder = ctxBuilder;
_transformer = transformer;
_contentKind = transformer switch {
AsyncTransformer<HtmlDocument, OutType> => ContentKind.Html,
AsyncTransformer<StringDocument, OutType> => ContentKind.File,
AsyncTransformer<ByteDocument, OutType> => ContentKind.Binary,
_ => throw new ArgumentException(string.Format(Exceptions.Exceptions.fluent_unsupported_transformer,
transformer.GetType()
.AsUniqueName()))
};
_optionsBuilder
.WithAsyncTransformer(_transformer);
}
public IContextStage<RawType, OutType> Configure(Action<DownloadContextBuilder<RawType>> configure) {
@@ -44,6 +48,12 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
return this;
}
public IContextStage<RawType, OutType> ConfigureUnitDownloaderOptions(
Action<UnitDownloaderOptionsBuilder<RawType, OutType>> configure) {
configure(_optionsBuilder);
return this;
}
public IContextStage<RawType, OutType> WithParallelism(int degree) {
_parallelism = Math.Max(1, degree);
return this;
@@ -108,14 +118,14 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
string.Format(Exceptions.Exceptions.fluent_type_conversion_failure,
o?.GetType().AsUniqueName() ?? "null", typeof(T).AsUniqueName()));
AsyncTransformer<HtmlDocument, OutType> HtmlTransformer()
=> To<AsyncTransformer<HtmlDocument, OutType>>(_transformer);
AsyncTransformer<StringDocument, OutType> FileTransformer()
=> To<AsyncTransformer<StringDocument, OutType>>(_transformer);
AsyncTransformer<ByteDocument, OutType> ByteTransformer()
=> To<AsyncTransformer<ByteDocument, OutType>>(_transformer);
AsyncDownloadFailurePredicate<HtmlDocument>[] HtmlFailurePredicates()
=> To<AsyncDownloadFailurePredicate<HtmlDocument>[]>(context.AsyncFailurePredicates);
AsyncDownloadFailurePredicate<StringDocument>[] FileFailurePredicates()
=> To<AsyncDownloadFailurePredicate<StringDocument>[]>(context.AsyncFailurePredicates);
AsyncDownloadFailurePredicate<ByteDocument>[] ByteFailurePredicates()
=> To<AsyncDownloadFailurePredicate<ByteDocument>[]>(context.AsyncFailurePredicates);
@@ -125,82 +135,39 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
#endregion
if (context.AsyncFailurePredicates is not null)
_optionsBuilder
.WithFailurePredicates(x => x.WithPredicates(context.AsyncFailurePredicates));
var options = _optionsBuilder
.WithClient(context.Client)
.Build();
return (_channel, _fragmentMode, _contentKind) switch {
// ──────────────── fragmented HTML ────────────────
(Channel.Plain, FragmentMode.Fragmented, ContentKind.Html)
=> new UnitFragmentDownloader<OutType>(
context.Web,
HtmlTransformer(),
HtmlFailurePredicates(),
_parallelism,
context.DownloadLogger),
// ──────────────── fragmented binary ────────────────
(Channel.Plain, FragmentMode.Fragmented, ContentKind.Binary)
=> new UnitFragmentDownloaderBinary<OutType>(
context.Client,
ByteTransformer(),
ByteFailurePredicates(),
_parallelism,
context.DownloadLogger),
// ──────────────── single HTML ────────────────
(Channel.Plain, FragmentMode.Single, ContentKind.Html)
=> new UnitDownloader<OutType>(
context.Web,
HtmlTransformer(),
HtmlFailurePredicates()),
// ──────────────── single binary ────────────────
(Channel.Plain, FragmentMode.Single, ContentKind.Binary)
=> new UnitDownloaderBinary<OutType>(
context.Client,
ByteTransformer(),
ByteFailurePredicates()),
// ──────────────── single playwright binary ────────────────
(Channel.Playwright, FragmentMode.Single, ContentKind.Binary)
=> new PlaywrightUnitDownloader<OutType>(
context.Client,
EnsureExists(_playwrightManipulator),
ByteTransformer(),
ByteFailurePredicates()
),
// ──────────────── single playwrigt HTML ────────────────
(Channel.Playwright, FragmentMode.Single, ContentKind.Html)
=> new PlaywrightUnitPageDownloader<OutType>(
context.Web,
EnsureExists(_playwrightManipulator),
HtmlTransformer(),
HtmlFailurePredicates()),
// ──────────────── single stealth HTML ────────────────
(Channel.Stealth, FragmentMode.Single, ContentKind.Html)
=> new StealthUnitPageDownloader<OutType>(
context.Web,
EnsureExists(_stealthConfig),
EnsureExists(_stealthManipulator),
HtmlTransformer(),
HtmlFailurePredicates()),
// ──────────────── single stealth binary ────────────────
// ──────────────── fragmented ────────────────
(Channel.Plain, FragmentMode.Fragmented, _)
=> new UnitFragmentDownloader<RawType, OutType>(options),
// ──────────────── single ────────────────
(Channel.Plain, FragmentMode.Single, _)
=> new UnitDownloader<RawType, OutType>(options),
// ──────────────── single playwright ────────────────
(Channel.Playwright, FragmentMode.Single, _)
=> new PlaywrightUnitDownloader<RawType, OutType>(options, EnsureExists(_playwrightManipulator)),
// ──────────────── single stealth file ────────────────
(Channel.Stealth, FragmentMode.Single, ContentKind.File)
=> new StealthUnitPageDownloader<RawType, OutType>(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)),
// ──────────────── single stealth binary ────────────────
(Channel.Stealth, FragmentMode.Single, ContentKind.Binary)
=> new StealthUnitDownloader<OutType>(
context.Client,
=> new StealthUnitDownloader<RawType, OutType>(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)),
// ──────────────── fragment stealth file ────────────────
(Channel.Stealth, FragmentMode.Fragmented, ContentKind.File)
=> new StealthFragmentPageDownloader<RawType, OutType>(options,
EnsureExists(_stealthConfig),
EnsureExists(_stealthManipulator),
ByteTransformer(),
ByteFailurePredicates()),
// ──────────────── fragment stealth HTML ────────────────
(Channel.Stealth, FragmentMode.Fragmented, ContentKind.Html)
=> new StealthFragmentPageDownloader<OutType>(
context.Web,
EnsureExists(_stealthConfig),
EnsureExists(_stealthManipulator),
HtmlTransformer(),
HtmlFailurePredicates()),
EnsureExists(_stealthManipulator)),
// ──────────────── fragment stealth binary ────────────────
(Channel.Stealth, FragmentMode.Fragmented, ContentKind.Binary)
=> new StealthFragmentDownloader<OutType>(
context.Client,
=> new StealthFragmentDownloader<RawType, OutType>(options,
EnsureExists(_stealthConfig),
EnsureExists(_stealthManipulator),
ByteTransformer(),
ByteFailurePredicates()),
EnsureExists(_stealthManipulator)),
_ => throw new Exception(string.Format(Exceptions.Exceptions.fluent_unsupported_pattern,
$"({_channel}, {_fragmentMode}, {_contentKind})")),
};
+1 -1
View File
@@ -12,6 +12,6 @@ public enum Channel {
}
public enum ContentKind {
Html,
File,
Binary
}
+2 -1
View File
@@ -1,10 +1,11 @@
using System.Collections.Concurrent;
using System.Text.Json;
using Beam.Abstractions;
using Beam.Models;
namespace Beam.Fluent;
internal sealed class DownloadStage<RawType, OutType>(DownloadEnumerable<OutType> download) : IDownloadStage<RawType, OutType> {
internal sealed class DownloadStage<RawType, OutType>(DownloadEnumerable<OutType> download) : IDownloadStage<RawType, OutType> where RawType : IDocument {
private IAsyncEnumerable<Ordered<OutType>> _download = download;
public DownloadEnumerable<OutType> AsAsyncEnumerable() {
+4 -3
View File
@@ -1,4 +1,5 @@
using aeqw89.DataKeys;
using Beam.Abstractions;
using Beam.Data;
using Beam.Downloaders;
using Beam.Dynamic;
@@ -7,13 +8,13 @@ using Beam.Models;
namespace Beam.Fluent;
public static class FluentDownload {
public static ITransformStage<RawType, OutType> Links<RawType, OutType>(params IEnumerable<string> links) {
public static ITransformStage<RawType, OutType> Links<RawType, OutType>(params IEnumerable<string> links) where RawType : IDocument {
return new TransformStage<RawType, OutType>(new DownloadContextBuilder<RawType>()
.WithLinks(links));
}
public static ITransformStage<RawType, OutType>
ResourceDefinition<RawType, OutType>(ResourceDefinition definition) {
ResourceDefinition<RawType, OutType>(ResourceDefinition definition) where RawType : IDocument {
if (definition.Location.States.Count == 0)
throw new ArgumentException(Exceptions.Exceptions.resource_definition_invalid_states_count, nameof(definition));
var linkGenerator = new OrderedLinkGenerator(definition.Location.Segments, (NumberedStateChanger)definition.Location.StateChanger.Behavior,
@@ -22,7 +23,7 @@ public static class FluentDownload {
.WithLinks(StringEnumerable.FromGenerator(linkGenerator!)));
}
public static ITransformStage<RawType, OutType> FromContext<RawType, OutType>(DownloadContext<RawType> existing) {
public static ITransformStage<RawType, OutType> FromContext<RawType, OutType>(DownloadContext<RawType> existing) where RawType : IDocument {
return new TransformStage<RawType, OutType>(DownloadContextBuilder<RawType>.FromContext(existing));
}
}
+3 -2
View File
@@ -1,11 +1,12 @@
using Beam.Data;
using Beam.Abstractions;
using Beam.Data;
using Beam.Downloaders;
using Beam.Dynamic;
using Beam.Models;
namespace Beam.Fluent;
internal sealed class TransformStage<RawType, OutType>(DownloadContextBuilder<RawType> CtxBuilder) : ITransformStage<RawType, OutType> {
internal sealed class TransformStage<RawType, OutType>(DownloadContextBuilder<RawType> CtxBuilder) : ITransformStage<RawType, OutType> where RawType : IDocument {
public IContextStage<RawType, OutType> WithTransformer(AsyncTransformer<RawType, OutType> transformer) {
return new ContextStage<RawType, OutType>(CtxBuilder, transformer);
}