Refactor downloaders to use generic options and unify logic

Replaces specialized binary and HTML downloaders with a generic, options-driven UnitDownloader and UnitFragmentDownloader pattern. Introduces UnitDownloaderOptions and builder classes for flexible configuration, updates interfaces and method signatures to support progress reporting, and removes redundant binary-specific classes. Updates Playwright and Stealth downloaders to use the new generic base, and adds improved error handling and reporting. Also updates dependency versions and makes minor API consistency improvements across the Fluent and Models layers.
This commit is contained in:
qwsdcvghyu89
2025-09-29 21:27:56 +10:00
parent 8e60109f5e
commit 2958a26e4f
30 changed files with 621 additions and 422 deletions
+43 -76
View File
@@ -8,13 +8,14 @@ using Beam.Downloaders;
namespace Beam.Fluent;
internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, OutType> {
internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, OutType> where RawType : IDocument {
private readonly DownloadContextBuilder<RawType> _ctxBuilder;
private readonly AsyncTransformer<RawType, OutType> _transformer;
private FragmentMode _fragmentMode = FragmentMode.Single;
private Channel _channel = Channel.Plain;
private readonly ContentKind _contentKind;
private int _parallelism = 4;
private UnitDownloaderOptionsBuilder<RawType, OutType> _optionsBuilder = new();
// ──────────────── playwright ────────────────
private PlaywrightAsyncManipulator? _playwrightManipulator = null;
@@ -31,12 +32,15 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
_ctxBuilder = ctxBuilder;
_transformer = transformer;
_contentKind = transformer switch {
AsyncTransformer<HtmlDocument, OutType> => ContentKind.Html,
AsyncTransformer<StringDocument, OutType> => ContentKind.File,
AsyncTransformer<ByteDocument, OutType> => ContentKind.Binary,
_ => throw new ArgumentException(string.Format(Exceptions.Exceptions.fluent_unsupported_transformer,
transformer.GetType()
.AsUniqueName()))
};
_optionsBuilder
.WithAsyncTransformer(_transformer);
}
public IContextStage<RawType, OutType> Configure(Action<DownloadContextBuilder<RawType>> configure) {
@@ -44,6 +48,12 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
return this;
}
public IContextStage<RawType, OutType> ConfigureUnitDownloaderOptions(
Action<UnitDownloaderOptionsBuilder<RawType, OutType>> configure) {
configure(_optionsBuilder);
return this;
}
public IContextStage<RawType, OutType> WithParallelism(int degree) {
_parallelism = Math.Max(1, degree);
return this;
@@ -108,14 +118,14 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
string.Format(Exceptions.Exceptions.fluent_type_conversion_failure,
o?.GetType().AsUniqueName() ?? "null", typeof(T).AsUniqueName()));
AsyncTransformer<HtmlDocument, OutType> HtmlTransformer()
=> To<AsyncTransformer<HtmlDocument, OutType>>(_transformer);
AsyncTransformer<StringDocument, OutType> FileTransformer()
=> To<AsyncTransformer<StringDocument, OutType>>(_transformer);
AsyncTransformer<ByteDocument, OutType> ByteTransformer()
=> To<AsyncTransformer<ByteDocument, OutType>>(_transformer);
AsyncDownloadFailurePredicate<HtmlDocument>[] HtmlFailurePredicates()
=> To<AsyncDownloadFailurePredicate<HtmlDocument>[]>(context.AsyncFailurePredicates);
AsyncDownloadFailurePredicate<StringDocument>[] FileFailurePredicates()
=> To<AsyncDownloadFailurePredicate<StringDocument>[]>(context.AsyncFailurePredicates);
AsyncDownloadFailurePredicate<ByteDocument>[] ByteFailurePredicates()
=> To<AsyncDownloadFailurePredicate<ByteDocument>[]>(context.AsyncFailurePredicates);
@@ -125,82 +135,39 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
#endregion
if (context.AsyncFailurePredicates is not null)
_optionsBuilder
.WithFailurePredicates(x => x.WithPredicates(context.AsyncFailurePredicates));
var options = _optionsBuilder
.WithClient(context.Client)
.Build();
return (_channel, _fragmentMode, _contentKind) switch {
// ──────────────── fragmented HTML ────────────────
(Channel.Plain, FragmentMode.Fragmented, ContentKind.Html)
=> new UnitFragmentDownloader<OutType>(
context.Web,
HtmlTransformer(),
HtmlFailurePredicates(),
_parallelism,
context.DownloadLogger),
// ──────────────── fragmented binary ────────────────
(Channel.Plain, FragmentMode.Fragmented, ContentKind.Binary)
=> new UnitFragmentDownloaderBinary<OutType>(
context.Client,
ByteTransformer(),
ByteFailurePredicates(),
_parallelism,
context.DownloadLogger),
// ──────────────── single HTML ────────────────
(Channel.Plain, FragmentMode.Single, ContentKind.Html)
=> new UnitDownloader<OutType>(
context.Web,
HtmlTransformer(),
HtmlFailurePredicates()),
// ──────────────── single binary ────────────────
(Channel.Plain, FragmentMode.Single, ContentKind.Binary)
=> new UnitDownloaderBinary<OutType>(
context.Client,
ByteTransformer(),
ByteFailurePredicates()),
// ──────────────── single playwright binary ────────────────
(Channel.Playwright, FragmentMode.Single, ContentKind.Binary)
=> new PlaywrightUnitDownloader<OutType>(
context.Client,
EnsureExists(_playwrightManipulator),
ByteTransformer(),
ByteFailurePredicates()
),
// ──────────────── single playwrigt HTML ────────────────
(Channel.Playwright, FragmentMode.Single, ContentKind.Html)
=> new PlaywrightUnitPageDownloader<OutType>(
context.Web,
EnsureExists(_playwrightManipulator),
HtmlTransformer(),
HtmlFailurePredicates()),
// ──────────────── single stealth HTML ────────────────
(Channel.Stealth, FragmentMode.Single, ContentKind.Html)
=> new StealthUnitPageDownloader<OutType>(
context.Web,
EnsureExists(_stealthConfig),
EnsureExists(_stealthManipulator),
HtmlTransformer(),
HtmlFailurePredicates()),
// ──────────────── single stealth binary ────────────────
// ──────────────── fragmented ────────────────
(Channel.Plain, FragmentMode.Fragmented, _)
=> new UnitFragmentDownloader<RawType, OutType>(options),
// ──────────────── single ────────────────
(Channel.Plain, FragmentMode.Single, _)
=> new UnitDownloader<RawType, OutType>(options),
// ──────────────── single playwright ────────────────
(Channel.Playwright, FragmentMode.Single, _)
=> new PlaywrightUnitDownloader<RawType, OutType>(options, EnsureExists(_playwrightManipulator)),
// ──────────────── single stealth file ────────────────
(Channel.Stealth, FragmentMode.Single, ContentKind.File)
=> new StealthUnitPageDownloader<RawType, OutType>(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)),
// ──────────────── single stealth binary ────────────────
(Channel.Stealth, FragmentMode.Single, ContentKind.Binary)
=> new StealthUnitDownloader<OutType>(
context.Client,
=> new StealthUnitDownloader<RawType, OutType>(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)),
// ──────────────── fragment stealth file ────────────────
(Channel.Stealth, FragmentMode.Fragmented, ContentKind.File)
=> new StealthFragmentPageDownloader<RawType, OutType>(options,
EnsureExists(_stealthConfig),
EnsureExists(_stealthManipulator),
ByteTransformer(),
ByteFailurePredicates()),
// ──────────────── fragment stealth HTML ────────────────
(Channel.Stealth, FragmentMode.Fragmented, ContentKind.Html)
=> new StealthFragmentPageDownloader<OutType>(
context.Web,
EnsureExists(_stealthConfig),
EnsureExists(_stealthManipulator),
HtmlTransformer(),
HtmlFailurePredicates()),
EnsureExists(_stealthManipulator)),
// ──────────────── fragment stealth binary ────────────────
(Channel.Stealth, FragmentMode.Fragmented, ContentKind.Binary)
=> new StealthFragmentDownloader<OutType>(
context.Client,
=> new StealthFragmentDownloader<RawType, OutType>(options,
EnsureExists(_stealthConfig),
EnsureExists(_stealthManipulator),
ByteTransformer(),
ByteFailurePredicates()),
EnsureExists(_stealthManipulator)),
_ => throw new Exception(string.Format(Exceptions.Exceptions.fluent_unsupported_pattern,
$"({_channel}, {_fragmentMode}, {_contentKind})")),
};