Refactor downloaders to use generic options and unify logic
Replaces specialized binary and HTML downloaders with a generic, options-driven UnitDownloader and UnitFragmentDownloader pattern. Introduces UnitDownloaderOptions and builder classes for flexible configuration, updates interfaces and method signatures to support progress reporting, and removes redundant binary-specific classes. Updates Playwright and Stealth downloaders to use the new generic base, and adds improved error handling and reporting. Also updates dependency versions and makes minor API consistency improvements across the Fluent and Models layers.
This commit is contained in:
+43
-76
@@ -8,13 +8,14 @@ using Beam.Downloaders;
|
||||
|
||||
namespace Beam.Fluent;
|
||||
|
||||
internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, OutType> {
|
||||
internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, OutType> where RawType : IDocument {
|
||||
private readonly DownloadContextBuilder<RawType> _ctxBuilder;
|
||||
private readonly AsyncTransformer<RawType, OutType> _transformer;
|
||||
private FragmentMode _fragmentMode = FragmentMode.Single;
|
||||
private Channel _channel = Channel.Plain;
|
||||
private readonly ContentKind _contentKind;
|
||||
private int _parallelism = 4;
|
||||
private UnitDownloaderOptionsBuilder<RawType, OutType> _optionsBuilder = new();
|
||||
|
||||
// ──────────────── playwright ────────────────
|
||||
private PlaywrightAsyncManipulator? _playwrightManipulator = null;
|
||||
@@ -31,12 +32,15 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
|
||||
_ctxBuilder = ctxBuilder;
|
||||
_transformer = transformer;
|
||||
_contentKind = transformer switch {
|
||||
AsyncTransformer<HtmlDocument, OutType> => ContentKind.Html,
|
||||
AsyncTransformer<StringDocument, OutType> => ContentKind.File,
|
||||
AsyncTransformer<ByteDocument, OutType> => ContentKind.Binary,
|
||||
_ => throw new ArgumentException(string.Format(Exceptions.Exceptions.fluent_unsupported_transformer,
|
||||
transformer.GetType()
|
||||
.AsUniqueName()))
|
||||
};
|
||||
|
||||
_optionsBuilder
|
||||
.WithAsyncTransformer(_transformer);
|
||||
}
|
||||
|
||||
public IContextStage<RawType, OutType> Configure(Action<DownloadContextBuilder<RawType>> configure) {
|
||||
@@ -44,6 +48,12 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
|
||||
return this;
|
||||
}
|
||||
|
||||
public IContextStage<RawType, OutType> ConfigureUnitDownloaderOptions(
|
||||
Action<UnitDownloaderOptionsBuilder<RawType, OutType>> configure) {
|
||||
configure(_optionsBuilder);
|
||||
return this;
|
||||
}
|
||||
|
||||
public IContextStage<RawType, OutType> WithParallelism(int degree) {
|
||||
_parallelism = Math.Max(1, degree);
|
||||
return this;
|
||||
@@ -108,14 +118,14 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
|
||||
string.Format(Exceptions.Exceptions.fluent_type_conversion_failure,
|
||||
o?.GetType().AsUniqueName() ?? "null", typeof(T).AsUniqueName()));
|
||||
|
||||
AsyncTransformer<HtmlDocument, OutType> HtmlTransformer()
|
||||
=> To<AsyncTransformer<HtmlDocument, OutType>>(_transformer);
|
||||
AsyncTransformer<StringDocument, OutType> FileTransformer()
|
||||
=> To<AsyncTransformer<StringDocument, OutType>>(_transformer);
|
||||
|
||||
AsyncTransformer<ByteDocument, OutType> ByteTransformer()
|
||||
=> To<AsyncTransformer<ByteDocument, OutType>>(_transformer);
|
||||
|
||||
AsyncDownloadFailurePredicate<HtmlDocument>[] HtmlFailurePredicates()
|
||||
=> To<AsyncDownloadFailurePredicate<HtmlDocument>[]>(context.AsyncFailurePredicates);
|
||||
AsyncDownloadFailurePredicate<StringDocument>[] FileFailurePredicates()
|
||||
=> To<AsyncDownloadFailurePredicate<StringDocument>[]>(context.AsyncFailurePredicates);
|
||||
|
||||
AsyncDownloadFailurePredicate<ByteDocument>[] ByteFailurePredicates()
|
||||
=> To<AsyncDownloadFailurePredicate<ByteDocument>[]>(context.AsyncFailurePredicates);
|
||||
@@ -125,82 +135,39 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
|
||||
|
||||
#endregion
|
||||
|
||||
if (context.AsyncFailurePredicates is not null)
|
||||
_optionsBuilder
|
||||
.WithFailurePredicates(x => x.WithPredicates(context.AsyncFailurePredicates));
|
||||
var options = _optionsBuilder
|
||||
.WithClient(context.Client)
|
||||
.Build();
|
||||
|
||||
return (_channel, _fragmentMode, _contentKind) switch {
|
||||
// ──────────────── fragmented HTML ────────────────
|
||||
(Channel.Plain, FragmentMode.Fragmented, ContentKind.Html)
|
||||
=> new UnitFragmentDownloader<OutType>(
|
||||
context.Web,
|
||||
HtmlTransformer(),
|
||||
HtmlFailurePredicates(),
|
||||
_parallelism,
|
||||
context.DownloadLogger),
|
||||
// ──────────────── fragmented binary ────────────────
|
||||
(Channel.Plain, FragmentMode.Fragmented, ContentKind.Binary)
|
||||
=> new UnitFragmentDownloaderBinary<OutType>(
|
||||
context.Client,
|
||||
ByteTransformer(),
|
||||
ByteFailurePredicates(),
|
||||
_parallelism,
|
||||
context.DownloadLogger),
|
||||
// ──────────────── single HTML ────────────────
|
||||
(Channel.Plain, FragmentMode.Single, ContentKind.Html)
|
||||
=> new UnitDownloader<OutType>(
|
||||
context.Web,
|
||||
HtmlTransformer(),
|
||||
HtmlFailurePredicates()),
|
||||
// ──────────────── single binary ────────────────
|
||||
(Channel.Plain, FragmentMode.Single, ContentKind.Binary)
|
||||
=> new UnitDownloaderBinary<OutType>(
|
||||
context.Client,
|
||||
ByteTransformer(),
|
||||
ByteFailurePredicates()),
|
||||
// ──────────────── single playwright binary ────────────────
|
||||
(Channel.Playwright, FragmentMode.Single, ContentKind.Binary)
|
||||
=> new PlaywrightUnitDownloader<OutType>(
|
||||
context.Client,
|
||||
EnsureExists(_playwrightManipulator),
|
||||
ByteTransformer(),
|
||||
ByteFailurePredicates()
|
||||
),
|
||||
// ──────────────── single playwrigt HTML ────────────────
|
||||
(Channel.Playwright, FragmentMode.Single, ContentKind.Html)
|
||||
=> new PlaywrightUnitPageDownloader<OutType>(
|
||||
context.Web,
|
||||
EnsureExists(_playwrightManipulator),
|
||||
HtmlTransformer(),
|
||||
HtmlFailurePredicates()),
|
||||
// ──────────────── single stealth HTML ────────────────
|
||||
(Channel.Stealth, FragmentMode.Single, ContentKind.Html)
|
||||
=> new StealthUnitPageDownloader<OutType>(
|
||||
context.Web,
|
||||
EnsureExists(_stealthConfig),
|
||||
EnsureExists(_stealthManipulator),
|
||||
HtmlTransformer(),
|
||||
HtmlFailurePredicates()),
|
||||
// ──────────────── single stealth binary ────────────────
|
||||
// ──────────────── fragmented ────────────────
|
||||
(Channel.Plain, FragmentMode.Fragmented, _)
|
||||
=> new UnitFragmentDownloader<RawType, OutType>(options),
|
||||
// ──────────────── single ────────────────
|
||||
(Channel.Plain, FragmentMode.Single, _)
|
||||
=> new UnitDownloader<RawType, OutType>(options),
|
||||
// ──────────────── single playwright ────────────────
|
||||
(Channel.Playwright, FragmentMode.Single, _)
|
||||
=> new PlaywrightUnitDownloader<RawType, OutType>(options, EnsureExists(_playwrightManipulator)),
|
||||
// ──────────────── single stealth file ────────────────
|
||||
(Channel.Stealth, FragmentMode.Single, ContentKind.File)
|
||||
=> new StealthUnitPageDownloader<RawType, OutType>(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)),
|
||||
// ──────────────── single stealth binary ────────────────
|
||||
(Channel.Stealth, FragmentMode.Single, ContentKind.Binary)
|
||||
=> new StealthUnitDownloader<OutType>(
|
||||
context.Client,
|
||||
=> new StealthUnitDownloader<RawType, OutType>(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)),
|
||||
// ──────────────── fragment stealth file ────────────────
|
||||
(Channel.Stealth, FragmentMode.Fragmented, ContentKind.File)
|
||||
=> new StealthFragmentPageDownloader<RawType, OutType>(options,
|
||||
EnsureExists(_stealthConfig),
|
||||
EnsureExists(_stealthManipulator),
|
||||
ByteTransformer(),
|
||||
ByteFailurePredicates()),
|
||||
// ──────────────── fragment stealth HTML ────────────────
|
||||
(Channel.Stealth, FragmentMode.Fragmented, ContentKind.Html)
|
||||
=> new StealthFragmentPageDownloader<OutType>(
|
||||
context.Web,
|
||||
EnsureExists(_stealthConfig),
|
||||
EnsureExists(_stealthManipulator),
|
||||
HtmlTransformer(),
|
||||
HtmlFailurePredicates()),
|
||||
EnsureExists(_stealthManipulator)),
|
||||
// ──────────────── fragment stealth binary ────────────────
|
||||
(Channel.Stealth, FragmentMode.Fragmented, ContentKind.Binary)
|
||||
=> new StealthFragmentDownloader<OutType>(
|
||||
context.Client,
|
||||
=> new StealthFragmentDownloader<RawType, OutType>(options,
|
||||
EnsureExists(_stealthConfig),
|
||||
EnsureExists(_stealthManipulator),
|
||||
ByteTransformer(),
|
||||
ByteFailurePredicates()),
|
||||
EnsureExists(_stealthManipulator)),
|
||||
_ => throw new Exception(string.Format(Exceptions.Exceptions.fluent_unsupported_pattern,
|
||||
$"({_channel}, {_fragmentMode}, {_contentKind})")),
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user