Refactor downloaders to use ByteDocument and add options builders

Replaces generic RawType with ByteDocument in downloaders and context classes, simplifying type usage. Adds builder classes for FailurePredicateOptions, FragmentOptions, SkipPredicateOptions, and UnitDownloaderOptions to improve configuration flexibility. Introduces DownloadTarget enum and SkipPredicate delegate for more granular download control. Refactors Fluent API interfaces and implementations to remove RawType generics and streamline usage. Adds Playwright and Stealth download strategies for extensibility.
This commit is contained in:
qwsdcvghyu89
2025-11-15 22:51:46 +11:00
parent 647b2b0f37
commit f52aa6123b
34 changed files with 648 additions and 439 deletions
+27 -35
View File
@@ -8,14 +8,14 @@ using Beam.Downloaders;
namespace Beam.Fluent;
internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, OutType> where RawType : IDocument {
private readonly DownloadContextBuilder<RawType> _ctxBuilder;
private readonly AsyncTransformer<RawType, OutType> _transformer;
internal sealed class ContextStage<OutType> : IContextStage<OutType> {
private readonly DownloadContextBuilder _ctxBuilder;
private readonly AsyncTransformer<ByteDocument, OutType> _transformer;
private FragmentMode _fragmentMode = FragmentMode.Single;
private Channel _channel = Channel.Plain;
private readonly ContentKind _contentKind;
private int _parallelism = 4;
private UnitDownloaderOptionsBuilder<RawType, OutType> _optionsBuilder = new();
private UnitDownloaderOptionsBuilder<OutType> _optionsBuilder = new();
// ──────────────── playwright ────────────────
private PlaywrightAsyncManipulator? _playwrightManipulator = null;
@@ -27,8 +27,8 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
// ────────────────────────────────────────────
public ContextStage(DownloadContextBuilder<RawType> ctxBuilder,
AsyncTransformer<RawType, OutType> transformer) {
public ContextStage(DownloadContextBuilder ctxBuilder,
AsyncTransformer<ByteDocument, OutType> transformer) {
_ctxBuilder = ctxBuilder;
_transformer = transformer;
_contentKind = transformer switch {
@@ -43,28 +43,28 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
.WithAsyncTransformer(_transformer);
}
public IContextStage<RawType, OutType> Configure(Action<DownloadContextBuilder<RawType>> configure) {
public IContextStage<OutType> Configure(Action<DownloadContextBuilder> configure) {
configure(_ctxBuilder);
return this;
}
public IContextStage<RawType, OutType> ConfigureUnitDownloaderOptions(
Action<UnitDownloaderOptionsBuilder<RawType, OutType>> configure) {
public IContextStage<OutType> ConfigureUnitDownloaderOptions(
Action<UnitDownloaderOptionsBuilder< OutType>> configure) {
configure(_optionsBuilder);
return this;
}
public IContextStage<RawType, OutType> WithParallelism(int degree) {
public IContextStage< OutType> WithParallelism(int degree) {
_parallelism = Math.Max(1, degree);
return this;
}
public IContextStage<RawType, OutType> WithTimeout(TimeSpan timeout) {
public IContextStage< OutType> WithTimeout(TimeSpan timeout) {
_ctxBuilder.WithTimeOut(timeout);
return this;
}
public IContextStage<RawType, OutType> WithRetryReporter(IProgress<IRetryReport> reporter) {
public IContextStage< OutType> WithRetryReporter(IProgress<IRetryReport> reporter) {
_ctxBuilder.WithRetryReporter(reporter);
return this;
}
@@ -73,7 +73,7 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
/// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with <see cref="UsePlaywright(PlaywrightAsyncManipulator)"/>
/// </summary>
/// <returns></returns>
public IContextStage<RawType, OutType> UseFragments() {
public IContextStage< OutType> UseFragments() {
if (_playwrightManipulator is not null)
_playwrightManipulator = null;
if (_channel == Channel.Playwright)
@@ -88,7 +88,7 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
/// </summary>
/// <param name="manipulator">The page manipulator</param>
/// <returns></returns>
public IContextStage<RawType, OutType> UsePlaywright(PlaywrightAsyncManipulator manipulator) {
public IContextStage< OutType> UsePlaywright(PlaywrightAsyncManipulator manipulator) {
if (_fragmentMode == FragmentMode.Fragmented)
_fragmentMode = FragmentMode.Single;
if (_stealthManipulator is not null)
@@ -99,7 +99,7 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
return this;
}
public IContextStage<RawType, OutType> UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) {
public IContextStage< OutType> UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) {
if (_playwrightManipulator is not null)
_playwrightManipulator = null;
@@ -109,7 +109,7 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
return this;
}
private object ConstructUnitDownloader(DownloadContext<RawType> context) {
private object ConstructUnitDownloader(DownloadContext context) {
#region Utility functions
T To<T>(object? o) where T : class
@@ -145,27 +145,19 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
return (_channel, _fragmentMode, _contentKind) switch {
// ──────────────── fragmented ────────────────
(Channel.Plain, FragmentMode.Fragmented, _)
=> new UnitFragmentDownloader<RawType, OutType>(options),
=> new UnitFragmentDownloader< OutType>(options),
// ──────────────── single ────────────────
(Channel.Plain, FragmentMode.Single, _)
=> new UnitDownloader<RawType, OutType>(options),
=> new UnitDownloader< OutType>(options),
// ──────────────── single playwright ────────────────
(Channel.Playwright, FragmentMode.Single, _)
=> new PlaywrightUnitDownloader<RawType, OutType>(options, EnsureExists(_playwrightManipulator)),
// ──────────────── single stealth file ────────────────
(Channel.Stealth, FragmentMode.Single, ContentKind.File)
=> new StealthUnitPageDownloader<RawType, OutType>(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)),
// ──────────────── single stealth binary ────────────────
=> new PlaywrightUnitDownloader< OutType>(options, EnsureExists(_playwrightManipulator)),
// ──────────────── single stealth ────────────────
(Channel.Stealth, FragmentMode.Single, ContentKind.Binary)
=> new StealthUnitDownloader<RawType, OutType>(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)),
// ──────────────── fragment stealth file ────────────────
(Channel.Stealth, FragmentMode.Fragmented, ContentKind.File)
=> new StealthFragmentPageDownloader<RawType, OutType>(options,
EnsureExists(_stealthConfig),
EnsureExists(_stealthManipulator)),
// ──────────────── fragment stealth binary ────────────────
=> new StealthUnitDownloader< OutType>(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)),
// ──────────────── fragment stealth ────────────────
(Channel.Stealth, FragmentMode.Fragmented, ContentKind.Binary)
=> new StealthFragmentDownloader<RawType, OutType>(options,
=> new StealthFragmentDownloader< OutType>(options,
EnsureExists(_stealthConfig),
EnsureExists(_stealthManipulator)),
_ => throw new Exception(string.Format(Exceptions.Exceptions.fluent_unsupported_pattern,
@@ -173,14 +165,14 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
};
}
private IAsyncEnumerator<Ordered<OutType>> ConstructDownloader(DownloadContext<RawType> context) {
var copyOfContext = DownloadContextBuilder<RawType>.FromContext(context).Build();
private IAsyncEnumerator<Ordered<OutType>> ConstructDownloader(DownloadContext context) {
var copyOfContext = DownloadContextBuilder.FromContext(context).Build();
return _fragmentMode switch {
FragmentMode.Fragmented => new SequentialFragmentDownloader<RawType, OutType>(
FragmentMode.Fragmented => new SequentialFragmentDownloader<OutType>(
copyOfContext,
ctx => (IUnitDownloader<Fragment<Ordered<OutType>>>)ConstructUnitDownloader(ctx),
context.DownloadLogger).UnwrapFragmented(),
FragmentMode.Single => new SequentialDownloader<RawType, OutType>(
FragmentMode.Single => new SequentialDownloader< OutType>(
copyOfContext,
ctx => (IUnitDownloader<OutType>)ConstructUnitDownloader(ctx),
context.DownloadLogger).WrapOrdered(),