using HtmlAgilityPack;
using Beam.Playwright;
using Beam.Stealth;
namespace Beam.Fluent {
public static partial class DownloadBuilder {
private sealed class ContextStage : IContextStage {
private readonly DownloadContextBuilder _ctxBuilder;
private readonly AsyncTransformer _transformer;
private int _parallelism = 4;
private bool _useFragments = false;
private PlaywrightAsyncManipulator? _usePlaywrightManipulator = null;
private StealthAsyncManipulator? _useStealthManipulator = null;
private StealthConfig? _stealthConfig = null;
public ContextStage(DownloadContextBuilder ctxBuilder, AsyncTransformer transformer) {
_ctxBuilder = ctxBuilder;
_transformer = transformer;
}
public IContextStage Configure(Action> configure) {
configure(_ctxBuilder);
return this;
}
public IContextStage WithParallelism(int degree) {
_parallelism = Math.Max(1, degree);
return this;
}
public IContextStage WithTimeout(TimeSpan timeout) {
_ctxBuilder.WithTimeOut(timeout);
return this;
}
public IContextStage WithRetryReporter(IProgress reporter) {
_ctxBuilder.WithRetryReporter(reporter);
return this;
}
///
/// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with
///
///
public IContextStage UseFragments() {
if (_usePlaywrightManipulator is not null)
_usePlaywrightManipulator = null;
_useFragments = true;
return this;
}
///
/// Use a puppet browser to download the links. This strategy is mutually exclusive with
///
/// The page manipulator
///
public IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator) {
if (_useFragments)
_useFragments = false;
if (_useStealthManipulator is not null)
_useStealthManipulator = null;
_usePlaywrightManipulator = manipulator;
return this;
}
public IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) {
if (_usePlaywrightManipulator is not null)
_usePlaywrightManipulator = null;
_useStealthManipulator = manipulator;
_stealthConfig = config;
return this;
}
private object ConstructUnitDownloader(DownloadContext context) {
return (_useFragments, _useStealthManipulator, _usePlaywrightManipulator, _transformer, context.AsyncFailurePredicates) switch {
// ──────────────── fragmented HTML ────────────────
(true, null, _, AsyncTransformer asyncHtmlTransformer,
AsyncDownloadFailurePredicate[] documentFailurePredicates)
=> new UnitFragmentDownloader(
context.Web,
asyncHtmlTransformer,
documentFailurePredicates,
_parallelism,
context.DownloadLogger),
// ──────────────── fragmented binary ────────────────
(true, null, _, AsyncTransformer asyncBinaryTransformer,
AsyncDownloadFailurePredicate[] responseFailurePredicates)
=> new UnitFragmentDownloaderBinary(
context.Client,
asyncBinaryTransformer,
responseFailurePredicates,
_parallelism,
context.DownloadLogger),
// ──────────────── single HTML ────────────────
(false, null, null, AsyncTransformer asyncHtmlTransformer,
AsyncDownloadFailurePredicate[] documentFailurePredicates)
=> new UnitDownloader(
context.Web,
asyncHtmlTransformer,
documentFailurePredicates),
// ──────────────── single binary ────────────────
(false, null, null, AsyncTransformer asyncBinaryTransformer,
AsyncDownloadFailurePredicate[] responseFailurePredicates)
=> new UnitDownloaderBinary(
context.Client,
asyncBinaryTransformer,
responseFailurePredicates),
// ──────────────── single playwright binary ────────────────
(false, _, PlaywrightAsyncManipulator manipulator, AsyncTransformer asyncBinaryTransformer,
AsyncDownloadFailurePredicate[] responseFailurePredicates)
=> new PlaywrightUnitDownloader(
context.Client,
manipulator,
asyncBinaryTransformer,
responseFailurePredicates),
// ──────────────── single playwrigt HTML ────────────────
(false, _, PlaywrightAsyncManipulator manipulator, AsyncTransformer asyncHtmlTransformer,
AsyncDownloadFailurePredicate[] documentFailurePredicates)
=> new PlaywrightUnitPageDownloader(
context.Web,
manipulator,
asyncHtmlTransformer,
documentFailurePredicates),
// ──────────────── single stealth HTML ────────────────
(false, StealthAsyncManipulator manipulator, _, AsyncTransformer asyncHtmlTransformer,
AsyncDownloadFailurePredicate[] documentFailurePredicates)
=> new StealthUnitPageDownloader(
context.Web,
_stealthConfig ?? throw new Exception($"Stealth config is null"),
manipulator,
asyncHtmlTransformer,
documentFailurePredicates),
// ──────────────── single stealth binary ────────────────
(false, StealthAsyncManipulator manipulator, _, AsyncTransformer asyncBinaryTransformer,
AsyncDownloadFailurePredicate[] responseFailurePredicates)
=> new StealthUnitDownloader(
context.Client,
_stealthConfig ?? throw new Exception($"Stealth config is null"),
manipulator,
asyncBinaryTransformer,
responseFailurePredicates),
// ──────────────── fragment stealth HTML ────────────────
(true, StealthAsyncManipulator manipulator, _, AsyncTransformer asyncHtmlTransformer,
AsyncDownloadFailurePredicate[] downloadFailurePredicates)
=> new StealthFragmentPageDownloader(
context.Web,
_stealthConfig ?? throw new Exception($"Stealth config is null"),
manipulator,
asyncHtmlTransformer),
// ──────────────── fragment stealth binary ────────────────
(true, StealthAsyncManipulator manipulator, _, AsyncTransformer asyncBinaryTransformer,
AsyncDownloadFailurePredicate[] responseFailurePredicates)
=> new StealthFragmentDownloader(
context.Client,
_stealthConfig ?? throw new Exception($"Stealth config is null"),
manipulator,
asyncBinaryTransformer),
_ => throw new Exception($"Unsupported transformer / failure-predicate combinatAsyion. Missing pattern: {_useFragments} , {_transformer.GetType().AsUniqueName()} , {context.AsyncFailurePredicates?.GetType().AsUniqueName()}"),
};
}
private IAsyncEnumerator> ConstructDownloader(DownloadContext context) {
var copyOfContext = context.CreateBuilder().Build();
return _useFragments switch {
true => new SequentialFragmentDownloader(
copyOfContext,
ctx => (IUnitDownloader>>)ConstructUnitDownloader(ctx),
context.DownloadLogger).UnwrapFragmented(),
false => new SequentialDownloader(
copyOfContext,
ctx => (IUnitDownloader)ConstructUnitDownloader(ctx),
context.DownloadLogger).WrapOrdered()
};
}
public DownloadEnumerable Build() {
var context = _ctxBuilder.Build();
var enumerable = new DownloadEnumerable(ConstructDownloader(context));
return enumerable;
}
}
}
}