using Beam.Models;
using HtmlAgilityPack;
using Beam.Playwright;
using Beam.Stealth;
using Beam;
using Beam.Abstractions;
using Beam.Downloaders;
namespace Beam.Fluent;
internal sealed class ContextStage : IContextStage {
private readonly DownloadContextBuilder _ctxBuilder;
private readonly AsyncTransformer _transformer;
private FragmentMode _fragmentMode = FragmentMode.Single;
private Channel _channel = Channel.Plain;
private readonly ContentKind _contentKind;
private int _parallelism = 4;
// ──────────────── playwright ────────────────
private PlaywrightAsyncManipulator? _playwrightManipulator = null;
// ────────────────────────────────────────────
// ──────────────── stealth ───────────────────
private StealthAsyncManipulator? _stealthManipulator = null;
private StealthConfig? _stealthConfig = null;
// ────────────────────────────────────────────
public ContextStage(DownloadContextBuilder ctxBuilder,
AsyncTransformer transformer) {
_ctxBuilder = ctxBuilder;
_transformer = transformer;
_contentKind = transformer switch {
AsyncTransformer => ContentKind.Html,
AsyncTransformer => ContentKind.Binary,
_ => throw new ArgumentException(string.Format(Exceptions.Exceptions.fluent_unsupported_transformer,
transformer.GetType()
.AsUniqueName()))
};
}
public IContextStage Configure(Action> configure) {
configure(_ctxBuilder);
return this;
}
public IContextStage WithParallelism(int degree) {
_parallelism = Math.Max(1, degree);
return this;
}
public IContextStage WithTimeout(TimeSpan timeout) {
_ctxBuilder.WithTimeOut(timeout);
return this;
}
public IContextStage WithRetryReporter(IProgress reporter) {
_ctxBuilder.WithRetryReporter(reporter);
return this;
}
///
/// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with
///
///
public IContextStage UseFragments() {
if (_playwrightManipulator is not null)
_playwrightManipulator = null;
if (_channel == Channel.Playwright)
_channel = Channel.Plain;
_fragmentMode = FragmentMode.Fragmented;
return this;
}
///
/// Use a puppet browser to download the links. This strategy is mutually exclusive with
///
/// The page manipulator
///
public IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator) {
if (_fragmentMode == FragmentMode.Fragmented)
_fragmentMode = FragmentMode.Single;
if (_stealthManipulator is not null)
_stealthManipulator = null;
_channel = Channel.Playwright;
_playwrightManipulator = manipulator;
return this;
}
public IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) {
if (_playwrightManipulator is not null)
_playwrightManipulator = null;
_channel = Channel.Stealth;
_stealthManipulator = manipulator;
_stealthConfig = config;
return this;
}
private object ConstructUnitDownloader(DownloadContext context) {
#region Utility functions
T To(object? o) where T : class
=> (o as T) ??
throw new Exception(
string.Format(Exceptions.Exceptions.fluent_type_conversion_failure,
o?.GetType().AsUniqueName() ?? "null", typeof(T).AsUniqueName()));
AsyncTransformer HtmlTransformer()
=> To>(_transformer);
AsyncTransformer ByteTransformer()
=> To>(_transformer);
AsyncDownloadFailurePredicate[] HtmlFailurePredicates()
=> To[]>(context.AsyncFailurePredicates);
AsyncDownloadFailurePredicate[] ByteFailurePredicates()
=> To[]>(context.AsyncFailurePredicates);
T EnsureExists(T? o) where T : class
=> (o ?? throw new Exception(Exceptions.Exceptions.fluent_invalid_state));
#endregion
return (_channel, _fragmentMode, _contentKind) switch {
// ──────────────── fragmented HTML ────────────────
(Channel.Plain, FragmentMode.Fragmented, ContentKind.Html)
=> new UnitFragmentDownloader(
context.Web,
HtmlTransformer(),
HtmlFailurePredicates(),
_parallelism,
context.DownloadLogger),
// ──────────────── fragmented binary ────────────────
(Channel.Plain, FragmentMode.Fragmented, ContentKind.Binary)
=> new UnitFragmentDownloaderBinary(
context.Client,
ByteTransformer(),
ByteFailurePredicates(),
_parallelism,
context.DownloadLogger),
// ──────────────── single HTML ────────────────
(Channel.Plain, FragmentMode.Single, ContentKind.Html)
=> new UnitDownloader(
context.Web,
HtmlTransformer(),
HtmlFailurePredicates()),
// ──────────────── single binary ────────────────
(Channel.Plain, FragmentMode.Single, ContentKind.Binary)
=> new UnitDownloaderBinary(
context.Client,
ByteTransformer(),
ByteFailurePredicates()),
// ──────────────── single playwright binary ────────────────
(Channel.Playwright, FragmentMode.Single, ContentKind.Binary)
=> new PlaywrightUnitDownloader(
context.Client,
EnsureExists(_playwrightManipulator),
ByteTransformer(),
ByteFailurePredicates()
),
// ──────────────── single playwrigt HTML ────────────────
(Channel.Playwright, FragmentMode.Single, ContentKind.Html)
=> new PlaywrightUnitPageDownloader(
context.Web,
EnsureExists(_playwrightManipulator),
HtmlTransformer(),
HtmlFailurePredicates()),
// ──────────────── single stealth HTML ────────────────
(Channel.Stealth, FragmentMode.Single, ContentKind.Html)
=> new StealthUnitPageDownloader(
context.Web,
EnsureExists(_stealthConfig),
EnsureExists(_stealthManipulator),
HtmlTransformer(),
HtmlFailurePredicates()),
// ──────────────── single stealth binary ────────────────
(Channel.Stealth, FragmentMode.Single, ContentKind.Binary)
=> new StealthUnitDownloader(
context.Client,
EnsureExists(_stealthConfig),
EnsureExists(_stealthManipulator),
ByteTransformer(),
ByteFailurePredicates()),
// ──────────────── fragment stealth HTML ────────────────
(Channel.Stealth, FragmentMode.Fragmented, ContentKind.Html)
=> new StealthFragmentPageDownloader(
context.Web,
EnsureExists(_stealthConfig),
EnsureExists(_stealthManipulator),
HtmlTransformer(),
HtmlFailurePredicates()),
// ──────────────── fragment stealth binary ────────────────
(Channel.Stealth, FragmentMode.Fragmented, ContentKind.Binary)
=> new StealthFragmentDownloader(
context.Client,
EnsureExists(_stealthConfig),
EnsureExists(_stealthManipulator),
ByteTransformer(),
ByteFailurePredicates()),
_ => throw new Exception(string.Format(Exceptions.Exceptions.fluent_unsupported_pattern,
$"({_channel}, {_fragmentMode}, {_contentKind})")),
};
}
private IAsyncEnumerator> ConstructDownloader(DownloadContext context) {
var copyOfContext = DownloadContextBuilder.FromContext(context).Build();
return _fragmentMode switch {
FragmentMode.Fragmented => new SequentialFragmentDownloader(
copyOfContext,
ctx => (IUnitDownloader>>)ConstructUnitDownloader(ctx),
context.DownloadLogger).UnwrapFragmented(),
FragmentMode.Single => new SequentialDownloader(
copyOfContext,
ctx => (IUnitDownloader)ConstructUnitDownloader(ctx),
context.DownloadLogger).WrapOrdered(),
_ => throw new Exception(string.Format(Exceptions.Exceptions.fluent_unsupported_pattern,
$"{_fragmentMode}")),
};
}
public DownloadEnumerable Build() {
var context = _ctxBuilder.Build();
var enumerable = new DownloadEnumerable(ConstructDownloader(context));
return enumerable;
}
}