using Beam.Models;
using HtmlAgilityPack;
using Beam.Playwright;
using Beam.Stealth;
using Beam;
using Beam.Abstractions;
using Beam.Downloaders;
namespace Beam.Fluent;
internal sealed class ContextStage : IContextStage {
private readonly DownloadContextBuilder _ctxBuilder;
private readonly AsyncTransformer _transformer;
private FragmentMode _fragmentMode = FragmentMode.Single;
private Channel _channel = Channel.Plain;
private readonly ContentKind _contentKind;
private int _parallelism = 4;
private UnitDownloaderOptionsBuilder _optionsBuilder = new();
// ──────────────── playwright ────────────────
private PlaywrightAsyncManipulator? _playwrightManipulator = null;
// ────────────────────────────────────────────
// ──────────────── stealth ───────────────────
private StealthAsyncManipulator? _stealthManipulator = null;
private StealthConfig? _stealthConfig = null;
// ────────────────────────────────────────────
public ContextStage(DownloadContextBuilder ctxBuilder,
AsyncTransformer transformer) {
_ctxBuilder = ctxBuilder;
_transformer = transformer;
_contentKind = transformer switch {
AsyncTransformer => ContentKind.File,
AsyncTransformer => ContentKind.Binary,
_ => throw new ArgumentException(string.Format(Exceptions.Exceptions.fluent_unsupported_transformer,
transformer.GetType()
.AsUniqueName()))
};
_optionsBuilder
.WithAsyncTransformer(_transformer);
}
public IContextStage Configure(Action configure) {
configure(_ctxBuilder);
return this;
}
public IContextStage ConfigureUnitDownloaderOptions(
Action> configure) {
configure(_optionsBuilder);
return this;
}
public IContextStage< OutType> WithParallelism(int degree) {
_parallelism = Math.Max(1, degree);
return this;
}
public IContextStage< OutType> WithTimeout(TimeSpan timeout) {
_ctxBuilder.WithTimeOut(timeout);
return this;
}
public IContextStage< OutType> WithRetryReporter(IProgress reporter) {
_ctxBuilder.WithRetryReporter(reporter);
return this;
}
///
/// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with
///
///
public IContextStage< OutType> UseFragments() {
if (_playwrightManipulator is not null)
_playwrightManipulator = null;
if (_channel == Channel.Playwright)
_channel = Channel.Plain;
_fragmentMode = FragmentMode.Fragmented;
return this;
}
///
/// Use a puppet browser to download the links. This strategy is mutually exclusive with
///
/// The page manipulator
///
public IContextStage< OutType> UsePlaywright(PlaywrightAsyncManipulator manipulator) {
if (_fragmentMode == FragmentMode.Fragmented)
_fragmentMode = FragmentMode.Single;
if (_stealthManipulator is not null)
_stealthManipulator = null;
_channel = Channel.Playwright;
_playwrightManipulator = manipulator;
return this;
}
public IContextStage< OutType> UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) {
if (_playwrightManipulator is not null)
_playwrightManipulator = null;
_channel = Channel.Stealth;
_stealthManipulator = manipulator;
_stealthConfig = config;
return this;
}
private object ConstructUnitDownloader(DownloadContext context) {
#region Utility functions
T To(object? o) where T : class
=> (o as T) ??
throw new Exception(
string.Format(Exceptions.Exceptions.fluent_type_conversion_failure,
o?.GetType().AsUniqueName() ?? "null", typeof(T).AsUniqueName()));
AsyncTransformer FileTransformer()
=> To>(_transformer);
AsyncTransformer ByteTransformer()
=> To>(_transformer);
AsyncDownloadFailurePredicate[] FileFailurePredicates()
=> To[]>(context.AsyncFailurePredicates);
AsyncDownloadFailurePredicate[] ByteFailurePredicates()
=> To[]>(context.AsyncFailurePredicates);
T EnsureExists(T? o) where T : class
=> (o ?? throw new Exception(Exceptions.Exceptions.fluent_invalid_state));
#endregion
if (context.AsyncFailurePredicates is not null)
_optionsBuilder
.WithFailurePredicates(x => x.WithPredicates(context.AsyncFailurePredicates));
var options = _optionsBuilder
.WithClient(context.Client)
.Build();
return (_channel, _fragmentMode, _contentKind) switch {
// ──────────────── fragmented ────────────────
(Channel.Plain, FragmentMode.Fragmented, _)
=> new UnitFragmentDownloader< OutType>(options),
// ──────────────── single ────────────────
(Channel.Plain, FragmentMode.Single, _)
=> new UnitDownloader< OutType>(options),
// ──────────────── single playwright ────────────────
(Channel.Playwright, FragmentMode.Single, _)
=> new PlaywrightUnitDownloader< OutType>(options, EnsureExists(_playwrightManipulator)),
// ──────────────── single stealth ────────────────
(Channel.Stealth, FragmentMode.Single, ContentKind.Binary)
=> new StealthUnitDownloader< OutType>(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)),
// ──────────────── fragment stealth ────────────────
(Channel.Stealth, FragmentMode.Fragmented, ContentKind.Binary)
=> new StealthFragmentDownloader< OutType>(options,
EnsureExists(_stealthConfig),
EnsureExists(_stealthManipulator)),
_ => throw new Exception(string.Format(Exceptions.Exceptions.fluent_unsupported_pattern,
$"({_channel}, {_fragmentMode}, {_contentKind})")),
};
}
private IAsyncEnumerator> ConstructDownloader(DownloadContext context) {
var copyOfContext = DownloadContextBuilder.FromContext(context).Build();
return _fragmentMode switch {
FragmentMode.Fragmented => new SequentialFragmentDownloader(
copyOfContext,
ctx => (IUnitDownloader>>)ConstructUnitDownloader(ctx),
context.DownloadLogger).UnwrapFragmented(),
FragmentMode.Single => new SequentialDownloader< OutType>(
copyOfContext,
ctx => (IUnitDownloader)ConstructUnitDownloader(ctx),
context.DownloadLogger).WrapOrdered(),
_ => throw new Exception(string.Format(Exceptions.Exceptions.fluent_unsupported_pattern,
$"{_fragmentMode}")),
};
}
public DownloadEnumerable Build() {
var context = _ctxBuilder.Build();
var enumerable = new DownloadEnumerable(ConstructDownloader(context));
return enumerable;
}
}