using Beam.Models; using HtmlAgilityPack; using Beam.Playwright; using Beam.Stealth; using Beam; using Beam.Abstractions; using Beam.Downloaders; namespace Beam.Fluent; internal sealed class ContextStage : IContextStage { private readonly DownloadContextBuilder _ctxBuilder; private readonly AsyncTransformer _transformer; private FragmentMode _fragmentMode = FragmentMode.Single; private Channel _channel = Channel.Plain; private readonly ContentKind _contentKind; private int _parallelism = 4; // ──────────────── playwright ──────────────── private PlaywrightAsyncManipulator? _playwrightManipulator = null; // ──────────────────────────────────────────── // ──────────────── stealth ─────────────────── private StealthAsyncManipulator? _stealthManipulator = null; private StealthConfig? _stealthConfig = null; // ──────────────────────────────────────────── public ContextStage(DownloadContextBuilder ctxBuilder, AsyncTransformer transformer) { _ctxBuilder = ctxBuilder; _transformer = transformer; _contentKind = transformer switch { AsyncTransformer => ContentKind.Html, AsyncTransformer => ContentKind.Binary, _ => throw new ArgumentException(string.Format(Exceptions.Exceptions.fluent_unsupported_transformer, transformer.GetType() .AsUniqueName())) }; } public IContextStage Configure(Action> configure) { configure(_ctxBuilder); return this; } public IContextStage WithParallelism(int degree) { _parallelism = Math.Max(1, degree); return this; } public IContextStage WithTimeout(TimeSpan timeout) { _ctxBuilder.WithTimeOut(timeout); return this; } public IContextStage WithRetryReporter(IProgress reporter) { _ctxBuilder.WithRetryReporter(reporter); return this; } /// /// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with /// /// public IContextStage UseFragments() { if (_playwrightManipulator is not null) _playwrightManipulator = null; if (_channel == Channel.Playwright) _channel = Channel.Plain; _fragmentMode = FragmentMode.Fragmented; return this; } /// /// Use a puppet browser to download the links. This strategy is mutually exclusive with /// /// The page manipulator /// public IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator) { if (_fragmentMode == FragmentMode.Fragmented) _fragmentMode = FragmentMode.Single; if (_stealthManipulator is not null) _stealthManipulator = null; _channel = Channel.Playwright; _playwrightManipulator = manipulator; return this; } public IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) { if (_playwrightManipulator is not null) _playwrightManipulator = null; _channel = Channel.Stealth; _stealthManipulator = manipulator; _stealthConfig = config; return this; } private object ConstructUnitDownloader(DownloadContext context) { #region Utility functions T To(object? o) where T : class => (o as T) ?? throw new Exception( string.Format(Exceptions.Exceptions.fluent_type_conversion_failure, o?.GetType().AsUniqueName() ?? "null", typeof(T).AsUniqueName())); AsyncTransformer HtmlTransformer() => To>(_transformer); AsyncTransformer ByteTransformer() => To>(_transformer); AsyncDownloadFailurePredicate[] HtmlFailurePredicates() => To[]>(context.AsyncFailurePredicates); AsyncDownloadFailurePredicate[] ByteFailurePredicates() => To[]>(context.AsyncFailurePredicates); T EnsureExists(T? o) where T : class => (o ?? throw new Exception(Exceptions.Exceptions.fluent_invalid_state)); #endregion return (_channel, _fragmentMode, _contentKind) switch { // ──────────────── fragmented HTML ──────────────── (Channel.Plain, FragmentMode.Fragmented, ContentKind.Html) => new UnitFragmentDownloader( context.Web, HtmlTransformer(), HtmlFailurePredicates(), _parallelism, context.DownloadLogger), // ──────────────── fragmented binary ──────────────── (Channel.Plain, FragmentMode.Fragmented, ContentKind.Binary) => new UnitFragmentDownloaderBinary( context.Client, ByteTransformer(), ByteFailurePredicates(), _parallelism, context.DownloadLogger), // ──────────────── single HTML ──────────────── (Channel.Plain, FragmentMode.Single, ContentKind.Html) => new UnitDownloader( context.Web, HtmlTransformer(), HtmlFailurePredicates()), // ──────────────── single binary ──────────────── (Channel.Plain, FragmentMode.Single, ContentKind.Binary) => new UnitDownloaderBinary( context.Client, ByteTransformer(), ByteFailurePredicates()), // ──────────────── single playwright binary ──────────────── (Channel.Playwright, FragmentMode.Single, ContentKind.Binary) => new PlaywrightUnitDownloader( context.Client, EnsureExists(_playwrightManipulator), ByteTransformer(), ByteFailurePredicates() ), // ──────────────── single playwrigt HTML ──────────────── (Channel.Playwright, FragmentMode.Single, ContentKind.Html) => new PlaywrightUnitPageDownloader( context.Web, EnsureExists(_playwrightManipulator), HtmlTransformer(), HtmlFailurePredicates()), // ──────────────── single stealth HTML ──────────────── (Channel.Stealth, FragmentMode.Single, ContentKind.Html) => new StealthUnitPageDownloader( context.Web, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator), HtmlTransformer(), HtmlFailurePredicates()), // ──────────────── single stealth binary ──────────────── (Channel.Stealth, FragmentMode.Single, ContentKind.Binary) => new StealthUnitDownloader( context.Client, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator), ByteTransformer(), ByteFailurePredicates()), // ──────────────── fragment stealth HTML ──────────────── (Channel.Stealth, FragmentMode.Fragmented, ContentKind.Html) => new StealthFragmentPageDownloader( context.Web, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator), HtmlTransformer(), HtmlFailurePredicates()), // ──────────────── fragment stealth binary ──────────────── (Channel.Stealth, FragmentMode.Fragmented, ContentKind.Binary) => new StealthFragmentDownloader( context.Client, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator), ByteTransformer(), ByteFailurePredicates()), _ => throw new Exception(string.Format(Exceptions.Exceptions.fluent_unsupported_pattern, $"({_channel}, {_fragmentMode}, {_contentKind})")), }; } private IAsyncEnumerator> ConstructDownloader(DownloadContext context) { var copyOfContext = DownloadContextBuilder.FromContext(context).Build(); return _fragmentMode switch { FragmentMode.Fragmented => new SequentialFragmentDownloader( copyOfContext, ctx => (IUnitDownloader>>)ConstructUnitDownloader(ctx), context.DownloadLogger).UnwrapFragmented(), FragmentMode.Single => new SequentialDownloader( copyOfContext, ctx => (IUnitDownloader)ConstructUnitDownloader(ctx), context.DownloadLogger).WrapOrdered(), _ => throw new Exception(string.Format(Exceptions.Exceptions.fluent_unsupported_pattern, $"{_fragmentMode}")), }; } public DownloadEnumerable Build() { var context = _ctxBuilder.Build(); var enumerable = new DownloadEnumerable(ConstructDownloader(context)); return enumerable; } }