using HtmlAgilityPack; using Beam.Playwright; using Beam.Stealth; namespace Beam.Fluent { public static partial class DownloadBuilder { private sealed class ContextStage : IContextStage { private readonly DownloadContextBuilder _ctxBuilder; private readonly AsyncTransformer _transformer; private int _parallelism = 4; private bool _useFragments = false; private PlaywrightAsyncManipulator? _usePlaywrightManipulator = null; private StealthAsyncManipulator? _useStealthManipulator = null; private StealthConfig? _stealthConfig = null; public ContextStage(DownloadContextBuilder ctxBuilder, AsyncTransformer transformer) { _ctxBuilder = ctxBuilder; _transformer = transformer; } public IContextStage Configure(Action> configure) { configure(_ctxBuilder); return this; } public IContextStage WithParallelism(int degree) { _parallelism = Math.Max(1, degree); return this; } public IContextStage WithTimeout(TimeSpan timeout) { _ctxBuilder.WithTimeOut(timeout); return this; } public IContextStage WithRetryReporter(IProgress reporter) { _ctxBuilder.WithRetryReporter(reporter); return this; } /// /// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with /// /// public IContextStage UseFragments() { if (_usePlaywrightManipulator is not null) _usePlaywrightManipulator = null; _useFragments = true; return this; } /// /// Use a puppet browser to download the links. This strategy is mutually exclusive with /// /// The page manipulator /// public IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator) { if (_useFragments) _useFragments = false; if (_useStealthManipulator is not null) _useStealthManipulator = null; _usePlaywrightManipulator = manipulator; return this; } public IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) { if (_usePlaywrightManipulator is not null) _usePlaywrightManipulator = null; _useStealthManipulator = manipulator; _stealthConfig = config; return this; } private object ConstructUnitDownloader(DownloadContext context) { return (_useFragments, _useStealthManipulator, _usePlaywrightManipulator, _transformer, context.AsyncFailurePredicates) switch { // ──────────────── fragmented HTML ──────────────── (true, null, _, AsyncTransformer asyncHtmlTransformer, AsyncDownloadFailurePredicate[] documentFailurePredicates) => new UnitFragmentDownloader( context.Web, asyncHtmlTransformer, documentFailurePredicates, _parallelism, context.DownloadLogger), // ──────────────── fragmented binary ──────────────── (true, null, _, AsyncTransformer asyncBinaryTransformer, AsyncDownloadFailurePredicate[] responseFailurePredicates) => new UnitFragmentDownloaderBinary( context.Client, asyncBinaryTransformer, responseFailurePredicates, _parallelism, context.DownloadLogger), // ──────────────── single HTML ──────────────── (false, null, null, AsyncTransformer asyncHtmlTransformer, AsyncDownloadFailurePredicate[] documentFailurePredicates) => new UnitDownloader( context.Web, asyncHtmlTransformer, documentFailurePredicates), // ──────────────── single binary ──────────────── (false, null, null, AsyncTransformer asyncBinaryTransformer, AsyncDownloadFailurePredicate[] responseFailurePredicates) => new UnitDownloaderBinary( context.Client, asyncBinaryTransformer, responseFailurePredicates), // ──────────────── single playwright binary ──────────────── (false, _, PlaywrightAsyncManipulator manipulator, AsyncTransformer asyncBinaryTransformer, AsyncDownloadFailurePredicate[] responseFailurePredicates) => new PlaywrightUnitDownloader( context.Client, manipulator, asyncBinaryTransformer, responseFailurePredicates), // ──────────────── single playwrigt HTML ──────────────── (false, _, PlaywrightAsyncManipulator manipulator, AsyncTransformer asyncHtmlTransformer, AsyncDownloadFailurePredicate[] documentFailurePredicates) => new PlaywrightUnitPageDownloader( context.Web, manipulator, asyncHtmlTransformer, documentFailurePredicates), // ──────────────── single stealth HTML ──────────────── (false, StealthAsyncManipulator manipulator, _, AsyncTransformer asyncHtmlTransformer, AsyncDownloadFailurePredicate[] documentFailurePredicates) => new StealthUnitPageDownloader( context.Web, _stealthConfig ?? throw new Exception($"Stealth config is null"), manipulator, asyncHtmlTransformer, documentFailurePredicates), // ──────────────── single stealth binary ──────────────── (false, StealthAsyncManipulator manipulator, _, AsyncTransformer asyncBinaryTransformer, AsyncDownloadFailurePredicate[] responseFailurePredicates) => new StealthUnitDownloader( context.Client, _stealthConfig ?? throw new Exception($"Stealth config is null"), manipulator, asyncBinaryTransformer, responseFailurePredicates), // ──────────────── fragment stealth HTML ──────────────── (true, StealthAsyncManipulator manipulator, _, AsyncTransformer asyncHtmlTransformer, AsyncDownloadFailurePredicate[] downloadFailurePredicates) => new StealthFragmentPageDownloader( context.Web, _stealthConfig ?? throw new Exception($"Stealth config is null"), manipulator, asyncHtmlTransformer), // ──────────────── fragment stealth binary ──────────────── (true, StealthAsyncManipulator manipulator, _, AsyncTransformer asyncBinaryTransformer, AsyncDownloadFailurePredicate[] responseFailurePredicates) => new StealthFragmentDownloader( context.Client, _stealthConfig ?? throw new Exception($"Stealth config is null"), manipulator, asyncBinaryTransformer), _ => throw new Exception($"Unsupported transformer / failure-predicate combinatAsyion. Missing pattern: {_useFragments} , {_transformer.GetType().AsUniqueName()} , {context.AsyncFailurePredicates?.GetType().AsUniqueName()}"), }; } private IAsyncEnumerator> ConstructDownloader(DownloadContext context) { var copyOfContext = context.CreateBuilder().Build(); return _useFragments switch { true => new SequentialFragmentDownloader( copyOfContext, ctx => (IUnitDownloader>>)ConstructUnitDownloader(ctx), context.DownloadLogger).UnwrapFragmented(), false => new SequentialDownloader( copyOfContext, ctx => (IUnitDownloader)ConstructUnitDownloader(ctx), context.DownloadLogger).WrapOrdered() }; } public DownloadEnumerable Build() { var context = _ctxBuilder.Build(); var enumerable = new DownloadEnumerable(ConstructDownloader(context)); return enumerable; } } } }