using Beam.Models; using HtmlAgilityPack; using Beam.Playwright; using Beam.Stealth; using Beam; using Beam.Abstractions; using Beam.Downloaders; namespace Beam.Fluent; internal sealed class ContextStage : IContextStage where RawType : IDocument { private readonly DownloadContextBuilder _ctxBuilder; private readonly AsyncTransformer _transformer; private FragmentMode _fragmentMode = FragmentMode.Single; private Channel _channel = Channel.Plain; private readonly ContentKind _contentKind; private int _parallelism = 4; private UnitDownloaderOptionsBuilder _optionsBuilder = new(); // ──────────────── playwright ──────────────── private PlaywrightAsyncManipulator? _playwrightManipulator = null; // ──────────────────────────────────────────── // ──────────────── stealth ─────────────────── private StealthAsyncManipulator? _stealthManipulator = null; private StealthConfig? _stealthConfig = null; // ──────────────────────────────────────────── public ContextStage(DownloadContextBuilder ctxBuilder, AsyncTransformer transformer) { _ctxBuilder = ctxBuilder; _transformer = transformer; _contentKind = transformer switch { AsyncTransformer => ContentKind.File, AsyncTransformer => ContentKind.Binary, _ => throw new ArgumentException(string.Format(Exceptions.Exceptions.fluent_unsupported_transformer, transformer.GetType() .AsUniqueName())) }; _optionsBuilder .WithAsyncTransformer(_transformer); } public IContextStage Configure(Action> configure) { configure(_ctxBuilder); return this; } public IContextStage ConfigureUnitDownloaderOptions( Action> configure) { configure(_optionsBuilder); return this; } public IContextStage WithParallelism(int degree) { _parallelism = Math.Max(1, degree); return this; } public IContextStage WithTimeout(TimeSpan timeout) { _ctxBuilder.WithTimeOut(timeout); return this; } public IContextStage WithRetryReporter(IProgress reporter) { _ctxBuilder.WithRetryReporter(reporter); return this; } /// /// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with /// /// public IContextStage UseFragments() { if (_playwrightManipulator is not null) _playwrightManipulator = null; if (_channel == Channel.Playwright) _channel = Channel.Plain; _fragmentMode = FragmentMode.Fragmented; return this; } /// /// Use a puppet browser to download the links. This strategy is mutually exclusive with /// /// The page manipulator /// public IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator) { if (_fragmentMode == FragmentMode.Fragmented) _fragmentMode = FragmentMode.Single; if (_stealthManipulator is not null) _stealthManipulator = null; _channel = Channel.Playwright; _playwrightManipulator = manipulator; return this; } public IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) { if (_playwrightManipulator is not null) _playwrightManipulator = null; _channel = Channel.Stealth; _stealthManipulator = manipulator; _stealthConfig = config; return this; } private object ConstructUnitDownloader(DownloadContext context) { #region Utility functions T To(object? o) where T : class => (o as T) ?? throw new Exception( string.Format(Exceptions.Exceptions.fluent_type_conversion_failure, o?.GetType().AsUniqueName() ?? "null", typeof(T).AsUniqueName())); AsyncTransformer FileTransformer() => To>(_transformer); AsyncTransformer ByteTransformer() => To>(_transformer); AsyncDownloadFailurePredicate[] FileFailurePredicates() => To[]>(context.AsyncFailurePredicates); AsyncDownloadFailurePredicate[] ByteFailurePredicates() => To[]>(context.AsyncFailurePredicates); T EnsureExists(T? o) where T : class => (o ?? throw new Exception(Exceptions.Exceptions.fluent_invalid_state)); #endregion if (context.AsyncFailurePredicates is not null) _optionsBuilder .WithFailurePredicates(x => x.WithPredicates(context.AsyncFailurePredicates)); var options = _optionsBuilder .WithClient(context.Client) .Build(); return (_channel, _fragmentMode, _contentKind) switch { // ──────────────── fragmented ──────────────── (Channel.Plain, FragmentMode.Fragmented, _) => new UnitFragmentDownloader(options), // ──────────────── single ──────────────── (Channel.Plain, FragmentMode.Single, _) => new UnitDownloader(options), // ──────────────── single playwright ──────────────── (Channel.Playwright, FragmentMode.Single, _) => new PlaywrightUnitDownloader(options, EnsureExists(_playwrightManipulator)), // ──────────────── single stealth file ──────────────── (Channel.Stealth, FragmentMode.Single, ContentKind.File) => new StealthUnitPageDownloader(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)), // ──────────────── single stealth binary ──────────────── (Channel.Stealth, FragmentMode.Single, ContentKind.Binary) => new StealthUnitDownloader(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)), // ──────────────── fragment stealth file ──────────────── (Channel.Stealth, FragmentMode.Fragmented, ContentKind.File) => new StealthFragmentPageDownloader(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)), // ──────────────── fragment stealth binary ──────────────── (Channel.Stealth, FragmentMode.Fragmented, ContentKind.Binary) => new StealthFragmentDownloader(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)), _ => throw new Exception(string.Format(Exceptions.Exceptions.fluent_unsupported_pattern, $"({_channel}, {_fragmentMode}, {_contentKind})")), }; } private IAsyncEnumerator> ConstructDownloader(DownloadContext context) { var copyOfContext = DownloadContextBuilder.FromContext(context).Build(); return _fragmentMode switch { FragmentMode.Fragmented => new SequentialFragmentDownloader( copyOfContext, ctx => (IUnitDownloader>>)ConstructUnitDownloader(ctx), context.DownloadLogger).UnwrapFragmented(), FragmentMode.Single => new SequentialDownloader( copyOfContext, ctx => (IUnitDownloader)ConstructUnitDownloader(ctx), context.DownloadLogger).WrapOrdered(), _ => throw new Exception(string.Format(Exceptions.Exceptions.fluent_unsupported_pattern, $"{_fragmentMode}")), }; } public DownloadEnumerable Build() { var context = _ctxBuilder.Build(); var enumerable = new DownloadEnumerable(ConstructDownloader(context)); return enumerable; } }