Files
Beam/Beam.Fluent/DownloadBuilder.ContextStage.cs
T
qwsdcvghyu89 7ed05abdb8 refactor: modularize Beam into new projects and interfaces
- Introduced modularity by splitting Beam into new projects: Beam.Abstractions, Beam.Models, and Beam.Downloaders.
- Refactored existing classes into appropriate namespaces and projects.
- Replaced specific implementations with abstractions (e.g., SourceLinkBuilder to LinkBuilder, State to IState, etc.).
- Updated interfaces: added ITemplate, IArticleData, IDownloadReport, and others for improved extensibility.
- Removed deprecated classes like SourceLinkBuilder and StateChangerFactory.
- Enhanced link handling in downloaders by refactoring to use `string` over `SourceLink`.
- Consolidated shared logic under Beam.Abstractions.
2025-09-22 01:51:46 +10:00

191 lines
11 KiB
C#

using Beam.Abstractions;
using Beam.Models;
using HtmlAgilityPack;
using Beam.Playwright;
using Beam.Stealth;
using Beam;
using Beam.Downloaders;
namespace Beam.Fluent {
public static partial class DownloadBuilder<RawType, OutType> {
private sealed class ContextStage : IContextStage {
private readonly DownloadContextBuilder<RawType> _ctxBuilder;
private readonly AsyncTransformer<RawType, OutType> _transformer;
private int _parallelism = 4;
private bool _useFragments = false;
private PlaywrightAsyncManipulator? _usePlaywrightManipulator = null;
private StealthAsyncManipulator? _useStealthManipulator = null;
private StealthConfig? _stealthConfig = null;
public ContextStage(DownloadContextBuilder<RawType> ctxBuilder, AsyncTransformer<RawType, OutType> transformer) {
_ctxBuilder = ctxBuilder;
_transformer = transformer;
}
public IContextStage Configure(Action<DownloadContextBuilder<RawType>> configure) {
configure(_ctxBuilder);
return this;
}
public IContextStage WithParallelism(int degree) {
_parallelism = Math.Max(1, degree);
return this;
}
public IContextStage WithTimeout(TimeSpan timeout) {
_ctxBuilder.WithTimeOut(timeout);
return this;
}
public IContextStage WithRetryReporter(IProgress<IRetryReport> reporter) {
_ctxBuilder.WithRetryReporter(reporter);
return this;
}
/// <summary>
/// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with <see cref="UsePlaywright(PlaywrightAsyncManipulator)"/>
/// </summary>
/// <returns></returns>
public IContextStage UseFragments() {
if (_usePlaywrightManipulator is not null)
_usePlaywrightManipulator = null;
_useFragments = true;
return this;
}
/// <summary>
/// Use a puppet browser to download the links. This strategy is mutually exclusive with <see cref="UseFragments"/>
/// </summary>
/// <param name="manipulator">The page manipulator</param>
/// <returns></returns>
public IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator) {
if (_useFragments)
_useFragments = false;
if (_useStealthManipulator is not null)
_useStealthManipulator = null;
_usePlaywrightManipulator = manipulator;
return this;
}
public IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) {
if (_usePlaywrightManipulator is not null)
_usePlaywrightManipulator = null;
_useStealthManipulator = manipulator;
_stealthConfig = config;
return this;
}
private object ConstructUnitDownloader(DownloadContext<RawType> context) {
return (_useFragments, _useStealthManipulator, _usePlaywrightManipulator, _transformer, context.AsyncFailurePredicates) switch {
// ──────────────── fragmented HTML ────────────────
(true, null, _, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
=> new UnitFragmentDownloader<OutType>(
context.Web,
asyncHtmlTransformer,
documentFailurePredicates,
_parallelism,
context.DownloadLogger),
// ──────────────── fragmented binary ────────────────
(true, null, _, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
=> new UnitFragmentDownloaderBinary<OutType>(
context.Client,
asyncBinaryTransformer,
responseFailurePredicates,
_parallelism,
context.DownloadLogger),
// ──────────────── single HTML ────────────────
(false, null, null, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
=> new UnitDownloader<OutType>(
context.Web,
asyncHtmlTransformer,
documentFailurePredicates),
// ──────────────── single binary ────────────────
(false, null, null, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
=> new UnitDownloaderBinary<OutType>(
context.Client,
asyncBinaryTransformer,
responseFailurePredicates),
// ──────────────── single playwright binary ────────────────
(false, _, PlaywrightAsyncManipulator manipulator, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
=> new PlaywrightUnitDownloader<OutType>(
context.Client,
manipulator,
asyncBinaryTransformer,
responseFailurePredicates),
// ──────────────── single playwrigt HTML ────────────────
(false, _, PlaywrightAsyncManipulator manipulator, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
=> new PlaywrightUnitPageDownloader<OutType>(
context.Web,
manipulator,
asyncHtmlTransformer,
documentFailurePredicates),
// ──────────────── single stealth HTML ────────────────
(false, StealthAsyncManipulator manipulator, _, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
=> new StealthUnitPageDownloader<OutType>(
context.Web,
_stealthConfig ?? throw new Exception($"Stealth config is null"),
manipulator,
asyncHtmlTransformer,
documentFailurePredicates),
// ──────────────── single stealth binary ────────────────
(false, StealthAsyncManipulator manipulator, _, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
=> new StealthUnitDownloader<OutType>(
context.Client,
_stealthConfig ?? throw new Exception($"Stealth config is null"),
manipulator,
asyncBinaryTransformer,
responseFailurePredicates),
// ──────────────── fragment stealth HTML ────────────────
(true, StealthAsyncManipulator manipulator, _, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
AsyncDownloadFailurePredicate<HtmlDocument>[] downloadFailurePredicates)
=> new StealthFragmentPageDownloader<OutType>(
context.Web,
_stealthConfig ?? throw new Exception($"Stealth config is null"),
manipulator,
asyncHtmlTransformer),
// ──────────────── fragment stealth binary ────────────────
(true, StealthAsyncManipulator manipulator, _, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
=> new StealthFragmentDownloader<OutType>(
context.Client,
_stealthConfig ?? throw new Exception($"Stealth config is null"),
manipulator,
asyncBinaryTransformer),
_ => throw new Exception($"Unsupported transformer / failure-predicate combinatAsyion. Missing pattern: {_useFragments} , {_transformer.GetType().AsUniqueName()} , {context.AsyncFailurePredicates?.GetType().AsUniqueName()}"),
};
}
private IAsyncEnumerator<Ordered<OutType>> ConstructDownloader(DownloadContext<RawType> context) {
var copyOfContext = DownloadContextBuilder<RawType>.FromContext(context).Build();
return _useFragments switch {
true => new SequentialFragmentDownloader<RawType, OutType>(
copyOfContext,
ctx => (IUnitDownloader<Fragment<Ordered<OutType>>>)ConstructUnitDownloader(ctx),
context.DownloadLogger).UnwrapFragmented(),
false => new SequentialDownloader<RawType, OutType>(
copyOfContext,
ctx => (IUnitDownloader<OutType>)ConstructUnitDownloader(ctx),
context.DownloadLogger).WrapOrdered()
};
}
public DownloadEnumerable<OutType> Build() {
var context = _ctxBuilder.Build();
var enumerable = new DownloadEnumerable<OutType>(ConstructDownloader(context));
return enumerable;
}
}
}
}