Refactor downloaders to use ByteDocument and add options builders
Replaces generic RawType with ByteDocument in downloaders and context classes, simplifying type usage. Adds builder classes for FailurePredicateOptions, FragmentOptions, SkipPredicateOptions, and UnitDownloaderOptions to improve configuration flexibility. Introduces DownloadTarget enum and SkipPredicate delegate for more granular download control. Refactors Fluent API interfaces and implementations to remove RawType generics and streamline usage. Adds Playwright and Stealth download strategies for extensibility.
This commit is contained in:
+27
-35
@@ -8,14 +8,14 @@ using Beam.Downloaders;
|
||||
|
||||
namespace Beam.Fluent;
|
||||
|
||||
internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, OutType> where RawType : IDocument {
|
||||
private readonly DownloadContextBuilder<RawType> _ctxBuilder;
|
||||
private readonly AsyncTransformer<RawType, OutType> _transformer;
|
||||
internal sealed class ContextStage<OutType> : IContextStage<OutType> {
|
||||
private readonly DownloadContextBuilder _ctxBuilder;
|
||||
private readonly AsyncTransformer<ByteDocument, OutType> _transformer;
|
||||
private FragmentMode _fragmentMode = FragmentMode.Single;
|
||||
private Channel _channel = Channel.Plain;
|
||||
private readonly ContentKind _contentKind;
|
||||
private int _parallelism = 4;
|
||||
private UnitDownloaderOptionsBuilder<RawType, OutType> _optionsBuilder = new();
|
||||
private UnitDownloaderOptionsBuilder<OutType> _optionsBuilder = new();
|
||||
|
||||
// ──────────────── playwright ────────────────
|
||||
private PlaywrightAsyncManipulator? _playwrightManipulator = null;
|
||||
@@ -27,8 +27,8 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
|
||||
public ContextStage(DownloadContextBuilder<RawType> ctxBuilder,
|
||||
AsyncTransformer<RawType, OutType> transformer) {
|
||||
public ContextStage(DownloadContextBuilder ctxBuilder,
|
||||
AsyncTransformer<ByteDocument, OutType> transformer) {
|
||||
_ctxBuilder = ctxBuilder;
|
||||
_transformer = transformer;
|
||||
_contentKind = transformer switch {
|
||||
@@ -43,28 +43,28 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
|
||||
.WithAsyncTransformer(_transformer);
|
||||
}
|
||||
|
||||
public IContextStage<RawType, OutType> Configure(Action<DownloadContextBuilder<RawType>> configure) {
|
||||
public IContextStage<OutType> Configure(Action<DownloadContextBuilder> configure) {
|
||||
configure(_ctxBuilder);
|
||||
return this;
|
||||
}
|
||||
|
||||
public IContextStage<RawType, OutType> ConfigureUnitDownloaderOptions(
|
||||
Action<UnitDownloaderOptionsBuilder<RawType, OutType>> configure) {
|
||||
public IContextStage<OutType> ConfigureUnitDownloaderOptions(
|
||||
Action<UnitDownloaderOptionsBuilder< OutType>> configure) {
|
||||
configure(_optionsBuilder);
|
||||
return this;
|
||||
}
|
||||
|
||||
public IContextStage<RawType, OutType> WithParallelism(int degree) {
|
||||
public IContextStage< OutType> WithParallelism(int degree) {
|
||||
_parallelism = Math.Max(1, degree);
|
||||
return this;
|
||||
}
|
||||
|
||||
public IContextStage<RawType, OutType> WithTimeout(TimeSpan timeout) {
|
||||
public IContextStage< OutType> WithTimeout(TimeSpan timeout) {
|
||||
_ctxBuilder.WithTimeOut(timeout);
|
||||
return this;
|
||||
}
|
||||
|
||||
public IContextStage<RawType, OutType> WithRetryReporter(IProgress<IRetryReport> reporter) {
|
||||
public IContextStage< OutType> WithRetryReporter(IProgress<IRetryReport> reporter) {
|
||||
_ctxBuilder.WithRetryReporter(reporter);
|
||||
return this;
|
||||
}
|
||||
@@ -73,7 +73,7 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
|
||||
/// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with <see cref="UsePlaywright(PlaywrightAsyncManipulator)"/>
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
public IContextStage<RawType, OutType> UseFragments() {
|
||||
public IContextStage< OutType> UseFragments() {
|
||||
if (_playwrightManipulator is not null)
|
||||
_playwrightManipulator = null;
|
||||
if (_channel == Channel.Playwright)
|
||||
@@ -88,7 +88,7 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
|
||||
/// </summary>
|
||||
/// <param name="manipulator">The page manipulator</param>
|
||||
/// <returns></returns>
|
||||
public IContextStage<RawType, OutType> UsePlaywright(PlaywrightAsyncManipulator manipulator) {
|
||||
public IContextStage< OutType> UsePlaywright(PlaywrightAsyncManipulator manipulator) {
|
||||
if (_fragmentMode == FragmentMode.Fragmented)
|
||||
_fragmentMode = FragmentMode.Single;
|
||||
if (_stealthManipulator is not null)
|
||||
@@ -99,7 +99,7 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
|
||||
return this;
|
||||
}
|
||||
|
||||
public IContextStage<RawType, OutType> UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) {
|
||||
public IContextStage< OutType> UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) {
|
||||
if (_playwrightManipulator is not null)
|
||||
_playwrightManipulator = null;
|
||||
|
||||
@@ -109,7 +109,7 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
|
||||
return this;
|
||||
}
|
||||
|
||||
private object ConstructUnitDownloader(DownloadContext<RawType> context) {
|
||||
private object ConstructUnitDownloader(DownloadContext context) {
|
||||
#region Utility functions
|
||||
|
||||
T To<T>(object? o) where T : class
|
||||
@@ -145,27 +145,19 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
|
||||
return (_channel, _fragmentMode, _contentKind) switch {
|
||||
// ──────────────── fragmented ────────────────
|
||||
(Channel.Plain, FragmentMode.Fragmented, _)
|
||||
=> new UnitFragmentDownloader<RawType, OutType>(options),
|
||||
=> new UnitFragmentDownloader< OutType>(options),
|
||||
// ──────────────── single ────────────────
|
||||
(Channel.Plain, FragmentMode.Single, _)
|
||||
=> new UnitDownloader<RawType, OutType>(options),
|
||||
=> new UnitDownloader< OutType>(options),
|
||||
// ──────────────── single playwright ────────────────
|
||||
(Channel.Playwright, FragmentMode.Single, _)
|
||||
=> new PlaywrightUnitDownloader<RawType, OutType>(options, EnsureExists(_playwrightManipulator)),
|
||||
// ──────────────── single stealth file ────────────────
|
||||
(Channel.Stealth, FragmentMode.Single, ContentKind.File)
|
||||
=> new StealthUnitPageDownloader<RawType, OutType>(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)),
|
||||
// ──────────────── single stealth binary ────────────────
|
||||
=> new PlaywrightUnitDownloader< OutType>(options, EnsureExists(_playwrightManipulator)),
|
||||
// ──────────────── single stealth ────────────────
|
||||
(Channel.Stealth, FragmentMode.Single, ContentKind.Binary)
|
||||
=> new StealthUnitDownloader<RawType, OutType>(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)),
|
||||
// ──────────────── fragment stealth file ────────────────
|
||||
(Channel.Stealth, FragmentMode.Fragmented, ContentKind.File)
|
||||
=> new StealthFragmentPageDownloader<RawType, OutType>(options,
|
||||
EnsureExists(_stealthConfig),
|
||||
EnsureExists(_stealthManipulator)),
|
||||
// ──────────────── fragment stealth binary ────────────────
|
||||
=> new StealthUnitDownloader< OutType>(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)),
|
||||
// ──────────────── fragment stealth ────────────────
|
||||
(Channel.Stealth, FragmentMode.Fragmented, ContentKind.Binary)
|
||||
=> new StealthFragmentDownloader<RawType, OutType>(options,
|
||||
=> new StealthFragmentDownloader< OutType>(options,
|
||||
EnsureExists(_stealthConfig),
|
||||
EnsureExists(_stealthManipulator)),
|
||||
_ => throw new Exception(string.Format(Exceptions.Exceptions.fluent_unsupported_pattern,
|
||||
@@ -173,14 +165,14 @@ internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, Ou
|
||||
};
|
||||
}
|
||||
|
||||
private IAsyncEnumerator<Ordered<OutType>> ConstructDownloader(DownloadContext<RawType> context) {
|
||||
var copyOfContext = DownloadContextBuilder<RawType>.FromContext(context).Build();
|
||||
private IAsyncEnumerator<Ordered<OutType>> ConstructDownloader(DownloadContext context) {
|
||||
var copyOfContext = DownloadContextBuilder.FromContext(context).Build();
|
||||
return _fragmentMode switch {
|
||||
FragmentMode.Fragmented => new SequentialFragmentDownloader<RawType, OutType>(
|
||||
FragmentMode.Fragmented => new SequentialFragmentDownloader<OutType>(
|
||||
copyOfContext,
|
||||
ctx => (IUnitDownloader<Fragment<Ordered<OutType>>>)ConstructUnitDownloader(ctx),
|
||||
context.DownloadLogger).UnwrapFragmented(),
|
||||
FragmentMode.Single => new SequentialDownloader<RawType, OutType>(
|
||||
FragmentMode.Single => new SequentialDownloader< OutType>(
|
||||
copyOfContext,
|
||||
ctx => (IUnitDownloader<OutType>)ConstructUnitDownloader(ctx),
|
||||
context.DownloadLogger).WrapOrdered(),
|
||||
|
||||
@@ -6,15 +6,15 @@ using Beam.Stealth;
|
||||
|
||||
namespace Beam.Fluent;
|
||||
|
||||
public interface IContextStage<RawType, OutType> {
|
||||
IContextStage<RawType, OutType> Configure(Action<DownloadContextBuilder<RawType>> configure);
|
||||
IContextStage<RawType, OutType> WithParallelism(int degree);
|
||||
IContextStage<RawType, OutType> WithTimeout(TimeSpan timeout);
|
||||
IContextStage<RawType, OutType> WithRetryReporter(IProgress<IRetryReport> reporter);
|
||||
IContextStage<RawType, OutType> UseFragments();
|
||||
IContextStage<RawType, OutType> UsePlaywright(PlaywrightAsyncManipulator manipulator);
|
||||
IContextStage<RawType, OutType> UseStealth(StealthAsyncManipulator manipulator, StealthConfig config);
|
||||
IContextStage<RawType, OutType> ConfigureUnitDownloaderOptions(
|
||||
Action<UnitDownloaderOptionsBuilder<RawType, OutType>> configure);
|
||||
public interface IContextStage<OutType> {
|
||||
IContextStage<OutType> Configure(Action<DownloadContextBuilder> configure);
|
||||
IContextStage<OutType> WithParallelism(int degree);
|
||||
IContextStage<OutType> WithTimeout(TimeSpan timeout);
|
||||
IContextStage<OutType> WithRetryReporter(IProgress<IRetryReport> reporter);
|
||||
IContextStage<OutType> UseFragments();
|
||||
IContextStage<OutType> UsePlaywright(PlaywrightAsyncManipulator manipulator);
|
||||
IContextStage<OutType> UseStealth(StealthAsyncManipulator manipulator, StealthConfig config);
|
||||
IContextStage<OutType> ConfigureUnitDownloaderOptions(
|
||||
Action<UnitDownloaderOptionsBuilder<OutType>> configure);
|
||||
DownloadEnumerable<OutType> Build();
|
||||
}
|
||||
@@ -2,10 +2,10 @@
|
||||
|
||||
namespace Beam.Fluent;
|
||||
|
||||
public interface IDownloadStage<RawType, OutType> {
|
||||
IDownloadStage<RawType, OutType> SaveToDirectory(string dir);
|
||||
IDownloadStage<RawType, OutType> SaveToFiles(IEnumerable<string> files);
|
||||
IDownloadStage<RawType, OutType> SaveToMemory(ConcurrentBag<OutType> bag);
|
||||
public interface IDownloadStage<OutType> {
|
||||
IDownloadStage<OutType> SaveToDirectory(string dir);
|
||||
IDownloadStage<OutType> SaveToFiles(IEnumerable<string> files);
|
||||
IDownloadStage<OutType> SaveToMemory(ConcurrentBag<OutType> bag);
|
||||
void WaitForDownload();
|
||||
Task WaitForDownloadAsync();
|
||||
DownloadEnumerable<OutType> AsAsyncEnumerable();
|
||||
|
||||
@@ -3,6 +3,6 @@ using Beam.Models;
|
||||
|
||||
namespace Beam.Fluent;
|
||||
|
||||
public interface ITransformStage<RawType, OutType> {
|
||||
IContextStage<RawType, OutType> WithTransformer(AsyncTransformer<RawType, OutType> factory);
|
||||
public interface ITransformStage<OutType> {
|
||||
IContextStage<OutType> WithTransformer(AsyncTransformer<ByteDocument, OutType> factory);
|
||||
}
|
||||
@@ -5,7 +5,7 @@ using Beam.Models;
|
||||
|
||||
namespace Beam.Fluent;
|
||||
|
||||
internal sealed class DownloadStage<RawType, OutType>(DownloadEnumerable<OutType> download) : IDownloadStage<RawType, OutType> where RawType : IDocument {
|
||||
internal sealed class DownloadStage<OutType>(DownloadEnumerable<OutType> download) : IDownloadStage<OutType> {
|
||||
private IAsyncEnumerable<Ordered<OutType>> _download = download;
|
||||
|
||||
public DownloadEnumerable<OutType> AsAsyncEnumerable() {
|
||||
@@ -22,16 +22,16 @@ internal sealed class DownloadStage<RawType, OutType>(DownloadEnumerable<OutType
|
||||
}
|
||||
}
|
||||
|
||||
public IDownloadStage<RawType, OutType> SaveToDirectory(string dir) {
|
||||
public IDownloadStage<OutType> SaveToDirectory(string dir) {
|
||||
_download = _SaveToDirectory(dir);
|
||||
return this;
|
||||
}
|
||||
|
||||
public IDownloadStage<RawType, OutType> SaveToFiles(IEnumerable<string> files) {
|
||||
public IDownloadStage<OutType> SaveToFiles(IEnumerable<string> files) {
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
public IDownloadStage<RawType, OutType> SaveToMemory(ConcurrentBag<OutType> bag) {
|
||||
public IDownloadStage<OutType> SaveToMemory(ConcurrentBag<OutType> bag) {
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
|
||||
@@ -8,22 +8,22 @@ using Beam.Models;
|
||||
namespace Beam.Fluent;
|
||||
|
||||
public static class FluentDownload {
|
||||
public static ITransformStage<RawType, OutType> Links<RawType, OutType>(params IEnumerable<string> links) where RawType : IDocument {
|
||||
return new TransformStage<RawType, OutType>(new DownloadContextBuilder<RawType>()
|
||||
public static ITransformStage<OutType> Links<OutType>(params IEnumerable<string> links) {
|
||||
return new TransformStage<OutType>(new DownloadContextBuilder()
|
||||
.WithLinks(links));
|
||||
}
|
||||
|
||||
public static ITransformStage<RawType, OutType>
|
||||
ResourceDefinition<RawType, OutType>(ResourceDefinition definition) where RawType : IDocument {
|
||||
public static ITransformStage< OutType>
|
||||
ResourceDefinition< OutType>(ResourceDefinition definition) {
|
||||
if (definition.Location.States.Count == 0)
|
||||
throw new ArgumentException(Exceptions.Exceptions.resource_definition_invalid_states_count, nameof(definition));
|
||||
var linkGenerator = new OrderedLinkGenerator(definition.Location.Segments, (NumberedStateChanger)definition.Location.StateChanger.Behavior,
|
||||
definition.Location.States.First().Copy());
|
||||
return new TransformStage<RawType, OutType>(new DownloadContextBuilder<RawType>()
|
||||
return new TransformStage< OutType>(new DownloadContextBuilder()
|
||||
.WithLinks(StringEnumerable.FromGenerator(linkGenerator!)));
|
||||
}
|
||||
|
||||
public static ITransformStage<RawType, OutType> FromContext<RawType, OutType>(DownloadContext<RawType> existing) where RawType : IDocument {
|
||||
return new TransformStage<RawType, OutType>(DownloadContextBuilder<RawType>.FromContext(existing));
|
||||
public static ITransformStage< OutType> FromContext< OutType>(DownloadContext existing) {
|
||||
return new TransformStage< OutType>(DownloadContextBuilder.FromContext(existing));
|
||||
}
|
||||
}
|
||||
@@ -6,8 +6,8 @@ using Beam.Models;
|
||||
|
||||
namespace Beam.Fluent;
|
||||
|
||||
internal sealed class TransformStage<RawType, OutType>(DownloadContextBuilder<RawType> CtxBuilder) : ITransformStage<RawType, OutType> where RawType : IDocument {
|
||||
public IContextStage<RawType, OutType> WithTransformer(AsyncTransformer<RawType, OutType> transformer) {
|
||||
return new ContextStage<RawType, OutType>(CtxBuilder, transformer);
|
||||
internal sealed class TransformStage<OutType>(DownloadContextBuilder CtxBuilder) : ITransformStage<OutType> {
|
||||
public IContextStage<OutType> WithTransformer(AsyncTransformer<ByteDocument, OutType> transformer) {
|
||||
return new ContextStage<OutType>(CtxBuilder, transformer);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user