Refactor downloaders to use ByteDocument and add options builders
Replaces generic RawType with ByteDocument in downloaders and context classes, simplifying type usage. Adds builder classes for FailurePredicateOptions, FragmentOptions, SkipPredicateOptions, and UnitDownloaderOptions to improve configuration flexibility. Introduces DownloadTarget enum and SkipPredicate delegate for more granular download control. Refactors Fluent API interfaces and implementations to remove RawType generics and streamline usage. Adds Playwright and Stealth download strategies for extensibility.
This commit is contained in:
@@ -1,4 +1,6 @@
|
||||
using Beam.Abstractions;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Text;
|
||||
using Beam.Abstractions;
|
||||
using Beam.Models;
|
||||
using HtmlAgilityPack;
|
||||
using File = System.IO.File;
|
||||
@@ -11,12 +13,12 @@ namespace Beam.Downloaders {
|
||||
/// <param name="web"></param>
|
||||
/// <param name="transformer"></param>
|
||||
/// <param name="failurePredicate"></param>
|
||||
public class UnitDownloader<RawType, OutType>(UnitDownloaderOptions<RawType, OutType> options) : IUnitDownloader<OutType> where RawType : IDocument {
|
||||
public UnitDownloaderOptions<RawType, OutType> Options { get; } = options;
|
||||
public class UnitDownloader<OutType>(UnitDownloaderOptions<OutType> options) : IUnitDownloader<OutType> {
|
||||
public UnitDownloaderOptions<OutType> Options { get; } = options;
|
||||
public HttpClient Client => Options.Client;
|
||||
public virtual AsyncTransformer<RawType, OutType> Transformer => Options.AsyncTransformer;
|
||||
|
||||
public virtual AsyncDownloadFailurePredicate<RawType>?[]? FailurePredicates =>
|
||||
public virtual AsyncTransformer<ByteDocument, OutType> Transformer => Options.AsyncTransformer;
|
||||
|
||||
public virtual AsyncDownloadFailurePredicate<ByteDocument>?[]? FailurePredicates =>
|
||||
Options?.FailurePredicateOptions?.AsyncDownloadFailurePredicates;
|
||||
|
||||
public int LinksPerDownload { get; } = 1;
|
||||
@@ -70,7 +72,7 @@ namespace Beam.Downloaders {
|
||||
return new ByteDocument(url, bytes);
|
||||
}
|
||||
|
||||
protected virtual async Task<bool> IsFailure(RawType doc, CancellationToken ct) {
|
||||
protected virtual async Task<bool> IsFailure(ByteDocument doc, CancellationToken ct) {
|
||||
if (FailurePredicates is null)
|
||||
return false;
|
||||
if (!(Options?.FailurePredicateOptions?.ProcessInParallel ?? false))
|
||||
@@ -103,19 +105,18 @@ namespace Beam.Downloaders {
|
||||
return false;
|
||||
}
|
||||
|
||||
protected virtual async Task<RawType> _Download(string link, IProgress<IDownloadReport> progress, CancellationToken ct) {
|
||||
if (Options.DownloadFolder is not null && this is UnitDownloader<StringDocument, OutType>) {
|
||||
var path = Path.Combine(Options.DownloadFolder, Path.GetRandomFileName());
|
||||
protected virtual async Task<ByteDocument> _Download(string link, IProgress<IDownloadReport> progress, CancellationToken ct) {
|
||||
if (Options.DownloadFolder is not null) {
|
||||
var path = Path.Combine(Options.DownloadFolder, options.GetFileNameForDownload(link, []));
|
||||
await DownloadToFile(link, Options.BufferSize, path, progress, ct);
|
||||
return (RawType)(object)new StringDocument(link, path);
|
||||
return new ByteDocument(link, Encoding.UTF8.GetBytes(path));
|
||||
}
|
||||
if (this is UnitDownloader<ByteDocument, OutType>) {
|
||||
return (RawType)(object)(await DownloadToMemory(link, Options.BufferSize, progress, ct));
|
||||
else {
|
||||
return await DownloadToMemory(link, Options.BufferSize, progress, ct);
|
||||
}
|
||||
throw new NotSupportedException(Exceptions.Exceptions.unit_downloader_limited_support);
|
||||
}
|
||||
|
||||
protected virtual async Task<(bool, OutType?)> Transform(RawType download, CancellationToken ct) {
|
||||
protected virtual async Task<(bool, OutType?)> Transform(ByteDocument download, CancellationToken ct) {
|
||||
try {
|
||||
if (FailurePredicates is null || !(await IsFailure(download, ct)))
|
||||
return (true, await Transformer(download));
|
||||
@@ -131,6 +132,9 @@ namespace Beam.Downloaders {
|
||||
return (false, default);
|
||||
|
||||
downProgress ??= new Progress<IDownloadReport>();
|
||||
|
||||
if (ShouldSkip(link[0].Data, out var defaultType))
|
||||
return (true, defaultType);
|
||||
|
||||
OutType? ot = default;
|
||||
int tryCount = 0;
|
||||
@@ -147,5 +151,41 @@ namespace Beam.Downloaders {
|
||||
|
||||
return (false, ot);
|
||||
}
|
||||
|
||||
private bool ShouldSkip(string link, [NotNullWhen(true)] out OutType? outType) {
|
||||
outType = default;
|
||||
if (Options.SkipPredicateOptions?.SkipPredicates is null)
|
||||
return false;
|
||||
if (!Options.SkipPredicateOptions.ProcessInParallel)
|
||||
foreach (var pred in Options.SkipPredicateOptions.SkipPredicates) {
|
||||
if (pred is null)
|
||||
continue;
|
||||
if (pred(link, out outType))
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
var shouldSkip = false;
|
||||
OutType? _outType = default;
|
||||
Parallel.ForEach(Options.SkipPredicateOptions.SkipPredicates, new ParallelOptions() {
|
||||
MaxDegreeOfParallelism = Options?.FailurePredicateOptions?.ParallelThreads ?? 4
|
||||
},
|
||||
(predicate, parallelLoopState) => {
|
||||
if (parallelLoopState.ShouldExitCurrentIteration)
|
||||
return;
|
||||
if (predicate == null)
|
||||
return;
|
||||
if (predicate(link, out var _innerLoopOutType)) {
|
||||
Interlocked.CompareExchange(ref shouldSkip, true, false);
|
||||
Interlocked.CompareExchange(ref _outType, _innerLoopOutType, default);
|
||||
parallelLoopState.Break();
|
||||
}
|
||||
}
|
||||
);
|
||||
outType = _outType;
|
||||
return shouldSkip;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user