Refactor downloaders to use ByteDocument and add options builders
Replaces generic RawType with ByteDocument in downloaders and context classes, simplifying type usage. Adds builder classes for FailurePredicateOptions, FragmentOptions, SkipPredicateOptions, and UnitDownloaderOptions to improve configuration flexibility. Introduces DownloadTarget enum and SkipPredicate delegate for more granular download control. Refactors Fluent API interfaces and implementations to remove RawType generics and streamline usage. Adds Playwright and Stealth download strategies for extensibility.
This commit is contained in:
@@ -14,6 +14,7 @@
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.9" />
|
||||
<PackageReference Include="System.IO.Hashing" Version="10.0.0" />
|
||||
<PackageReference Include="System.Linq.Async" Version="6.0.3" />
|
||||
</ItemGroup>
|
||||
|
||||
|
||||
@@ -8,14 +8,14 @@ namespace Beam.Downloaders {
|
||||
//public delegate Task<T> AsyncHtmlTransformer<T>(HtmlDocument doc);
|
||||
//public delegate Task<T> AsyncBinaryTransformer<T>(byte[] bin);
|
||||
|
||||
public class DownloadContext<RawType> {
|
||||
public class DownloadContext {
|
||||
private bool disposedValue;
|
||||
|
||||
public HttpClient Client { get; }
|
||||
public HtmlWeb Web { get; }
|
||||
public IProgress<IDownloadReport>? DownloadReporter { get; set; }
|
||||
public IProgress<IRetryReport>? RetryReporter { get; set; }
|
||||
public AsyncDownloadFailurePredicate<RawType>?[]? AsyncFailurePredicates { get; }
|
||||
public AsyncDownloadFailurePredicate<ByteDocument>?[]? AsyncFailurePredicates { get; }
|
||||
public TimeSpan TimeOut { get; set; }
|
||||
public IEnumerable<string> Links { get; }
|
||||
public CancellationToken CancellationToken { get; }
|
||||
@@ -28,7 +28,7 @@ namespace Beam.Downloaders {
|
||||
CancellationToken cancellationToken = default,
|
||||
IProgress<IDownloadReport>? downloadReporter = null,
|
||||
IProgress<IRetryReport>? retryReporter = null,
|
||||
AsyncDownloadFailurePredicate<RawType>?[]? asyncFailurePredicates = null,
|
||||
AsyncDownloadFailurePredicate<ByteDocument>?[]? asyncFailurePredicates = null,
|
||||
TimeSpan? timeOut = null,
|
||||
ILogger? downloadLogger = null) {
|
||||
ArgumentNullException.ThrowIfNull(web, nameof(web));
|
||||
|
||||
@@ -5,12 +5,12 @@ using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace Beam.Downloaders {
|
||||
|
||||
public class DownloadContextBuilder<RawType> {
|
||||
public class DownloadContextBuilder {
|
||||
private HtmlWeb _web;
|
||||
private HttpClient _client;
|
||||
private IProgress<IDownloadReport>? _downloadReporter;
|
||||
private IProgress<IRetryReport>? _retryReporter;
|
||||
private AsyncDownloadFailurePredicate<RawType>?[] _asyncFailurePredicates = [];
|
||||
private AsyncDownloadFailurePredicate<ByteDocument>?[] _asyncFailurePredicates = [];
|
||||
private TimeSpan _timeOut;
|
||||
private IEnumerable<string> _links;
|
||||
private CancellationToken _cancellationToken;
|
||||
@@ -26,60 +26,60 @@ namespace Beam.Downloaders {
|
||||
_links = [];
|
||||
}
|
||||
|
||||
public DownloadContextBuilder<RawType> WithWeb(HtmlWeb web) {
|
||||
public DownloadContextBuilder WithWeb(HtmlWeb web) {
|
||||
_web = web;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DownloadContextBuilder<RawType> WithClient(HttpClient client) {
|
||||
public DownloadContextBuilder WithClient(HttpClient client) {
|
||||
_client = client;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DownloadContextBuilder<RawType> WithDownloadReporter(IProgress<IDownloadReport> downloadReporter) {
|
||||
public DownloadContextBuilder WithDownloadReporter(IProgress<IDownloadReport> downloadReporter) {
|
||||
_downloadReporter = downloadReporter;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DownloadContextBuilder<RawType> WithRetryReporter(IProgress<IRetryReport> retryReporter) {
|
||||
public DownloadContextBuilder WithRetryReporter(IProgress<IRetryReport> retryReporter) {
|
||||
_retryReporter = retryReporter;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DownloadContextBuilder<RawType> WithAsyncFailurePredicates(params AsyncDownloadFailurePredicate<RawType>[] predicates) {
|
||||
public DownloadContextBuilder WithAsyncFailurePredicates(params AsyncDownloadFailurePredicate<ByteDocument>[] predicates) {
|
||||
_asyncFailurePredicates = predicates;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DownloadContextBuilder<RawType> WithTimeOut(TimeSpan timeOut) {
|
||||
public DownloadContextBuilder WithTimeOut(TimeSpan timeOut) {
|
||||
_timeOut = timeOut;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DownloadContextBuilder<RawType> WithLinks(IEnumerable<string> links) {
|
||||
public DownloadContextBuilder WithLinks(IEnumerable<string> links) {
|
||||
_links = links;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DownloadContextBuilder<RawType> WithCancellationToken(CancellationToken cancellationToken) {
|
||||
public DownloadContextBuilder WithCancellationToken(CancellationToken cancellationToken) {
|
||||
_cancellationToken = cancellationToken;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DownloadContextBuilder<RawType> WithCache(DocumentCache cache) {
|
||||
public DownloadContextBuilder WithCache(DocumentCache cache) {
|
||||
_cache = cache;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DownloadContextBuilder<RawType> WithDownloadLogger(ILogger downloadLogger) {
|
||||
public DownloadContextBuilder WithDownloadLogger(ILogger downloadLogger) {
|
||||
_downloadLogger = downloadLogger;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
public DownloadContext<RawType> Build() {
|
||||
public DownloadContext Build() {
|
||||
// Construct the DownloadContext<T> using the collected values.
|
||||
var context = new DownloadContext<RawType>(
|
||||
var context = new DownloadContext(
|
||||
web: _web,
|
||||
client: _client,
|
||||
links: _links,
|
||||
@@ -100,15 +100,15 @@ namespace Beam.Downloaders {
|
||||
return context;
|
||||
}
|
||||
|
||||
public static DownloadContextBuilder<RawType> FromContext(DownloadContext<RawType> existing) {
|
||||
public static DownloadContextBuilder FromContext(DownloadContext existing) {
|
||||
if (existing == null) throw new ArgumentNullException(nameof(existing));
|
||||
|
||||
return new DownloadContextBuilder<RawType>(existing.Client, existing.Web)
|
||||
return new DownloadContextBuilder(existing.Client, existing.Web)
|
||||
.WithLinks(existing.Links)
|
||||
.WithCancellationToken(existing.CancellationToken)
|
||||
.WithDownloadReporter(existing.DownloadReporter!)
|
||||
.WithRetryReporter(existing.RetryReporter!)
|
||||
.WithAsyncFailurePredicates(existing.AsyncFailurePredicates ?? Array.Empty<AsyncDownloadFailurePredicate<RawType>>())
|
||||
.WithAsyncFailurePredicates(existing.AsyncFailurePredicates ?? Array.Empty<AsyncDownloadFailurePredicate<ByteDocument>>())
|
||||
.WithTimeOut(existing.TimeOut)
|
||||
.WithDownloadLogger(existing.DownloadLogger!)
|
||||
.WithCache(existing.Cache);
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
using Beam.Models;
|
||||
|
||||
namespace Beam.Downloaders;
|
||||
|
||||
public record class FailurePredicateOptions<RawType> {
|
||||
public required AsyncDownloadFailurePredicate<RawType>?[]? AsyncDownloadFailurePredicates { get; init; }
|
||||
public bool ProcessInParallel { get; init; } = false;
|
||||
public int? ParallelThreads { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
using Beam.Models;
|
||||
|
||||
namespace Beam.Downloaders;
|
||||
|
||||
public sealed class FailurePredicateOptionsBuilder<TRaw>
|
||||
{
|
||||
private readonly System.Collections.Generic.List<AsyncDownloadFailurePredicate<TRaw>?> _predicates =
|
||||
new System.Collections.Generic.List<AsyncDownloadFailurePredicate<TRaw>?>();
|
||||
private bool _processInParallel = false;
|
||||
private int? _parallelThreads = null;
|
||||
|
||||
public FailurePredicateOptionsBuilder<TRaw> WithPredicate(AsyncDownloadFailurePredicate<TRaw>? predicate)
|
||||
{
|
||||
_predicates.Add(predicate);
|
||||
return this;
|
||||
}
|
||||
|
||||
public FailurePredicateOptionsBuilder<TRaw> WithPredicates(System.Collections.Generic.IEnumerable<AsyncDownloadFailurePredicate<TRaw>?> predicates)
|
||||
{
|
||||
if (predicates == null) throw new System.ArgumentNullException(nameof(predicates));
|
||||
_predicates.AddRange(predicates);
|
||||
return this;
|
||||
}
|
||||
|
||||
public FailurePredicateOptionsBuilder<TRaw> WithPredicates(params AsyncDownloadFailurePredicate<TRaw>?[] predicates)
|
||||
{
|
||||
_predicates.Clear();
|
||||
if (predicates != null) _predicates.AddRange(predicates);
|
||||
return this;
|
||||
}
|
||||
|
||||
public FailurePredicateOptionsBuilder<TRaw> WithProcessInParallel(bool value = true)
|
||||
{
|
||||
_processInParallel = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
public FailurePredicateOptionsBuilder<TRaw> WithParallelThreads(int? threads)
|
||||
{
|
||||
if (threads.HasValue && threads.Value <= 0)
|
||||
throw new System.ArgumentOutOfRangeException(nameof(threads));
|
||||
_parallelThreads = threads;
|
||||
return this;
|
||||
}
|
||||
|
||||
public FailurePredicateOptions<TRaw> Build()
|
||||
{
|
||||
var arr = _predicates.Count == 0 ? [] : _predicates.ToArray();
|
||||
return new FailurePredicateOptions<TRaw>
|
||||
{
|
||||
AsyncDownloadFailurePredicates = arr,
|
||||
ProcessInParallel = _processInParallel,
|
||||
ParallelThreads = _parallelThreads
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
namespace Beam.Downloaders;
|
||||
|
||||
public record class FragmentOptions {
|
||||
public required int FragmentSize { get; init; }
|
||||
public bool DownloadInParallel { get; init; } = false;
|
||||
public int? ParallelThreads { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
namespace Beam.Downloaders;
|
||||
|
||||
public sealed class FragmentOptionsBuilder {
|
||||
private int? _fragmentSize;
|
||||
private bool _downloadInParallel = false;
|
||||
private int? _parallelThreads = null;
|
||||
|
||||
public FragmentOptionsBuilder WithFragmentSize(int bytes) {
|
||||
if (bytes <= 0) throw new System.ArgumentOutOfRangeException(nameof(bytes));
|
||||
_fragmentSize = bytes;
|
||||
return this;
|
||||
}
|
||||
|
||||
public FragmentOptionsBuilder WithDownloadInParallel(bool value = true) {
|
||||
_downloadInParallel = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
public FragmentOptionsBuilder WithParallelThreads(int? threads) {
|
||||
if (threads.HasValue && threads.Value <= 0)
|
||||
throw new System.ArgumentOutOfRangeException(nameof(threads));
|
||||
_parallelThreads = threads;
|
||||
return this;
|
||||
}
|
||||
|
||||
public FragmentOptions Build() {
|
||||
if (!_fragmentSize.HasValue)
|
||||
throw new System.InvalidOperationException("FragmentSize must be provided.");
|
||||
|
||||
return new FragmentOptions {
|
||||
FragmentSize = _fragmentSize.Value,
|
||||
DownloadInParallel = _downloadInParallel,
|
||||
ParallelThreads = _parallelThreads
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -3,9 +3,9 @@ using Beam.Models;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace Beam.Downloaders {
|
||||
public class SequentialDownloader<RawType, OutType> : IAsyncEnumerator<OutType> {
|
||||
public class SequentialDownloader<OutType> : IAsyncEnumerator<OutType> {
|
||||
public OutType Current { get; protected set; }
|
||||
public DownloadContext<RawType> Context { get; }
|
||||
public DownloadContext Context { get; }
|
||||
public ILogger? Logger { get; set; }
|
||||
public int LastOrder { get; set; } = 0;
|
||||
|
||||
@@ -13,7 +13,7 @@ namespace Beam.Downloaders {
|
||||
|
||||
public Func<IUnitDownloader<OutType>> GetUnitDownloader { get; set; }
|
||||
|
||||
public SequentialDownloader(DownloadContext<RawType> context, Func<DownloadContext<RawType>, IUnitDownloader<OutType>> getUnitDownloader, ILogger? logger = null) {
|
||||
public SequentialDownloader(DownloadContext context, Func<DownloadContext, IUnitDownloader<OutType>> getUnitDownloader, ILogger? logger = null) {
|
||||
Context = context;
|
||||
Logger = logger;
|
||||
LinksEnumerator = Context.Links.GetEnumerator();
|
||||
|
||||
@@ -3,10 +3,10 @@ using Beam.Models;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace Beam.Downloaders {
|
||||
public class SequentialFragmentDownloader<RawType, OutType> : SequentialDownloader<RawType, Fragment<Ordered<OutType>>> {
|
||||
public class SequentialFragmentDownloader<OutType> : SequentialDownloader<Fragment<Ordered<OutType>>> {
|
||||
public SequentialFragmentDownloader(
|
||||
DownloadContext<RawType> context,
|
||||
Func<DownloadContext<RawType>, IUnitDownloader<Fragment<Ordered<OutType>>>> getUnitDownloader,
|
||||
DownloadContext context,
|
||||
Func<DownloadContext, IUnitDownloader<Fragment<Ordered<OutType>>>> getUnitDownloader,
|
||||
ILogger? logger = null)
|
||||
: base(context, getUnitDownloader, logger) {}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
using Beam.Models;
|
||||
|
||||
namespace Beam.Downloaders;
|
||||
|
||||
public class SkipPredicateOptions<OutType> {
|
||||
public required SkipPredicate<OutType>?[]? SkipPredicates { get; init; }
|
||||
public bool ProcessInParallel { get; init; } = false;
|
||||
public int? ParallelThreads { get; init; }
|
||||
}
|
||||
|
||||
public class SkipPredicateOptionsBuilder<OutType> {
|
||||
private List<SkipPredicate<OutType>?> _skipPredicates { get; set; } = [];
|
||||
private bool _processInParallel { get; set; } = false;
|
||||
private int? _parallelThreads { get; set; }
|
||||
|
||||
public SkipPredicateOptionsBuilder<OutType> WithSkipPredicate(SkipPredicate<OutType> predicate, bool replace=false) {
|
||||
if (replace)
|
||||
_skipPredicates.Clear();
|
||||
_skipPredicates.Add(predicate);
|
||||
return this;
|
||||
}
|
||||
|
||||
public SkipPredicateOptionsBuilder<OutType> WithSkipPredicates(SkipPredicate<OutType>[] predicates,
|
||||
bool replace = true) {
|
||||
if (replace)
|
||||
_skipPredicates.Clear();
|
||||
_skipPredicates.AddRange(predicates);
|
||||
return this;
|
||||
}
|
||||
|
||||
public SkipPredicateOptionsBuilder<OutType> ProcessInParallel(bool processInParallel = true) {
|
||||
_processInParallel = processInParallel;
|
||||
return this;
|
||||
}
|
||||
|
||||
public SkipPredicateOptionsBuilder<OutType> WithParallelThreads(int parallelThreads) {
|
||||
_parallelThreads = parallelThreads;
|
||||
return this;
|
||||
}
|
||||
|
||||
public SkipPredicateOptions<OutType> Build() {
|
||||
return new SkipPredicateOptions<OutType>() {
|
||||
SkipPredicates = _skipPredicates.ToArray(),
|
||||
ParallelThreads = _parallelThreads,
|
||||
ProcessInParallel = _processInParallel
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,6 @@
|
||||
using Beam.Abstractions;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Text;
|
||||
using Beam.Abstractions;
|
||||
using Beam.Models;
|
||||
using HtmlAgilityPack;
|
||||
using File = System.IO.File;
|
||||
@@ -11,12 +13,12 @@ namespace Beam.Downloaders {
|
||||
/// <param name="web"></param>
|
||||
/// <param name="transformer"></param>
|
||||
/// <param name="failurePredicate"></param>
|
||||
public class UnitDownloader<RawType, OutType>(UnitDownloaderOptions<RawType, OutType> options) : IUnitDownloader<OutType> where RawType : IDocument {
|
||||
public UnitDownloaderOptions<RawType, OutType> Options { get; } = options;
|
||||
public class UnitDownloader<OutType>(UnitDownloaderOptions<OutType> options) : IUnitDownloader<OutType> {
|
||||
public UnitDownloaderOptions<OutType> Options { get; } = options;
|
||||
public HttpClient Client => Options.Client;
|
||||
public virtual AsyncTransformer<RawType, OutType> Transformer => Options.AsyncTransformer;
|
||||
|
||||
public virtual AsyncDownloadFailurePredicate<RawType>?[]? FailurePredicates =>
|
||||
public virtual AsyncTransformer<ByteDocument, OutType> Transformer => Options.AsyncTransformer;
|
||||
|
||||
public virtual AsyncDownloadFailurePredicate<ByteDocument>?[]? FailurePredicates =>
|
||||
Options?.FailurePredicateOptions?.AsyncDownloadFailurePredicates;
|
||||
|
||||
public int LinksPerDownload { get; } = 1;
|
||||
@@ -70,7 +72,7 @@ namespace Beam.Downloaders {
|
||||
return new ByteDocument(url, bytes);
|
||||
}
|
||||
|
||||
protected virtual async Task<bool> IsFailure(RawType doc, CancellationToken ct) {
|
||||
protected virtual async Task<bool> IsFailure(ByteDocument doc, CancellationToken ct) {
|
||||
if (FailurePredicates is null)
|
||||
return false;
|
||||
if (!(Options?.FailurePredicateOptions?.ProcessInParallel ?? false))
|
||||
@@ -103,19 +105,18 @@ namespace Beam.Downloaders {
|
||||
return false;
|
||||
}
|
||||
|
||||
protected virtual async Task<RawType> _Download(string link, IProgress<IDownloadReport> progress, CancellationToken ct) {
|
||||
if (Options.DownloadFolder is not null && this is UnitDownloader<StringDocument, OutType>) {
|
||||
var path = Path.Combine(Options.DownloadFolder, Path.GetRandomFileName());
|
||||
protected virtual async Task<ByteDocument> _Download(string link, IProgress<IDownloadReport> progress, CancellationToken ct) {
|
||||
if (Options.DownloadFolder is not null) {
|
||||
var path = Path.Combine(Options.DownloadFolder, options.GetFileNameForDownload(link, []));
|
||||
await DownloadToFile(link, Options.BufferSize, path, progress, ct);
|
||||
return (RawType)(object)new StringDocument(link, path);
|
||||
return new ByteDocument(link, Encoding.UTF8.GetBytes(path));
|
||||
}
|
||||
if (this is UnitDownloader<ByteDocument, OutType>) {
|
||||
return (RawType)(object)(await DownloadToMemory(link, Options.BufferSize, progress, ct));
|
||||
else {
|
||||
return await DownloadToMemory(link, Options.BufferSize, progress, ct);
|
||||
}
|
||||
throw new NotSupportedException(Exceptions.Exceptions.unit_downloader_limited_support);
|
||||
}
|
||||
|
||||
protected virtual async Task<(bool, OutType?)> Transform(RawType download, CancellationToken ct) {
|
||||
protected virtual async Task<(bool, OutType?)> Transform(ByteDocument download, CancellationToken ct) {
|
||||
try {
|
||||
if (FailurePredicates is null || !(await IsFailure(download, ct)))
|
||||
return (true, await Transformer(download));
|
||||
@@ -131,6 +132,9 @@ namespace Beam.Downloaders {
|
||||
return (false, default);
|
||||
|
||||
downProgress ??= new Progress<IDownloadReport>();
|
||||
|
||||
if (ShouldSkip(link[0].Data, out var defaultType))
|
||||
return (true, defaultType);
|
||||
|
||||
OutType? ot = default;
|
||||
int tryCount = 0;
|
||||
@@ -147,5 +151,41 @@ namespace Beam.Downloaders {
|
||||
|
||||
return (false, ot);
|
||||
}
|
||||
|
||||
private bool ShouldSkip(string link, [NotNullWhen(true)] out OutType? outType) {
|
||||
outType = default;
|
||||
if (Options.SkipPredicateOptions?.SkipPredicates is null)
|
||||
return false;
|
||||
if (!Options.SkipPredicateOptions.ProcessInParallel)
|
||||
foreach (var pred in Options.SkipPredicateOptions.SkipPredicates) {
|
||||
if (pred is null)
|
||||
continue;
|
||||
if (pred(link, out outType))
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
var shouldSkip = false;
|
||||
OutType? _outType = default;
|
||||
Parallel.ForEach(Options.SkipPredicateOptions.SkipPredicates, new ParallelOptions() {
|
||||
MaxDegreeOfParallelism = Options?.FailurePredicateOptions?.ParallelThreads ?? 4
|
||||
},
|
||||
(predicate, parallelLoopState) => {
|
||||
if (parallelLoopState.ShouldExitCurrentIteration)
|
||||
return;
|
||||
if (predicate == null)
|
||||
return;
|
||||
if (predicate(link, out var _innerLoopOutType)) {
|
||||
Interlocked.CompareExchange(ref shouldSkip, true, false);
|
||||
Interlocked.CompareExchange(ref _outType, _innerLoopOutType, default);
|
||||
parallelLoopState.Break();
|
||||
}
|
||||
}
|
||||
);
|
||||
outType = _outType;
|
||||
return shouldSkip;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,198 +1,38 @@
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using Beam.Models;
|
||||
|
||||
namespace Beam.Downloaders;
|
||||
|
||||
public record class UnitDownloaderOptions<RawType, OutType> {
|
||||
public record class UnitDownloaderOptions<OutType> {
|
||||
public HttpClient Client { get; init; } = new();
|
||||
|
||||
public DownloadTarget Target { get; init; } = DownloadTarget.URL;
|
||||
|
||||
public FailurePredicateOptions<RawType>? FailurePredicateOptions { get; init; }
|
||||
public SkipPredicateOptions<OutType>? SkipPredicateOptions { get; init; }
|
||||
public FailurePredicateOptions<ByteDocument>? FailurePredicateOptions { get; init; }
|
||||
public FragmentOptions? FragmentOptions { get; init; }
|
||||
public required AsyncTransformer<RawType, OutType> AsyncTransformer { get; init; }
|
||||
public required AsyncTransformer<ByteDocument, OutType> AsyncTransformer { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// The location where the download is stored.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// If not defined, <c>UnitDownloader.TryDownload()</c> downloads to memory.
|
||||
/// </remarks>
|
||||
public string? DownloadFolder { get; init; } = null;
|
||||
public int BufferSize { get; init; } = 80 * 1024; // 80kb
|
||||
|
||||
public string GetFileNameForDownload(string url, byte[] additionalData) {
|
||||
byte[] bytes = [..Encoding.UTF8.GetBytes(url), ..additionalData];
|
||||
var name = Convert.ToBase64String(System.IO.Hashing.XxHash64.Hash(bytes));
|
||||
return name.Replace('+', '-').Replace('/', '_').Replace('=', ' ').Trim();
|
||||
}
|
||||
}
|
||||
|
||||
public record class FailurePredicateOptions<RawType> {
|
||||
public required AsyncDownloadFailurePredicate<RawType>?[]? AsyncDownloadFailurePredicates { get; init; }
|
||||
public bool ProcessInParallel { get; init; } = false;
|
||||
public int? ParallelThreads { get; init; }
|
||||
}
|
||||
// ---------- UnitDownloaderOptions Builder ----------
|
||||
|
||||
public record class FragmentOptions {
|
||||
public required int FragmentSize { get; init; }
|
||||
public bool DownloadInParallel { get; init; } = false;
|
||||
public int? ParallelThreads { get; init; }
|
||||
}
|
||||
// ---------- FailurePredicateOptions Builder ----------
|
||||
|
||||
|
||||
// ---------- UnitDownloaderOptions Builder ----------
|
||||
public sealed class UnitDownloaderOptionsBuilder<TRaw, TOut>
|
||||
{
|
||||
private HttpClient _client = new HttpClient();
|
||||
private FailurePredicateOptions<TRaw>? _failureOptions;
|
||||
private FragmentOptions? _fragmentOptions;
|
||||
private AsyncTransformer<TRaw, TOut>? _asyncTransformer;
|
||||
private string? _downloadFolder = null;
|
||||
private int _bufferSize = 80 * 1024;
|
||||
|
||||
public UnitDownloaderOptionsBuilder<TRaw, TOut> WithClient(HttpClient client)
|
||||
{
|
||||
_client = client ?? throw new System.ArgumentNullException(nameof(client));
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnitDownloaderOptionsBuilder<TRaw, TOut> WithFailurePredicateOptions(FailurePredicateOptions<TRaw>? options)
|
||||
{
|
||||
_failureOptions = options;
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnitDownloaderOptionsBuilder<TRaw, TOut> WithFailurePredicates(System.Action<FailurePredicateOptionsBuilder<TRaw>> configure)
|
||||
{
|
||||
if (configure == null) throw new System.ArgumentNullException(nameof(configure));
|
||||
var b = new FailurePredicateOptionsBuilder<TRaw>();
|
||||
configure(b);
|
||||
_failureOptions = b.Build();
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnitDownloaderOptionsBuilder<TRaw, TOut> WithFragmentOptions(FragmentOptions? options)
|
||||
{
|
||||
_fragmentOptions = options;
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnitDownloaderOptionsBuilder<TRaw, TOut> WithFragments(System.Action<FragmentOptionsBuilder> configure)
|
||||
{
|
||||
if (configure == null) throw new System.ArgumentNullException(nameof(configure));
|
||||
var b = new FragmentOptionsBuilder();
|
||||
configure(b);
|
||||
_fragmentOptions = b.Build();
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnitDownloaderOptionsBuilder<TRaw, TOut> WithAsyncTransformer(AsyncTransformer<TRaw, TOut> transformer)
|
||||
{
|
||||
_asyncTransformer = transformer ?? throw new System.ArgumentNullException(nameof(transformer));
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnitDownloaderOptionsBuilder<TRaw, TOut> WithDownloadFolder(string? downloadFolder)
|
||||
{
|
||||
_downloadFolder = downloadFolder;
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnitDownloaderOptionsBuilder<TRaw, TOut> WithBufferSize(int bytes)
|
||||
{
|
||||
if (bytes <= 0) throw new System.ArgumentOutOfRangeException(nameof(bytes));
|
||||
_bufferSize = bytes;
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnitDownloaderOptions<TRaw, TOut> Build()
|
||||
{
|
||||
if (_asyncTransformer == null)
|
||||
throw new System.InvalidOperationException("AsyncTransformer must be provided.");
|
||||
|
||||
return new UnitDownloaderOptions<TRaw, TOut>
|
||||
{
|
||||
Client = _client,
|
||||
FailurePredicateOptions = _failureOptions,
|
||||
FragmentOptions = _fragmentOptions,
|
||||
AsyncTransformer = _asyncTransformer,
|
||||
DownloadFolder = _downloadFolder,
|
||||
BufferSize = _bufferSize
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- FailurePredicateOptions Builder ----------
|
||||
public sealed class FailurePredicateOptionsBuilder<TRaw>
|
||||
{
|
||||
private readonly System.Collections.Generic.List<AsyncDownloadFailurePredicate<TRaw>?> _predicates =
|
||||
new System.Collections.Generic.List<AsyncDownloadFailurePredicate<TRaw>?>();
|
||||
private bool _processInParallel = false;
|
||||
private int? _parallelThreads = null;
|
||||
|
||||
public FailurePredicateOptionsBuilder<TRaw> WithPredicate(AsyncDownloadFailurePredicate<TRaw>? predicate)
|
||||
{
|
||||
_predicates.Add(predicate);
|
||||
return this;
|
||||
}
|
||||
|
||||
public FailurePredicateOptionsBuilder<TRaw> WithPredicates(System.Collections.Generic.IEnumerable<AsyncDownloadFailurePredicate<TRaw>?> predicates)
|
||||
{
|
||||
if (predicates == null) throw new System.ArgumentNullException(nameof(predicates));
|
||||
_predicates.AddRange(predicates);
|
||||
return this;
|
||||
}
|
||||
|
||||
public FailurePredicateOptionsBuilder<TRaw> WithPredicates(params AsyncDownloadFailurePredicate<TRaw>?[] predicates)
|
||||
{
|
||||
_predicates.Clear();
|
||||
if (predicates != null) _predicates.AddRange(predicates);
|
||||
return this;
|
||||
}
|
||||
|
||||
public FailurePredicateOptionsBuilder<TRaw> WithProcessInParallel(bool value = true)
|
||||
{
|
||||
_processInParallel = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
public FailurePredicateOptionsBuilder<TRaw> WithParallelThreads(int? threads)
|
||||
{
|
||||
if (threads.HasValue && threads.Value <= 0)
|
||||
throw new System.ArgumentOutOfRangeException(nameof(threads));
|
||||
_parallelThreads = threads;
|
||||
return this;
|
||||
}
|
||||
|
||||
public FailurePredicateOptions<TRaw> Build()
|
||||
{
|
||||
var arr = _predicates.Count == 0 ? [] : _predicates.ToArray();
|
||||
return new FailurePredicateOptions<TRaw>
|
||||
{
|
||||
AsyncDownloadFailurePredicates = arr,
|
||||
ProcessInParallel = _processInParallel,
|
||||
ParallelThreads = _parallelThreads
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- FragmentOptions Builder ----------
|
||||
public sealed class FragmentOptionsBuilder {
|
||||
private int? _fragmentSize;
|
||||
private bool _downloadInParallel = false;
|
||||
private int? _parallelThreads = null;
|
||||
|
||||
public FragmentOptionsBuilder WithFragmentSize(int bytes) {
|
||||
if (bytes <= 0) throw new System.ArgumentOutOfRangeException(nameof(bytes));
|
||||
_fragmentSize = bytes;
|
||||
return this;
|
||||
}
|
||||
|
||||
public FragmentOptionsBuilder WithDownloadInParallel(bool value = true) {
|
||||
_downloadInParallel = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
public FragmentOptionsBuilder WithParallelThreads(int? threads) {
|
||||
if (threads.HasValue && threads.Value <= 0)
|
||||
throw new System.ArgumentOutOfRangeException(nameof(threads));
|
||||
_parallelThreads = threads;
|
||||
return this;
|
||||
}
|
||||
|
||||
public FragmentOptions Build() {
|
||||
if (!_fragmentSize.HasValue)
|
||||
throw new System.InvalidOperationException("FragmentSize must be provided.");
|
||||
|
||||
return new FragmentOptions {
|
||||
FragmentSize = _fragmentSize.Value,
|
||||
DownloadInParallel = _downloadInParallel,
|
||||
ParallelThreads = _parallelThreads
|
||||
};
|
||||
}
|
||||
}
|
||||
// ---------- FragmentOptions Builder ----------
|
||||
@@ -0,0 +1,107 @@
|
||||
using Beam.Models;
|
||||
|
||||
namespace Beam.Downloaders;
|
||||
|
||||
public sealed class UnitDownloaderOptionsBuilder<OutType> {
|
||||
private DownloadTarget _target = DownloadTarget.URL;
|
||||
private HttpClient _client = new HttpClient();
|
||||
private FailurePredicateOptionsBuilder<ByteDocument> _failureOptionsBuilder = new();
|
||||
private FailurePredicateOptions<ByteDocument>? _failurePredicateOptionsOverride = null;
|
||||
private SkipPredicateOptionsBuilder<OutType> _skipPredicateOptionsBuilder = new();
|
||||
private SkipPredicateOptions<OutType>? _skipPredicateOptionsOverride = null;
|
||||
private FragmentOptions? _fragmentOptions;
|
||||
private AsyncTransformer<ByteDocument, OutType>? _asyncTransformer;
|
||||
private string? _downloadFolder = null;
|
||||
private int _bufferSize = 80 * 1024;
|
||||
|
||||
public UnitDownloaderOptionsBuilder<OutType> WithTarget(DownloadTarget target) {
|
||||
_target = target;
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnitDownloaderOptionsBuilder<OutType> WithClient(HttpClient client)
|
||||
{
|
||||
_client = client ?? throw new System.ArgumentNullException(nameof(client));
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnitDownloaderOptionsBuilder<OutType> WithFailurePredicateOptions(FailurePredicateOptions<ByteDocument>? options)
|
||||
{
|
||||
_failurePredicateOptionsOverride = options;
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnitDownloaderOptionsBuilder<OutType> WithFailurePredicates(System.Action<FailurePredicateOptionsBuilder<ByteDocument>> configure)
|
||||
{
|
||||
if (configure == null) throw new System.ArgumentNullException(nameof(configure));
|
||||
configure(_failureOptionsBuilder);
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnitDownloaderOptionsBuilder<OutType> WithFragmentOptions(FragmentOptions? options)
|
||||
{
|
||||
_fragmentOptions = options;
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnitDownloaderOptionsBuilder<OutType> WithSkipPredicates(Action<SkipPredicateOptionsBuilder<OutType>> configure) {
|
||||
if (configure == null) throw new ArgumentNullException(nameof(configure));
|
||||
configure(_skipPredicateOptionsBuilder);
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnitDownloaderOptionsBuilder<OutType> WithSkipPredicateOptions(
|
||||
SkipPredicateOptions<OutType> skipPredicateOptions) {
|
||||
_skipPredicateOptionsOverride = skipPredicateOptions;
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnitDownloaderOptionsBuilder<OutType> WithFragments(System.Action<FragmentOptionsBuilder> configure)
|
||||
{
|
||||
if (configure == null) throw new System.ArgumentNullException(nameof(configure));
|
||||
var b = new FragmentOptionsBuilder();
|
||||
configure(b);
|
||||
_fragmentOptions = b.Build();
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnitDownloaderOptionsBuilder<OutType> WithAsyncTransformer(AsyncTransformer<ByteDocument, OutType> transformer)
|
||||
{
|
||||
_asyncTransformer = transformer ?? throw new System.ArgumentNullException(nameof(transformer));
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnitDownloaderOptionsBuilder<OutType> WithDownloadFolder(string? downloadFolder)
|
||||
{
|
||||
_downloadFolder = downloadFolder;
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnitDownloaderOptionsBuilder<OutType> WithBufferSize(int bytes)
|
||||
{
|
||||
if (bytes <= 0) throw new System.ArgumentOutOfRangeException(nameof(bytes));
|
||||
_bufferSize = bytes;
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnitDownloaderOptions<OutType> Build()
|
||||
{
|
||||
if (_asyncTransformer == null)
|
||||
throw new System.InvalidOperationException("AsyncTransformer must be provided.");
|
||||
|
||||
_failurePredicateOptionsOverride ??= _failureOptionsBuilder.Build();
|
||||
_skipPredicateOptionsOverride ??= _skipPredicateOptionsBuilder.Build();
|
||||
|
||||
return new UnitDownloaderOptions<OutType>
|
||||
{
|
||||
Target = _target,
|
||||
Client = _client,
|
||||
FailurePredicateOptions = _failurePredicateOptionsOverride,
|
||||
SkipPredicateOptions = _skipPredicateOptionsOverride,
|
||||
FragmentOptions = _fragmentOptions,
|
||||
AsyncTransformer = _asyncTransformer,
|
||||
DownloadFolder = _downloadFolder,
|
||||
BufferSize = _bufferSize
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -5,12 +5,12 @@ using HtmlAgilityPack;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace Beam.Downloaders {
|
||||
public class UnitFragmentDownloader<RawType, OutType>(UnitDownloaderOptions<RawType, OutType> options,
|
||||
IUnitDownloader<OutType>? internalDownloader = null) : IUnitDownloader<Fragment<Ordered<OutType>>> where RawType : IDocument {
|
||||
public class UnitFragmentDownloader<OutType>(UnitDownloaderOptions<OutType> options,
|
||||
IUnitDownloader<OutType>? internalDownloader = null) : IUnitDownloader<Fragment<Ordered<OutType>>> {
|
||||
|
||||
public UnitDownloaderOptions<RawType, OutType> Options { get; } = options;
|
||||
public UnitDownloaderOptions<OutType> Options { get; } = options;
|
||||
public int LinksPerDownload { get; set; }
|
||||
private IUnitDownloader<OutType> UnitDownloader { get; } = internalDownloader ?? new UnitDownloader<RawType, OutType>(options);
|
||||
private IUnitDownloader<OutType> UnitDownloader { get; } = internalDownloader ?? new UnitDownloader<OutType>(options);
|
||||
|
||||
async Task<(bool, Fragment<Ordered<OutType>>?)> IUnitDownloader<Fragment<Ordered<OutType>>>.TryDownload(IOrdered<string>[] link, CancellationToken ct, int maximumRetryCount, IProgress<IDownloadReport>? downProgress, IProgress<IRetryReport>? tryProgress) {
|
||||
Fragment<Ordered<OutType>> fragment = new Fragment<Ordered<OutType>>(link.Length);
|
||||
|
||||
Reference in New Issue
Block a user