diff --git a/Beam.Downloaders/Beam.Downloaders.csproj b/Beam.Downloaders/Beam.Downloaders.csproj
index ab69f30..1526f93 100644
--- a/Beam.Downloaders/Beam.Downloaders.csproj
+++ b/Beam.Downloaders/Beam.Downloaders.csproj
@@ -14,6 +14,7 @@
+
diff --git a/Beam.Downloaders/DownloadContext.cs b/Beam.Downloaders/DownloadContext.cs
index 96009af..59e0ab1 100644
--- a/Beam.Downloaders/DownloadContext.cs
+++ b/Beam.Downloaders/DownloadContext.cs
@@ -8,14 +8,14 @@ namespace Beam.Downloaders {
//public delegate Task AsyncHtmlTransformer(HtmlDocument doc);
//public delegate Task AsyncBinaryTransformer(byte[] bin);
- public class DownloadContext {
+ public class DownloadContext {
private bool disposedValue;
public HttpClient Client { get; }
public HtmlWeb Web { get; }
public IProgress? DownloadReporter { get; set; }
public IProgress? RetryReporter { get; set; }
- public AsyncDownloadFailurePredicate?[]? AsyncFailurePredicates { get; }
+ public AsyncDownloadFailurePredicate?[]? AsyncFailurePredicates { get; }
public TimeSpan TimeOut { get; set; }
public IEnumerable Links { get; }
public CancellationToken CancellationToken { get; }
@@ -28,7 +28,7 @@ namespace Beam.Downloaders {
CancellationToken cancellationToken = default,
IProgress? downloadReporter = null,
IProgress? retryReporter = null,
- AsyncDownloadFailurePredicate?[]? asyncFailurePredicates = null,
+ AsyncDownloadFailurePredicate?[]? asyncFailurePredicates = null,
TimeSpan? timeOut = null,
ILogger? downloadLogger = null) {
ArgumentNullException.ThrowIfNull(web, nameof(web));
diff --git a/Beam.Downloaders/DownloadContextBuilder.cs b/Beam.Downloaders/DownloadContextBuilder.cs
index d92277a..41f5e13 100644
--- a/Beam.Downloaders/DownloadContextBuilder.cs
+++ b/Beam.Downloaders/DownloadContextBuilder.cs
@@ -5,12 +5,12 @@ using Microsoft.Extensions.Logging;
namespace Beam.Downloaders {
- public class DownloadContextBuilder {
+ public class DownloadContextBuilder {
private HtmlWeb _web;
private HttpClient _client;
private IProgress? _downloadReporter;
private IProgress? _retryReporter;
- private AsyncDownloadFailurePredicate?[] _asyncFailurePredicates = [];
+ private AsyncDownloadFailurePredicate?[] _asyncFailurePredicates = [];
private TimeSpan _timeOut;
private IEnumerable _links;
private CancellationToken _cancellationToken;
@@ -26,60 +26,60 @@ namespace Beam.Downloaders {
_links = [];
}
- public DownloadContextBuilder WithWeb(HtmlWeb web) {
+ public DownloadContextBuilder WithWeb(HtmlWeb web) {
_web = web;
return this;
}
- public DownloadContextBuilder WithClient(HttpClient client) {
+ public DownloadContextBuilder WithClient(HttpClient client) {
_client = client;
return this;
}
- public DownloadContextBuilder WithDownloadReporter(IProgress downloadReporter) {
+ public DownloadContextBuilder WithDownloadReporter(IProgress downloadReporter) {
_downloadReporter = downloadReporter;
return this;
}
- public DownloadContextBuilder WithRetryReporter(IProgress retryReporter) {
+ public DownloadContextBuilder WithRetryReporter(IProgress retryReporter) {
_retryReporter = retryReporter;
return this;
}
- public DownloadContextBuilder WithAsyncFailurePredicates(params AsyncDownloadFailurePredicate[] predicates) {
+ public DownloadContextBuilder WithAsyncFailurePredicates(params AsyncDownloadFailurePredicate[] predicates) {
_asyncFailurePredicates = predicates;
return this;
}
- public DownloadContextBuilder WithTimeOut(TimeSpan timeOut) {
+ public DownloadContextBuilder WithTimeOut(TimeSpan timeOut) {
_timeOut = timeOut;
return this;
}
- public DownloadContextBuilder WithLinks(IEnumerable links) {
+ public DownloadContextBuilder WithLinks(IEnumerable links) {
_links = links;
return this;
}
- public DownloadContextBuilder WithCancellationToken(CancellationToken cancellationToken) {
+ public DownloadContextBuilder WithCancellationToken(CancellationToken cancellationToken) {
_cancellationToken = cancellationToken;
return this;
}
- public DownloadContextBuilder WithCache(DocumentCache cache) {
+ public DownloadContextBuilder WithCache(DocumentCache cache) {
_cache = cache;
return this;
}
- public DownloadContextBuilder WithDownloadLogger(ILogger downloadLogger) {
+ public DownloadContextBuilder WithDownloadLogger(ILogger downloadLogger) {
_downloadLogger = downloadLogger;
return this;
}
- public DownloadContext Build() {
+ public DownloadContext Build() {
// Construct the DownloadContext using the collected values.
- var context = new DownloadContext(
+ var context = new DownloadContext(
web: _web,
client: _client,
links: _links,
@@ -100,15 +100,15 @@ namespace Beam.Downloaders {
return context;
}
- public static DownloadContextBuilder FromContext(DownloadContext existing) {
+ public static DownloadContextBuilder FromContext(DownloadContext existing) {
if (existing == null) throw new ArgumentNullException(nameof(existing));
- return new DownloadContextBuilder(existing.Client, existing.Web)
+ return new DownloadContextBuilder(existing.Client, existing.Web)
.WithLinks(existing.Links)
.WithCancellationToken(existing.CancellationToken)
.WithDownloadReporter(existing.DownloadReporter!)
.WithRetryReporter(existing.RetryReporter!)
- .WithAsyncFailurePredicates(existing.AsyncFailurePredicates ?? Array.Empty>())
+ .WithAsyncFailurePredicates(existing.AsyncFailurePredicates ?? Array.Empty>())
.WithTimeOut(existing.TimeOut)
.WithDownloadLogger(existing.DownloadLogger!)
.WithCache(existing.Cache);
diff --git a/Beam.Downloaders/FailurePredicateOptions.cs b/Beam.Downloaders/FailurePredicateOptions.cs
new file mode 100644
index 0000000..e804577
--- /dev/null
+++ b/Beam.Downloaders/FailurePredicateOptions.cs
@@ -0,0 +1,9 @@
+using Beam.Models;
+
+namespace Beam.Downloaders;
+
+public record class FailurePredicateOptions {
+ public required AsyncDownloadFailurePredicate?[]? AsyncDownloadFailurePredicates { get; init; }
+ public bool ProcessInParallel { get; init; } = false;
+ public int? ParallelThreads { get; init; }
+}
\ No newline at end of file
diff --git a/Beam.Downloaders/FailurePredicateOptionsBuilder.cs b/Beam.Downloaders/FailurePredicateOptionsBuilder.cs
new file mode 100644
index 0000000..d5ec367
--- /dev/null
+++ b/Beam.Downloaders/FailurePredicateOptionsBuilder.cs
@@ -0,0 +1,56 @@
+using Beam.Models;
+
+namespace Beam.Downloaders;
+
+public sealed class FailurePredicateOptionsBuilder
+{
+ private readonly System.Collections.Generic.List?> _predicates =
+ new System.Collections.Generic.List?>();
+ private bool _processInParallel = false;
+ private int? _parallelThreads = null;
+
+ public FailurePredicateOptionsBuilder WithPredicate(AsyncDownloadFailurePredicate? predicate)
+ {
+ _predicates.Add(predicate);
+ return this;
+ }
+
+ public FailurePredicateOptionsBuilder WithPredicates(System.Collections.Generic.IEnumerable?> predicates)
+ {
+ if (predicates == null) throw new System.ArgumentNullException(nameof(predicates));
+ _predicates.AddRange(predicates);
+ return this;
+ }
+
+ public FailurePredicateOptionsBuilder WithPredicates(params AsyncDownloadFailurePredicate?[] predicates)
+ {
+ _predicates.Clear();
+ if (predicates != null) _predicates.AddRange(predicates);
+ return this;
+ }
+
+ public FailurePredicateOptionsBuilder WithProcessInParallel(bool value = true)
+ {
+ _processInParallel = value;
+ return this;
+ }
+
+ public FailurePredicateOptionsBuilder WithParallelThreads(int? threads)
+ {
+ if (threads.HasValue && threads.Value <= 0)
+ throw new System.ArgumentOutOfRangeException(nameof(threads));
+ _parallelThreads = threads;
+ return this;
+ }
+
+ public FailurePredicateOptions Build()
+ {
+ var arr = _predicates.Count == 0 ? [] : _predicates.ToArray();
+ return new FailurePredicateOptions
+ {
+ AsyncDownloadFailurePredicates = arr,
+ ProcessInParallel = _processInParallel,
+ ParallelThreads = _parallelThreads
+ };
+ }
+}
\ No newline at end of file
diff --git a/Beam.Downloaders/FragmentOptions.cs b/Beam.Downloaders/FragmentOptions.cs
new file mode 100644
index 0000000..a00c698
--- /dev/null
+++ b/Beam.Downloaders/FragmentOptions.cs
@@ -0,0 +1,7 @@
+namespace Beam.Downloaders;
+
+public record class FragmentOptions {
+ public required int FragmentSize { get; init; }
+ public bool DownloadInParallel { get; init; } = false;
+ public int? ParallelThreads { get; init; }
+}
\ No newline at end of file
diff --git a/Beam.Downloaders/FragmentOptionsBuilder.cs b/Beam.Downloaders/FragmentOptionsBuilder.cs
new file mode 100644
index 0000000..49ea521
--- /dev/null
+++ b/Beam.Downloaders/FragmentOptionsBuilder.cs
@@ -0,0 +1,36 @@
+namespace Beam.Downloaders;
+
+public sealed class FragmentOptionsBuilder {
+ private int? _fragmentSize;
+ private bool _downloadInParallel = false;
+ private int? _parallelThreads = null;
+
+ public FragmentOptionsBuilder WithFragmentSize(int bytes) {
+ if (bytes <= 0) throw new System.ArgumentOutOfRangeException(nameof(bytes));
+ _fragmentSize = bytes;
+ return this;
+ }
+
+ public FragmentOptionsBuilder WithDownloadInParallel(bool value = true) {
+ _downloadInParallel = value;
+ return this;
+ }
+
+ public FragmentOptionsBuilder WithParallelThreads(int? threads) {
+ if (threads.HasValue && threads.Value <= 0)
+ throw new System.ArgumentOutOfRangeException(nameof(threads));
+ _parallelThreads = threads;
+ return this;
+ }
+
+ public FragmentOptions Build() {
+ if (!_fragmentSize.HasValue)
+ throw new System.InvalidOperationException("FragmentSize must be provided.");
+
+ return new FragmentOptions {
+ FragmentSize = _fragmentSize.Value,
+ DownloadInParallel = _downloadInParallel,
+ ParallelThreads = _parallelThreads
+ };
+ }
+}
\ No newline at end of file
diff --git a/Beam.Downloaders/SequentialDownloader.cs b/Beam.Downloaders/SequentialDownloader.cs
index 378000b..2f7db22 100644
--- a/Beam.Downloaders/SequentialDownloader.cs
+++ b/Beam.Downloaders/SequentialDownloader.cs
@@ -3,9 +3,9 @@ using Beam.Models;
using Microsoft.Extensions.Logging;
namespace Beam.Downloaders {
- public class SequentialDownloader : IAsyncEnumerator {
+ public class SequentialDownloader : IAsyncEnumerator {
public OutType Current { get; protected set; }
- public DownloadContext Context { get; }
+ public DownloadContext Context { get; }
public ILogger? Logger { get; set; }
public int LastOrder { get; set; } = 0;
@@ -13,7 +13,7 @@ namespace Beam.Downloaders {
public Func> GetUnitDownloader { get; set; }
- public SequentialDownloader(DownloadContext context, Func, IUnitDownloader> getUnitDownloader, ILogger? logger = null) {
+ public SequentialDownloader(DownloadContext context, Func> getUnitDownloader, ILogger? logger = null) {
Context = context;
Logger = logger;
LinksEnumerator = Context.Links.GetEnumerator();
diff --git a/Beam.Downloaders/SequentialFragmentDownloader.cs b/Beam.Downloaders/SequentialFragmentDownloader.cs
index b4c40d5..a7a1274 100644
--- a/Beam.Downloaders/SequentialFragmentDownloader.cs
+++ b/Beam.Downloaders/SequentialFragmentDownloader.cs
@@ -3,10 +3,10 @@ using Beam.Models;
using Microsoft.Extensions.Logging;
namespace Beam.Downloaders {
- public class SequentialFragmentDownloader : SequentialDownloader>> {
+ public class SequentialFragmentDownloader : SequentialDownloader>> {
public SequentialFragmentDownloader(
- DownloadContext context,
- Func, IUnitDownloader>>> getUnitDownloader,
+ DownloadContext context,
+ Func>>> getUnitDownloader,
ILogger? logger = null)
: base(context, getUnitDownloader, logger) {}
}
diff --git a/Beam.Downloaders/SkipPredicateOptions.cs b/Beam.Downloaders/SkipPredicateOptions.cs
new file mode 100644
index 0000000..2955cf7
--- /dev/null
+++ b/Beam.Downloaders/SkipPredicateOptions.cs
@@ -0,0 +1,48 @@
+using Beam.Models;
+
+namespace Beam.Downloaders;
+
+public class SkipPredicateOptions {
+ public required SkipPredicate?[]? SkipPredicates { get; init; }
+ public bool ProcessInParallel { get; init; } = false;
+ public int? ParallelThreads { get; init; }
+}
+
+public class SkipPredicateOptionsBuilder {
+ private List?> _skipPredicates { get; set; } = [];
+ private bool _processInParallel { get; set; } = false;
+ private int? _parallelThreads { get; set; }
+
+ public SkipPredicateOptionsBuilder WithSkipPredicate(SkipPredicate predicate, bool replace=false) {
+ if (replace)
+ _skipPredicates.Clear();
+ _skipPredicates.Add(predicate);
+ return this;
+ }
+
+ public SkipPredicateOptionsBuilder WithSkipPredicates(SkipPredicate[] predicates,
+ bool replace = true) {
+ if (replace)
+ _skipPredicates.Clear();
+ _skipPredicates.AddRange(predicates);
+ return this;
+ }
+
+ public SkipPredicateOptionsBuilder ProcessInParallel(bool processInParallel = true) {
+ _processInParallel = processInParallel;
+ return this;
+ }
+
+ public SkipPredicateOptionsBuilder WithParallelThreads(int parallelThreads) {
+ _parallelThreads = parallelThreads;
+ return this;
+ }
+
+ public SkipPredicateOptions Build() {
+ return new SkipPredicateOptions() {
+ SkipPredicates = _skipPredicates.ToArray(),
+ ParallelThreads = _parallelThreads,
+ ProcessInParallel = _processInParallel
+ };
+ }
+}
\ No newline at end of file
diff --git a/Beam.Downloaders/UnitDownloader.cs b/Beam.Downloaders/UnitDownloader.cs
index 400f47a..cad1ccc 100644
--- a/Beam.Downloaders/UnitDownloader.cs
+++ b/Beam.Downloaders/UnitDownloader.cs
@@ -1,4 +1,6 @@
-using Beam.Abstractions;
+using System.Diagnostics.CodeAnalysis;
+using System.Text;
+using Beam.Abstractions;
using Beam.Models;
using HtmlAgilityPack;
using File = System.IO.File;
@@ -11,12 +13,12 @@ namespace Beam.Downloaders {
///
///
///
- public class UnitDownloader(UnitDownloaderOptions options) : IUnitDownloader where RawType : IDocument {
- public UnitDownloaderOptions Options { get; } = options;
+ public class UnitDownloader(UnitDownloaderOptions options) : IUnitDownloader {
+ public UnitDownloaderOptions Options { get; } = options;
public HttpClient Client => Options.Client;
- public virtual AsyncTransformer Transformer => Options.AsyncTransformer;
-
- public virtual AsyncDownloadFailurePredicate?[]? FailurePredicates =>
+ public virtual AsyncTransformer Transformer => Options.AsyncTransformer;
+
+ public virtual AsyncDownloadFailurePredicate?[]? FailurePredicates =>
Options?.FailurePredicateOptions?.AsyncDownloadFailurePredicates;
public int LinksPerDownload { get; } = 1;
@@ -70,7 +72,7 @@ namespace Beam.Downloaders {
return new ByteDocument(url, bytes);
}
- protected virtual async Task IsFailure(RawType doc, CancellationToken ct) {
+ protected virtual async Task IsFailure(ByteDocument doc, CancellationToken ct) {
if (FailurePredicates is null)
return false;
if (!(Options?.FailurePredicateOptions?.ProcessInParallel ?? false))
@@ -103,19 +105,18 @@ namespace Beam.Downloaders {
return false;
}
- protected virtual async Task _Download(string link, IProgress progress, CancellationToken ct) {
- if (Options.DownloadFolder is not null && this is UnitDownloader) {
- var path = Path.Combine(Options.DownloadFolder, Path.GetRandomFileName());
+ protected virtual async Task _Download(string link, IProgress progress, CancellationToken ct) {
+ if (Options.DownloadFolder is not null) {
+ var path = Path.Combine(Options.DownloadFolder, options.GetFileNameForDownload(link, []));
await DownloadToFile(link, Options.BufferSize, path, progress, ct);
- return (RawType)(object)new StringDocument(link, path);
+ return new ByteDocument(link, Encoding.UTF8.GetBytes(path));
}
- if (this is UnitDownloader) {
- return (RawType)(object)(await DownloadToMemory(link, Options.BufferSize, progress, ct));
+ else {
+ return await DownloadToMemory(link, Options.BufferSize, progress, ct);
}
- throw new NotSupportedException(Exceptions.Exceptions.unit_downloader_limited_support);
}
- protected virtual async Task<(bool, OutType?)> Transform(RawType download, CancellationToken ct) {
+ protected virtual async Task<(bool, OutType?)> Transform(ByteDocument download, CancellationToken ct) {
try {
if (FailurePredicates is null || !(await IsFailure(download, ct)))
return (true, await Transformer(download));
@@ -131,6 +132,9 @@ namespace Beam.Downloaders {
return (false, default);
downProgress ??= new Progress();
+
+ if (ShouldSkip(link[0].Data, out var defaultType))
+ return (true, defaultType);
OutType? ot = default;
int tryCount = 0;
@@ -147,5 +151,41 @@ namespace Beam.Downloaders {
return (false, ot);
}
+
+ private bool ShouldSkip(string link, [NotNullWhen(true)] out OutType? outType) {
+ outType = default;
+ if (Options.SkipPredicateOptions?.SkipPredicates is null)
+ return false;
+ if (!Options.SkipPredicateOptions.ProcessInParallel)
+ foreach (var pred in Options.SkipPredicateOptions.SkipPredicates) {
+ if (pred is null)
+ continue;
+ if (pred(link, out outType))
+ return true;
+ }
+ else {
+ var shouldSkip = false;
+ OutType? _outType = default;
+ Parallel.ForEach(Options.SkipPredicateOptions.SkipPredicates, new ParallelOptions() {
+ MaxDegreeOfParallelism = Options?.FailurePredicateOptions?.ParallelThreads ?? 4
+ },
+ (predicate, parallelLoopState) => {
+ if (parallelLoopState.ShouldExitCurrentIteration)
+ return;
+ if (predicate == null)
+ return;
+ if (predicate(link, out var _innerLoopOutType)) {
+ Interlocked.CompareExchange(ref shouldSkip, true, false);
+ Interlocked.CompareExchange(ref _outType, _innerLoopOutType, default);
+ parallelLoopState.Break();
+ }
+ }
+ );
+ outType = _outType;
+ return shouldSkip;
+ }
+
+ return false;
+ }
}
}
diff --git a/Beam.Downloaders/UnitDownloaderOptions.cs b/Beam.Downloaders/UnitDownloaderOptions.cs
index dc99555..421c3b6 100644
--- a/Beam.Downloaders/UnitDownloaderOptions.cs
+++ b/Beam.Downloaders/UnitDownloaderOptions.cs
@@ -1,198 +1,38 @@
+using System.Diagnostics.CodeAnalysis;
+using System.Security.Cryptography;
+using System.Text;
using Beam.Models;
namespace Beam.Downloaders;
-public record class UnitDownloaderOptions {
+public record class UnitDownloaderOptions {
public HttpClient Client { get; init; } = new();
+
+ public DownloadTarget Target { get; init; } = DownloadTarget.URL;
- public FailurePredicateOptions? FailurePredicateOptions { get; init; }
+ public SkipPredicateOptions? SkipPredicateOptions { get; init; }
+ public FailurePredicateOptions? FailurePredicateOptions { get; init; }
public FragmentOptions? FragmentOptions { get; init; }
- public required AsyncTransformer AsyncTransformer { get; init; }
+ public required AsyncTransformer AsyncTransformer { get; init; }
+
+ ///
+ /// The location where the download is stored.
+ ///
+ ///
+ /// If not defined, UnitDownloader.TryDownload() downloads to memory.
+ ///
public string? DownloadFolder { get; init; } = null;
public int BufferSize { get; init; } = 80 * 1024; // 80kb
+
+ public string GetFileNameForDownload(string url, byte[] additionalData) {
+ byte[] bytes = [..Encoding.UTF8.GetBytes(url), ..additionalData];
+ var name = Convert.ToBase64String(System.IO.Hashing.XxHash64.Hash(bytes));
+ return name.Replace('+', '-').Replace('/', '_').Replace('=', ' ').Trim();
+ }
}
-public record class FailurePredicateOptions {
- public required AsyncDownloadFailurePredicate?[]? AsyncDownloadFailurePredicates { get; init; }
- public bool ProcessInParallel { get; init; } = false;
- public int? ParallelThreads { get; init; }
-}
+// ---------- UnitDownloaderOptions Builder ----------
-public record class FragmentOptions {
- public required int FragmentSize { get; init; }
- public bool DownloadInParallel { get; init; } = false;
- public int? ParallelThreads { get; init; }
-}
+// ---------- FailurePredicateOptions Builder ----------
-
- // ---------- UnitDownloaderOptions Builder ----------
- public sealed class UnitDownloaderOptionsBuilder
- {
- private HttpClient _client = new HttpClient();
- private FailurePredicateOptions? _failureOptions;
- private FragmentOptions? _fragmentOptions;
- private AsyncTransformer? _asyncTransformer;
- private string? _downloadFolder = null;
- private int _bufferSize = 80 * 1024;
-
- public UnitDownloaderOptionsBuilder WithClient(HttpClient client)
- {
- _client = client ?? throw new System.ArgumentNullException(nameof(client));
- return this;
- }
-
- public UnitDownloaderOptionsBuilder WithFailurePredicateOptions(FailurePredicateOptions? options)
- {
- _failureOptions = options;
- return this;
- }
-
- public UnitDownloaderOptionsBuilder WithFailurePredicates(System.Action> configure)
- {
- if (configure == null) throw new System.ArgumentNullException(nameof(configure));
- var b = new FailurePredicateOptionsBuilder();
- configure(b);
- _failureOptions = b.Build();
- return this;
- }
-
- public UnitDownloaderOptionsBuilder WithFragmentOptions(FragmentOptions? options)
- {
- _fragmentOptions = options;
- return this;
- }
-
- public UnitDownloaderOptionsBuilder WithFragments(System.Action configure)
- {
- if (configure == null) throw new System.ArgumentNullException(nameof(configure));
- var b = new FragmentOptionsBuilder();
- configure(b);
- _fragmentOptions = b.Build();
- return this;
- }
-
- public UnitDownloaderOptionsBuilder WithAsyncTransformer(AsyncTransformer transformer)
- {
- _asyncTransformer = transformer ?? throw new System.ArgumentNullException(nameof(transformer));
- return this;
- }
-
- public UnitDownloaderOptionsBuilder WithDownloadFolder(string? downloadFolder)
- {
- _downloadFolder = downloadFolder;
- return this;
- }
-
- public UnitDownloaderOptionsBuilder WithBufferSize(int bytes)
- {
- if (bytes <= 0) throw new System.ArgumentOutOfRangeException(nameof(bytes));
- _bufferSize = bytes;
- return this;
- }
-
- public UnitDownloaderOptions Build()
- {
- if (_asyncTransformer == null)
- throw new System.InvalidOperationException("AsyncTransformer must be provided.");
-
- return new UnitDownloaderOptions
- {
- Client = _client,
- FailurePredicateOptions = _failureOptions,
- FragmentOptions = _fragmentOptions,
- AsyncTransformer = _asyncTransformer,
- DownloadFolder = _downloadFolder,
- BufferSize = _bufferSize
- };
- }
- }
-
- // ---------- FailurePredicateOptions Builder ----------
- public sealed class FailurePredicateOptionsBuilder
- {
- private readonly System.Collections.Generic.List?> _predicates =
- new System.Collections.Generic.List?>();
- private bool _processInParallel = false;
- private int? _parallelThreads = null;
-
- public FailurePredicateOptionsBuilder WithPredicate(AsyncDownloadFailurePredicate? predicate)
- {
- _predicates.Add(predicate);
- return this;
- }
-
- public FailurePredicateOptionsBuilder WithPredicates(System.Collections.Generic.IEnumerable?> predicates)
- {
- if (predicates == null) throw new System.ArgumentNullException(nameof(predicates));
- _predicates.AddRange(predicates);
- return this;
- }
-
- public FailurePredicateOptionsBuilder WithPredicates(params AsyncDownloadFailurePredicate?[] predicates)
- {
- _predicates.Clear();
- if (predicates != null) _predicates.AddRange(predicates);
- return this;
- }
-
- public FailurePredicateOptionsBuilder WithProcessInParallel(bool value = true)
- {
- _processInParallel = value;
- return this;
- }
-
- public FailurePredicateOptionsBuilder WithParallelThreads(int? threads)
- {
- if (threads.HasValue && threads.Value <= 0)
- throw new System.ArgumentOutOfRangeException(nameof(threads));
- _parallelThreads = threads;
- return this;
- }
-
- public FailurePredicateOptions Build()
- {
- var arr = _predicates.Count == 0 ? [] : _predicates.ToArray();
- return new FailurePredicateOptions
- {
- AsyncDownloadFailurePredicates = arr,
- ProcessInParallel = _processInParallel,
- ParallelThreads = _parallelThreads
- };
- }
- }
-
- // ---------- FragmentOptions Builder ----------
- public sealed class FragmentOptionsBuilder {
- private int? _fragmentSize;
- private bool _downloadInParallel = false;
- private int? _parallelThreads = null;
-
- public FragmentOptionsBuilder WithFragmentSize(int bytes) {
- if (bytes <= 0) throw new System.ArgumentOutOfRangeException(nameof(bytes));
- _fragmentSize = bytes;
- return this;
- }
-
- public FragmentOptionsBuilder WithDownloadInParallel(bool value = true) {
- _downloadInParallel = value;
- return this;
- }
-
- public FragmentOptionsBuilder WithParallelThreads(int? threads) {
- if (threads.HasValue && threads.Value <= 0)
- throw new System.ArgumentOutOfRangeException(nameof(threads));
- _parallelThreads = threads;
- return this;
- }
-
- public FragmentOptions Build() {
- if (!_fragmentSize.HasValue)
- throw new System.InvalidOperationException("FragmentSize must be provided.");
-
- return new FragmentOptions {
- FragmentSize = _fragmentSize.Value,
- DownloadInParallel = _downloadInParallel,
- ParallelThreads = _parallelThreads
- };
- }
- }
+// ---------- FragmentOptions Builder ----------
\ No newline at end of file
diff --git a/Beam.Downloaders/UnitDownloaderOptionsBuilder.cs b/Beam.Downloaders/UnitDownloaderOptionsBuilder.cs
new file mode 100644
index 0000000..4675092
--- /dev/null
+++ b/Beam.Downloaders/UnitDownloaderOptionsBuilder.cs
@@ -0,0 +1,107 @@
+using Beam.Models;
+
+namespace Beam.Downloaders;
+
+public sealed class UnitDownloaderOptionsBuilder {
+ private DownloadTarget _target = DownloadTarget.URL;
+ private HttpClient _client = new HttpClient();
+ private FailurePredicateOptionsBuilder _failureOptionsBuilder = new();
+ private FailurePredicateOptions? _failurePredicateOptionsOverride = null;
+ private SkipPredicateOptionsBuilder _skipPredicateOptionsBuilder = new();
+ private SkipPredicateOptions? _skipPredicateOptionsOverride = null;
+ private FragmentOptions? _fragmentOptions;
+ private AsyncTransformer? _asyncTransformer;
+ private string? _downloadFolder = null;
+ private int _bufferSize = 80 * 1024;
+
+ public UnitDownloaderOptionsBuilder WithTarget(DownloadTarget target) {
+ _target = target;
+ return this;
+ }
+
+ public UnitDownloaderOptionsBuilder WithClient(HttpClient client)
+ {
+ _client = client ?? throw new System.ArgumentNullException(nameof(client));
+ return this;
+ }
+
+ public UnitDownloaderOptionsBuilder WithFailurePredicateOptions(FailurePredicateOptions? options)
+ {
+ _failurePredicateOptionsOverride = options;
+ return this;
+ }
+
+ public UnitDownloaderOptionsBuilder WithFailurePredicates(System.Action> configure)
+ {
+ if (configure == null) throw new System.ArgumentNullException(nameof(configure));
+ configure(_failureOptionsBuilder);
+ return this;
+ }
+
+ public UnitDownloaderOptionsBuilder WithFragmentOptions(FragmentOptions? options)
+ {
+ _fragmentOptions = options;
+ return this;
+ }
+
+ public UnitDownloaderOptionsBuilder WithSkipPredicates(Action> configure) {
+ if (configure == null) throw new ArgumentNullException(nameof(configure));
+ configure(_skipPredicateOptionsBuilder);
+ return this;
+ }
+
+ public UnitDownloaderOptionsBuilder WithSkipPredicateOptions(
+ SkipPredicateOptions skipPredicateOptions) {
+ _skipPredicateOptionsOverride = skipPredicateOptions;
+ return this;
+ }
+
+ public UnitDownloaderOptionsBuilder WithFragments(System.Action configure)
+ {
+ if (configure == null) throw new System.ArgumentNullException(nameof(configure));
+ var b = new FragmentOptionsBuilder();
+ configure(b);
+ _fragmentOptions = b.Build();
+ return this;
+ }
+
+ public UnitDownloaderOptionsBuilder WithAsyncTransformer(AsyncTransformer transformer)
+ {
+ _asyncTransformer = transformer ?? throw new System.ArgumentNullException(nameof(transformer));
+ return this;
+ }
+
+ public UnitDownloaderOptionsBuilder WithDownloadFolder(string? downloadFolder)
+ {
+ _downloadFolder = downloadFolder;
+ return this;
+ }
+
+ public UnitDownloaderOptionsBuilder WithBufferSize(int bytes)
+ {
+ if (bytes <= 0) throw new System.ArgumentOutOfRangeException(nameof(bytes));
+ _bufferSize = bytes;
+ return this;
+ }
+
+ public UnitDownloaderOptions Build()
+ {
+ if (_asyncTransformer == null)
+ throw new System.InvalidOperationException("AsyncTransformer must be provided.");
+
+ _failurePredicateOptionsOverride ??= _failureOptionsBuilder.Build();
+ _skipPredicateOptionsOverride ??= _skipPredicateOptionsBuilder.Build();
+
+ return new UnitDownloaderOptions
+ {
+ Target = _target,
+ Client = _client,
+ FailurePredicateOptions = _failurePredicateOptionsOverride,
+ SkipPredicateOptions = _skipPredicateOptionsOverride,
+ FragmentOptions = _fragmentOptions,
+ AsyncTransformer = _asyncTransformer,
+ DownloadFolder = _downloadFolder,
+ BufferSize = _bufferSize
+ };
+ }
+}
\ No newline at end of file
diff --git a/Beam.Downloaders/UnitFragmentDownloader.cs b/Beam.Downloaders/UnitFragmentDownloader.cs
index a399128..017fce4 100644
--- a/Beam.Downloaders/UnitFragmentDownloader.cs
+++ b/Beam.Downloaders/UnitFragmentDownloader.cs
@@ -5,12 +5,12 @@ using HtmlAgilityPack;
using Microsoft.Extensions.Logging;
namespace Beam.Downloaders {
- public class UnitFragmentDownloader(UnitDownloaderOptions options,
- IUnitDownloader? internalDownloader = null) : IUnitDownloader>> where RawType : IDocument {
+ public class UnitFragmentDownloader(UnitDownloaderOptions options,
+ IUnitDownloader? internalDownloader = null) : IUnitDownloader>> {
- public UnitDownloaderOptions Options { get; } = options;
+ public UnitDownloaderOptions Options { get; } = options;
public int LinksPerDownload { get; set; }
- private IUnitDownloader UnitDownloader { get; } = internalDownloader ?? new UnitDownloader(options);
+ private IUnitDownloader UnitDownloader { get; } = internalDownloader ?? new UnitDownloader(options);
async Task<(bool, Fragment>?)> IUnitDownloader>>.TryDownload(IOrdered[] link, CancellationToken ct, int maximumRetryCount, IProgress? downProgress, IProgress? tryProgress) {
Fragment> fragment = new Fragment>(link.Length);
diff --git a/Beam.Fluent/ContextStage.cs b/Beam.Fluent/ContextStage.cs
index c387c04..1d86a7f 100644
--- a/Beam.Fluent/ContextStage.cs
+++ b/Beam.Fluent/ContextStage.cs
@@ -8,14 +8,14 @@ using Beam.Downloaders;
namespace Beam.Fluent;
-internal sealed class ContextStage : IContextStage where RawType : IDocument {
- private readonly DownloadContextBuilder _ctxBuilder;
- private readonly AsyncTransformer _transformer;
+internal sealed class ContextStage : IContextStage {
+ private readonly DownloadContextBuilder _ctxBuilder;
+ private readonly AsyncTransformer _transformer;
private FragmentMode _fragmentMode = FragmentMode.Single;
private Channel _channel = Channel.Plain;
private readonly ContentKind _contentKind;
private int _parallelism = 4;
- private UnitDownloaderOptionsBuilder _optionsBuilder = new();
+ private UnitDownloaderOptionsBuilder _optionsBuilder = new();
// ──────────────── playwright ────────────────
private PlaywrightAsyncManipulator? _playwrightManipulator = null;
@@ -27,8 +27,8 @@ internal sealed class ContextStage : IContextStage ctxBuilder,
- AsyncTransformer transformer) {
+ public ContextStage(DownloadContextBuilder ctxBuilder,
+ AsyncTransformer transformer) {
_ctxBuilder = ctxBuilder;
_transformer = transformer;
_contentKind = transformer switch {
@@ -43,28 +43,28 @@ internal sealed class ContextStage : IContextStage Configure(Action> configure) {
+ public IContextStage Configure(Action configure) {
configure(_ctxBuilder);
return this;
}
- public IContextStage ConfigureUnitDownloaderOptions(
- Action> configure) {
+ public IContextStage ConfigureUnitDownloaderOptions(
+ Action> configure) {
configure(_optionsBuilder);
return this;
}
- public IContextStage WithParallelism(int degree) {
+ public IContextStage< OutType> WithParallelism(int degree) {
_parallelism = Math.Max(1, degree);
return this;
}
- public IContextStage WithTimeout(TimeSpan timeout) {
+ public IContextStage< OutType> WithTimeout(TimeSpan timeout) {
_ctxBuilder.WithTimeOut(timeout);
return this;
}
- public IContextStage WithRetryReporter(IProgress reporter) {
+ public IContextStage< OutType> WithRetryReporter(IProgress reporter) {
_ctxBuilder.WithRetryReporter(reporter);
return this;
}
@@ -73,7 +73,7 @@ internal sealed class ContextStage : IContextStage
///
///
- public IContextStage UseFragments() {
+ public IContextStage< OutType> UseFragments() {
if (_playwrightManipulator is not null)
_playwrightManipulator = null;
if (_channel == Channel.Playwright)
@@ -88,7 +88,7 @@ internal sealed class ContextStage : IContextStage
/// The page manipulator
///
- public IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator) {
+ public IContextStage< OutType> UsePlaywright(PlaywrightAsyncManipulator manipulator) {
if (_fragmentMode == FragmentMode.Fragmented)
_fragmentMode = FragmentMode.Single;
if (_stealthManipulator is not null)
@@ -99,7 +99,7 @@ internal sealed class ContextStage : IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) {
+ public IContextStage< OutType> UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) {
if (_playwrightManipulator is not null)
_playwrightManipulator = null;
@@ -109,7 +109,7 @@ internal sealed class ContextStage : IContextStage context) {
+ private object ConstructUnitDownloader(DownloadContext context) {
#region Utility functions
T To(object? o) where T : class
@@ -145,27 +145,19 @@ internal sealed class ContextStage : IContextStage new UnitFragmentDownloader(options),
+ => new UnitFragmentDownloader< OutType>(options),
// ──────────────── single ────────────────
(Channel.Plain, FragmentMode.Single, _)
- => new UnitDownloader(options),
+ => new UnitDownloader< OutType>(options),
// ──────────────── single playwright ────────────────
(Channel.Playwright, FragmentMode.Single, _)
- => new PlaywrightUnitDownloader(options, EnsureExists(_playwrightManipulator)),
- // ──────────────── single stealth file ────────────────
- (Channel.Stealth, FragmentMode.Single, ContentKind.File)
- => new StealthUnitPageDownloader(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)),
- // ──────────────── single stealth binary ────────────────
+ => new PlaywrightUnitDownloader< OutType>(options, EnsureExists(_playwrightManipulator)),
+ // ──────────────── single stealth ────────────────
(Channel.Stealth, FragmentMode.Single, ContentKind.Binary)
- => new StealthUnitDownloader(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)),
- // ──────────────── fragment stealth file ────────────────
- (Channel.Stealth, FragmentMode.Fragmented, ContentKind.File)
- => new StealthFragmentPageDownloader(options,
- EnsureExists(_stealthConfig),
- EnsureExists(_stealthManipulator)),
- // ──────────────── fragment stealth binary ────────────────
+ => new StealthUnitDownloader< OutType>(options, EnsureExists(_stealthConfig), EnsureExists(_stealthManipulator)),
+ // ──────────────── fragment stealth ────────────────
(Channel.Stealth, FragmentMode.Fragmented, ContentKind.Binary)
- => new StealthFragmentDownloader(options,
+ => new StealthFragmentDownloader< OutType>(options,
EnsureExists(_stealthConfig),
EnsureExists(_stealthManipulator)),
_ => throw new Exception(string.Format(Exceptions.Exceptions.fluent_unsupported_pattern,
@@ -173,14 +165,14 @@ internal sealed class ContextStage : IContextStage> ConstructDownloader(DownloadContext context) {
- var copyOfContext = DownloadContextBuilder.FromContext(context).Build();
+ private IAsyncEnumerator> ConstructDownloader(DownloadContext context) {
+ var copyOfContext = DownloadContextBuilder.FromContext(context).Build();
return _fragmentMode switch {
- FragmentMode.Fragmented => new SequentialFragmentDownloader(
+ FragmentMode.Fragmented => new SequentialFragmentDownloader(
copyOfContext,
ctx => (IUnitDownloader>>)ConstructUnitDownloader(ctx),
context.DownloadLogger).UnwrapFragmented(),
- FragmentMode.Single => new SequentialDownloader(
+ FragmentMode.Single => new SequentialDownloader< OutType>(
copyOfContext,
ctx => (IUnitDownloader)ConstructUnitDownloader(ctx),
context.DownloadLogger).WrapOrdered(),
diff --git a/Beam.Fluent/Core/IContextStage.cs b/Beam.Fluent/Core/IContextStage.cs
index 5ad4486..2be7bf0 100644
--- a/Beam.Fluent/Core/IContextStage.cs
+++ b/Beam.Fluent/Core/IContextStage.cs
@@ -6,15 +6,15 @@ using Beam.Stealth;
namespace Beam.Fluent;
-public interface IContextStage