Refactor downloaders to use generic options and unify logic

Replaces specialized binary and HTML downloaders with a generic, options-driven UnitDownloader and UnitFragmentDownloader pattern. Introduces UnitDownloaderOptions and builder classes for flexible configuration, updates interfaces and method signatures to support progress reporting, and removes redundant binary-specific classes. Updates Playwright and Stealth downloaders to use the new generic base, and adds improved error handling and reporting. Also updates dependency versions and makes minor API consistency improvements across the Fluent and Models layers.
This commit is contained in:
qwsdcvghyu89
2025-09-29 21:27:56 +10:00
parent 8e60109f5e
commit 2958a26e4f
30 changed files with 621 additions and 422 deletions
+13 -29
View File
@@ -5,55 +5,39 @@ using HtmlAgilityPack;
using Microsoft.Extensions.Logging;
namespace Beam.Downloaders {
public class UnitFragmentDownloader<T> : IUnitDownloader<Fragment<Ordered<T>>> {
public UnitFragmentDownloader(HtmlWeb web,
AsyncTransformer<HtmlDocument, T> transformer,
AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null,
int fragmentSize = 4,
ILogger? logger = null,
IUnitDownloader<T>? internalDownloader = null) {
Web = web;
Transformer = transformer;
FailurePredicate = failurePredicate;
UnitDownloader = internalDownloader ?? new UnitDownloader<T>(Web, Transformer, FailurePredicate);
LinksPerDownload = fragmentSize;
Logger = logger;
}
public class UnitFragmentDownloader<RawType, OutType>(UnitDownloaderOptions<RawType, OutType> options,
IUnitDownloader<OutType>? internalDownloader = null) : IUnitDownloader<Fragment<Ordered<OutType>>> where RawType : IDocument {
public HtmlWeb Web { get; }
public AsyncTransformer<HtmlDocument, T> Transformer { get; }
public AsyncDownloadFailurePredicate<HtmlDocument>?[]? FailurePredicate { get; }
public UnitDownloaderOptions<RawType, OutType> Options { get; } = options;
public int LinksPerDownload { get; set; }
public ILogger? Logger { get; set; }
private IUnitDownloader<OutType> UnitDownloader { get; } = internalDownloader ?? new UnitDownloader<RawType, OutType>(options);
private readonly IUnitDownloader<T> UnitDownloader;
async Task<(bool, Fragment<Ordered<T>>?)> IUnitDownloader<Fragment<Ordered<T>>>.TryDownload(IOrdered<string>[] link, CancellationToken ct, int maximumRetryCount, IProgress<IRetryReport>? tryProgress) {
Fragment<Ordered<T>> fragment = new Fragment<Ordered<T>>(link.Length);
if (!Fragment<Ordered<T>>.TryAcquireUpdater(fragment, out var updater))
async Task<(bool, Fragment<Ordered<OutType>>?)> IUnitDownloader<Fragment<Ordered<OutType>>>.TryDownload(IOrdered<string>[] link, CancellationToken ct, int maximumRetryCount, IProgress<IDownloadReport>? downProgress, IProgress<IRetryReport>? tryProgress) {
Fragment<Ordered<OutType>> fragment = new Fragment<Ordered<OutType>>(link.Length);
if (!Fragment<Ordered<OutType>>.TryAcquireUpdater(fragment, out var updater))
throw new AssertionException(Exceptions.Exceptions.fragment_locked);
bool isFailure = false;
await Parallel.ForEachAsync(link, async (x, pct) => {
pct.ThrowIfCancellationRequested();
ct.ThrowIfCancellationRequested();
var (result, downloadedT) = await UnitDownloader.TryDownload([x], ct, maximumRetryCount, tryProgress);
if (isFailure)
return;
var (result, downloadedT) = await UnitDownloader.TryDownload([x], ct, maximumRetryCount, downProgress, tryProgress);
if (!result) {
Interlocked.Exchange(ref isFailure, true);
Logger?.LogError("Failed to retrieve {0} order={1}", x.Data, x.Order);
return;
}
if (downloadedT == null) {
Interlocked.Exchange(ref isFailure, true);
Logger?.LogCritical("Failed to retrieve {0} order={1}", x.Data, x.Order);
return;
}
updater(new Ordered<T>(downloadedT, x.Order));
updater(new Ordered<OutType>(downloadedT, x.Order));
});
if (!isFailure)
Fragment<Ordered<T>>.SetComplete(fragment, true);
Fragment<Ordered<OutType>>.SetComplete(fragment, true);
Fragment<Ordered<T>>.TryReleaseUpdater(fragment, updater);
Fragment<Ordered<OutType>>.TryReleaseUpdater(fragment, updater);
return (!isFailure, fragment);