Refactor downloaders to use generic options and unify logic

Replaces specialized binary and HTML downloaders with a generic, options-driven UnitDownloader and UnitFragmentDownloader pattern. Introduces UnitDownloaderOptions and builder classes for flexible configuration, updates interfaces and method signatures to support progress reporting, and removes redundant binary-specific classes. Updates Playwright and Stealth downloaders to use the new generic base, and adds improved error handling and reporting. Also updates dependency versions and makes minor API consistency improvements across the Fluent and Models layers.
This commit is contained in:
qwsdcvghyu89
2025-09-29 21:27:56 +10:00
parent 8e60109f5e
commit 2958a26e4f
30 changed files with 621 additions and 422 deletions
+23 -23
View File
@@ -1,36 +1,36 @@
using Beam.Downloaders;
using Beam.Abstractions;
using Beam.Downloaders;
using Beam.Models;
using Microsoft.Playwright;
namespace Beam.Playwright {
public class PlaywrightUnitDownloader<T> : UnitDownloaderBinary<T> {
public PlaywrightAsyncManipulator PuppetManipulator { get; }
public class PlaywrightUnitDownloader<RawType, OutType>(
UnitDownloaderOptions<RawType, OutType> options,
PlaywrightAsyncManipulator puppetManipulator)
: UnitDownloader<RawType, OutType>(options)
where RawType : IDocument {
public PlaywrightAsyncManipulator PuppetManipulator { get; } = puppetManipulator;
public PlaywrightUnitDownloader(HttpClient client, PlaywrightAsyncManipulator puppetManipulator, AsyncTransformer<ByteDocument, T> asyncHtmlTransformer, AsyncDownloadFailurePredicate<ByteDocument>[] asyncDownloadFailurePredicates)
: base(client, asyncHtmlTransformer, asyncDownloadFailurePredicates) {
PuppetManipulator = puppetManipulator;
}
protected override async Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
protected override async Task DownloadToStream(string url, int bufferSize, Stream destinationStream, IProgress<IDownloadReport> progress, CancellationToken ct) {
var page = await PlaywrightContext.Browser.Value.NewPageAsync();
try {
await page.GotoAsync(link);
await page.GotoAsync(url);
await PuppetManipulator(page);
var download = await page.WaitForDownloadAsync();
using var stream = await download.CreateReadStreamAsync();
byte[] content = new byte[stream.Length];
await stream.ReadExactlyAsync(content, ct);
ByteDocument doc = new ByteDocument(download.SuggestedFilename, content);
if (FailurePredicates is not null && await IsFailure(doc))
return (false, default);
var transformed = await Transformer(doc);
return (true, transformed);
} catch (Exception) {
return (false, default);
await using var stream = await download.CreateReadStreamAsync();
var buffer = new byte[bufferSize];
var inBuffer = 0;
var downloaded = 0;
while ((inBuffer = stream.Read(buffer)) > 0) {
downloaded += inBuffer;
progress?.Report(new DownloadReport() {
BytesDownloaded = downloaded,
BytesRemaining = stream.Length - downloaded
});
await destinationStream.WriteAsync(buffer.AsMemory(0, inBuffer), ct);
}
} finally {
if (!page.IsClosed)
await page.CloseAsync();