Refactor downloaders to use ByteDocument and add options builders

Replaces generic RawType with ByteDocument in downloaders and context classes, simplifying type usage. Adds builder classes for FailurePredicateOptions, FragmentOptions, SkipPredicateOptions, and UnitDownloaderOptions to improve configuration flexibility. Introduces DownloadTarget enum and SkipPredicate delegate for more granular download control. Refactors Fluent API interfaces and implementations to remove RawType generics and streamline usage. Adds Playwright and Stealth download strategies for extensibility.
This commit is contained in:
qwsdcvghyu89
2025-11-15 22:51:46 +11:00
parent 647b2b0f37
commit f52aa6123b
34 changed files with 648 additions and 439 deletions
+16 -21
View File
@@ -1,36 +1,31 @@
using Beam.Abstractions;
using Beam.Downloaders;
using Beam.Models;
using Beam.Playwright.Strategies;
using Microsoft.Playwright;
namespace Beam.Playwright {
public class PlaywrightUnitDownloader<RawType, OutType>(
UnitDownloaderOptions<RawType, OutType> options,
PlaywrightAsyncManipulator puppetManipulator)
: UnitDownloader<RawType, OutType>(options)
where RawType : IDocument {
public PlaywrightAsyncManipulator PuppetManipulator { get; } = puppetManipulator;
public class PlaywrightUnitDownloader<OutType> : UnitDownloader<OutType> {
public PlaywrightUnitDownloader(UnitDownloaderOptions<OutType> options,
PlaywrightAsyncManipulator puppetManipulator) : base(options) {
PuppetManipulator = puppetManipulator;
_downloadStrategy = options.Target switch {
DownloadTarget.URL or DownloadTarget.InURL => new PageDownloadStrategy(),
DownloadTarget.Complex => new WaitingDownloadStrategy(),
_ => throw new NotSupportedException() // TODO add an exception message
};
}
public PlaywrightAsyncManipulator PuppetManipulator { get; }
private IDownloadStrategy _downloadStrategy { get; }
protected override async Task DownloadToStream(string url, int bufferSize, Stream destinationStream, IProgress<IDownloadReport> progress, CancellationToken ct) {
var page = await PlaywrightContext.Browser.Value.NewPageAsync();
try {
await page.GotoAsync(url);
await PuppetManipulator(page);
var download = await page.WaitForDownloadAsync();
await using var stream = await download.CreateReadStreamAsync();
var buffer = new byte[bufferSize];
var inBuffer = 0;
var downloaded = 0;
while ((inBuffer = stream.Read(buffer)) > 0) {
downloaded += inBuffer;
progress?.Report(new DownloadReport() {
BytesDownloaded = downloaded,
BytesRemaining = stream.Length - downloaded
});
await destinationStream.WriteAsync(buffer.AsMemory(0, inBuffer), ct);
}
await _downloadStrategy.DownloadToStream(url, bufferSize, destinationStream, progress, page, ct);
} finally {
if (!page.IsClosed)
await page.CloseAsync();
@@ -0,0 +1,9 @@
using Beam.Abstractions;
using Microsoft.Playwright;
namespace Beam.Playwright.Strategies;
internal interface IDownloadStrategy {
Task DownloadToStream(string url, int bufferSize, Stream destinationStream,
IProgress<IDownloadReport> progress, IPage page, CancellationToken ct);
}
@@ -0,0 +1,15 @@
using System.Text;
using Beam.Abstractions;
using Microsoft.Playwright;
namespace Beam.Playwright.Strategies;
internal class PageDownloadStrategy : IDownloadStrategy {
public async Task DownloadToStream(string url, int bufferSize, Stream destinationStream, IProgress<IDownloadReport> progress, IPage page,
CancellationToken ct) {
var source = await page.InnerHTMLAsync("html", new PageInnerHTMLOptions() { Strict = false });
var bytes = Encoding.UTF8.GetBytes(source);
await destinationStream.WriteAsync(bytes, ct);
}
}
@@ -0,0 +1,25 @@
using Beam.Abstractions;
using Beam.Models;
using Microsoft.Playwright;
namespace Beam.Playwright.Strategies;
internal class WaitingDownloadStrategy : IDownloadStrategy {
public async Task DownloadToStream(string url, int bufferSize, Stream destinationStream, IProgress<IDownloadReport> progress, IPage page,
CancellationToken ct) {
var download = await page.WaitForDownloadAsync();
await using var stream = await download.CreateReadStreamAsync();
var buffer = new byte[bufferSize];
var inBuffer = 0;
var downloaded = 0;
while ((inBuffer = stream.Read(buffer)) > 0) {
downloaded += inBuffer;
progress?.Report(new DownloadReport() {
BytesDownloaded = downloaded,
BytesRemaining = stream.Length - downloaded
});
await destinationStream.WriteAsync(buffer.AsMemory(0, inBuffer), ct);
}
}
}