Files
Beam/Beam.Puppeteer/PuppetUnitDownloader.cs
T
qwsdcvghyu89 3baa31a7cc feat: add Puppeteer integration for web downloads
This introduces a new Puppeteer-based mechanism for downloading
web content. It provides a flexible way to manipulate pages
during downloads, enhancing the ability to handle dynamic
content and improve the overall download process.
2025-06-25 13:42:24 +03:00

40 lines
1.6 KiB
C#

using Microsoft.Playwright;
namespace Beam.Puppeteer {
public class PuppetUnitDownloader<T> : UnitDownloaderBinary<T> {
public AsyncManipulator PuppetManipulator { get; }
public PuppetUnitDownloader(HttpClient client, AsyncManipulator puppetManipulator, AsyncTransformer<ByteDocument, T> asyncHtmlTransformer, AsyncDownloadFailurePredicate<ByteDocument>[] asyncDownloadFailurePredicates)
: base(client, asyncHtmlTransformer, asyncDownloadFailurePredicates) {
PuppetManipulator = puppetManipulator;
}
protected override async Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
var page = await PuppetContext.Browser.Value.NewPageAsync();
try {
await page.GotoAsync(link);
await PuppetManipulator(page);
var download = await page.WaitForDownloadAsync();
using var stream = await download.CreateReadStreamAsync();
byte[] content = new byte[stream.Length];
await stream.ReadExactlyAsync(content, ct);
ByteDocument doc = new ByteDocument(download.SuggestedFilename, content);
if (FailurePredicates is not null && await IsFailure(doc))
return (false, default);
var transformed = await Transformer(doc);
return (true, transformed);
} catch (Exception) {
return (false, default);
} finally {
if (!page.IsClosed)
await page.CloseAsync();
}
}
}
}