3baa31a7cc
This introduces a new Puppeteer-based mechanism for downloading web content. It provides a flexible way to manipulate pages during downloads, enhancing the ability to handle dynamic content and improve the overall download process.
40 lines
1.6 KiB
C#
40 lines
1.6 KiB
C#
using Microsoft.Playwright;
|
|
|
|
namespace Beam.Puppeteer {
|
|
public class PuppetUnitDownloader<T> : UnitDownloaderBinary<T> {
|
|
public AsyncManipulator PuppetManipulator { get; }
|
|
|
|
public PuppetUnitDownloader(HttpClient client, AsyncManipulator puppetManipulator, AsyncTransformer<ByteDocument, T> asyncHtmlTransformer, AsyncDownloadFailurePredicate<ByteDocument>[] asyncDownloadFailurePredicates)
|
|
: base(client, asyncHtmlTransformer, asyncDownloadFailurePredicates) {
|
|
PuppetManipulator = puppetManipulator;
|
|
}
|
|
|
|
protected override async Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
|
|
var page = await PuppetContext.Browser.Value.NewPageAsync();
|
|
try {
|
|
await page.GotoAsync(link);
|
|
await PuppetManipulator(page);
|
|
var download = await page.WaitForDownloadAsync();
|
|
|
|
using var stream = await download.CreateReadStreamAsync();
|
|
byte[] content = new byte[stream.Length];
|
|
|
|
await stream.ReadExactlyAsync(content, ct);
|
|
|
|
ByteDocument doc = new ByteDocument(download.SuggestedFilename, content);
|
|
if (FailurePredicates is not null && await IsFailure(doc))
|
|
return (false, default);
|
|
|
|
var transformed = await Transformer(doc);
|
|
return (true, transformed);
|
|
} catch (Exception) {
|
|
return (false, default);
|
|
} finally {
|
|
if (!page.IsClosed)
|
|
await page.CloseAsync();
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|