using HtmlAgilityPack;
using Microsoft.Playwright;
namespace Beam.Puppeteer {
public class PuppetUnitPageDownloader : UnitDownloader {
public AsyncManipulator PuppetManipulator { get; }
public PuppetUnitPageDownloader(HtmlWeb web, AsyncManipulator puppetManipulator, AsyncTransformer asyncHtmlTransformer, AsyncDownloadFailurePredicate[] asyncDownloadFailurePredicates)
: base(web, asyncHtmlTransformer, asyncDownloadFailurePredicates) {
PuppetManipulator = puppetManipulator;
}
protected override async Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
var page = await PuppetContext.Browser.Value.NewPageAsync();
try {
await page.GotoAsync(link);
await PuppetManipulator(page);
var content = await page.ContentAsync();
await page.CloseAsync();
HtmlDocument doc = new();
doc.LoadHtml(content);
var transformed = await Transformer(doc);
if (FailurePredicates is null || !(await IsFailure(doc)))
return (true, transformed);
return (false, default);
} catch (Exception) {
return (false, default);
} finally {
if (!page.IsClosed)
await page.CloseAsync();
}
}
}
}