using HtmlAgilityPack;
using Microsoft.Playwright;
namespace Beam.Puppeteer {
public class PuppetContext(IPlaywright playwright, IBrowser browser) {
public IPlaywright Playwright { get; set; } = playwright;
public IBrowser Browser { get; set; } = browser;
}
public class PuppetUnitDownloader : UnitDownloader {
public PuppetContext Context { get; }
public PuppetUnitDownloader(PuppetContext pc, DownloadContext context, AsyncTransformer asyncHtmlTransformer, AsyncDownloadFailurePredicate[] asyncDownloadFailurePredicates)
: base(context.Web, asyncHtmlTransformer, asyncDownloadFailurePredicates) {
Context = pc;
}
protected override async Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
var page = await Context.Browser.NewPageAsync();
try {
var content = await page.ContentAsync();
await page.CloseAsync();
HtmlDocument doc = new();
doc.LoadHtml(content);
var transformed = await Transformer(doc);
if (FailurePredicates is null || !(await IsFailure(doc)))
return (true, transformed);
return (false, default);
} catch (Exception) {
return (false, default);
} finally {
if (!page.IsClosed)
await page.CloseAsync();
}
}
}
}