7ed05abdb8
- Introduced modularity by splitting Beam into new projects: Beam.Abstractions, Beam.Models, and Beam.Downloaders. - Refactored existing classes into appropriate namespaces and projects. - Replaced specific implementations with abstractions (e.g., SourceLinkBuilder to LinkBuilder, State to IState, etc.). - Updated interfaces: added ITemplate, IArticleData, IDownloadReport, and others for improved extensibility. - Removed deprecated classes like SourceLinkBuilder and StateChangerFactory. - Enhanced link handling in downloaders by refactoring to use `string` over `SourceLink`. - Consolidated shared logic under Beam.Abstractions.
40 lines
1.5 KiB
C#
40 lines
1.5 KiB
C#
|
|
using Beam.Downloaders;
|
|
using Beam.Models;
|
|
using HtmlAgilityPack;
|
|
using Microsoft.Playwright;
|
|
|
|
namespace Beam.Playwright {
|
|
public class PlaywrightUnitPageDownloader<T> : UnitDownloader<T> {
|
|
public PlaywrightAsyncManipulator PuppetManipulator { get; }
|
|
|
|
public PlaywrightUnitPageDownloader(HtmlWeb web, PlaywrightAsyncManipulator puppetManipulator, AsyncTransformer<HtmlDocument, T> asyncHtmlTransformer, AsyncDownloadFailurePredicate<HtmlDocument>[] asyncDownloadFailurePredicates)
|
|
: base(web, asyncHtmlTransformer, asyncDownloadFailurePredicates) {
|
|
PuppetManipulator = puppetManipulator;
|
|
}
|
|
|
|
protected override async Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
|
|
var page = await PlaywrightContext.Browser.Value.NewPageAsync();
|
|
try {
|
|
await page.GotoAsync(link);
|
|
await PuppetManipulator(page);
|
|
var content = await page.ContentAsync();
|
|
await page.CloseAsync();
|
|
|
|
HtmlDocument doc = new();
|
|
doc.LoadHtml(content);
|
|
var transformed = await Transformer(doc);
|
|
if (FailurePredicates is null || !(await IsFailure(doc)))
|
|
return (true, transformed);
|
|
return (false, default);
|
|
} catch (Exception) {
|
|
return (false, default);
|
|
} finally {
|
|
if (!page.IsClosed)
|
|
await page.CloseAsync();
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|