using Beam.Abstractions; using Beam.Models; using HtmlAgilityPack; namespace Beam.Downloaders { /// /// A download managing class that manages a singular download with failure-detection and exponential-backoff retries. This class is safe to instantiate per request. /// /// /// /// /// public class UnitDownloader(HtmlWeb web, AsyncTransformer transformer, AsyncDownloadFailurePredicate?[]? failurePredicate = null) : IUnitDownloader { public HtmlWeb Web { get; } = web; public virtual AsyncTransformer Transformer { get; } = transformer; public virtual AsyncDownloadFailurePredicate?[]? FailurePredicates { get; } = failurePredicate; public int LinksPerDownload { get; } = 1; protected virtual async Task IsFailure(HtmlDocument doc) { if (FailurePredicates is null) return false; var failed = false; await Parallel.ForEachAsync(FailurePredicates, async (x, ct) => { if (failed == true) return; if (x is null) return; if (await x(doc)) failed = true; }); return failed; } protected virtual async Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) { try { var html = await Web.LoadFromWebAsync(link, ct); if (FailurePredicates is null || !(await IsFailure(html))) return (true, await Transformer(html)); else return (false, default); } catch(Exception) { return (false, default); } } public async Task<(bool, T?)> TryDownload(IOrdered[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress? tryProgress = null) { if (link.Length == 0) return (false, default); T? doc = default; int tryCount = 0; while (tryCount < maximumRetryCount) { ct.ThrowIfCancellationRequested(); (var success, doc) = await TryDownloadWithNoRetries(link[0].Data, ct); if (success && doc != null) return (true, doc); ++tryCount; tryProgress?.Report(new RetryReport(tryCount, link[0].Data)); await Task.Delay((int)Math.Pow(2, tryCount) * 1000); } return (false, doc); } } }