using Beam.Abstractions;
using Beam.Models;
using HtmlAgilityPack;
namespace Beam.Downloaders {
///
/// A download managing class that manages a singular download with failure-detection and exponential-backoff retries. This class is safe to instantiate per request.
///
///
///
///
///
public class UnitDownloader(HtmlWeb web, AsyncTransformer transformer, AsyncDownloadFailurePredicate?[]? failurePredicate = null) : IUnitDownloader {
public HtmlWeb Web { get; } = web;
public virtual AsyncTransformer Transformer { get; } = transformer;
public virtual AsyncDownloadFailurePredicate?[]? FailurePredicates { get; } = failurePredicate;
public int LinksPerDownload { get; } = 1;
protected virtual async Task IsFailure(HtmlDocument doc) {
if (FailurePredicates is null)
return false;
var failed = false;
await Parallel.ForEachAsync(FailurePredicates, async (x, ct) => {
if (failed == true)
return;
if (x is null)
return;
if (await x(doc))
failed = true;
});
return failed;
}
protected virtual async Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
try {
var html = await Web.LoadFromWebAsync(link, ct);
if (FailurePredicates is null || !(await IsFailure(html)))
return (true, await Transformer(html));
else
return (false, default);
} catch(Exception) {
return (false, default);
}
}
public async Task<(bool, T?)> TryDownload(IOrdered[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress? tryProgress = null) {
if (link.Length == 0)
return (false, default);
T? doc = default;
int tryCount = 0;
while (tryCount < maximumRetryCount) {
ct.ThrowIfCancellationRequested();
(var success, doc) = await TryDownloadWithNoRetries(link[0].Data, ct);
if (success && doc != null)
return (true, doc);
++tryCount;
tryProgress?.Report(new RetryReport(tryCount, link[0].Data));
await Task.Delay((int)Math.Pow(2, tryCount) * 1000);
}
return (false, doc);
}
}
}