7ed05abdb8
- Introduced modularity by splitting Beam into new projects: Beam.Abstractions, Beam.Models, and Beam.Downloaders. - Refactored existing classes into appropriate namespaces and projects. - Replaced specific implementations with abstractions (e.g., SourceLinkBuilder to LinkBuilder, State to IState, etc.). - Updated interfaces: added ITemplate, IArticleData, IDownloadReport, and others for improved extensibility. - Removed deprecated classes like SourceLinkBuilder and StateChangerFactory. - Enhanced link handling in downloaders by refactoring to use `string` over `SourceLink`. - Consolidated shared logic under Beam.Abstractions.
68 lines
2.7 KiB
C#
68 lines
2.7 KiB
C#
using Beam.Abstractions;
|
|
using Beam.Models;
|
|
using HtmlAgilityPack;
|
|
|
|
namespace Beam.Downloaders {
|
|
/// <summary>
|
|
/// A download managing class that manages a singular download with failure-detection and exponential-backoff retries. This class is safe to instantiate per request.
|
|
/// </summary>
|
|
/// <typeparam name="T"></typeparam>
|
|
/// <param name="web"></param>
|
|
/// <param name="transformer"></param>
|
|
/// <param name="failurePredicate"></param>
|
|
public class UnitDownloader<T>(HtmlWeb web, AsyncTransformer<HtmlDocument, T> transformer, AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null) : IUnitDownloader<T> {
|
|
public HtmlWeb Web { get; } = web;
|
|
public virtual AsyncTransformer<HtmlDocument, T> Transformer { get; } = transformer;
|
|
public virtual AsyncDownloadFailurePredicate<HtmlDocument>?[]? FailurePredicates { get; } = failurePredicate;
|
|
|
|
public int LinksPerDownload { get; } = 1;
|
|
|
|
protected virtual async Task<bool> IsFailure(HtmlDocument doc) {
|
|
if (FailurePredicates is null)
|
|
return false;
|
|
var failed = false;
|
|
await Parallel.ForEachAsync(FailurePredicates, async (x, ct) => {
|
|
if (failed == true)
|
|
return;
|
|
if (x is null)
|
|
return;
|
|
if (await x(doc))
|
|
failed = true;
|
|
});
|
|
|
|
return failed;
|
|
}
|
|
|
|
protected virtual async Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
|
|
try {
|
|
var html = await Web.LoadFromWebAsync(link, ct);
|
|
if (FailurePredicates is null || !(await IsFailure(html)))
|
|
return (true, await Transformer(html));
|
|
else
|
|
return (false, default);
|
|
} catch(Exception) {
|
|
return (false, default);
|
|
}
|
|
}
|
|
|
|
public async Task<(bool, T?)> TryDownload(IOrdered<string>[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress<IRetryReport>? tryProgress = null) {
|
|
if (link.Length == 0)
|
|
return (false, default);
|
|
|
|
T? doc = default;
|
|
int tryCount = 0;
|
|
while (tryCount < maximumRetryCount) {
|
|
ct.ThrowIfCancellationRequested();
|
|
(var success, doc) = await TryDownloadWithNoRetries(link[0].Data, ct);
|
|
if (success && doc != null)
|
|
return (true, doc);
|
|
++tryCount;
|
|
tryProgress?.Report(new RetryReport(tryCount, link[0].Data));
|
|
await Task.Delay((int)Math.Pow(2, tryCount) * 1000);
|
|
}
|
|
|
|
return (false, doc);
|
|
}
|
|
}
|
|
}
|