refactor: modularize Beam into new projects and interfaces
- Introduced modularity by splitting Beam into new projects: Beam.Abstractions, Beam.Models, and Beam.Downloaders. - Refactored existing classes into appropriate namespaces and projects. - Replaced specific implementations with abstractions (e.g., SourceLinkBuilder to LinkBuilder, State to IState, etc.). - Updated interfaces: added ITemplate, IArticleData, IDownloadReport, and others for improved extensibility. - Removed deprecated classes like SourceLinkBuilder and StateChangerFactory. - Enhanced link handling in downloaders by refactoring to use `string` over `SourceLink`. - Consolidated shared logic under Beam.Abstractions.
This commit is contained in:
@@ -0,0 +1,67 @@
|
||||
using Beam.Abstractions;
|
||||
using Beam.Models;
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace Beam.Downloaders {
|
||||
/// <summary>
|
||||
/// A download managing class that manages a singular download with failure-detection and exponential-backoff retries. This class is safe to instantiate per request.
|
||||
/// </summary>
|
||||
/// <typeparam name="T"></typeparam>
|
||||
/// <param name="web"></param>
|
||||
/// <param name="transformer"></param>
|
||||
/// <param name="failurePredicate"></param>
|
||||
public class UnitDownloader<T>(HtmlWeb web, AsyncTransformer<HtmlDocument, T> transformer, AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null) : IUnitDownloader<T> {
|
||||
public HtmlWeb Web { get; } = web;
|
||||
public virtual AsyncTransformer<HtmlDocument, T> Transformer { get; } = transformer;
|
||||
public virtual AsyncDownloadFailurePredicate<HtmlDocument>?[]? FailurePredicates { get; } = failurePredicate;
|
||||
|
||||
public int LinksPerDownload { get; } = 1;
|
||||
|
||||
protected virtual async Task<bool> IsFailure(HtmlDocument doc) {
|
||||
if (FailurePredicates is null)
|
||||
return false;
|
||||
var failed = false;
|
||||
await Parallel.ForEachAsync(FailurePredicates, async (x, ct) => {
|
||||
if (failed == true)
|
||||
return;
|
||||
if (x is null)
|
||||
return;
|
||||
if (await x(doc))
|
||||
failed = true;
|
||||
});
|
||||
|
||||
return failed;
|
||||
}
|
||||
|
||||
protected virtual async Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
|
||||
try {
|
||||
var html = await Web.LoadFromWebAsync(link, ct);
|
||||
if (FailurePredicates is null || !(await IsFailure(html)))
|
||||
return (true, await Transformer(html));
|
||||
else
|
||||
return (false, default);
|
||||
} catch(Exception) {
|
||||
return (false, default);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<(bool, T?)> TryDownload(IOrdered<string>[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress<IRetryReport>? tryProgress = null) {
|
||||
if (link.Length == 0)
|
||||
return (false, default);
|
||||
|
||||
T? doc = default;
|
||||
int tryCount = 0;
|
||||
while (tryCount < maximumRetryCount) {
|
||||
ct.ThrowIfCancellationRequested();
|
||||
(var success, doc) = await TryDownloadWithNoRetries(link[0].Data, ct);
|
||||
if (success && doc != null)
|
||||
return (true, doc);
|
||||
++tryCount;
|
||||
tryProgress?.Report(new RetryReport(tryCount, link[0].Data));
|
||||
await Task.Delay((int)Math.Pow(2, tryCount) * 1000);
|
||||
}
|
||||
|
||||
return (false, doc);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user