3baa31a7cc
This introduces a new Puppeteer-based mechanism for downloading web content. It provides a flexible way to manipulate pages during downloads, enhancing the ability to handle dynamic content and improve the overall download process.
76 lines
3.0 KiB
C#
76 lines
3.0 KiB
C#
using System.Net.Http;
|
|
using System.Threading;
|
|
using System.Threading.Tasks;
|
|
|
|
namespace Beam {
|
|
/// <summary>
|
|
/// A download-managing class that retrieves binary data through <see cref="HttpClient"/>,
|
|
/// applies an <see cref="AsyncBinaryTransformer{T}"/>, and supports failure detection
|
|
/// plus exponential-back-off retries. Safe to instantiate per request.
|
|
/// </summary>
|
|
public class UnitDownloaderBinary<T>(
|
|
HttpClient client,
|
|
AsyncTransformer<ByteDocument, T> transformer,
|
|
AsyncDownloadFailurePredicate<ByteDocument>?[]? failurePredicates = null)
|
|
: IUnitDownloader<T> {
|
|
public HttpClient Client { get; } = client;
|
|
public virtual AsyncTransformer<ByteDocument, T> Transformer { get; } = transformer;
|
|
public virtual AsyncDownloadFailurePredicate<ByteDocument>?[]? FailurePredicates { get; } = failurePredicates;
|
|
|
|
public int LinksPerDownload { get; } = 1;
|
|
|
|
/// <summary>Runs all configured failure predicates in parallel on the raw HTTP response.</summary>
|
|
protected virtual async Task<bool> IsFailure(ByteDocument response) {
|
|
if (FailurePredicates is null) return false;
|
|
|
|
var failed = false;
|
|
await Parallel.ForEachAsync(FailurePredicates, async (pred, ct) => {
|
|
if (failed || pred is null) return;
|
|
if (await pred(response))
|
|
failed = true;
|
|
});
|
|
return failed;
|
|
}
|
|
|
|
/// <summary>One attempt without retries or back-off.</summary>
|
|
protected virtual async Task<(bool Success, T? Result)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
|
|
try {
|
|
using var response = await Client.GetAsync(link, HttpCompletionOption.ResponseHeadersRead, ct);
|
|
if (!response.IsSuccessStatusCode) return (false, default);
|
|
|
|
var bytes = await response.Content.ReadAsByteArrayAsync(ct);
|
|
var doc = new ByteDocument(link, bytes);
|
|
if (await IsFailure(doc)) return (false, default);
|
|
|
|
return (true, await Transformer(doc));
|
|
} catch {
|
|
return (false, default);
|
|
}
|
|
}
|
|
|
|
public async Task<(bool, T?)> TryDownload(
|
|
Ordered<string>[] link,
|
|
CancellationToken ct,
|
|
int maximumRetryCount = 7,
|
|
IProgress<RetryReport>? tryProgress = null) {
|
|
if (link.Length == 0) return (false, default);
|
|
|
|
T? result = default;
|
|
var attempt = 0;
|
|
|
|
while (attempt < maximumRetryCount) {
|
|
ct.ThrowIfCancellationRequested();
|
|
|
|
(var success, result) = await TryDownloadWithNoRetries(link[0].Data, ct);
|
|
if (success && result is not null) return (true, result);
|
|
|
|
++attempt;
|
|
tryProgress?.Report(new RetryReport(attempt, link[0].Data));
|
|
await Task.Delay((int)Math.Pow(2, attempt) * 1000, ct);
|
|
}
|
|
|
|
return (false, result);
|
|
}
|
|
}
|
|
}
|