Files
Beam/Beam/UnitDownloaderBinary.cs
T
qwsdcvghyu89 3baa31a7cc feat: add Puppeteer integration for web downloads
This introduces a new Puppeteer-based mechanism for downloading
web content. It provides a flexible way to manipulate pages
during downloads, enhancing the ability to handle dynamic
content and improve the overall download process.
2025-06-25 13:42:24 +03:00

76 lines
3.0 KiB
C#

using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
namespace Beam {
/// <summary>
/// A download-managing class that retrieves binary data through <see cref="HttpClient"/>,
/// applies an <see cref="AsyncBinaryTransformer{T}"/>, and supports failure detection
/// plus exponential-back-off retries. Safe to instantiate per request.
/// </summary>
public class UnitDownloaderBinary<T>(
HttpClient client,
AsyncTransformer<ByteDocument, T> transformer,
AsyncDownloadFailurePredicate<ByteDocument>?[]? failurePredicates = null)
: IUnitDownloader<T> {
public HttpClient Client { get; } = client;
public virtual AsyncTransformer<ByteDocument, T> Transformer { get; } = transformer;
public virtual AsyncDownloadFailurePredicate<ByteDocument>?[]? FailurePredicates { get; } = failurePredicates;
public int LinksPerDownload { get; } = 1;
/// <summary>Runs all configured failure predicates in parallel on the raw HTTP response.</summary>
protected virtual async Task<bool> IsFailure(ByteDocument response) {
if (FailurePredicates is null) return false;
var failed = false;
await Parallel.ForEachAsync(FailurePredicates, async (pred, ct) => {
if (failed || pred is null) return;
if (await pred(response))
failed = true;
});
return failed;
}
/// <summary>One attempt without retries or back-off.</summary>
protected virtual async Task<(bool Success, T? Result)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
try {
using var response = await Client.GetAsync(link, HttpCompletionOption.ResponseHeadersRead, ct);
if (!response.IsSuccessStatusCode) return (false, default);
var bytes = await response.Content.ReadAsByteArrayAsync(ct);
var doc = new ByteDocument(link, bytes);
if (await IsFailure(doc)) return (false, default);
return (true, await Transformer(doc));
} catch {
return (false, default);
}
}
public async Task<(bool, T?)> TryDownload(
Ordered<string>[] link,
CancellationToken ct,
int maximumRetryCount = 7,
IProgress<RetryReport>? tryProgress = null) {
if (link.Length == 0) return (false, default);
T? result = default;
var attempt = 0;
while (attempt < maximumRetryCount) {
ct.ThrowIfCancellationRequested();
(var success, result) = await TryDownloadWithNoRetries(link[0].Data, ct);
if (success && result is not null) return (true, result);
++attempt;
tryProgress?.Report(new RetryReport(attempt, link[0].Data));
await Task.Delay((int)Math.Pow(2, attempt) * 1000, ct);
}
return (false, result);
}
}
}