Files
Beam/Beam/UnitFragmentDownloader.cs
T
qwsdcvghyu89 482a46b568 Enhance project metadata and refactor core classes
Updated project files for `Beam.Dynamic`, `Beam.Exports`, `Beam.Temporary.Cli`, and `Beam` to include additional metadata and specific package versions. Refactored `DataBindings` and `ResolvedBindings` to records, added a new `Text` property in `Binding.cs`, and introduced `ParseNumbers` in `OnlineCleaner`. New classes `PuppetContext` and `PuppetUnitDownloader` added for Playwright integration. Introduced `ImmutableState` struct and `UnitDownloaderBinary` class for improved download management. Updated tests in `UnitTest1.cs` for number localization. Added `Beam.Puppeteer` project to the solution.
2025-06-23 02:11:19 +03:00

66 lines
2.9 KiB
C#

using HtmlAgilityPack;
using Microsoft.Extensions.Logging;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam {
public class UnitFragmentDownloader<T> : IUnitDownloader<Fragment<Ordered<T>>> {
public UnitFragmentDownloader(HtmlWeb web,
AsyncHtmlTransformer<T> transformer,
AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null,
int fragmentSize = 4,
ILogger? logger = null,
IUnitDownloader<T>? internalDownloader = null) {
Web = web;
Transformer = transformer;
FailurePredicate = failurePredicate;
UnitDownloader = internalDownloader ?? new UnitDownloader<T>(Web, Transformer, FailurePredicate);
LinksPerDownload = fragmentSize;
Logger = logger;
}
public HtmlWeb Web { get; }
public AsyncHtmlTransformer<T> Transformer { get; }
public AsyncDownloadFailurePredicate<HtmlDocument>?[]? FailurePredicate { get; }
public int LinksPerDownload { get; set; }
public ILogger? Logger { get; set; }
private readonly IUnitDownloader<T> UnitDownloader;
async Task<(bool, Fragment<Ordered<T>>?)> IUnitDownloader<Fragment<Ordered<T>>>.TryDownload(Ordered<string>[] link, CancellationToken ct, int maximumRetryCount, IProgress<RetryReport>? tryProgress) {
Fragment<Ordered<T>> fragment = new Fragment<Ordered<T>>(link.Length);
if (!Fragment<Ordered<T>>.TryAcquireUpdater(fragment, out var updater))
throw new S.AssertionException(S.M.NewFragmentShouldBeFree);
bool isFailure = false;
await Parallel.ForEachAsync(link, async (x, pct) => {
pct.ThrowIfCancellationRequested();
ct.ThrowIfCancellationRequested();
var (result, downloadedT) = await UnitDownloader.TryDownload([x], ct, maximumRetryCount, tryProgress);
if (!result) {
Interlocked.Exchange(ref isFailure, true);
Logger?.LogError("Failed to retrieve {} order={}", x.Data, x.Order);
return;
}
if (downloadedT == null) {
Interlocked.Exchange(ref isFailure, true);
Logger?.LogCritical("Failed to retrieve {} order={}", x.Data, x.Order);
return;
}
updater(new Ordered<T>(downloadedT, x.Order));
});
if (!isFailure)
Fragment<Ordered<T>>.SetComplete(fragment, true);
Fragment<Ordered<T>>.TryReleaseUpdater(fragment, updater);
return (!isFailure, fragment);
}
}
}