482a46b568
Updated project files for `Beam.Dynamic`, `Beam.Exports`, `Beam.Temporary.Cli`, and `Beam` to include additional metadata and specific package versions. Refactored `DataBindings` and `ResolvedBindings` to records, added a new `Text` property in `Binding.cs`, and introduced `ParseNumbers` in `OnlineCleaner`. New classes `PuppetContext` and `PuppetUnitDownloader` added for Playwright integration. Introduced `ImmutableState` struct and `UnitDownloaderBinary` class for improved download management. Updated tests in `UnitTest1.cs` for number localization. Added `Beam.Puppeteer` project to the solution.
66 lines
2.9 KiB
C#
66 lines
2.9 KiB
C#
using HtmlAgilityPack;
|
|
using Microsoft.Extensions.Logging;
|
|
using System;
|
|
using System.Collections.Concurrent;
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using System.Text;
|
|
using System.Threading.Tasks;
|
|
|
|
namespace Beam {
|
|
public class UnitFragmentDownloader<T> : IUnitDownloader<Fragment<Ordered<T>>> {
|
|
public UnitFragmentDownloader(HtmlWeb web,
|
|
AsyncHtmlTransformer<T> transformer,
|
|
AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null,
|
|
int fragmentSize = 4,
|
|
ILogger? logger = null,
|
|
IUnitDownloader<T>? internalDownloader = null) {
|
|
Web = web;
|
|
Transformer = transformer;
|
|
FailurePredicate = failurePredicate;
|
|
UnitDownloader = internalDownloader ?? new UnitDownloader<T>(Web, Transformer, FailurePredicate);
|
|
LinksPerDownload = fragmentSize;
|
|
Logger = logger;
|
|
}
|
|
|
|
public HtmlWeb Web { get; }
|
|
public AsyncHtmlTransformer<T> Transformer { get; }
|
|
public AsyncDownloadFailurePredicate<HtmlDocument>?[]? FailurePredicate { get; }
|
|
public int LinksPerDownload { get; set; }
|
|
public ILogger? Logger { get; set; }
|
|
|
|
private readonly IUnitDownloader<T> UnitDownloader;
|
|
|
|
async Task<(bool, Fragment<Ordered<T>>?)> IUnitDownloader<Fragment<Ordered<T>>>.TryDownload(Ordered<string>[] link, CancellationToken ct, int maximumRetryCount, IProgress<RetryReport>? tryProgress) {
|
|
Fragment<Ordered<T>> fragment = new Fragment<Ordered<T>>(link.Length);
|
|
if (!Fragment<Ordered<T>>.TryAcquireUpdater(fragment, out var updater))
|
|
throw new S.AssertionException(S.M.NewFragmentShouldBeFree);
|
|
bool isFailure = false;
|
|
await Parallel.ForEachAsync(link, async (x, pct) => {
|
|
pct.ThrowIfCancellationRequested();
|
|
ct.ThrowIfCancellationRequested();
|
|
var (result, downloadedT) = await UnitDownloader.TryDownload([x], ct, maximumRetryCount, tryProgress);
|
|
if (!result) {
|
|
Interlocked.Exchange(ref isFailure, true);
|
|
Logger?.LogError("Failed to retrieve {} order={}", x.Data, x.Order);
|
|
return;
|
|
}
|
|
if (downloadedT == null) {
|
|
Interlocked.Exchange(ref isFailure, true);
|
|
Logger?.LogCritical("Failed to retrieve {} order={}", x.Data, x.Order);
|
|
return;
|
|
}
|
|
updater(new Ordered<T>(downloadedT, x.Order));
|
|
});
|
|
|
|
if (!isFailure)
|
|
Fragment<Ordered<T>>.SetComplete(fragment, true);
|
|
|
|
Fragment<Ordered<T>>.TryReleaseUpdater(fragment, updater);
|
|
|
|
return (!isFailure, fragment);
|
|
|
|
}
|
|
}
|
|
}
|