a7d148a96f
Added new Beam.Fluent and Beam.Models projects with staged download builder and data context models. Refactored and moved model classes from Beam.Temporary.Cli to Beam.Models. Added new data providers and extended DataBindings in Beam.Dynamic. Renamed Beam.Puppeteer to Beam.Playwright and updated related classes. Updated project references and package versions. Removed obsolete and unused files from Beam.Temporary.Cli.
117 lines
4.6 KiB
C#
117 lines
4.6 KiB
C#
using Microsoft.Extensions.Logging;
|
||
using OpenQA.Selenium.Chrome;
|
||
using System;
|
||
using System.Collections.Generic;
|
||
using System.Diagnostics;
|
||
using System.Linq;
|
||
using System.Text;
|
||
using System.Threading.Tasks;
|
||
|
||
namespace Beam.Stealth {
|
||
public class StealthUnitDownloader<T> : UnitDownloaderBinary<T> {
|
||
public StealthConfig Config { get; }
|
||
public StealthAsyncManipulator Manipulator { get; }
|
||
|
||
private ILogger? Logger => Config.Logger;
|
||
|
||
public StealthUnitDownloader(HttpClient client, StealthConfig config, StealthAsyncManipulator manipulator, AsyncTransformer<ByteDocument, T> transformer, AsyncDownloadFailurePredicate<ByteDocument>?[]? failurePredicates = null) : base(client, transformer, failurePredicates) {
|
||
Config = config;
|
||
Manipulator = manipulator;
|
||
}
|
||
|
||
protected override async Task<(bool Success, T? Result)> TryDownloadWithNoRetries(
|
||
string link, CancellationToken ct) {
|
||
try {
|
||
Logger?.LogInformation("Navigating to {Link}", link);
|
||
|
||
var driver = Config.Driver;
|
||
await driver.Navigate().GoToUrlAsync(link);
|
||
await Manipulator(driver);
|
||
|
||
var sw = Stopwatch.StartNew();
|
||
ByteDocument? doc = await WaitForDownloadAsync(link, sw, ct);
|
||
|
||
if (doc is null || await IsFailure(doc))
|
||
return (false, default);
|
||
|
||
Logger?.LogInformation("Download finished in {Elapsed}", sw.Elapsed);
|
||
return (true, await Transformer(doc));
|
||
} catch (Exception ex) {
|
||
Logger?.LogError(ex, "Error occurred downloading {Link}", link);
|
||
return (false, default);
|
||
}
|
||
}
|
||
|
||
/* --------------------------------------------------------------------- */
|
||
|
||
private async Task<ByteDocument?> WaitForDownloadAsync(
|
||
string link, Stopwatch sw, CancellationToken ct) {
|
||
const int PollDelayMs = 250; // how often we look
|
||
const int StableDelayMs = 1000; // size-unchanged window
|
||
|
||
string dir = Config.DownloadsDirectory;
|
||
string? finalPath = null;
|
||
long lastSize = -1;
|
||
DateTime lastChange = DateTime.UtcNow;
|
||
|
||
bool IsTemp(string p) =>
|
||
p.EndsWith(".crdownload", StringComparison.OrdinalIgnoreCase) ||
|
||
p.EndsWith(".part", StringComparison.OrdinalIgnoreCase);
|
||
|
||
Logger?.LogDebug("Polling {Dir} for download files", dir);
|
||
|
||
while (sw.Elapsed < Config.TimeOut && !ct.IsCancellationRequested) {
|
||
// current files in the directory
|
||
var files = Directory.EnumerateFiles(dir, "*", SearchOption.TopDirectoryOnly).ToArray();
|
||
|
||
// ignore temp names; pick (or re-pick) the first real candidate
|
||
finalPath ??= files.FirstOrDefault(f => !IsTemp(f));
|
||
|
||
// still nothing but temps – keep waiting
|
||
if (finalPath is null) {
|
||
await Task.Delay(PollDelayMs, ct);
|
||
continue;
|
||
}
|
||
|
||
// track growth
|
||
long size = new FileInfo(finalPath).Length;
|
||
if (size == 0 || size != lastSize) {
|
||
lastSize = size;
|
||
lastChange = DateTime.UtcNow;
|
||
await Task.Delay(PollDelayMs, ct);
|
||
continue;
|
||
}
|
||
|
||
// size stable long enough *and* no temp files left?
|
||
bool tempsRemain = files.Any(IsTemp);
|
||
if ((DateTime.UtcNow - lastChange).TotalMilliseconds < StableDelayMs || tempsRemain) {
|
||
await Task.Delay(PollDelayMs, ct);
|
||
continue;
|
||
}
|
||
|
||
// wait until writer releases lock
|
||
while (true) {
|
||
try {
|
||
using FileStream _ =
|
||
File.Open(finalPath, FileMode.Open, FileAccess.Read, FileShare.None);
|
||
break;
|
||
} catch (IOException) {
|
||
await Task.Delay(200, ct);
|
||
}
|
||
}
|
||
|
||
byte[] bytes = await File.ReadAllBytesAsync(finalPath, ct);
|
||
Logger?.LogInformation("Download completed {Path} ({Size} bytes)",
|
||
finalPath, bytes.Length);
|
||
|
||
return new ByteDocument(Path.GetFileName(finalPath), bytes);
|
||
}
|
||
|
||
Logger?.LogWarning("Download timed out after {Elapsed}", sw.Elapsed);
|
||
return null;
|
||
}
|
||
|
||
|
||
}
|
||
}
|