Files
Beam/Beam.Stealth/StealthUnitDownloader.cs
T
qwsdcvghyu89 2958a26e4f Refactor downloaders to use generic options and unify logic
Replaces specialized binary and HTML downloaders with a generic, options-driven UnitDownloader and UnitFragmentDownloader pattern. Introduces UnitDownloaderOptions and builder classes for flexible configuration, updates interfaces and method signatures to support progress reporting, and removes redundant binary-specific classes. Updates Playwright and Stealth downloaders to use the new generic base, and adds improved error handling and reporting. Also updates dependency versions and makes minor API consistency improvements across the Fluent and Models layers.
2025-09-29 21:27:56 +10:00

107 lines
4.1 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
using Microsoft.Extensions.Logging;
using OpenQA.Selenium.Chrome;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Beam.Abstractions;
using Beam.Downloaders;
using Beam.Models;
namespace Beam.Stealth {
using File = System.IO.File;
public class StealthUnitDownloader<RawType, OutType> : UnitDownloader<RawType, OutType> where RawType : IDocument {
public StealthConfig Config { get; }
public StealthAsyncManipulator Manipulator { get; }
private ILogger? Logger => Config.Logger;
public StealthUnitDownloader(UnitDownloaderOptions<RawType, OutType> options, StealthConfig config, StealthAsyncManipulator manipulator) : base(options) {
Config = config;
Manipulator = manipulator;
}
protected override async Task DownloadToStream(string url, int bufferSize, Stream destinationStream,
IProgress<IDownloadReport> progress, CancellationToken ct) {
var driver = Config.Driver;
await driver.Navigate().GoToUrlAsync(url);
await Manipulator(driver);
await using var stream = await WaitForDownloadAsync(url, progress, Stopwatch.StartNew(), ct);
await (stream?.CopyToAsync(destinationStream, ct) ?? Task.CompletedTask);
}
/* --------------------------------------------------------------------- */
private async Task<Stream?> WaitForDownloadAsync(
string link, IProgress<IDownloadReport> progress, Stopwatch sw, CancellationToken ct) {
const int PollDelayMs = 250; // how often we look
const int StableDelayMs = 1000; // size-unchanged window
string dir = Config.DownloadsDirectory;
string? finalPath = null;
long lastSize = -1;
DateTime lastChange = DateTime.UtcNow;
bool IsTemp(string p) =>
p.EndsWith(".crdownload", StringComparison.OrdinalIgnoreCase) ||
p.EndsWith(".part", StringComparison.OrdinalIgnoreCase);
Logger?.LogDebug("Polling {Dir} for download files", dir);
while (sw.Elapsed < Config.TimeOut && !ct.IsCancellationRequested) {
// current files in the directory
var files = Directory.EnumerateFiles(dir, "*", SearchOption.TopDirectoryOnly).ToArray();
// ignore temp names; pick (or re-pick) the first real candidate
finalPath ??= files.FirstOrDefault(f => !IsTemp(f));
// still nothing but temps keep waiting
if (finalPath is null) {
await Task.Delay(PollDelayMs, ct);
continue;
}
// track growth
long size = new FileInfo(finalPath).Length;
if (size == 0 || size != lastSize) {
progress?.Report(new DownloadReport() {
BytesDownloaded = size - lastSize,
});
lastSize = size;
lastChange = DateTime.UtcNow;
await Task.Delay(PollDelayMs, ct);
continue;
}
// size stable long enough *and* no temp files left?
bool tempsRemain = files.Any(IsTemp);
if ((DateTime.UtcNow - lastChange).TotalMilliseconds < StableDelayMs || tempsRemain) {
await Task.Delay(PollDelayMs, ct);
continue;
}
// wait until writer releases lock
while (true) {
try {
using FileStream _ =
File.Open(finalPath, FileMode.Open, FileAccess.Read, FileShare.None);
break;
} catch (IOException) {
await Task.Delay(200, ct);
}
}
return File.OpenRead(finalPath);
}
Logger?.LogWarning("Download timed out after {Elapsed}", sw.Elapsed);
return null;
}
}
}