Refactor downloaders to use ByteDocument and add options builders

Replaces generic RawType with ByteDocument in downloaders and context classes, simplifying type usage. Adds builder classes for FailurePredicateOptions, FragmentOptions, SkipPredicateOptions, and UnitDownloaderOptions to improve configuration flexibility. Introduces DownloadTarget enum and SkipPredicate delegate for more granular download control. Refactors Fluent API interfaces and implementations to remove RawType generics and streamline usage. Adds Playwright and Stealth download strategies for extensibility.
This commit is contained in:
qwsdcvghyu89
2025-11-15 22:51:46 +11:00
parent 647b2b0f37
commit f52aa6123b
34 changed files with 648 additions and 439 deletions
@@ -0,0 +1,9 @@
using Beam.Abstractions;
using Microsoft.Extensions.Logging;
namespace Beam.Stealth.Strategies;
internal interface IDownloadStrategy {
Task DownloadToStream(string url, int bufferSize, Stream destinationStream,
IProgress<IDownloadReport> progress, StealthConfig config, ILogger? logger, CancellationToken ct);
}
@@ -0,0 +1,13 @@
using System.Text;
using Beam.Abstractions;
using Microsoft.Extensions.Logging;
namespace Beam.Stealth.Strategies;
internal class PageDownloadStrategy : IDownloadStrategy {
public async Task DownloadToStream(string url, int bufferSize, Stream destinationStream, IProgress<IDownloadReport> progress, StealthConfig config,
ILogger? logger, CancellationToken ct) {
byte[] bytes = Encoding.UTF8.GetBytes(config.Driver.PageSource);
await destinationStream.WriteAsync(bytes, ct);
}
}
@@ -0,0 +1,83 @@
using System.Diagnostics;
using Beam.Abstractions;
using Beam.Models;
using Microsoft.Extensions.Logging;
using File = System.IO.File;
namespace Beam.Stealth.Strategies;
public class WaitingDownloadStrategy : IDownloadStrategy {
public async Task DownloadToStream(string url, int bufferSize, Stream destinationStream, IProgress<IDownloadReport> progress, StealthConfig config,
ILogger? logger, CancellationToken ct) {
await using var stream = await WaitForDownloadAsync(url, progress, Stopwatch.StartNew(), config, logger, ct);
await (stream?.CopyToAsync(destinationStream, ct) ?? Task.CompletedTask);
}
private async Task<Stream?> WaitForDownloadAsync(
string link, IProgress<IDownloadReport> progress, Stopwatch sw, StealthConfig config, ILogger? logger, CancellationToken ct) {
const int PollDelayMs = 250; // how often we look
const int StableDelayMs = 1000; // size-unchanged window
string dir = config.DownloadsDirectory;
string? finalPath = null;
long lastSize = -1;
DateTime lastChange = DateTime.UtcNow;
bool IsTemp(string p) =>
p.EndsWith(".crdownload", StringComparison.OrdinalIgnoreCase) ||
p.EndsWith(".part", StringComparison.OrdinalIgnoreCase);
logger?.LogDebug("Polling {Dir} for download files", dir);
while (sw.Elapsed < config.TimeOut && !ct.IsCancellationRequested) {
// current files in the directory
var files = Directory.EnumerateFiles(dir, "*", SearchOption.TopDirectoryOnly).ToArray();
// ignore temp names; pick (or re-pick) the first real candidate
finalPath ??= files.FirstOrDefault(f => !IsTemp(f));
// still nothing but temps keep waiting
if (finalPath is null) {
await Task.Delay(PollDelayMs, ct);
continue;
}
// track growth
long size = new FileInfo(finalPath).Length;
if (size == 0 || size != lastSize) {
progress?.Report(new DownloadReport() {
BytesDownloaded = size - lastSize,
});
lastSize = size;
lastChange = DateTime.UtcNow;
await Task.Delay(PollDelayMs, ct);
continue;
}
// size stable long enough *and* no temp files left?
bool tempsRemain = files.Any(IsTemp);
if ((DateTime.UtcNow - lastChange).TotalMilliseconds < StableDelayMs || tempsRemain) {
await Task.Delay(PollDelayMs, ct);
continue;
}
// wait until writer releases lock
while (true) {
try {
using FileStream _ =
File.Open(finalPath, FileMode.Open, FileAccess.Read, FileShare.None);
break;
} catch (IOException) {
await Task.Delay(200, ct);
}
}
return File.OpenRead(finalPath);
}
logger?.LogWarning("Download timed out after {Elapsed}", sw.Elapsed);
return null;
}
}