Refactor downloaders to use generic options and unify logic
Replaces specialized binary and HTML downloaders with a generic, options-driven UnitDownloader and UnitFragmentDownloader pattern. Introduces UnitDownloaderOptions and builder classes for flexible configuration, updates interfaces and method signatures to support progress reporting, and removes redundant binary-specific classes. Updates Playwright and Stealth downloaders to use the new generic base, and adds improved error handling and reporting. Also updates dependency versions and makes minor API consistency improvements across the Fluent and Models layers.
This commit is contained in:
@@ -4,11 +4,12 @@ using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using Beam.Abstractions;
|
||||
using Beam.Downloaders;
|
||||
using Beam.Models;
|
||||
|
||||
namespace Beam.Stealth {
|
||||
public class StealthFragmentDownloader<T> : UnitFragmentDownloaderBinary<T> {
|
||||
public StealthFragmentDownloader(HttpClient client, StealthConfig config, StealthAsyncManipulator manipulator, AsyncTransformer<ByteDocument, T> transformer, AsyncDownloadFailurePredicate<ByteDocument>?[]? failurePredicate = null, int fragmentSize = 4, ILogger? logger = null) : base(client, transformer, failurePredicate, fragmentSize, logger, new StealthUnitDownloader<T>(client, config, manipulator, transformer, failurePredicate)) {}
|
||||
public class StealthFragmentDownloader<RawType, OutType> : UnitFragmentDownloader<RawType, OutType> where RawType : IDocument {
|
||||
public StealthFragmentDownloader(UnitDownloaderOptions<RawType, OutType> options, StealthConfig config, StealthAsyncManipulator manipulator) : base(options, new StealthUnitDownloader<RawType, OutType>(options, config, manipulator)) {}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,11 +5,12 @@ using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using Beam.Abstractions;
|
||||
using Beam.Downloaders;
|
||||
using Beam.Models;
|
||||
|
||||
namespace Beam.Stealth {
|
||||
public class StealthFragmentPageDownloader<T> : UnitFragmentDownloader<T> {
|
||||
public StealthFragmentPageDownloader(HtmlWeb web, StealthConfig config, StealthAsyncManipulator manipulator, AsyncTransformer<HtmlDocument, T> transformer, AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null, int fragmentSize = 4, ILogger? logger = null) : base(web, transformer, failurePredicate, fragmentSize, logger, new StealthUnitPageDownloader<T>(web, config, manipulator, transformer, failurePredicate)) {}
|
||||
public class StealthFragmentPageDownloader<RawType, OutType> : UnitFragmentDownloader<RawType, OutType> where RawType : IDocument {
|
||||
public StealthFragmentPageDownloader(UnitDownloaderOptions<RawType, OutType> options, StealthConfig config, StealthAsyncManipulator manipulator) : base(options, new StealthUnitPageDownloader<RawType, OutType>(options, config, manipulator)) {}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,50 +6,37 @@ using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using Beam.Abstractions;
|
||||
using Beam.Downloaders;
|
||||
using Beam.Models;
|
||||
|
||||
namespace Beam.Stealth {
|
||||
using File = System.IO.File;
|
||||
|
||||
public class StealthUnitDownloader<T> : UnitDownloaderBinary<T> {
|
||||
public class StealthUnitDownloader<RawType, OutType> : UnitDownloader<RawType, OutType> where RawType : IDocument {
|
||||
public StealthConfig Config { get; }
|
||||
public StealthAsyncManipulator Manipulator { get; }
|
||||
|
||||
private ILogger? Logger => Config.Logger;
|
||||
|
||||
public StealthUnitDownloader(HttpClient client, StealthConfig config, StealthAsyncManipulator manipulator, AsyncTransformer<ByteDocument, T> transformer, AsyncDownloadFailurePredicate<ByteDocument>?[]? failurePredicates = null) : base(client, transformer, failurePredicates) {
|
||||
public StealthUnitDownloader(UnitDownloaderOptions<RawType, OutType> options, StealthConfig config, StealthAsyncManipulator manipulator) : base(options) {
|
||||
Config = config;
|
||||
Manipulator = manipulator;
|
||||
}
|
||||
|
||||
protected override async Task<(bool Success, T? Result)> TryDownloadWithNoRetries(
|
||||
string link, CancellationToken ct) {
|
||||
try {
|
||||
Logger?.LogInformation("Navigating to {Link}", link);
|
||||
protected override async Task DownloadToStream(string url, int bufferSize, Stream destinationStream,
|
||||
IProgress<IDownloadReport> progress, CancellationToken ct) {
|
||||
var driver = Config.Driver;
|
||||
await driver.Navigate().GoToUrlAsync(url);
|
||||
await Manipulator(driver);
|
||||
|
||||
var driver = Config.Driver;
|
||||
await driver.Navigate().GoToUrlAsync(link);
|
||||
await Manipulator(driver);
|
||||
|
||||
var sw = Stopwatch.StartNew();
|
||||
ByteDocument? doc = await WaitForDownloadAsync(link, sw, ct);
|
||||
|
||||
if (doc is null || await IsFailure(doc))
|
||||
return (false, default);
|
||||
|
||||
Logger?.LogInformation("Download finished in {Elapsed}", sw.Elapsed);
|
||||
return (true, await Transformer(doc));
|
||||
} catch (Exception ex) {
|
||||
Logger?.LogError(ex, "Error occurred downloading {Link}", link);
|
||||
return (false, default);
|
||||
}
|
||||
await using var stream = await WaitForDownloadAsync(url, progress, Stopwatch.StartNew(), ct);
|
||||
await (stream?.CopyToAsync(destinationStream, ct) ?? Task.CompletedTask);
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
private async Task<ByteDocument?> WaitForDownloadAsync(
|
||||
string link, Stopwatch sw, CancellationToken ct) {
|
||||
private async Task<Stream?> WaitForDownloadAsync(
|
||||
string link, IProgress<IDownloadReport> progress, Stopwatch sw, CancellationToken ct) {
|
||||
const int PollDelayMs = 250; // how often we look
|
||||
const int StableDelayMs = 1000; // size-unchanged window
|
||||
|
||||
@@ -80,6 +67,9 @@ namespace Beam.Stealth {
|
||||
// track growth
|
||||
long size = new FileInfo(finalPath).Length;
|
||||
if (size == 0 || size != lastSize) {
|
||||
progress?.Report(new DownloadReport() {
|
||||
BytesDownloaded = size - lastSize,
|
||||
});
|
||||
lastSize = size;
|
||||
lastChange = DateTime.UtcNow;
|
||||
await Task.Delay(PollDelayMs, ct);
|
||||
@@ -104,11 +94,7 @@ namespace Beam.Stealth {
|
||||
}
|
||||
}
|
||||
|
||||
byte[] bytes = await File.ReadAllBytesAsync(finalPath, ct);
|
||||
Logger?.LogInformation("Download completed {Path} ({Size} bytes)",
|
||||
finalPath, bytes.Length);
|
||||
|
||||
return new ByteDocument(Path.GetFileName(finalPath), bytes);
|
||||
return File.OpenRead(finalPath);
|
||||
}
|
||||
|
||||
Logger?.LogWarning("Download timed out after {Elapsed}", sw.Elapsed);
|
||||
|
||||
@@ -5,39 +5,29 @@ using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using Beam.Abstractions;
|
||||
using Beam.Downloaders;
|
||||
using Beam.Models;
|
||||
|
||||
namespace Beam.Stealth {
|
||||
public class StealthUnitPageDownloader<T> : UnitDownloader<T> {
|
||||
public class StealthUnitPageDownloader<RawType, OutType> : UnitDownloader<RawType, OutType> where RawType : IDocument {
|
||||
public StealthConfig Config { get; }
|
||||
public StealthAsyncManipulator Manipulator { get; }
|
||||
|
||||
private ILogger? Logger => Config.Logger;
|
||||
|
||||
public StealthUnitPageDownloader(HtmlWeb web, StealthConfig config, StealthAsyncManipulator manipulator, AsyncTransformer<HtmlDocument, T> transformer, AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null) : base(web, transformer, failurePredicate) {
|
||||
public StealthUnitPageDownloader(UnitDownloaderOptions<RawType, OutType> options, StealthConfig config, StealthAsyncManipulator manipulator) : base(options) {
|
||||
Config = config;
|
||||
Manipulator = manipulator;
|
||||
}
|
||||
|
||||
protected async override Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
|
||||
try {
|
||||
var driver = Config.Driver;
|
||||
protected override async Task DownloadToStream(string url, int bufferSize, Stream destinationStream, IProgress<IDownloadReport> progress, CancellationToken ct) {
|
||||
var driver = Config.Driver;
|
||||
|
||||
await driver.Navigate().GoToUrlAsync(link);
|
||||
await Manipulator(driver);
|
||||
await driver.Navigate().GoToUrlAsync(url);
|
||||
await Manipulator(driver);
|
||||
|
||||
HtmlDocument doc = new();
|
||||
doc.LoadHtml(driver.PageSource);
|
||||
|
||||
if (await IsFailure(doc))
|
||||
return (false, default);
|
||||
|
||||
return (true, await Transformer(doc));
|
||||
} catch (Exception e) {
|
||||
Logger?.LogError(e, "Error occurred downloading {}", link);
|
||||
return (false, default);
|
||||
}
|
||||
byte[] bytes = Encoding.UTF8.GetBytes(driver.PageSource);
|
||||
await destinationStream.WriteAsync(bytes, ct);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user