7ed05abdb8
- Introduced modularity by splitting Beam into new projects: Beam.Abstractions, Beam.Models, and Beam.Downloaders. - Refactored existing classes into appropriate namespaces and projects. - Replaced specific implementations with abstractions (e.g., SourceLinkBuilder to LinkBuilder, State to IState, etc.). - Updated interfaces: added ITemplate, IArticleData, IDownloadReport, and others for improved extensibility. - Removed deprecated classes like SourceLinkBuilder and StateChangerFactory. - Enhanced link handling in downloaders by refactoring to use `string` over `SourceLink`. - Consolidated shared logic under Beam.Abstractions.
121 lines
4.6 KiB
C#
121 lines
4.6 KiB
C#
using Microsoft.Extensions.Logging;
|
||
using OpenQA.Selenium.Chrome;
|
||
using System;
|
||
using System.Collections.Generic;
|
||
using System.Diagnostics;
|
||
using System.Linq;
|
||
using System.Text;
|
||
using System.Threading.Tasks;
|
||
using Beam.Downloaders;
|
||
using Beam.Models;
|
||
|
||
namespace Beam.Stealth {
|
||
using File = System.IO.File;
|
||
|
||
public class StealthUnitDownloader<T> : UnitDownloaderBinary<T> {
|
||
public StealthConfig Config { get; }
|
||
public StealthAsyncManipulator Manipulator { get; }
|
||
|
||
private ILogger? Logger => Config.Logger;
|
||
|
||
public StealthUnitDownloader(HttpClient client, StealthConfig config, StealthAsyncManipulator manipulator, AsyncTransformer<ByteDocument, T> transformer, AsyncDownloadFailurePredicate<ByteDocument>?[]? failurePredicates = null) : base(client, transformer, failurePredicates) {
|
||
Config = config;
|
||
Manipulator = manipulator;
|
||
}
|
||
|
||
protected override async Task<(bool Success, T? Result)> TryDownloadWithNoRetries(
|
||
string link, CancellationToken ct) {
|
||
try {
|
||
Logger?.LogInformation("Navigating to {Link}", link);
|
||
|
||
var driver = Config.Driver;
|
||
await driver.Navigate().GoToUrlAsync(link);
|
||
await Manipulator(driver);
|
||
|
||
var sw = Stopwatch.StartNew();
|
||
ByteDocument? doc = await WaitForDownloadAsync(link, sw, ct);
|
||
|
||
if (doc is null || await IsFailure(doc))
|
||
return (false, default);
|
||
|
||
Logger?.LogInformation("Download finished in {Elapsed}", sw.Elapsed);
|
||
return (true, await Transformer(doc));
|
||
} catch (Exception ex) {
|
||
Logger?.LogError(ex, "Error occurred downloading {Link}", link);
|
||
return (false, default);
|
||
}
|
||
}
|
||
|
||
/* --------------------------------------------------------------------- */
|
||
|
||
private async Task<ByteDocument?> WaitForDownloadAsync(
|
||
string link, Stopwatch sw, CancellationToken ct) {
|
||
const int PollDelayMs = 250; // how often we look
|
||
const int StableDelayMs = 1000; // size-unchanged window
|
||
|
||
string dir = Config.DownloadsDirectory;
|
||
string? finalPath = null;
|
||
long lastSize = -1;
|
||
DateTime lastChange = DateTime.UtcNow;
|
||
|
||
bool IsTemp(string p) =>
|
||
p.EndsWith(".crdownload", StringComparison.OrdinalIgnoreCase) ||
|
||
p.EndsWith(".part", StringComparison.OrdinalIgnoreCase);
|
||
|
||
Logger?.LogDebug("Polling {Dir} for download files", dir);
|
||
|
||
while (sw.Elapsed < Config.TimeOut && !ct.IsCancellationRequested) {
|
||
// current files in the directory
|
||
var files = Directory.EnumerateFiles(dir, "*", SearchOption.TopDirectoryOnly).ToArray();
|
||
|
||
// ignore temp names; pick (or re-pick) the first real candidate
|
||
finalPath ??= files.FirstOrDefault(f => !IsTemp(f));
|
||
|
||
// still nothing but temps – keep waiting
|
||
if (finalPath is null) {
|
||
await Task.Delay(PollDelayMs, ct);
|
||
continue;
|
||
}
|
||
|
||
// track growth
|
||
long size = new FileInfo(finalPath).Length;
|
||
if (size == 0 || size != lastSize) {
|
||
lastSize = size;
|
||
lastChange = DateTime.UtcNow;
|
||
await Task.Delay(PollDelayMs, ct);
|
||
continue;
|
||
}
|
||
|
||
// size stable long enough *and* no temp files left?
|
||
bool tempsRemain = files.Any(IsTemp);
|
||
if ((DateTime.UtcNow - lastChange).TotalMilliseconds < StableDelayMs || tempsRemain) {
|
||
await Task.Delay(PollDelayMs, ct);
|
||
continue;
|
||
}
|
||
|
||
// wait until writer releases lock
|
||
while (true) {
|
||
try {
|
||
using FileStream _ =
|
||
File.Open(finalPath, FileMode.Open, FileAccess.Read, FileShare.None);
|
||
break;
|
||
} catch (IOException) {
|
||
await Task.Delay(200, ct);
|
||
}
|
||
}
|
||
|
||
byte[] bytes = await File.ReadAllBytesAsync(finalPath, ct);
|
||
Logger?.LogInformation("Download completed {Path} ({Size} bytes)",
|
||
finalPath, bytes.Length);
|
||
|
||
return new ByteDocument(Path.GetFileName(finalPath), bytes);
|
||
}
|
||
|
||
Logger?.LogWarning("Download timed out after {Elapsed}", sw.Elapsed);
|
||
return null;
|
||
}
|
||
|
||
|
||
}
|
||
}
|