Introduce Beam.Fluent and Beam.Models projects
Added new Beam.Fluent and Beam.Models projects with staged download builder and data context models. Refactored and moved model classes from Beam.Temporary.Cli to Beam.Models. Added new data providers and extended DataBindings in Beam.Dynamic. Renamed Beam.Puppeteer to Beam.Playwright and updated related classes. Updated project references and package versions. Removed obsolete and unused files from Beam.Temporary.Cli.
This commit is contained in:
@@ -0,0 +1,17 @@
|
||||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net9.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.7" />
|
||||
<PackageReference Include="Selenium.WebDriver" Version="4.34.0" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Beam\Beam.csproj">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</ProjectReference>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -0,0 +1,10 @@
|
||||
using OpenQA.Selenium;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Stealth {
|
||||
public delegate Task StealthAsyncManipulator(IWebDriver driver);
|
||||
}
|
||||
@@ -0,0 +1,105 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using OpenQA.Selenium;
|
||||
using OpenQA.Selenium.Firefox;
|
||||
using OpenQA.Selenium.Chrome;
|
||||
using OpenQA.Selenium.Edge;
|
||||
|
||||
namespace Beam.Stealth {
|
||||
public enum Browser {
|
||||
Firefox,
|
||||
Chrome,
|
||||
Chromium,
|
||||
Edge
|
||||
}
|
||||
|
||||
public sealed class StealthConfig : IDisposable {
|
||||
public bool ShowBrowser { get; init; }
|
||||
public TimeSpan TimeOut { get; init; } = Timeout.InfiniteTimeSpan;
|
||||
public string DownloadsDirectory { get; }
|
||||
public ILogger? Logger { get; init; }
|
||||
public required IWebDriver Driver { get; init; }
|
||||
|
||||
private StealthConfig(string downloadDir) => DownloadsDirectory = downloadDir;
|
||||
|
||||
/* ---------- browser-specific option builders ---------- */
|
||||
|
||||
private static FirefoxOptions GetFirefoxOptions(string downloadDir, bool headless) {
|
||||
var o = new FirefoxOptions();
|
||||
if (headless) o.AddArgument("--headless");
|
||||
|
||||
o.SetPreference("browser.download.folderList", 2); // use custom dir
|
||||
o.SetPreference("browser.download.dir", downloadDir);
|
||||
o.SetPreference("browser.download.useDownloadDir", true);
|
||||
o.SetPreference("browser.helperApps.neverAsk.saveToDisk",
|
||||
"application/octet-stream,application/pdf,application/zip");
|
||||
o.SetPreference("pdfjs.disabled", true); // open PDFs externally
|
||||
o.SetPreference("browser.download.manager.showWhenStarting", false);
|
||||
|
||||
return o;
|
||||
}
|
||||
|
||||
private static ChromeOptions GetChromeOptions(string downloadDir, bool headless) {
|
||||
var o = new ChromeOptions();
|
||||
if (headless) o.AddArgument("--headless=new");
|
||||
|
||||
// download prefs
|
||||
o.AddUserProfilePreference("download.default_directory", downloadDir);
|
||||
o.AddUserProfilePreference("download.prompt_for_download", false);
|
||||
o.AddUserProfilePreference("safebrowsing.enabled", false);
|
||||
|
||||
// common stability flags
|
||||
o.AddArgument("--no-sandbox");
|
||||
o.AddArgument("--disable-dev-shm-usage");
|
||||
|
||||
return o;
|
||||
}
|
||||
|
||||
private static EdgeOptions GetEdgeOptions(string downloadDir, bool headless) {
|
||||
var o = new EdgeOptions();
|
||||
if (headless) o.AddArgument("--headless=new");
|
||||
|
||||
o.AddUserProfilePreference("download.default_directory", downloadDir);
|
||||
o.AddUserProfilePreference("download.prompt_for_download", false);
|
||||
o.AddUserProfilePreference("safebrowsing.enabled", false);
|
||||
|
||||
return o;
|
||||
}
|
||||
|
||||
/* ---------- factory ---------- */
|
||||
|
||||
public static StealthConfig Create(
|
||||
bool showBrowser = false,
|
||||
string? downloadDir = null,
|
||||
TimeSpan? timeOut = null,
|
||||
Browser browser = Browser.Firefox,
|
||||
ILogger? logger = null) {
|
||||
// pick or create a dedicated download folder
|
||||
downloadDir ??= Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());
|
||||
Directory.CreateDirectory(downloadDir);
|
||||
|
||||
bool headless = !showBrowser;
|
||||
|
||||
IWebDriver driver = browser switch {
|
||||
Browser.Chrome or Browser.Chromium
|
||||
=> new ChromeDriver(GetChromeOptions(downloadDir, headless)),
|
||||
Browser.Edge
|
||||
=> new EdgeDriver(GetEdgeOptions(downloadDir, headless)),
|
||||
Browser.Firefox or _
|
||||
=> new FirefoxDriver(GetFirefoxOptions(downloadDir, headless)),
|
||||
};
|
||||
|
||||
return new StealthConfig(downloadDir) {
|
||||
ShowBrowser = showBrowser,
|
||||
TimeOut = timeOut ?? Timeout.InfiniteTimeSpan,
|
||||
Logger = logger,
|
||||
Driver = driver
|
||||
};
|
||||
}
|
||||
|
||||
public void Dispose() {
|
||||
Driver.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Stealth {
|
||||
public class StealthFragmentDownloader<T> : UnitFragmentDownloaderBinary<T> {
|
||||
public StealthFragmentDownloader(HttpClient client, StealthConfig config, StealthAsyncManipulator manipulator, AsyncTransformer<ByteDocument, T> transformer, AsyncDownloadFailurePredicate<ByteDocument>?[]? failurePredicate = null, int fragmentSize = 4, ILogger? logger = null) : base(client, transformer, failurePredicate, fragmentSize, logger, new StealthUnitDownloader<T>(client, config, manipulator, transformer, failurePredicate)) {}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
using HtmlAgilityPack;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Stealth {
|
||||
public class StealthFragmentPageDownloader<T> : UnitFragmentDownloader<T> {
|
||||
public StealthFragmentPageDownloader(HtmlWeb web, StealthConfig config, StealthAsyncManipulator manipulator, AsyncTransformer<HtmlDocument, T> transformer, AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null, int fragmentSize = 4, ILogger? logger = null) : base(web, transformer, failurePredicate, fragmentSize, logger, new StealthUnitPageDownloader<T>(web, config, manipulator, transformer, failurePredicate)) {}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using OpenQA.Selenium.Chrome;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Stealth {
|
||||
public class StealthUnitDownloader<T> : UnitDownloaderBinary<T> {
|
||||
public StealthConfig Config { get; }
|
||||
public StealthAsyncManipulator Manipulator { get; }
|
||||
|
||||
private ILogger? Logger => Config.Logger;
|
||||
|
||||
public StealthUnitDownloader(HttpClient client, StealthConfig config, StealthAsyncManipulator manipulator, AsyncTransformer<ByteDocument, T> transformer, AsyncDownloadFailurePredicate<ByteDocument>?[]? failurePredicates = null) : base(client, transformer, failurePredicates) {
|
||||
Config = config;
|
||||
Manipulator = manipulator;
|
||||
}
|
||||
|
||||
protected override async Task<(bool Success, T? Result)> TryDownloadWithNoRetries(
|
||||
string link, CancellationToken ct) {
|
||||
try {
|
||||
Logger?.LogInformation("Navigating to {Link}", link);
|
||||
|
||||
var driver = Config.Driver;
|
||||
await driver.Navigate().GoToUrlAsync(link);
|
||||
await Manipulator(driver);
|
||||
|
||||
var sw = Stopwatch.StartNew();
|
||||
ByteDocument? doc = await WaitForDownloadAsync(link, sw, ct);
|
||||
|
||||
if (doc is null || await IsFailure(doc))
|
||||
return (false, default);
|
||||
|
||||
Logger?.LogInformation("Download finished in {Elapsed}", sw.Elapsed);
|
||||
return (true, await Transformer(doc));
|
||||
} catch (Exception ex) {
|
||||
Logger?.LogError(ex, "Error occurred downloading {Link}", link);
|
||||
return (false, default);
|
||||
}
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
private async Task<ByteDocument?> WaitForDownloadAsync(
|
||||
string link, Stopwatch sw, CancellationToken ct) {
|
||||
const int PollDelayMs = 250; // how often we look
|
||||
const int StableDelayMs = 1000; // size-unchanged window
|
||||
|
||||
string dir = Config.DownloadsDirectory;
|
||||
string? finalPath = null;
|
||||
long lastSize = -1;
|
||||
DateTime lastChange = DateTime.UtcNow;
|
||||
|
||||
bool IsTemp(string p) =>
|
||||
p.EndsWith(".crdownload", StringComparison.OrdinalIgnoreCase) ||
|
||||
p.EndsWith(".part", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
Logger?.LogDebug("Polling {Dir} for download files", dir);
|
||||
|
||||
while (sw.Elapsed < Config.TimeOut && !ct.IsCancellationRequested) {
|
||||
// current files in the directory
|
||||
var files = Directory.EnumerateFiles(dir, "*", SearchOption.TopDirectoryOnly).ToArray();
|
||||
|
||||
// ignore temp names; pick (or re-pick) the first real candidate
|
||||
finalPath ??= files.FirstOrDefault(f => !IsTemp(f));
|
||||
|
||||
// still nothing but temps – keep waiting
|
||||
if (finalPath is null) {
|
||||
await Task.Delay(PollDelayMs, ct);
|
||||
continue;
|
||||
}
|
||||
|
||||
// track growth
|
||||
long size = new FileInfo(finalPath).Length;
|
||||
if (size == 0 || size != lastSize) {
|
||||
lastSize = size;
|
||||
lastChange = DateTime.UtcNow;
|
||||
await Task.Delay(PollDelayMs, ct);
|
||||
continue;
|
||||
}
|
||||
|
||||
// size stable long enough *and* no temp files left?
|
||||
bool tempsRemain = files.Any(IsTemp);
|
||||
if ((DateTime.UtcNow - lastChange).TotalMilliseconds < StableDelayMs || tempsRemain) {
|
||||
await Task.Delay(PollDelayMs, ct);
|
||||
continue;
|
||||
}
|
||||
|
||||
// wait until writer releases lock
|
||||
while (true) {
|
||||
try {
|
||||
using FileStream _ =
|
||||
File.Open(finalPath, FileMode.Open, FileAccess.Read, FileShare.None);
|
||||
break;
|
||||
} catch (IOException) {
|
||||
await Task.Delay(200, ct);
|
||||
}
|
||||
}
|
||||
|
||||
byte[] bytes = await File.ReadAllBytesAsync(finalPath, ct);
|
||||
Logger?.LogInformation("Download completed {Path} ({Size} bytes)",
|
||||
finalPath, bytes.Length);
|
||||
|
||||
return new ByteDocument(Path.GetFileName(finalPath), bytes);
|
||||
}
|
||||
|
||||
Logger?.LogWarning("Download timed out after {Elapsed}", sw.Elapsed);
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
using HtmlAgilityPack;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Beam.Stealth {
|
||||
public class StealthUnitPageDownloader<T> : UnitDownloader<T> {
|
||||
public StealthConfig Config { get; }
|
||||
public StealthAsyncManipulator Manipulator { get; }
|
||||
|
||||
private ILogger? Logger => Config.Logger;
|
||||
|
||||
public StealthUnitPageDownloader(HtmlWeb web, StealthConfig config, StealthAsyncManipulator manipulator, AsyncTransformer<HtmlDocument, T> transformer, AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null) : base(web, transformer, failurePredicate) {
|
||||
Config = config;
|
||||
Manipulator = manipulator;
|
||||
}
|
||||
|
||||
protected async override Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
|
||||
try {
|
||||
var driver = Config.Driver;
|
||||
|
||||
await driver.Navigate().GoToUrlAsync(link);
|
||||
await Manipulator(driver);
|
||||
|
||||
HtmlDocument doc = new();
|
||||
doc.LoadHtml(driver.PageSource);
|
||||
|
||||
if (await IsFailure(doc))
|
||||
return (false, default);
|
||||
|
||||
return (true, await Transformer(doc));
|
||||
} catch (Exception e) {
|
||||
Logger?.LogError(e, "Error occurred downloading {}", link);
|
||||
return (false, default);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user