Introduce Beam.Fluent and Beam.Models projects

Added new Beam.Fluent and Beam.Models projects with staged download builder and data context models. Refactored and moved model classes from Beam.Temporary.Cli to Beam.Models. Added new data providers and extended DataBindings in Beam.Dynamic. Renamed Beam.Puppeteer to Beam.Playwright and updated related classes. Updated project references and package versions. Removed obsolete and unused files from Beam.Temporary.Cli.
This commit is contained in:
qwsdcvghyu89
2025-09-18 18:32:25 +10:00
parent 849bdcd089
commit a7d148a96f
72 changed files with 2100 additions and 721 deletions
+137 -66
View File
@@ -9,13 +9,16 @@ using System.Text.Json.Serialization;
using System.Text.Json.Serialization.Metadata;
using Beam.Temporary.Cli.Templates.Classic;
using Beam.Exports;
using System.Diagnostics;
using Beam.Models;
using Beam.Stealth;
namespace Beam.Temporary.Cli {
internal class Program {
public static JsonSerializerOptions ConversionOptions { get; internal set; } = new();
public static BeamDataDictionary BeamData { get; set; } = [];
public static BeamDataContext BeamData { get; set; } = [];
public static IArchitecture Architecture = IArchitecture.Default;
@@ -27,14 +30,15 @@ namespace Beam.Temporary.Cli {
var web = new HtmlWeb();
var lf = LoggerFactory.Create((x) => {
x.AddConsole();
});
var lf = LoggerFactory.Create((x) => x
.AddConsole()
.SetMinimumLevel(LogLevel.Trace)
);
ILogger logger = lf
.CreateLogger("Program");
await using var sharedContext = await DataDictionaryContext<BeamDataDictionary>.Create(
await using var sharedContext = await DataDictionaryContext<BeamDataContext>.Create(
BeamDataPath,
false,
DataKind.Shared,
@@ -45,82 +49,149 @@ namespace Beam.Temporary.Cli {
BeamData = sharedContext.Data;
BeamData.Clear();
NovelStatics.Define_WoDuShu(BeamData);
NovelStatics.Define_WoDuShu_HouseOfHorrors(BeamData);
NovelStatics.Define_YeBiQuge(BeamData);
NovelStatics.Define_YeBiQuge_LordOfMysteries(BeamData);
NovelStatics.Define_KuaiShu5(BeamData);
NovelStatics.Define_KuaiShu5_LordOfMysteries(BeamData);
ClassicTemplates.Register(BeamData);
await sharedContext.ForceSave();
BeamData = sharedContext.Data; // need to refresh instance after forced save!
CancellationTokenSource cts = new();
var novel = new DataKey<TextResource>("novels:house_of_horrors");
using var config = StealthConfig.Create(true, null, TimeSpan.FromMinutes(2), Browser.Chrome, lf.CreateLogger<StealthConfig>());
var unit = new StealthUnitPageDownloader<HtmlDocument>(new(), config, (x) => {
return Task.CompletedTask;
}, x => Task.FromResult(x));
var metadata2 = await DownloadBuilder<HtmlDocument, IDocumentMetaData>.FromMeta(novel, BeamData)
.WithLink()
.WithTransformer(CommonTransformers.ArticleDataTransformer)
.Configure((x) => x
.WithDownloadLogger(logger)
.WithRetryReporter(new Progress<RetryReport>())
.WithTimeOut(TimeSpan.FromSeconds(15)))
.Build()
.FirstAsync();
var (success, result) = await unit.TryDownload([new("https://duckduckgo.com/?t=ffab&q=C%23+stealth+headless+browser&ia=web", 0)], default);
if (success)
logger?.LogInformation("Success! Downloaded '{}'", result?.DocumentNode.Name);
else
logger?.LogError("Failed to download!");
var downloader2 = DownloadBuilder<HtmlDocument, IDocument>.FromText(novel, BeamData)
.WithRange(1..5)
.WithLinkGenerator()
.WithTransformer((x) => CommonTransformers.DocumentTransformer(x, metadata2.Data))
.Configure((x) => x
.WithDownloadLogger(logger)
.WithDownloadReporter(new Progress<DownloadReport>((x) => logger.LogInformation(x.ToString())))
.WithTimeOut(TimeSpan.FromSeconds(15))
)
.Build();
Console.WriteLine(result?.DocumentNode.OuterHtml);
//var novelResDict = new DataKey<ResourceDictionary>("kuaishu5:novels:lord_of_the_mysteries");
List<Task<Ordered<IDocument>>> translationTasks = [];
List<Ordered<IDocument>> documents = [];
//var metadata2 = await DownloadBuilder<HtmlDocument, TableOfContentsData>.FromResource(novelResDict, nameof(TableOfContentsData), BeamData)
// .WithLink()
// .WithTransformer(CommonTransformers.TableOfContentsTransformer)
// .Configure((x) => x
// .WithDownloadLogger(logger)
// .WithRetryReporter(new Progress<RetryReport>())
// .WithTimeOut(TimeSpan.FromSeconds(15)))
// .Build()
// .FirstAsync();
await foreach (var download in downloader2.Take(10)) {
if (!download.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var meta))
continue;
if (meta is not ArticleData articleMetaData)
continue;
if (!download.Data.MetaData.TryGetValue(Architecture.BookKey, out var bookmeta))
continue;
if (meta is not ArticleData bookMetaData)
continue;
//Console.WriteLine($"Title: {data.Name}");
//Console.WriteLine($"Description: {data.Description}");
//Console.WriteLine($"Categories: {data.Categories.Aggregate((x, y) => $"{x}; {y}")}");
//Console.WriteLine($"Authors: {data.Authors.Aggregate((x,y) => $"{x}; {y}")}");
Console.WriteLine($"Chapter title: {articleMetaData.Name}");
Console.WriteLine($"Book title: {bookMetaData.Name}");
//Console.WriteLine($"Content: {download}");
//if (metadata2.Data.PagesLinks is null || metadata2.Data.PagesLinks.Length == 0)
// Debugger.Break();
//translationTasks.Add(Task.Run(async () => {
// logger.LogInformation("Beginning translation {} task for {}", download.Order, articleMetaData.Name);
// var ret = new Ordered<IDocument>(await QuickAndDirtyJanitor.TranslateAsync(download.Data), download.Order);
// logger.LogInformation("Finished translation {} task for {}", download.Order, articleMetaData.Name);
// return ret;
//}));
}
//var pageLinks = DownloadBuilder<HtmlDocument, TableOfContentsData>.FromScratch()
// .WithLinks(metadata2.Data.PagesLinks)
// .WithTransformer(CommonTransformers.TableOfContentsTransformer(BeamData.Bindings[BeamData.Resources[BeamData.ResourceDictionaries[novelResDict].Resources[nameof(TableOfContentsData)]].Bindings]))
// .Configure(x => x
// .WithDownloadLogger(logger)
// .WithRetryReporter(new Progress<RetryReport>())
// .WithTimeOut(TimeSpan.FromSeconds(15)))
// .Build();
documents = (await Task.WhenAll(translationTasks)).ToList();
//var links = (await pageLinks
// .ToListAsync())
// .Where(x => x?.Data?.ContentLinks is not null)
// .SelectMany(x => x.Data.ContentLinks!)
// .DistinctBy(x => x.Link.AbsoluteUri);
string testDir = Path.Combine("txt", Path.GetRandomFileName());
Directory.CreateDirectory(testDir);
//var downloader = DownloadBuilder<HtmlDocument, StringDocument>.FromScratch()
// .WithLinks(links)
// .WithTransformer(CommonTransformers.DocumentTransformer(BeamData.Bindings[BeamData.Resources[BeamData.ResourceDictionaries[novelResDict].Resources[nameof(IDocument)]].Bindings]))
// .Configure(x => x
// .WithDownloadLogger(logger)
// .WithRetryReporter(new Progress<RetryReport>(x => logger?.LogWarning("Retrying download {} for the {} time", x.Link, x.TryNumber)))
// .WithTimeOut(TimeSpan.FromSeconds(15)))
// .WithParallelism(4)
// .UseFragments()
// .Build();
int len = documents.MaxBy((x) => x.Order)?.Order ?? -1;
foreach (var document in documents.OrderBy((x) => x.Order)) {
document.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var chapterMetaData);
Dictionary<string, string> linkButtons = new();
if (document.Order != 0)
linkButtons.Add("Previous", $"{document.Order - 1}.html");
if (document.Order != len)
linkButtons.Add("Next", $"{document.Order + 1}.html");
new HtmlExporter(document.Data, chapterMetaData as ArticleData, linkButtons).Write(Path.Combine(testDir, $"{document.Order}.html"));
}
//HashSet<Ordered<StringDocument>> downloaded = [];
//try {
// await foreach (var download in downloader) {
// logger?.LogInformation("Downloaded chapter with order={}", download.Order);
// try {
// downloaded.Add(download);
// } catch (Exception e) {
// logger?.LogError(e, "Unknown error occurred");
// }
// }
//} catch (Exception e) {
// logger?.LogError(e, "Uncaught error detected!");
//} finally {
// logger?.LogInformation("Done with loop, downloaded {}", downloaded.Count);
// try {
// string serialized = JsonSerializer.Serialize(downloaded.Select(x => new { x.Order, x.Data.Content, x.Data.MetaData }).ToArray(), ConversionOptions);
// System.IO.File.WriteAllText("lordOfTheMysteries.json", serialized);
// } catch (Exception e) {
// logger?.LogInformation(e, "Failed to serialize chapters");
// }
//}
//var downloader2 = DownloadBuilder<HtmlDocument, IDocument>.FromText(novel, BeamData)
// .WithRange(1..5)
// .WithLinkGenerator()
// .WithTransformer((x) => CommonTransformers.DocumentTransformer(x, metadata2.Data))
// .Configure((x) => x
// .WithDownloadLogger(logger)
// .WithDownloadReporter(new Progress<DownloadReport>((x) => logger.LogInformation(x.ToString())))
// .WithTimeOut(TimeSpan.FromSeconds(15))
// )
// .Build();
//List<Task<Ordered<IDocument>>> translationTasks = [];
//List<Ordered<IDocument>> documents = [];
//await foreach (var download in downloader2.Take(10)) {
// if (!download.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var meta))
// continue;
// if (meta is not ArticleData articleMetaData)
// continue;
// if (!download.Data.MetaData.TryGetValue(Architecture.BookKey, out var bookmeta))
// continue;
// if (meta is not ArticleData bookMetaData)
// continue;
// //Console.WriteLine($"Title: {data.Name}");
// //Console.WriteLine($"Description: {data.Description}");
// //Console.WriteLine($"Categories: {data.Categories.Aggregate((x, y) => $"{x}; {y}")}");
// //Console.WriteLine($"Authors: {data.Authors.Aggregate((x,y) => $"{x}; {y}")}");
// Console.WriteLine($"Chapter title: {articleMetaData.Name}");
// Console.WriteLine($"Book title: {bookMetaData.Name}");
// //Console.WriteLine($"Content: {download}");
// //translationTasks.Add(Task.Run(async () => {
// // logger.LogInformation("Beginning translation {} task for {}", download.Order, articleMetaData.Name);
// // var ret = new Ordered<IDocument>(await QuickAndDirtyJanitor.TranslateAsync(download.Data), download.Order);
// // logger.LogInformation("Finished translation {} task for {}", download.Order, articleMetaData.Name);
// // return ret;
// //}));
//}
//documents = (await Task.WhenAll(translationTasks)).ToList();
//string testDir = Path.Combine("txt", Path.GetRandomFileName());
//Directory.CreateDirectory(testDir);
//int len = documents.MaxBy((x) => x.Order)?.Order ?? -1;
//foreach (var document in documents.OrderBy((x) => x.Order)) {
// document.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var chapterMetaData);
// Dictionary<string, string> linkButtons = new();
// if (document.Order != 0)
// linkButtons.Add("Previous", $"{document.Order - 1}.html");
// if (document.Order != len)
// linkButtons.Add("Next", $"{document.Order + 1}.html");
// new HtmlExporter(document.Data, chapterMetaData as ArticleData, linkButtons).Write(Path.Combine(testDir, $"{document.Order}.html"));
//}
Console.ReadKey();