refactor: modularize Beam into new projects and interfaces

- Introduced modularity by splitting Beam into new projects: Beam.Abstractions, Beam.Models, and Beam.Downloaders.
- Refactored existing classes into appropriate namespaces and projects.
- Replaced specific implementations with abstractions (e.g., SourceLinkBuilder to LinkBuilder, State to IState, etc.).
- Updated interfaces: added ITemplate, IArticleData, IDownloadReport, and others for improved extensibility.
- Removed deprecated classes like SourceLinkBuilder and StateChangerFactory.
- Enhanced link handling in downloaders by refactoring to use `string` over `SourceLink`.
- Consolidated shared logic under Beam.Abstractions.
This commit is contained in:
qwsdcvghyu89
2025-09-22 01:51:46 +10:00
parent a7d148a96f
commit 7ed05abdb8
128 changed files with 2058 additions and 1804 deletions
+223 -222
View File
@@ -1,229 +1,230 @@
using aeqw89.PersistentData;
using aeqw89.DataKeys;
using Beam.Dynamic;
using HtmlAgilityPack;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Text.Json.Serialization.Metadata;
using Beam.Temporary.Cli.Templates.Classic;
using Beam.Exports;
using System.Diagnostics;
using Beam.Models;
using Beam.Stealth;
// using aeqw89.PersistentData;
// using aeqw89.DataKeys;
// using Beam.Dynamic;
// using HtmlAgilityPack;
// using Microsoft.Extensions.DependencyInjection;
// using Microsoft.Extensions.Logging;
// using System.Text.Json;
// using System.Text.Json.Serialization;
// using System.Text.Json.Serialization.Metadata;
// using Beam.Temporary.Cli.Templates.Classic;
// using Beam.Exports;
// using System.Diagnostics;
// using Beam.Models;
// using Beam.Models.Public_Concrete;
// using Beam.Stealth;
namespace Beam.Temporary.Cli {
internal class Program {
//
// public static JsonSerializerOptions ConversionOptions { get; internal set; } = new();
//
// public static BeamDataContext BeamData { get; set; } = [];
//
// public static IArchitecture Architecture = IArchitecture.Default;
//
// const string BeamDataPath = "data/.dat";
public static JsonSerializerOptions ConversionOptions { get; internal set; } = new();
public static BeamDataContext BeamData { get; set; } = [];
public static IArchitecture Architecture = IArchitecture.Default;
const string BeamDataPath = "data/.dat";
static async Task Main(string[] args) {
ConversionOptions.Converters.AddPersistentDataRequiredConverters();
ConversionOptions.WriteIndented = true;
var web = new HtmlWeb();
var lf = LoggerFactory.Create((x) => x
.AddConsole()
.SetMinimumLevel(LogLevel.Trace)
);
ILogger logger = lf
.CreateLogger("Program");
await using var sharedContext = await DataDictionaryContext<BeamDataContext>.Create(
BeamDataPath,
false,
DataKind.Shared,
logger,
ConversionOptions
);
BeamData = sharedContext.Data;
BeamData.Clear();
NovelStatics.Define_YeBiQuge(BeamData);
NovelStatics.Define_YeBiQuge_LordOfMysteries(BeamData);
NovelStatics.Define_KuaiShu5(BeamData);
NovelStatics.Define_KuaiShu5_LordOfMysteries(BeamData);
ClassicTemplates.Register(BeamData);
await sharedContext.ForceSave();
BeamData = sharedContext.Data; // need to refresh instance after forced save!
CancellationTokenSource cts = new();
using var config = StealthConfig.Create(true, null, TimeSpan.FromMinutes(2), Browser.Chrome, lf.CreateLogger<StealthConfig>());
var unit = new StealthUnitPageDownloader<HtmlDocument>(new(), config, (x) => {
return Task.CompletedTask;
}, x => Task.FromResult(x));
var (success, result) = await unit.TryDownload([new("https://duckduckgo.com/?t=ffab&q=C%23+stealth+headless+browser&ia=web", 0)], default);
if (success)
logger?.LogInformation("Success! Downloaded '{}'", result?.DocumentNode.Name);
else
logger?.LogError("Failed to download!");
Console.WriteLine(result?.DocumentNode.OuterHtml);
//var novelResDict = new DataKey<ResourceDictionary>("kuaishu5:novels:lord_of_the_mysteries");
//var metadata2 = await DownloadBuilder<HtmlDocument, TableOfContentsData>.FromResource(novelResDict, nameof(TableOfContentsData), BeamData)
// .WithLink()
// .WithTransformer(CommonTransformers.TableOfContentsTransformer)
// .Configure((x) => x
// .WithDownloadLogger(logger)
// .WithRetryReporter(new Progress<RetryReport>())
// .WithTimeOut(TimeSpan.FromSeconds(15)))
// .Build()
// .FirstAsync();
//if (metadata2.Data.PagesLinks is null || metadata2.Data.PagesLinks.Length == 0)
// Debugger.Break();
//var pageLinks = DownloadBuilder<HtmlDocument, TableOfContentsData>.FromScratch()
// .WithLinks(metadata2.Data.PagesLinks)
// .WithTransformer(CommonTransformers.TableOfContentsTransformer(BeamData.Bindings[BeamData.Resources[BeamData.ResourceDictionaries[novelResDict].Resources[nameof(TableOfContentsData)]].Bindings]))
// .Configure(x => x
// .WithDownloadLogger(logger)
// .WithRetryReporter(new Progress<RetryReport>())
// .WithTimeOut(TimeSpan.FromSeconds(15)))
// .Build();
//var links = (await pageLinks
// .ToListAsync())
// .Where(x => x?.Data?.ContentLinks is not null)
// .SelectMany(x => x.Data.ContentLinks!)
// .DistinctBy(x => x.Link.AbsoluteUri);
//var downloader = DownloadBuilder<HtmlDocument, StringDocument>.FromScratch()
// .WithLinks(links)
// .WithTransformer(CommonTransformers.DocumentTransformer(BeamData.Bindings[BeamData.Resources[BeamData.ResourceDictionaries[novelResDict].Resources[nameof(IDocument)]].Bindings]))
// .Configure(x => x
// .WithDownloadLogger(logger)
// .WithRetryReporter(new Progress<RetryReport>(x => logger?.LogWarning("Retrying download {} for the {} time", x.Link, x.TryNumber)))
// .WithTimeOut(TimeSpan.FromSeconds(15)))
// .WithParallelism(4)
// .UseFragments()
// .Build();
//HashSet<Ordered<StringDocument>> downloaded = [];
//try {
// await foreach (var download in downloader) {
// logger?.LogInformation("Downloaded chapter with order={}", download.Order);
// try {
// downloaded.Add(download);
// } catch (Exception e) {
// logger?.LogError(e, "Unknown error occurred");
// }
// }
//} catch (Exception e) {
// logger?.LogError(e, "Uncaught error detected!");
//} finally {
// logger?.LogInformation("Done with loop, downloaded {}", downloaded.Count);
// try {
// string serialized = JsonSerializer.Serialize(downloaded.Select(x => new { x.Order, x.Data.Content, x.Data.MetaData }).ToArray(), ConversionOptions);
// System.IO.File.WriteAllText("lordOfTheMysteries.json", serialized);
// } catch (Exception e) {
// logger?.LogInformation(e, "Failed to serialize chapters");
// }
//}
//var downloader2 = DownloadBuilder<HtmlDocument, IDocument>.FromText(novel, BeamData)
// .WithRange(1..5)
// .WithLinkGenerator()
// .WithTransformer((x) => CommonTransformers.DocumentTransformer(x, metadata2.Data))
// .Configure((x) => x
// .WithDownloadLogger(logger)
// .WithDownloadReporter(new Progress<DownloadReport>((x) => logger.LogInformation(x.ToString())))
// .WithTimeOut(TimeSpan.FromSeconds(15))
// )
// .Build();
//List<Task<Ordered<IDocument>>> translationTasks = [];
//List<Ordered<IDocument>> documents = [];
//await foreach (var download in downloader2.Take(10)) {
// if (!download.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var meta))
// continue;
// if (meta is not ArticleData articleMetaData)
// continue;
// if (!download.Data.MetaData.TryGetValue(Architecture.BookKey, out var bookmeta))
// continue;
// if (meta is not ArticleData bookMetaData)
// continue;
// //Console.WriteLine($"Title: {data.Name}");
// //Console.WriteLine($"Description: {data.Description}");
// //Console.WriteLine($"Categories: {data.Categories.Aggregate((x, y) => $"{x}; {y}")}");
// //Console.WriteLine($"Authors: {data.Authors.Aggregate((x,y) => $"{x}; {y}")}");
// Console.WriteLine($"Chapter title: {articleMetaData.Name}");
// Console.WriteLine($"Book title: {bookMetaData.Name}");
// //Console.WriteLine($"Content: {download}");
// //translationTasks.Add(Task.Run(async () => {
// // logger.LogInformation("Beginning translation {} task for {}", download.Order, articleMetaData.Name);
// // var ret = new Ordered<IDocument>(await QuickAndDirtyJanitor.TranslateAsync(download.Data), download.Order);
// // logger.LogInformation("Finished translation {} task for {}", download.Order, articleMetaData.Name);
// // return ret;
// //}));
//}
//documents = (await Task.WhenAll(translationTasks)).ToList();
//string testDir = Path.Combine("txt", Path.GetRandomFileName());
//Directory.CreateDirectory(testDir);
//int len = documents.MaxBy((x) => x.Order)?.Order ?? -1;
//foreach (var document in documents.OrderBy((x) => x.Order)) {
// document.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var chapterMetaData);
// Dictionary<string, string> linkButtons = new();
// if (document.Order != 0)
// linkButtons.Add("Previous", $"{document.Order - 1}.html");
// if (document.Order != len)
// linkButtons.Add("Next", $"{document.Order + 1}.html");
// new HtmlExporter(document.Data, chapterMetaData as ArticleData, linkButtons).Write(Path.Combine(testDir, $"{document.Order}.html"));
//}
Console.ReadKey();
//foreach (var download in documents.OrderBy((x) => x.Order)) {
// if (download.Data.TryGetTaggedMetaData<ArticleData>(Architecture.ChapterKey, out var meta))
// Console.WriteLine($"{download.Order}:{meta.Name}");
//}
//string[] templates = new DataKey<File>[] {
// HtmlBook.Keys.ContentPage,
// HtmlBook.Keys.NoContentPage,
// HtmlBook.Keys.TitlePage,
// HtmlBook.Keys.StylesPage,
//}.Select(
// (x) => BeamData.Files.ReadToString(x.WithNamespace("aeqw89:files:templates:classic"))
//).ToArray();
//HtmlBook book = new(
// bookname: Path.Combine(Path.GetRandomFileName(), "I Alone Level Up"),
// new CssData(),
// new ArticleData(),
// new HtmlBookTemplates() {
// ContentPageTemplate = templates[0],
// NoContentTemplate = templates[1],
// TitlePageTemplate = templates[2],
// CssTemplate = templates[3],
// },
// documents: documents.Select((x) => x.Data).ToList()
//);
//book.Update();
//Console.WriteLine("One variable!");
static async Task Main(string[] args) {
// ConversionOptions.Converters.AddPersistentDataRequiredConverters();
// ConversionOptions.WriteIndented = true;
//
// var web = new HtmlWeb();
//
// var lf = LoggerFactory.Create((x) => x
// .AddConsole()
// .SetMinimumLevel(LogLevel.Trace)
// );
//
// ILogger logger = lf
// .CreateLogger("Program");
//
// await using var sharedContext = await DataDictionaryContext<BeamDataContext>.Create(
// BeamDataPath,
// false,
// DataKind.Shared,
// logger,
// ConversionOptions
// );
//
// BeamData = sharedContext.Data;
//
// BeamData.Clear();
// NovelStatics.Define_YeBiQuge(BeamData);
// NovelStatics.Define_YeBiQuge_LordOfMysteries(BeamData);
// NovelStatics.Define_KuaiShu5(BeamData);
// NovelStatics.Define_KuaiShu5_LordOfMysteries(BeamData);
// ClassicTemplates.Register(BeamData);
//
// await sharedContext.ForceSave();
// BeamData = sharedContext.Data; // need to refresh instance after forced save!
//
// CancellationTokenSource cts = new();
//
// using var config = StealthConfig.Create(true, null, TimeSpan.FromMinutes(2), Browser.Chrome, lf.CreateLogger<StealthConfig>());
// var unit = new StealthUnitPageDownloader<HtmlDocument>(new(), config, (x) => {
// return Task.CompletedTask;
// }, x => Task.FromResult(x));
//
// var (success, result) = await unit.TryDownload([new("https://duckduckgo.com/?t=ffab&q=C%23+stealth+headless+browser&ia=web", 0)], default);
// if (success)
// logger?.LogInformation("Success! Downloaded '{}'", result?.DocumentNode.Name);
// else
// logger?.LogError("Failed to download!");
//
// Console.WriteLine(result?.DocumentNode.OuterHtml);
//
// //var novelResDict = new DataKey<ResourceDictionary>("kuaishu5:novels:lord_of_the_mysteries");
//
// //var metadata2 = await DownloadBuilder<HtmlDocument, TableOfContentsData>.FromResource(novelResDict, nameof(TableOfContentsData), BeamData)
// // .WithLink()
// // .WithTransformer(CommonTransformers.TableOfContentsTransformer)
// // .Configure((x) => x
// // .WithDownloadLogger(logger)
// // .WithRetryReporter(new Progress<RetryReport>())
// // .WithTimeOut(TimeSpan.FromSeconds(15)))
// // .Build()
// // .FirstAsync();
//
// //if (metadata2.Data.PagesLinks is null || metadata2.Data.PagesLinks.Length == 0)
// // Debugger.Break();
//
// //var pageLinks = DownloadBuilder<HtmlDocument, TableOfContentsData>.FromScratch()
// // .WithLinks(metadata2.Data.PagesLinks)
// // .WithTransformer(CommonTransformers.TableOfContentsTransformer(BeamData.Bindings[BeamData.Resources[BeamData.ResourceDictionaries[novelResDict].Resources[nameof(TableOfContentsData)]].Bindings]))
// // .Configure(x => x
// // .WithDownloadLogger(logger)
// // .WithRetryReporter(new Progress<RetryReport>())
// // .WithTimeOut(TimeSpan.FromSeconds(15)))
// // .Build();
//
// //var links = (await pageLinks
// // .ToListAsync())
// // .Where(x => x?.Data?.ContentLinks is not null)
// // .SelectMany(x => x.Data.ContentLinks!)
// // .DistinctBy(x => x.Link.AbsoluteUri);
//
// //var downloader = DownloadBuilder<HtmlDocument, StringDocument>.FromScratch()
// // .WithLinks(links)
// // .WithTransformer(CommonTransformers.DocumentTransformer(BeamData.Bindings[BeamData.Resources[BeamData.ResourceDictionaries[novelResDict].Resources[nameof(IDocument)]].Bindings]))
// // .Configure(x => x
// // .WithDownloadLogger(logger)
// // .WithRetryReporter(new Progress<RetryReport>(x => logger?.LogWarning("Retrying download {} for the {} time", x.Link, x.TryNumber)))
// // .WithTimeOut(TimeSpan.FromSeconds(15)))
// // .WithParallelism(4)
// // .UseFragments()
// // .Build();
//
// //HashSet<Ordered<StringDocument>> downloaded = [];
// //try {
// // await foreach (var download in downloader) {
// // logger?.LogInformation("Downloaded chapter with order={}", download.Order);
// // try {
// // downloaded.Add(download);
// // } catch (Exception e) {
// // logger?.LogError(e, "Unknown error occurred");
// // }
// // }
// //} catch (Exception e) {
// // logger?.LogError(e, "Uncaught error detected!");
// //} finally {
// // logger?.LogInformation("Done with loop, downloaded {}", downloaded.Count);
// // try {
// // string serialized = JsonSerializer.Serialize(downloaded.Select(x => new { x.Order, x.Data.Content, x.Data.MetaData }).ToArray(), ConversionOptions);
// // System.IO.File.WriteAllText("lordOfTheMysteries.json", serialized);
// // } catch (Exception e) {
// // logger?.LogInformation(e, "Failed to serialize chapters");
// // }
// //}
//
// //var downloader2 = DownloadBuilder<HtmlDocument, IDocument>.FromText(novel, BeamData)
// // .WithRange(1..5)
// // .WithLinkGenerator()
// // .WithTransformer((x) => CommonTransformers.DocumentTransformer(x, metadata2.Data))
// // .Configure((x) => x
// // .WithDownloadLogger(logger)
// // .WithDownloadReporter(new Progress<DownloadReport>((x) => logger.LogInformation(x.ToString())))
// // .WithTimeOut(TimeSpan.FromSeconds(15))
// // )
// // .Build();
//
//
//
// //List<Task<Ordered<IDocument>>> translationTasks = [];
// //List<Ordered<IDocument>> documents = [];
//
// //await foreach (var download in downloader2.Take(10)) {
// // if (!download.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var meta))
// // continue;
// // if (meta is not ArticleData articleMetaData)
// // continue;
// // if (!download.Data.MetaData.TryGetValue(Architecture.BookKey, out var bookmeta))
// // continue;
// // if (meta is not ArticleData bookMetaData)
// // continue;
// // //Console.WriteLine($"Title: {data.Name}");
// // //Console.WriteLine($"Description: {data.Description}");
// // //Console.WriteLine($"Categories: {data.Categories.Aggregate((x, y) => $"{x}; {y}")}");
// // //Console.WriteLine($"Authors: {data.Authors.Aggregate((x,y) => $"{x}; {y}")}");
// // Console.WriteLine($"Chapter title: {articleMetaData.Name}");
// // Console.WriteLine($"Book title: {bookMetaData.Name}");
// // //Console.WriteLine($"Content: {download}");
//
// // //translationTasks.Add(Task.Run(async () => {
// // // logger.LogInformation("Beginning translation {} task for {}", download.Order, articleMetaData.Name);
// // // var ret = new Ordered<IDocument>(await QuickAndDirtyJanitor.TranslateAsync(download.Data), download.Order);
// // // logger.LogInformation("Finished translation {} task for {}", download.Order, articleMetaData.Name);
// // // return ret;
// // //}));
// //}
//
// //documents = (await Task.WhenAll(translationTasks)).ToList();
//
// //string testDir = Path.Combine("txt", Path.GetRandomFileName());
// //Directory.CreateDirectory(testDir);
//
// //int len = documents.MaxBy((x) => x.Order)?.Order ?? -1;
// //foreach (var document in documents.OrderBy((x) => x.Order)) {
// // document.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var chapterMetaData);
// // Dictionary<string, string> linkButtons = new();
// // if (document.Order != 0)
// // linkButtons.Add("Previous", $"{document.Order - 1}.html");
// // if (document.Order != len)
// // linkButtons.Add("Next", $"{document.Order + 1}.html");
// // new HtmlExporter(document.Data, chapterMetaData as ArticleData, linkButtons).Write(Path.Combine(testDir, $"{document.Order}.html"));
// //}
//
// Console.ReadKey();
//
// //foreach (var download in documents.OrderBy((x) => x.Order)) {
// // if (download.Data.TryGetTaggedMetaData<ArticleData>(Architecture.ChapterKey, out var meta))
// // Console.WriteLine($"{download.Order}:{meta.Name}");
// //}
//
// //string[] templates = new DataKey<File>[] {
// // HtmlBook.Keys.ContentPage,
// // HtmlBook.Keys.NoContentPage,
// // HtmlBook.Keys.TitlePage,
// // HtmlBook.Keys.StylesPage,
// //}.Select(
// // (x) => BeamData.Files.ReadToString(x.WithNamespace("aeqw89:files:templates:classic"))
// //).ToArray();
//
// //HtmlBook book = new(
// // bookname: Path.Combine(Path.GetRandomFileName(), "I Alone Level Up"),
// // new CssData(),
// // new ArticleData(),
// // new HtmlBookTemplates() {
// // ContentPageTemplate = templates[0],
// // NoContentTemplate = templates[1],
// // TitlePageTemplate = templates[2],
// // CssTemplate = templates[3],
// // },
// // documents: documents.Select((x) => x.Data).ToList()
// //);
//
// //book.Update();
// //Console.WriteLine("One variable!");
}
}
}