refactor: modularize Beam into new projects and interfaces

- Introduced modularity by splitting Beam into new projects: Beam.Abstractions, Beam.Models, and Beam.Downloaders.
- Refactored existing classes into appropriate namespaces and projects.
- Replaced specific implementations with abstractions (e.g., SourceLinkBuilder to LinkBuilder, State to IState, etc.).
- Updated interfaces: added ITemplate, IArticleData, IDownloadReport, and others for improved extensibility.
- Removed deprecated classes like SourceLinkBuilder and StateChangerFactory.
- Enhanced link handling in downloaders by refactoring to use `string` over `SourceLink`.
- Consolidated shared logic under Beam.Abstractions.
This commit is contained in:
qwsdcvghyu89
2025-09-22 01:51:46 +10:00
parent a7d148a96f
commit 7ed05abdb8
128 changed files with 2058 additions and 1804 deletions
-52
View File
@@ -1,52 +0,0 @@
using aeqw89.DataKeys;
using HtmlAgilityPack;
using Microsoft.Extensions.Logging;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam.Temporary.Cli {
/// <summary>
/// <para>
/// A collection of specific useful methods and constants that facilitate the use of the application; allows other parts of the application to depend on architecture-specific arbitrary choices without compromising the Single-Responsibility principle or increasing redundant code.
/// </para>
/// </summary>
public partial interface IArchitecture {
///// <summary>
///// Gets the metadata associated with a <see cref="ResourceDictionary"/>
///// </summary>
///// <param name="web">The web client to use when downloading <see cref="WebResource"/>s</param>
///// <param name="pieceKey">The key of the <see cref="ResourceDictionary"/> stored in the <paramref name="sdd"/></param>
///// <param name="sdd">The <see cref="BeamDataDictionary"/> to be used to retrieve information</param>
///// <param name="logger">Optional logger for logging debug information</param>
///// <returns>A <see cref="DownloadContext{T}"/> object with the required information to perform the download</returns>
//public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<ResourceDictionary> pieceKey, BeamDataDictionary sdd, CancellationToken ct = default, ILogger? logger = null);
///// <summary>
///// Gets the <see cref="DownloadContext{T}"/> of the text record associated with <see cref="ResourceDictionary"/>
///// </summary>
///// <param name="web">The web client to use when downloading <see cref="WebResource"/>s</param>
///// <param name="pieceKey">The key of the <see cref="ResourceDictionary"/> stored in the <paramref name="sdd"/></param>
///// <param name="sdd">The <see cref="BeamDataDictionary"/> to be used to retrieve information</param>
///// <param name="metadata">Optional book metadata to include with the final text record</param>
///// <param name="logger">Optional logger for logging debug information</param>
///// <returns>A <see cref="DownloadContext{T}"/> object with the required information to perform the download</returns>
//public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<ResourceDictionary> pieceKey, BeamDataDictionary sdd, IDocumentMetaData? metadata = null, CancellationToken ct = default, ILogger? logger = null);
/// <summary>
/// The <see cref="DataKey{IDocumentMetaData}"/> to use when looking for the chapter metadata
/// </summary>
public DataKey<IDocumentMetaData> ChapterKey { get; set; }
/// <summary>
/// The <see cref="DataKey{IDocumentMetaData}"/> to use when looking for the book metadata
/// </summary>
public DataKey<IDocumentMetaData> BookKey { get; set; }
/// <summary>
/// The default architecture
/// </summary>
public static IArchitecture Default => new MainArchitecture();
}
}
+1 -1
View File
@@ -12,7 +12,7 @@
</ItemGroup>
<ItemGroup>
<PackageReference Include="aeqw89.DataKeys" Version="2.0.1" />
<PackageReference Include="aeqw89.PersistentData" Version="1.1.0" />
<PackageReference Include="aeqw89.PersistentData" Version="1.3.3" />
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.7" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.7" />
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.7" />
-46
View File
@@ -1,46 +0,0 @@
using aeqw89.DataKeys;
using Beam.Dynamic;
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam.Temporary.Cli {
public static class CommonTransformers {
public static AsyncTransformer<HtmlDocument, ArticleData> ArticleDataTransformer(DataBindings? binding) => (x) => {
return Task.FromResult(new ArticleData() {
Authors = binding?.Authors?.Get(x)?.Select(OnlineCleaner.Clean)?.ToArray() ?? [],
Name = OnlineCleaner.Clean(binding?.Title?.Get(x) ?? ""),
Categories = binding?.Tags?.Get(x)?.Select(OnlineCleaner.Clean)?.ToArray() ?? [],
Description = OnlineCleaner.Clean(binding?.Description?.Get(x) ?? "")
});
};
public static AsyncTransformer<HtmlDocument, TableOfContentsData> TableOfContentsTransformer(DataBindings? binding) => (x) => {
return Task.FromResult(new TableOfContentsData() {
Authors = binding?.Authors?.Get(x)?.Select(OnlineCleaner.Clean)?.ToArray() ?? [],
Name = OnlineCleaner.Clean(binding?.Title?.Get(x) ?? ""),
Categories = binding?.Tags?.Get(x)?.Select(OnlineCleaner.Clean)?.ToArray() ?? [],
Description = OnlineCleaner.Clean(binding?.Description?.Get(x) ?? ""),
ContentLinks = binding?.TableOfContents?.Get(x) ?? [],
PagesLinks = binding?.PagesDropDown?.Get(x) ?? []
});
};
public static AsyncTransformer<HtmlDocument, StringDocument> DocumentTransformer(DataBindings? binding, IDocumentMetaData? metaData = null) => (x) => {
var resolved = binding?.Resolve(x);
var articleData = new ArticleData() {
Name = OnlineCleaner.Clean(resolved?.Title),
};
Dictionary<DataKey<IDocumentMetaData>, IDocumentMetaData> meta = [];
meta.Add(IArchitecture.Default.ChapterKey, articleData);
if (metaData is not null)
meta.Add(IArchitecture.Default.BookKey, metaData);
return Task.FromResult(new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved?.Content)) {
MetaData = meta
});
};
}
}
-60
View File
@@ -1,60 +0,0 @@
using aeqw89.DataKeys;
using Beam.Dynamic;
using HtmlAgilityPack;
using Microsoft.Extensions.Logging;
namespace Beam.Temporary.Cli {
public partial interface IArchitecture {
private class MainArchitecture : IArchitecture {
public MainArchitecture() { }
public DataKey<IDocumentMetaData> ChapterKey { get; set; } = new("ma:chapter");
public DataKey<IDocumentMetaData> BookKey { get; set; } = new("ma:book");
//public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<ResourceDictionary> pieceKey, BeamDataDictionary sdd, CancellationToken ct = default, ILogger? logger = null) {
// var piece = sdd.ResourceDictionaries[pieceKey].ToRecord(sdd); // retrieves novel data from the sdd
// var auxiliary = piece.AssociatedMetaSource?.ToRecord(sdd); // retrieves novel aux data from the sdd
// // null checks
// if (auxiliary is null) // aux is required to get metadata
// return null;
// if (piece?.Resource?.MetaTemplateInitialData is null) // sanity check to avoid null warnings
// return null;
// // gets the link for the novel's metadata using the auxillary data retrieved from the sdd
// var link = sdd.Templates[auxiliary.Resource.Key].Builder.Build(piece?.Resource?.MetaTemplateInitialData);
// var binding = auxiliary.Bindings;
// return new DownloadContext<IDocumentMetaData>(web, new(), [link], downloadLogger: logger);
//}
//public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<ResourceDictionary> resKey, BeamDataDictionary sdd, IDocumentMetaData? metaData = null, CancellationToken ct = default, ILogger? logger = null) {
// var res = sdd.ResourceDictionaries[resKey].ToRecord(sdd); // retrieves the novel data from the sdd
// var aggregator = res.AssociatedSource?.ToRecord(sdd); // retrieves the aggregator (novel web source) from the sdd
// if (aggregator is null) // ensure aggergator data was retrieved successfully
// return null;
// if (res is null) // ensure novel data was retrieved successfully
// return null;
// var template = sdd.Templates[aggregator.Resource.Key]; // gets the link generator for the specified aggregator
// // creates a generative enumerable of type link from 'template'
// var sle = SourceLinkEnumerable.FromGenerator(new OrderedSourceLinkGenerator(
// template.Builder, new NumberedStateChanger(template.Factory.Behavior),
// res.Resource.TemplateInitialData));
// return new DownloadContext<IDocument>(web, new(), sle,
// retryReporter: new Progress<RetryReport>((x) => Console.WriteLine($"Retrying download of '{x.Link}' ({x.TryNumber}x)")),
// //downloadReporter: new Progress<DownloadReport>((x) => Console.WriteLine($"Downloaded ({x})")),
// asyncFailurePredicates: [
// //(x) => Task.FromResult(!x.DocumentNode.InnerHtml.Contains("<div id=\"chapter-container\" class=\"chapter-content\" itemprop=\"description\">"))
// ],
// timeOut: TimeSpan.FromSeconds(15),
// downloadLogger: logger
// );
//}
}
}
}
+109 -103
View File
@@ -1,105 +1,111 @@
using aeqw89.DataKeys;
using Beam.Dynamic;
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Collections.ObjectModel;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Beam.Models;
// using aeqw89.DataKeys;
// using Beam.Dynamic;
// using System;
// using System.Collections.Generic;
// using System.Collections.Immutable;
// using System.Collections.ObjectModel;
// using System.Linq;
// using System.Text;
// using System.Threading.Tasks;
// using Beam.Data;
// using Beam.Fluent;
// using Beam.Models;
/*
* MAJOR TODO FIX THIS MESS
*/
//
// namespace Beam.Temporary.Cli {
//
// public record class ResourceDictionaryBuilder(string SiteKey) {
// private List<Func<WebResourceBuilder>> _builders;
//
//
// private record class WebResourceBuilder(string ResourceKey) {
// private Func<Template> _template;
// private Func<IReadOnlyDictionary<DataKey<DataBindings>, DataBindings>> _bindings;
// private string _name;
// private string _description;
// private Uri _domain;
//
// }
//
// private record class ResourceDictionaryRegistrar(
// string SiteKey,
// string FriendlyName,
// IEnumerable<WebResource> Resources,
// IReadOnlyDictionary<string, Template> Templates,
// IReadOnlyDictionary<string, DataBindings> Bindings) : IResourceDictionaryRegistrar {
//
// private Dictionary<string, ImmutableState> _states;
//
// public IResourceDictionaryRegistrar AddInitialState(string key, ImmutableState state) {
// _states[key] = state;
// return this;
// }
//
// public void Register(BeamDataContext sdd) {
// foreach (var resource in Resources)
// sdd.Resources.TryAdd(resource.Key, resource);
// // foreach (var template in Templates)
// // sdd.Templates.TryAdd(new DataKey<WebResource>(template.Key), template.Value);
// foreach (var binding in Bindings)
// sdd.Bindings.TryAdd(new DataKey<DataBindings>(binding.Key), binding.Value);
// foreach (var state in _states)
// sdd.InitialStates.TryAdd(new DataKey<ImmutableState>(state.Key), state.Value);
//
// sdd.ResourceDictionaries.TryAdd(new DataKey<ResourceDictionary>(SiteKey), new ResourceDictionary() {
// Key = new DataKey<ResourceDictionary>(SiteKey),
// FriendlyName = FriendlyName,
// InitialStates =
// });
// }
// }
// }
//
// public interface IResourceDictionaryRegistrar {
// public IResourceDictionaryRegistrar AddInitialState(string key, ImmutableState state);
// public void Register(BeamDataContext sdd);
// }
//
// public interface IBindingsBuilder {
// public IBindingsBuilder AddBinding(DataBindings bindings);
// public IBindingsBuilder AddBinding(Action<DataBindings> configure);
// public IReadOnlyDictionary<DataKey<DataBindings>, DataBindings> Build();
// }
//
// public interface IResourceDictionaryBuilder {
// public IResourceDictionaryBuilder AddResource(Func<ITemplateBuilderStage, IWebResourceBuilderStage> configure);
// public IResourceDictionaryBuilder WithResources(Func<ITemplateBuilderStage, IWebResourceBuilderStage>[] configure);
// public IResourceDictionaryBuilder WithFriendlyName(string friendlyName);
// public IResourceDictionaryRegistrar Then();
// }
//
// public interface IWebResourceBuilderStage {
// public IWebResourceBuilderStage WithName(string name); // Stage 3
// public IWebResourceBuilderStage WithDescription(string description); // Stage 3
// public IWebResourceBuilderStage WithDomain(Uri domain); // Stage 3
// public WebResource Build();
// }
//
// public interface IBindingBuilderStage {
// public IWebResourceBuilderStage WithBindings(Action<IBindingsBuilder> configure);
// public IWebResourceBuilderStage WithBindings(IReadOnlyDictionary<DataKey<DataBindings>, DataBindings> bindings);
// }
//
// public interface ITemplateBuilderStage {
// public IBindingBuilderStage WithTemplate(Action<ITemplateBuilder> configure);
// public IBindingBuilderStage WithTemplate(Template template);
// }
//
// public interface ITemplateBuilder {
// public ITemplateBuilder WithFactory(StateChangerFactory factory);
// public ITemplateBuilder WithUrlBuilder(LinkBuilder builder);
// public ITemplateBuilder WithUrlBuilder(Action<LinkBuilder> configure);
// public Template Build();
// }
//
namespace Beam.Temporary.Cli {
public record class ResourceDictionaryBuilder(string SiteKey) {
private List<Func<WebResourceBuilder>> _builders;
private record class WebResourceBuilder(string ResourceKey) {
private Func<Template> _template;
private Func<IReadOnlyDictionary<DataKey<DataBindings>, DataBindings>> _bindings;
private string _name;
private string _description;
private Uri _domain;
}
private record class ResourceDictionaryRegistrar(
string SiteKey,
string FriendlyName,
IEnumerable<WebResource> Resources,
IReadOnlyDictionary<string, Template> Templates,
IReadOnlyDictionary<string, DataBindings> Bindings) : IResourceDictionaryRegistrar {
private Dictionary<string, ImmutableState> _states;
public IResourceDictionaryRegistrar AddInitialState(string key, ImmutableState state) {
_states[key] = state;
return this;
}
public void Register(BeamDataContext sdd) {
foreach (var resource in Resources)
sdd.Resources.TryAdd(resource.Key, resource);
foreach (var template in Templates)
sdd.Templates.TryAdd(new DataKey<WebResource>(template.Key), template.Value);
foreach (var binding in Bindings)
sdd.Bindings.TryAdd(new DataKey<DataBindings>(binding.Key), binding.Value);
foreach (var state in _states)
sdd.InitialStates.TryAdd(new DataKey<ImmutableState>(state.Key), state.Value);
sdd.ResourceDictionaries.TryAdd(new DataKey<ResourceDictionary>(SiteKey), new ResourceDictionary() {
Key = new DataKey<ResourceDictionary>(SiteKey),
FriendlyName = FriendlyName,
InitialStates =
});
}
}
}
public interface IResourceDictionaryRegistrar {
public IResourceDictionaryRegistrar AddInitialState(string key, ImmutableState state);
public void Register(BeamDataContext sdd);
}
public interface IBindingsBuilder {
public IBindingsBuilder AddBinding(DataBindings bindings);
public IBindingsBuilder AddBinding(Action<DataBindings> configure);
public IReadOnlyDictionary<DataKey<DataBindings>, DataBindings> Build();
}
public interface IResourceDictionaryBuilder {
public IResourceDictionaryBuilder AddResource(Func<ITemplateBuilderStage, IWebResourceBuilderStage> configure);
public IResourceDictionaryBuilder WithResources(Func<ITemplateBuilderStage, IWebResourceBuilderStage>[] configure);
public IResourceDictionaryBuilder WithFriendlyName(string friendlyName);
public IResourceDictionaryRegistrar Then();
}
public interface IWebResourceBuilderStage {
public IWebResourceBuilderStage WithName(string name); // Stage 3
public IWebResourceBuilderStage WithDescription(string description); // Stage 3
public IWebResourceBuilderStage WithDomain(Uri domain); // Stage 3
public WebResource Build();
}
public interface IBindingBuilderStage {
public IWebResourceBuilderStage WithBindings(Action<IBindingsBuilder> configure);
public IWebResourceBuilderStage WithBindings(IReadOnlyDictionary<DataKey<DataBindings>, DataBindings> bindings);
}
public interface ITemplateBuilderStage {
public IBindingBuilderStage WithTemplate(Action<ITemplateBuilder> configure);
public IBindingBuilderStage WithTemplate(Template template);
}
public interface ITemplateBuilder {
public ITemplateBuilder WithFactory(StateChangerFactory factory);
public ITemplateBuilder WithUrlBuilder(SourceLinkBuilder builder);
public ITemplateBuilder WithUrlBuilder(Action<SourceLinkBuilder> configure);
public Template Build();
}
}
// }
File diff suppressed because it is too large Load Diff
+223 -222
View File
@@ -1,229 +1,230 @@
using aeqw89.PersistentData;
using aeqw89.DataKeys;
using Beam.Dynamic;
using HtmlAgilityPack;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Text.Json.Serialization.Metadata;
using Beam.Temporary.Cli.Templates.Classic;
using Beam.Exports;
using System.Diagnostics;
using Beam.Models;
using Beam.Stealth;
// using aeqw89.PersistentData;
// using aeqw89.DataKeys;
// using Beam.Dynamic;
// using HtmlAgilityPack;
// using Microsoft.Extensions.DependencyInjection;
// using Microsoft.Extensions.Logging;
// using System.Text.Json;
// using System.Text.Json.Serialization;
// using System.Text.Json.Serialization.Metadata;
// using Beam.Temporary.Cli.Templates.Classic;
// using Beam.Exports;
// using System.Diagnostics;
// using Beam.Models;
// using Beam.Models.Public_Concrete;
// using Beam.Stealth;
namespace Beam.Temporary.Cli {
internal class Program {
//
// public static JsonSerializerOptions ConversionOptions { get; internal set; } = new();
//
// public static BeamDataContext BeamData { get; set; } = [];
//
// public static IArchitecture Architecture = IArchitecture.Default;
//
// const string BeamDataPath = "data/.dat";
public static JsonSerializerOptions ConversionOptions { get; internal set; } = new();
public static BeamDataContext BeamData { get; set; } = [];
public static IArchitecture Architecture = IArchitecture.Default;
const string BeamDataPath = "data/.dat";
static async Task Main(string[] args) {
ConversionOptions.Converters.AddPersistentDataRequiredConverters();
ConversionOptions.WriteIndented = true;
var web = new HtmlWeb();
var lf = LoggerFactory.Create((x) => x
.AddConsole()
.SetMinimumLevel(LogLevel.Trace)
);
ILogger logger = lf
.CreateLogger("Program");
await using var sharedContext = await DataDictionaryContext<BeamDataContext>.Create(
BeamDataPath,
false,
DataKind.Shared,
logger,
ConversionOptions
);
BeamData = sharedContext.Data;
BeamData.Clear();
NovelStatics.Define_YeBiQuge(BeamData);
NovelStatics.Define_YeBiQuge_LordOfMysteries(BeamData);
NovelStatics.Define_KuaiShu5(BeamData);
NovelStatics.Define_KuaiShu5_LordOfMysteries(BeamData);
ClassicTemplates.Register(BeamData);
await sharedContext.ForceSave();
BeamData = sharedContext.Data; // need to refresh instance after forced save!
CancellationTokenSource cts = new();
using var config = StealthConfig.Create(true, null, TimeSpan.FromMinutes(2), Browser.Chrome, lf.CreateLogger<StealthConfig>());
var unit = new StealthUnitPageDownloader<HtmlDocument>(new(), config, (x) => {
return Task.CompletedTask;
}, x => Task.FromResult(x));
var (success, result) = await unit.TryDownload([new("https://duckduckgo.com/?t=ffab&q=C%23+stealth+headless+browser&ia=web", 0)], default);
if (success)
logger?.LogInformation("Success! Downloaded '{}'", result?.DocumentNode.Name);
else
logger?.LogError("Failed to download!");
Console.WriteLine(result?.DocumentNode.OuterHtml);
//var novelResDict = new DataKey<ResourceDictionary>("kuaishu5:novels:lord_of_the_mysteries");
//var metadata2 = await DownloadBuilder<HtmlDocument, TableOfContentsData>.FromResource(novelResDict, nameof(TableOfContentsData), BeamData)
// .WithLink()
// .WithTransformer(CommonTransformers.TableOfContentsTransformer)
// .Configure((x) => x
// .WithDownloadLogger(logger)
// .WithRetryReporter(new Progress<RetryReport>())
// .WithTimeOut(TimeSpan.FromSeconds(15)))
// .Build()
// .FirstAsync();
//if (metadata2.Data.PagesLinks is null || metadata2.Data.PagesLinks.Length == 0)
// Debugger.Break();
//var pageLinks = DownloadBuilder<HtmlDocument, TableOfContentsData>.FromScratch()
// .WithLinks(metadata2.Data.PagesLinks)
// .WithTransformer(CommonTransformers.TableOfContentsTransformer(BeamData.Bindings[BeamData.Resources[BeamData.ResourceDictionaries[novelResDict].Resources[nameof(TableOfContentsData)]].Bindings]))
// .Configure(x => x
// .WithDownloadLogger(logger)
// .WithRetryReporter(new Progress<RetryReport>())
// .WithTimeOut(TimeSpan.FromSeconds(15)))
// .Build();
//var links = (await pageLinks
// .ToListAsync())
// .Where(x => x?.Data?.ContentLinks is not null)
// .SelectMany(x => x.Data.ContentLinks!)
// .DistinctBy(x => x.Link.AbsoluteUri);
//var downloader = DownloadBuilder<HtmlDocument, StringDocument>.FromScratch()
// .WithLinks(links)
// .WithTransformer(CommonTransformers.DocumentTransformer(BeamData.Bindings[BeamData.Resources[BeamData.ResourceDictionaries[novelResDict].Resources[nameof(IDocument)]].Bindings]))
// .Configure(x => x
// .WithDownloadLogger(logger)
// .WithRetryReporter(new Progress<RetryReport>(x => logger?.LogWarning("Retrying download {} for the {} time", x.Link, x.TryNumber)))
// .WithTimeOut(TimeSpan.FromSeconds(15)))
// .WithParallelism(4)
// .UseFragments()
// .Build();
//HashSet<Ordered<StringDocument>> downloaded = [];
//try {
// await foreach (var download in downloader) {
// logger?.LogInformation("Downloaded chapter with order={}", download.Order);
// try {
// downloaded.Add(download);
// } catch (Exception e) {
// logger?.LogError(e, "Unknown error occurred");
// }
// }
//} catch (Exception e) {
// logger?.LogError(e, "Uncaught error detected!");
//} finally {
// logger?.LogInformation("Done with loop, downloaded {}", downloaded.Count);
// try {
// string serialized = JsonSerializer.Serialize(downloaded.Select(x => new { x.Order, x.Data.Content, x.Data.MetaData }).ToArray(), ConversionOptions);
// System.IO.File.WriteAllText("lordOfTheMysteries.json", serialized);
// } catch (Exception e) {
// logger?.LogInformation(e, "Failed to serialize chapters");
// }
//}
//var downloader2 = DownloadBuilder<HtmlDocument, IDocument>.FromText(novel, BeamData)
// .WithRange(1..5)
// .WithLinkGenerator()
// .WithTransformer((x) => CommonTransformers.DocumentTransformer(x, metadata2.Data))
// .Configure((x) => x
// .WithDownloadLogger(logger)
// .WithDownloadReporter(new Progress<DownloadReport>((x) => logger.LogInformation(x.ToString())))
// .WithTimeOut(TimeSpan.FromSeconds(15))
// )
// .Build();
//List<Task<Ordered<IDocument>>> translationTasks = [];
//List<Ordered<IDocument>> documents = [];
//await foreach (var download in downloader2.Take(10)) {
// if (!download.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var meta))
// continue;
// if (meta is not ArticleData articleMetaData)
// continue;
// if (!download.Data.MetaData.TryGetValue(Architecture.BookKey, out var bookmeta))
// continue;
// if (meta is not ArticleData bookMetaData)
// continue;
// //Console.WriteLine($"Title: {data.Name}");
// //Console.WriteLine($"Description: {data.Description}");
// //Console.WriteLine($"Categories: {data.Categories.Aggregate((x, y) => $"{x}; {y}")}");
// //Console.WriteLine($"Authors: {data.Authors.Aggregate((x,y) => $"{x}; {y}")}");
// Console.WriteLine($"Chapter title: {articleMetaData.Name}");
// Console.WriteLine($"Book title: {bookMetaData.Name}");
// //Console.WriteLine($"Content: {download}");
// //translationTasks.Add(Task.Run(async () => {
// // logger.LogInformation("Beginning translation {} task for {}", download.Order, articleMetaData.Name);
// // var ret = new Ordered<IDocument>(await QuickAndDirtyJanitor.TranslateAsync(download.Data), download.Order);
// // logger.LogInformation("Finished translation {} task for {}", download.Order, articleMetaData.Name);
// // return ret;
// //}));
//}
//documents = (await Task.WhenAll(translationTasks)).ToList();
//string testDir = Path.Combine("txt", Path.GetRandomFileName());
//Directory.CreateDirectory(testDir);
//int len = documents.MaxBy((x) => x.Order)?.Order ?? -1;
//foreach (var document in documents.OrderBy((x) => x.Order)) {
// document.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var chapterMetaData);
// Dictionary<string, string> linkButtons = new();
// if (document.Order != 0)
// linkButtons.Add("Previous", $"{document.Order - 1}.html");
// if (document.Order != len)
// linkButtons.Add("Next", $"{document.Order + 1}.html");
// new HtmlExporter(document.Data, chapterMetaData as ArticleData, linkButtons).Write(Path.Combine(testDir, $"{document.Order}.html"));
//}
Console.ReadKey();
//foreach (var download in documents.OrderBy((x) => x.Order)) {
// if (download.Data.TryGetTaggedMetaData<ArticleData>(Architecture.ChapterKey, out var meta))
// Console.WriteLine($"{download.Order}:{meta.Name}");
//}
//string[] templates = new DataKey<File>[] {
// HtmlBook.Keys.ContentPage,
// HtmlBook.Keys.NoContentPage,
// HtmlBook.Keys.TitlePage,
// HtmlBook.Keys.StylesPage,
//}.Select(
// (x) => BeamData.Files.ReadToString(x.WithNamespace("aeqw89:files:templates:classic"))
//).ToArray();
//HtmlBook book = new(
// bookname: Path.Combine(Path.GetRandomFileName(), "I Alone Level Up"),
// new CssData(),
// new ArticleData(),
// new HtmlBookTemplates() {
// ContentPageTemplate = templates[0],
// NoContentTemplate = templates[1],
// TitlePageTemplate = templates[2],
// CssTemplate = templates[3],
// },
// documents: documents.Select((x) => x.Data).ToList()
//);
//book.Update();
//Console.WriteLine("One variable!");
static async Task Main(string[] args) {
// ConversionOptions.Converters.AddPersistentDataRequiredConverters();
// ConversionOptions.WriteIndented = true;
//
// var web = new HtmlWeb();
//
// var lf = LoggerFactory.Create((x) => x
// .AddConsole()
// .SetMinimumLevel(LogLevel.Trace)
// );
//
// ILogger logger = lf
// .CreateLogger("Program");
//
// await using var sharedContext = await DataDictionaryContext<BeamDataContext>.Create(
// BeamDataPath,
// false,
// DataKind.Shared,
// logger,
// ConversionOptions
// );
//
// BeamData = sharedContext.Data;
//
// BeamData.Clear();
// NovelStatics.Define_YeBiQuge(BeamData);
// NovelStatics.Define_YeBiQuge_LordOfMysteries(BeamData);
// NovelStatics.Define_KuaiShu5(BeamData);
// NovelStatics.Define_KuaiShu5_LordOfMysteries(BeamData);
// ClassicTemplates.Register(BeamData);
//
// await sharedContext.ForceSave();
// BeamData = sharedContext.Data; // need to refresh instance after forced save!
//
// CancellationTokenSource cts = new();
//
// using var config = StealthConfig.Create(true, null, TimeSpan.FromMinutes(2), Browser.Chrome, lf.CreateLogger<StealthConfig>());
// var unit = new StealthUnitPageDownloader<HtmlDocument>(new(), config, (x) => {
// return Task.CompletedTask;
// }, x => Task.FromResult(x));
//
// var (success, result) = await unit.TryDownload([new("https://duckduckgo.com/?t=ffab&q=C%23+stealth+headless+browser&ia=web", 0)], default);
// if (success)
// logger?.LogInformation("Success! Downloaded '{}'", result?.DocumentNode.Name);
// else
// logger?.LogError("Failed to download!");
//
// Console.WriteLine(result?.DocumentNode.OuterHtml);
//
// //var novelResDict = new DataKey<ResourceDictionary>("kuaishu5:novels:lord_of_the_mysteries");
//
// //var metadata2 = await DownloadBuilder<HtmlDocument, TableOfContentsData>.FromResource(novelResDict, nameof(TableOfContentsData), BeamData)
// // .WithLink()
// // .WithTransformer(CommonTransformers.TableOfContentsTransformer)
// // .Configure((x) => x
// // .WithDownloadLogger(logger)
// // .WithRetryReporter(new Progress<RetryReport>())
// // .WithTimeOut(TimeSpan.FromSeconds(15)))
// // .Build()
// // .FirstAsync();
//
// //if (metadata2.Data.PagesLinks is null || metadata2.Data.PagesLinks.Length == 0)
// // Debugger.Break();
//
// //var pageLinks = DownloadBuilder<HtmlDocument, TableOfContentsData>.FromScratch()
// // .WithLinks(metadata2.Data.PagesLinks)
// // .WithTransformer(CommonTransformers.TableOfContentsTransformer(BeamData.Bindings[BeamData.Resources[BeamData.ResourceDictionaries[novelResDict].Resources[nameof(TableOfContentsData)]].Bindings]))
// // .Configure(x => x
// // .WithDownloadLogger(logger)
// // .WithRetryReporter(new Progress<RetryReport>())
// // .WithTimeOut(TimeSpan.FromSeconds(15)))
// // .Build();
//
// //var links = (await pageLinks
// // .ToListAsync())
// // .Where(x => x?.Data?.ContentLinks is not null)
// // .SelectMany(x => x.Data.ContentLinks!)
// // .DistinctBy(x => x.Link.AbsoluteUri);
//
// //var downloader = DownloadBuilder<HtmlDocument, StringDocument>.FromScratch()
// // .WithLinks(links)
// // .WithTransformer(CommonTransformers.DocumentTransformer(BeamData.Bindings[BeamData.Resources[BeamData.ResourceDictionaries[novelResDict].Resources[nameof(IDocument)]].Bindings]))
// // .Configure(x => x
// // .WithDownloadLogger(logger)
// // .WithRetryReporter(new Progress<RetryReport>(x => logger?.LogWarning("Retrying download {} for the {} time", x.Link, x.TryNumber)))
// // .WithTimeOut(TimeSpan.FromSeconds(15)))
// // .WithParallelism(4)
// // .UseFragments()
// // .Build();
//
// //HashSet<Ordered<StringDocument>> downloaded = [];
// //try {
// // await foreach (var download in downloader) {
// // logger?.LogInformation("Downloaded chapter with order={}", download.Order);
// // try {
// // downloaded.Add(download);
// // } catch (Exception e) {
// // logger?.LogError(e, "Unknown error occurred");
// // }
// // }
// //} catch (Exception e) {
// // logger?.LogError(e, "Uncaught error detected!");
// //} finally {
// // logger?.LogInformation("Done with loop, downloaded {}", downloaded.Count);
// // try {
// // string serialized = JsonSerializer.Serialize(downloaded.Select(x => new { x.Order, x.Data.Content, x.Data.MetaData }).ToArray(), ConversionOptions);
// // System.IO.File.WriteAllText("lordOfTheMysteries.json", serialized);
// // } catch (Exception e) {
// // logger?.LogInformation(e, "Failed to serialize chapters");
// // }
// //}
//
// //var downloader2 = DownloadBuilder<HtmlDocument, IDocument>.FromText(novel, BeamData)
// // .WithRange(1..5)
// // .WithLinkGenerator()
// // .WithTransformer((x) => CommonTransformers.DocumentTransformer(x, metadata2.Data))
// // .Configure((x) => x
// // .WithDownloadLogger(logger)
// // .WithDownloadReporter(new Progress<DownloadReport>((x) => logger.LogInformation(x.ToString())))
// // .WithTimeOut(TimeSpan.FromSeconds(15))
// // )
// // .Build();
//
//
//
// //List<Task<Ordered<IDocument>>> translationTasks = [];
// //List<Ordered<IDocument>> documents = [];
//
// //await foreach (var download in downloader2.Take(10)) {
// // if (!download.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var meta))
// // continue;
// // if (meta is not ArticleData articleMetaData)
// // continue;
// // if (!download.Data.MetaData.TryGetValue(Architecture.BookKey, out var bookmeta))
// // continue;
// // if (meta is not ArticleData bookMetaData)
// // continue;
// // //Console.WriteLine($"Title: {data.Name}");
// // //Console.WriteLine($"Description: {data.Description}");
// // //Console.WriteLine($"Categories: {data.Categories.Aggregate((x, y) => $"{x}; {y}")}");
// // //Console.WriteLine($"Authors: {data.Authors.Aggregate((x,y) => $"{x}; {y}")}");
// // Console.WriteLine($"Chapter title: {articleMetaData.Name}");
// // Console.WriteLine($"Book title: {bookMetaData.Name}");
// // //Console.WriteLine($"Content: {download}");
//
// // //translationTasks.Add(Task.Run(async () => {
// // // logger.LogInformation("Beginning translation {} task for {}", download.Order, articleMetaData.Name);
// // // var ret = new Ordered<IDocument>(await QuickAndDirtyJanitor.TranslateAsync(download.Data), download.Order);
// // // logger.LogInformation("Finished translation {} task for {}", download.Order, articleMetaData.Name);
// // // return ret;
// // //}));
// //}
//
// //documents = (await Task.WhenAll(translationTasks)).ToList();
//
// //string testDir = Path.Combine("txt", Path.GetRandomFileName());
// //Directory.CreateDirectory(testDir);
//
// //int len = documents.MaxBy((x) => x.Order)?.Order ?? -1;
// //foreach (var document in documents.OrderBy((x) => x.Order)) {
// // document.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var chapterMetaData);
// // Dictionary<string, string> linkButtons = new();
// // if (document.Order != 0)
// // linkButtons.Add("Previous", $"{document.Order - 1}.html");
// // if (document.Order != len)
// // linkButtons.Add("Next", $"{document.Order + 1}.html");
// // new HtmlExporter(document.Data, chapterMetaData as ArticleData, linkButtons).Write(Path.Combine(testDir, $"{document.Order}.html"));
// //}
//
// Console.ReadKey();
//
// //foreach (var download in documents.OrderBy((x) => x.Order)) {
// // if (download.Data.TryGetTaggedMetaData<ArticleData>(Architecture.ChapterKey, out var meta))
// // Console.WriteLine($"{download.Order}:{meta.Name}");
// //}
//
// //string[] templates = new DataKey<File>[] {
// // HtmlBook.Keys.ContentPage,
// // HtmlBook.Keys.NoContentPage,
// // HtmlBook.Keys.TitlePage,
// // HtmlBook.Keys.StylesPage,
// //}.Select(
// // (x) => BeamData.Files.ReadToString(x.WithNamespace("aeqw89:files:templates:classic"))
// //).ToArray();
//
// //HtmlBook book = new(
// // bookname: Path.Combine(Path.GetRandomFileName(), "I Alone Level Up"),
// // new CssData(),
// // new ArticleData(),
// // new HtmlBookTemplates() {
// // ContentPageTemplate = templates[0],
// // NoContentTemplate = templates[1],
// // TitlePageTemplate = templates[2],
// // CssTemplate = templates[3],
// // },
// // documents: documents.Select((x) => x.Data).ToList()
// //);
//
// //book.Update();
// //Console.WriteLine("One variable!");
}
}
}
+26 -25
View File
@@ -1,25 +1,26 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using OpenAI;
using OpenAI.Chat;
namespace Beam.Temporary.Cli {
public class QuickAndDirtyJanitor {
static OpenAIClient client;
static QuickAndDirtyJanitor() {
var key = Environment.GetEnvironmentVariable("OPEN_AI_KEY");
client = new OpenAIClient(key);
}
public static async Task<IDocument> TranslateAsync(IDocument document) {
var chatCompletion = await client.GetChatClient("gpt-4.1").CompleteChatAsync(
ChatMessage.CreateSystemMessage("Translate the following text into english. If any part of the text has no direct English translation, you may choose to leave it as is. In either case, make sure to leave footnotes for any difficult to translate words. You must translate the whole text and output only your translation and footnotes. No other comments are necessary."),
ChatMessage.CreateUserMessage("From UNKNOWN to ENGLISH.\n" + document.ToString()));
return new StringDocument(document.Filename, chatCompletion.Value.Content.DefaultIfEmpty().Select((x) => x?.Text).Aggregate((x,y) => $"{x}{y}"));
}
}
}
// using System;
// using System.Collections.Generic;
// using System.Linq;
// using System.Text;
// using System.Threading.Tasks;
// using Beam.Models;
// using OpenAI;
// using OpenAI.Chat;
//
// namespace Beam.Temporary.Cli {
// public class QuickAndDirtyJanitor {
// static OpenAIClient client;
//
// static QuickAndDirtyJanitor() {
// var key = Environment.GetEnvironmentVariable("OPEN_AI_KEY");
// client = new OpenAIClient(key);
// }
//
// public static async Task<IDocument> TranslateAsync(IDocument document) {
// var chatCompletion = await client.GetChatClient("gpt-4.1").CompleteChatAsync(
// ChatMessage.CreateSystemMessage("Translate the following text into english. If any part of the text has no direct English translation, you may choose to leave it as is. In either case, make sure to leave footnotes for any difficult to translate words. You must translate the whole text and output only your translation and footnotes. No other comments are necessary."),
// ChatMessage.CreateUserMessage("From UNKNOWN to ENGLISH.\n" + document.ToString()));
// return new StringDocument(document.Filename, chatCompletion.Value.Content.DefaultIfEmpty().Select((x) => x?.Text).Aggregate((x,y) => $"{x}{y}"));
// }
// }
// }
-34
View File
@@ -1,34 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
namespace Beam.Temporary.Cli {
public class StateChangerFactory {
[JsonIgnore]
public IStateChangeBehaviour Behavior => FactoryTable[StateChangerKey]();
[JsonInclude]
public string StateChangerKey { get; set; }
[JsonConstructor]
public StateChangerFactory(string stateChangerKey) {
if (!Keys.Contains(stateChangerKey))
throw new ArgumentException($"{stateChangerKey} not in keys list", nameof(stateChangerKey));
StateChangerKey = stateChangerKey;
}
public static Dictionary<string, Func<IStateChangeBehaviour>> FactoryTable = new() {
{ LastAsNumber, () => CommonStateChangers.LastAsNumber },
{ LastAsNumberPrefixed, () => CommonStateChangers.NthAsNumber(^1, true) },
{ Constant, () => CommonStateChangers.Constant },
};
public HashSet<string> Keys = [LastAsNumber, LastAsNumberPrefixed, Constant];
public const string LastAsNumber = "LastAsNumber";
public const string LastAsNumberPrefixed = "LastAsNumberPrefixed";
public const string Constant = "Constant";
}
}
-18
View File
@@ -1,18 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam.Temporary.Cli {
public record class TableOfContentsData : ArticleData {
/// <summary>
/// The link collection of the actual content
/// </summary>
public SourceLink[]? ContentLinks { get; set; }
/// <summary>
/// The link collection of all the Table Of Content pages for this specific resource.
/// </summary>
public SourceLink[]? PagesLinks { get; set; }
}
}
@@ -1,30 +1,30 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam.Temporary.Cli.Templates.Classic {
internal class ClassicTemplates {
public static void Register(BeamDataDictionary sdd) {
sdd.Files.TryAdd(
new("aeqw89:files:templates:classic:content_page"),
new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\Content.template.html", "htmlpage", "templates"));
sdd.Files.TryAdd(
new("aeqw89:files:templates:classic:title_page"),
new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\Title.template.html", "htmlpage", "templates"));
sdd.Files.TryAdd(
new("aeqw89:files:templates:classic:styles_page"),
new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\Styles.template.css", "styles", "templates"));
sdd.Files.TryAdd(
new("aeqw89:files:templates:classic:no_content_page"),
new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\NoContent.template.html", "htmlpage", "templates"));
}
}
internal static class DictionaryOfFileExtensions {
public static string ReadToString<T>(this Dictionary<T, File> dict, T key) where T: notnull {
return System.IO.File.ReadAllText(dict[key].Path);
}
}
}
// using System;
// using System.Collections.Generic;
// using System.Linq;
// using System.Text;
// using System.Threading.Tasks;
//
// namespace Beam.Temporary.Cli.Templates.Classic {
// internal class ClassicTemplates {
// public static void Register(BeamDataDictionary sdd) {
// sdd.Files.TryAdd(
// new("aeqw89:files:templates:classic:content_page"),
// new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\Content.template.html", "htmlpage", "templates"));
// sdd.Files.TryAdd(
// new("aeqw89:files:templates:classic:title_page"),
// new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\Title.template.html", "htmlpage", "templates"));
// sdd.Files.TryAdd(
// new("aeqw89:files:templates:classic:styles_page"),
// new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\Styles.template.css", "styles", "templates"));
// sdd.Files.TryAdd(
// new("aeqw89:files:templates:classic:no_content_page"),
// new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\NoContent.template.html", "htmlpage", "templates"));
// }
// }
//
// internal static class DictionaryOfFileExtensions {
// public static string ReadToString<T>(this Dictionary<T, File> dict, T key) where T: notnull {
// return System.IO.File.ReadAllText(dict[key].Path);
// }
// }
// }