Added constant state changers to represent singular/repeating states. Added a DownloadContextBuilder to support fluent building patterns. Changed RetryReporter and DownloadReporter to use RetryReport and DownloadReport structs to simplify type declarations. Made MainArchitecture obsolete by supporting a fluent downloads with DownloadBuilder. Created a 'budge' OpenAI bridge for proof-of-concept translation.

This commit is contained in:
qwsdcvghyu89
2025-06-07 00:56:26 +03:00
parent a086cfa02b
commit a9a22ea23d
28 changed files with 809 additions and 145 deletions
+72 -19
View File
@@ -15,11 +15,11 @@ namespace Beam.Temporary.Cli {
public static JsonSerializerOptions ConversionOptions { get; internal set; } = new();
public static SharedDataDictionary Shared { get; set; } = [];
public static BeamDataDictionary BeamData { get; set; } = [];
public static IArchitecture Architecture = IArchitecture.Default;
const string SharedDataPath = "data/.dat";
const string BeamDataPath = "data/.dat";
static async Task Main(string[] args) {
ConversionOptions.Converters.AddPersistentDataRequiredConverters();
@@ -34,24 +34,49 @@ namespace Beam.Temporary.Cli {
ILogger logger = lf
.CreateLogger("Program");
await using var sharedContext = await DataDictionaryContext<SharedDataDictionary>.Create(
SharedDataPath,
await using var sharedContext = await DataDictionaryContext<BeamDataDictionary>.Create(
BeamDataPath,
false,
DataKind.Shared,
logger,
ConversionOptions
);
Shared = sharedContext.Data;
BeamData = sharedContext.Data;
Shared.Clear();
NovelStatics.Define_LightNovelWorld(Shared);
NovelStatics.Define_LightNovelWorld_Novel_TheLegendaryMechanic(Shared);
NovelStatics.Define_LightNovelWorl_Novel_IAloneLevelUp(Shared);
ClassicTemplates.Register(Shared);
BeamData.Clear();
NovelStatics.Define_WoDuShu(BeamData);
NovelStatics.Define_WoDuShu_HouseOfHorrors(BeamData);
ClassicTemplates.Register(BeamData);
var novel = new DataKey<TextResource>("novels:i_alone_level_up");
var context_aux = Architecture.GetMeta(web, novel, Shared);
CancellationTokenSource cts = new();
HtmlTransformer<IDocumentMetaData> ArticleDataTransformer(DataBindings? binding) => (x) => {
return new ArticleData() {
Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")],
Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""),
Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [],
Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "")
};
};
HtmlTransformer<IDocument> DocumentTransformer(DataBindings? binding, IDocumentMetaData? metaData = null) => (x) => {
var resolved = binding?.Resolve(x);
var articleData = new ArticleData() {
Name = OnlineCleaner.Clean(resolved?.Title),
};
Dictionary<DataKey<IDocumentMetaData>, IDocumentMetaData> meta = [];
meta.Add(IArchitecture.Default.ChapterKey, articleData);
if (metaData is not null)
meta.Add(IArchitecture.Default.BookKey, metaData);
return new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved?.Content)) {
MetaData = meta
};
};
var novel = new DataKey<TextResource>("novels:house_of_horrors");
var context_aux = Architecture.GetMeta(web, novel, BeamData, cts.Token);
context_aux.RetryReporter = new Progress<RetryReport>((x) => Console.WriteLine($"Failed. Trying again. {x.TryNumber}"));
var metaDownloader = new DownloadEnumerable<IDocumentMetaData>(
new SequentialFragmentDownloader<IDocumentMetaData>(
context_aux,
@@ -60,8 +85,17 @@ namespace Beam.Temporary.Cli {
.UnwrapFragmented());
var metadata = (await metaDownloader.FirstAsync());
var context = Architecture.GetTextRecord(web, novel, Shared, metadata.Data);
context.DownloadReporter = new Progress<IDocument>((x) => Console.WriteLine(x.Filename));
var metadata2 = await DownloadBuilder<IDocumentMetaData>.FromMeta(novel, BeamData)
.WithLink()
.WithTransformer(ArticleDataTransformer)
.Configure((x) => x
.WithRetryReporter(new Progress<RetryReport>())
.WithTimeOut(TimeSpan.FromSeconds(15)))
.Build()
.FirstAsync();
var context = Architecture.GetTextRecord(web, novel, BeamData, metadata.Data, cts.Token);
context.DownloadReporter = new Progress<DownloadReport>((x) => Console.WriteLine(x));
var downloader = new DownloadEnumerable<IDocument>(
new SequentialFragmentDownloader<IDocument>(
context,
@@ -69,9 +103,21 @@ namespace Beam.Temporary.Cli {
logger)
.UnwrapFragmented());
List<Ordered<IDocument>> documents = [];
var downloader2 = DownloadBuilder<IDocument>.FromText(novel, BeamData)
.WithLinkGenerator()
.WithTransformer((x) => DocumentTransformer(x, metadata2.Data))
.Configure((x) => x
.WithDownloadReporter(new Progress<DownloadReport>((x) => logger.LogInformation(x.ToString())))
.WithTimeOut(TimeSpan.FromSeconds(15))
)
.Build();
await foreach (var download in downloader.Take(20)) {
List<Task<Ordered<IDocument>>> translationTasks = [];
List<Ordered<IDocument>> documents = [];
await foreach (var download in downloader2.Take(10)) {
if (!download.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var meta))
continue;
if (meta is not ArticleData articleMetaData)
@@ -83,8 +129,15 @@ namespace Beam.Temporary.Cli {
Console.WriteLine($"Chapter title: {articleMetaData.Name}");
//Console.WriteLine($"Content: {download}");
documents.Add(download);
}
//translationTasks.Add(Task.Run(async () => {
// logger.LogInformation("Beginning translation {} task for {}", download.Order, articleMetaData.Name);
// var ret = new Ordered<IDocument>(await QuickAndDirtyJanitor.TranslateAsync(download.Data), download.Order);
// logger.LogInformation("Finished translation {} task for {}", download.Order, articleMetaData.Name);
// return ret;
//}));
}
documents = (await Task.WhenAll(translationTasks)).ToList();
string testDir = Path.Combine("txt", Path.GetRandomFileName());
Directory.CreateDirectory(testDir);
@@ -113,7 +166,7 @@ namespace Beam.Temporary.Cli {
// HtmlBook.Keys.TitlePage,
// HtmlBook.Keys.StylesPage,
//}.Select(
// (x) => Shared.Files.ReadToString(x.WithNamespace("aeqw89:files:templates:classic"))
// (x) => BeamData.Files.ReadToString(x.WithNamespace("aeqw89:files:templates:classic"))
//).ToArray();
//HtmlBook book = new(