Added constant state changers to represent singular/repeating states. Added a DownloadContextBuilder to support fluent building patterns. Changed RetryReporter and DownloadReporter to use RetryReport and DownloadReport structs to simplify type declarations. Made MainArchitecture obsolete by supporting a fluent downloads with DownloadBuilder. Created a 'budge' OpenAI bridge for proof-of-concept translation.
This commit is contained in:
@@ -15,11 +15,11 @@ namespace Beam.Temporary.Cli {
|
||||
|
||||
public static JsonSerializerOptions ConversionOptions { get; internal set; } = new();
|
||||
|
||||
public static SharedDataDictionary Shared { get; set; } = [];
|
||||
public static BeamDataDictionary BeamData { get; set; } = [];
|
||||
|
||||
public static IArchitecture Architecture = IArchitecture.Default;
|
||||
|
||||
const string SharedDataPath = "data/.dat";
|
||||
const string BeamDataPath = "data/.dat";
|
||||
|
||||
static async Task Main(string[] args) {
|
||||
ConversionOptions.Converters.AddPersistentDataRequiredConverters();
|
||||
@@ -34,24 +34,49 @@ namespace Beam.Temporary.Cli {
|
||||
ILogger logger = lf
|
||||
.CreateLogger("Program");
|
||||
|
||||
await using var sharedContext = await DataDictionaryContext<SharedDataDictionary>.Create(
|
||||
SharedDataPath,
|
||||
await using var sharedContext = await DataDictionaryContext<BeamDataDictionary>.Create(
|
||||
BeamDataPath,
|
||||
false,
|
||||
DataKind.Shared,
|
||||
logger,
|
||||
ConversionOptions
|
||||
);
|
||||
|
||||
Shared = sharedContext.Data;
|
||||
BeamData = sharedContext.Data;
|
||||
|
||||
Shared.Clear();
|
||||
NovelStatics.Define_LightNovelWorld(Shared);
|
||||
NovelStatics.Define_LightNovelWorld_Novel_TheLegendaryMechanic(Shared);
|
||||
NovelStatics.Define_LightNovelWorl_Novel_IAloneLevelUp(Shared);
|
||||
ClassicTemplates.Register(Shared);
|
||||
BeamData.Clear();
|
||||
NovelStatics.Define_WoDuShu(BeamData);
|
||||
NovelStatics.Define_WoDuShu_HouseOfHorrors(BeamData);
|
||||
ClassicTemplates.Register(BeamData);
|
||||
|
||||
var novel = new DataKey<TextResource>("novels:i_alone_level_up");
|
||||
var context_aux = Architecture.GetMeta(web, novel, Shared);
|
||||
CancellationTokenSource cts = new();
|
||||
|
||||
HtmlTransformer<IDocumentMetaData> ArticleDataTransformer(DataBindings? binding) => (x) => {
|
||||
return new ArticleData() {
|
||||
Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")],
|
||||
Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""),
|
||||
Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [],
|
||||
Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "")
|
||||
};
|
||||
};
|
||||
|
||||
HtmlTransformer<IDocument> DocumentTransformer(DataBindings? binding, IDocumentMetaData? metaData = null) => (x) => {
|
||||
var resolved = binding?.Resolve(x);
|
||||
var articleData = new ArticleData() {
|
||||
Name = OnlineCleaner.Clean(resolved?.Title),
|
||||
};
|
||||
Dictionary<DataKey<IDocumentMetaData>, IDocumentMetaData> meta = [];
|
||||
meta.Add(IArchitecture.Default.ChapterKey, articleData);
|
||||
if (metaData is not null)
|
||||
meta.Add(IArchitecture.Default.BookKey, metaData);
|
||||
return new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved?.Content)) {
|
||||
MetaData = meta
|
||||
};
|
||||
};
|
||||
|
||||
var novel = new DataKey<TextResource>("novels:house_of_horrors");
|
||||
var context_aux = Architecture.GetMeta(web, novel, BeamData, cts.Token);
|
||||
context_aux.RetryReporter = new Progress<RetryReport>((x) => Console.WriteLine($"Failed. Trying again. {x.TryNumber}"));
|
||||
var metaDownloader = new DownloadEnumerable<IDocumentMetaData>(
|
||||
new SequentialFragmentDownloader<IDocumentMetaData>(
|
||||
context_aux,
|
||||
@@ -60,8 +85,17 @@ namespace Beam.Temporary.Cli {
|
||||
.UnwrapFragmented());
|
||||
var metadata = (await metaDownloader.FirstAsync());
|
||||
|
||||
var context = Architecture.GetTextRecord(web, novel, Shared, metadata.Data);
|
||||
context.DownloadReporter = new Progress<IDocument>((x) => Console.WriteLine(x.Filename));
|
||||
var metadata2 = await DownloadBuilder<IDocumentMetaData>.FromMeta(novel, BeamData)
|
||||
.WithLink()
|
||||
.WithTransformer(ArticleDataTransformer)
|
||||
.Configure((x) => x
|
||||
.WithRetryReporter(new Progress<RetryReport>())
|
||||
.WithTimeOut(TimeSpan.FromSeconds(15)))
|
||||
.Build()
|
||||
.FirstAsync();
|
||||
|
||||
var context = Architecture.GetTextRecord(web, novel, BeamData, metadata.Data, cts.Token);
|
||||
context.DownloadReporter = new Progress<DownloadReport>((x) => Console.WriteLine(x));
|
||||
var downloader = new DownloadEnumerable<IDocument>(
|
||||
new SequentialFragmentDownloader<IDocument>(
|
||||
context,
|
||||
@@ -69,9 +103,21 @@ namespace Beam.Temporary.Cli {
|
||||
logger)
|
||||
.UnwrapFragmented());
|
||||
|
||||
List<Ordered<IDocument>> documents = [];
|
||||
var downloader2 = DownloadBuilder<IDocument>.FromText(novel, BeamData)
|
||||
.WithLinkGenerator()
|
||||
.WithTransformer((x) => DocumentTransformer(x, metadata2.Data))
|
||||
.Configure((x) => x
|
||||
.WithDownloadReporter(new Progress<DownloadReport>((x) => logger.LogInformation(x.ToString())))
|
||||
.WithTimeOut(TimeSpan.FromSeconds(15))
|
||||
)
|
||||
.Build();
|
||||
|
||||
|
||||
await foreach (var download in downloader.Take(20)) {
|
||||
|
||||
List<Task<Ordered<IDocument>>> translationTasks = [];
|
||||
List<Ordered<IDocument>> documents = [];
|
||||
|
||||
await foreach (var download in downloader2.Take(10)) {
|
||||
if (!download.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var meta))
|
||||
continue;
|
||||
if (meta is not ArticleData articleMetaData)
|
||||
@@ -83,8 +129,15 @@ namespace Beam.Temporary.Cli {
|
||||
Console.WriteLine($"Chapter title: {articleMetaData.Name}");
|
||||
//Console.WriteLine($"Content: {download}");
|
||||
|
||||
documents.Add(download);
|
||||
}
|
||||
//translationTasks.Add(Task.Run(async () => {
|
||||
// logger.LogInformation("Beginning translation {} task for {}", download.Order, articleMetaData.Name);
|
||||
// var ret = new Ordered<IDocument>(await QuickAndDirtyJanitor.TranslateAsync(download.Data), download.Order);
|
||||
// logger.LogInformation("Finished translation {} task for {}", download.Order, articleMetaData.Name);
|
||||
// return ret;
|
||||
//}));
|
||||
}
|
||||
|
||||
documents = (await Task.WhenAll(translationTasks)).ToList();
|
||||
|
||||
string testDir = Path.Combine("txt", Path.GetRandomFileName());
|
||||
Directory.CreateDirectory(testDir);
|
||||
@@ -113,7 +166,7 @@ namespace Beam.Temporary.Cli {
|
||||
// HtmlBook.Keys.TitlePage,
|
||||
// HtmlBook.Keys.StylesPage,
|
||||
//}.Select(
|
||||
// (x) => Shared.Files.ReadToString(x.WithNamespace("aeqw89:files:templates:classic"))
|
||||
// (x) => BeamData.Files.ReadToString(x.WithNamespace("aeqw89:files:templates:classic"))
|
||||
//).ToArray();
|
||||
|
||||
//HtmlBook book = new(
|
||||
|
||||
Reference in New Issue
Block a user