190 lines
8.6 KiB
C#
190 lines
8.6 KiB
C#
using aeqw89.PersistentData;
|
|
using aeqw89.DataKeys;
|
|
using Beam.Dynamic;
|
|
using HtmlAgilityPack;
|
|
using Microsoft.Extensions.DependencyInjection;
|
|
using Microsoft.Extensions.Logging;
|
|
using System.Text.Json;
|
|
using System.Text.Json.Serialization;
|
|
using System.Text.Json.Serialization.Metadata;
|
|
using Beam.Temporary.Cli.Templates.Classic;
|
|
using Beam.Exports;
|
|
|
|
namespace Beam.Temporary.Cli {
|
|
internal class Program {
|
|
|
|
public static JsonSerializerOptions ConversionOptions { get; internal set; } = new();
|
|
|
|
public static BeamDataDictionary BeamData { get; set; } = [];
|
|
|
|
public static IArchitecture Architecture = IArchitecture.Default;
|
|
|
|
const string BeamDataPath = "data/.dat";
|
|
|
|
static async Task Main(string[] args) {
|
|
ConversionOptions.Converters.AddPersistentDataRequiredConverters();
|
|
ConversionOptions.WriteIndented = true;
|
|
|
|
var web = new HtmlWeb();
|
|
|
|
var lf = LoggerFactory.Create((x) => {
|
|
x.AddConsole();
|
|
});
|
|
|
|
ILogger logger = lf
|
|
.CreateLogger("Program");
|
|
|
|
await using var sharedContext = await DataDictionaryContext<BeamDataDictionary>.Create(
|
|
BeamDataPath,
|
|
false,
|
|
DataKind.Shared,
|
|
logger,
|
|
ConversionOptions
|
|
);
|
|
|
|
BeamData = sharedContext.Data;
|
|
|
|
BeamData.Clear();
|
|
NovelStatics.Define_WoDuShu(BeamData);
|
|
NovelStatics.Define_WoDuShu_HouseOfHorrors(BeamData);
|
|
ClassicTemplates.Register(BeamData);
|
|
|
|
CancellationTokenSource cts = new();
|
|
|
|
HtmlTransformer<IDocumentMetaData> ArticleDataTransformer(DataBindings? binding) => (x) => {
|
|
return new ArticleData() {
|
|
Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")],
|
|
Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""),
|
|
Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [],
|
|
Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "")
|
|
};
|
|
};
|
|
|
|
HtmlTransformer<IDocument> DocumentTransformer(DataBindings? binding, IDocumentMetaData? metaData = null) => (x) => {
|
|
var resolved = binding?.Resolve(x);
|
|
var articleData = new ArticleData() {
|
|
Name = OnlineCleaner.Clean(resolved?.Title),
|
|
};
|
|
Dictionary<DataKey<IDocumentMetaData>, IDocumentMetaData> meta = [];
|
|
meta.Add(IArchitecture.Default.ChapterKey, articleData);
|
|
if (metaData is not null)
|
|
meta.Add(IArchitecture.Default.BookKey, metaData);
|
|
return new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved?.Content)) {
|
|
MetaData = meta
|
|
};
|
|
};
|
|
|
|
var novel = new DataKey<TextResource>("novels:house_of_horrors");
|
|
var context_aux = Architecture.GetMeta(web, novel, BeamData, cts.Token);
|
|
context_aux.RetryReporter = new Progress<RetryReport>((x) => Console.WriteLine($"Failed. Trying again. {x.TryNumber}"));
|
|
var metaDownloader = new DownloadEnumerable<IDocumentMetaData>(
|
|
new SequentialFragmentDownloader<IDocumentMetaData>(
|
|
context_aux,
|
|
(c) => new UnitFragmentDownloader<IDocumentMetaData>(c.Web, c.AsyncTranformer, c.AsyncFailurePredicates, 4, logger),
|
|
logger)
|
|
.UnwrapFragmented());
|
|
var metadata = (await metaDownloader.FirstAsync());
|
|
|
|
var metadata2 = await DownloadBuilder<IDocumentMetaData>.FromMeta(novel, BeamData)
|
|
.WithLink()
|
|
.WithTransformer(ArticleDataTransformer)
|
|
.Configure((x) => x
|
|
.WithRetryReporter(new Progress<RetryReport>())
|
|
.WithTimeOut(TimeSpan.FromSeconds(15)))
|
|
.Build()
|
|
.FirstAsync();
|
|
|
|
var context = Architecture.GetTextRecord(web, novel, BeamData, metadata.Data, cts.Token);
|
|
context.DownloadReporter = new Progress<DownloadReport>((x) => Console.WriteLine(x));
|
|
var downloader = new DownloadEnumerable<IDocument>(
|
|
new SequentialFragmentDownloader<IDocument>(
|
|
context,
|
|
(c) => new UnitFragmentDownloader<IDocument>(c.Web, c.AsyncTranformer, c.AsyncFailurePredicates, 4, logger),
|
|
logger)
|
|
.UnwrapFragmented());
|
|
|
|
var downloader2 = DownloadBuilder<IDocument>.FromText(novel, BeamData)
|
|
.WithLinkGenerator()
|
|
.WithTransformer((x) => DocumentTransformer(x, metadata2.Data))
|
|
.Configure((x) => x
|
|
.WithDownloadReporter(new Progress<DownloadReport>((x) => logger.LogInformation(x.ToString())))
|
|
.WithTimeOut(TimeSpan.FromSeconds(15))
|
|
)
|
|
.Build();
|
|
|
|
|
|
|
|
List<Task<Ordered<IDocument>>> translationTasks = [];
|
|
List<Ordered<IDocument>> documents = [];
|
|
|
|
await foreach (var download in downloader2.Take(10)) {
|
|
if (!download.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var meta))
|
|
continue;
|
|
if (meta is not ArticleData articleMetaData)
|
|
continue;
|
|
//Console.WriteLine($"Title: {data.Name}");
|
|
//Console.WriteLine($"Description: {data.Description}");
|
|
//Console.WriteLine($"Categories: {data.Categories.Aggregate((x, y) => $"{x}; {y}")}");
|
|
//Console.WriteLine($"Authors: {data.Authors.Aggregate((x,y) => $"{x}; {y}")}");
|
|
Console.WriteLine($"Chapter title: {articleMetaData.Name}");
|
|
//Console.WriteLine($"Content: {download}");
|
|
|
|
//translationTasks.Add(Task.Run(async () => {
|
|
// logger.LogInformation("Beginning translation {} task for {}", download.Order, articleMetaData.Name);
|
|
// var ret = new Ordered<IDocument>(await QuickAndDirtyJanitor.TranslateAsync(download.Data), download.Order);
|
|
// logger.LogInformation("Finished translation {} task for {}", download.Order, articleMetaData.Name);
|
|
// return ret;
|
|
//}));
|
|
}
|
|
|
|
documents = (await Task.WhenAll(translationTasks)).ToList();
|
|
|
|
string testDir = Path.Combine("txt", Path.GetRandomFileName());
|
|
Directory.CreateDirectory(testDir);
|
|
|
|
int len = documents.MaxBy((x) => x.Order)?.Order ?? -1;
|
|
foreach (var document in documents.OrderBy((x) => x.Order)) {
|
|
document.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var chapterMetaData);
|
|
Dictionary<string, string> linkButtons = new();
|
|
if (document.Order != 0)
|
|
linkButtons.Add("Previous", $"{document.Order - 1}.html");
|
|
if (document.Order != len)
|
|
linkButtons.Add("Next", $"{document.Order + 1}.html");
|
|
new HtmlExporter(document.Data, chapterMetaData as ArticleData, linkButtons).Write(Path.Combine(testDir, $"{document.Order}.html"));
|
|
}
|
|
|
|
Console.ReadKey();
|
|
|
|
//foreach (var download in documents.OrderBy((x) => x.Order)) {
|
|
// if (download.Data.TryGetTaggedMetaData<ArticleData>(Architecture.ChapterKey, out var meta))
|
|
// Console.WriteLine($"{download.Order}:{meta.Name}");
|
|
//}
|
|
|
|
//string[] templates = new DataKey<File>[] {
|
|
// HtmlBook.Keys.ContentPage,
|
|
// HtmlBook.Keys.NoContentPage,
|
|
// HtmlBook.Keys.TitlePage,
|
|
// HtmlBook.Keys.StylesPage,
|
|
//}.Select(
|
|
// (x) => BeamData.Files.ReadToString(x.WithNamespace("aeqw89:files:templates:classic"))
|
|
//).ToArray();
|
|
|
|
//HtmlBook book = new(
|
|
// bookname: Path.Combine(Path.GetRandomFileName(), "I Alone Level Up"),
|
|
// new CssData(),
|
|
// new ArticleData(),
|
|
// new HtmlBookTemplates() {
|
|
// ContentPageTemplate = templates[0],
|
|
// NoContentTemplate = templates[1],
|
|
// TitlePageTemplate = templates[2],
|
|
// CssTemplate = templates[3],
|
|
// },
|
|
// documents: documents.Select((x) => x.Data).ToList()
|
|
//);
|
|
|
|
//book.Update();
|
|
//Console.WriteLine("One variable!");
|
|
}
|
|
}
|
|
}
|