Added constant state changers to represent singular/repeating states. Added a DownloadContextBuilder to support fluent building patterns. Changed RetryReporter and DownloadReporter to use RetryReport and DownloadReport structs to simplify type declarations. Made MainArchitecture obsolete by supporting a fluent downloads with DownloadBuilder. Created a 'budge' OpenAI bridge for proof-of-concept translation.
This commit is contained in:
@@ -4,14 +4,23 @@ using HtmlAgilityPack;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace Beam.Temporary.Cli {
|
||||
partial interface IArchitecture {
|
||||
public partial interface IArchitecture {
|
||||
private class MainArchitecture : IArchitecture {
|
||||
public MainArchitecture() { }
|
||||
|
||||
public DataKey<IDocumentMetaData> ChapterKey { get; set; } = new("ma:chapter");
|
||||
public DataKey<IDocumentMetaData> BookKey { get; set; } = new("ma:book");
|
||||
|
||||
public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<TextResource> pieceKey, SharedDataDictionary sdd, ILogger? logger = null) {
|
||||
public HtmlTransformer<IDocumentMetaData> ArticleDataTransformer(DataBindings? binding) => (x) => {
|
||||
return new ArticleData() {
|
||||
Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")],
|
||||
Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""),
|
||||
Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [],
|
||||
Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "")
|
||||
};
|
||||
};
|
||||
|
||||
public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<TextResource> pieceKey, BeamDataDictionary sdd, CancellationToken ct = default, ILogger? logger = null) {
|
||||
var piece = sdd.Novels[pieceKey].ToRecord(sdd); // retrieves novel data from the sdd
|
||||
var auxiliary = piece.AssociatedMetaSource?.ToRecord(sdd); // retrieves novel aux data from the sdd
|
||||
|
||||
@@ -22,7 +31,7 @@ namespace Beam.Temporary.Cli {
|
||||
return null;
|
||||
|
||||
// gets the link for the novel's metadata using the auxillary data retrieved from the sdd
|
||||
var link = sdd.Templates[auxiliary.Resource.Key].GenerateLink(piece?.Resource?.MetaTemplateInitialData!);
|
||||
var link = sdd.Templates[auxiliary.Resource.Key].Builder.Build(piece?.Resource?.MetaTemplateInitialData);
|
||||
var binding = auxiliary.Bindings;
|
||||
|
||||
return new DownloadContext<IDocumentMetaData>(web, [link], downloadLogger: logger, transformer: (x) => {
|
||||
@@ -35,7 +44,7 @@ namespace Beam.Temporary.Cli {
|
||||
});
|
||||
}
|
||||
|
||||
public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<TextResource> resKey, SharedDataDictionary sdd, IDocumentMetaData? metaData = null, ILogger? logger = null) {
|
||||
public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<TextResource> resKey, BeamDataDictionary sdd, IDocumentMetaData? metaData = null, CancellationToken ct = default, ILogger? logger = null) {
|
||||
var res = sdd.Novels[resKey].ToRecord(sdd); // retrieves the novel data from the sdd
|
||||
var aggregator = res.AssociatedSource?.ToRecord(sdd); // retrieves the aggregator (novel web source) from the sdd
|
||||
|
||||
@@ -47,8 +56,9 @@ namespace Beam.Temporary.Cli {
|
||||
var template = sdd.Templates[aggregator.Resource.Key]; // gets the link generator for the specified aggregator
|
||||
|
||||
// creates a generative enumerable of type link from 'template'
|
||||
var sle = SourceLinkEnumerable.FromGenerator(new DataBackedSourceLinkGenerator(
|
||||
template, res.Resource.TemplateInitialData));
|
||||
var sle = SourceLinkEnumerable.FromGenerator(new OrderedSourceLinkGenerator(
|
||||
template.Builder, new NumberedStateChanger(template.Factory.Behavior),
|
||||
res.Resource.TemplateInitialData));
|
||||
|
||||
return new DownloadContext<IDocument>(web, sle,
|
||||
transformer: (x) => {
|
||||
@@ -64,10 +74,10 @@ namespace Beam.Temporary.Cli {
|
||||
MetaData = meta
|
||||
};
|
||||
},
|
||||
retryReporter: new Progress<int>((x) => Console.WriteLine($"Retrying download ({x})")),
|
||||
downloadReporter: new Progress<IDocument>((x) => Console.WriteLine($"Downloaded ({x.Filename})")),
|
||||
retryReporter: new Progress<RetryReport>((x) => Console.WriteLine($"Retrying download of '{x.Link}' ({x.TryNumber}x)")),
|
||||
//downloadReporter: new Progress<DownloadReport>((x) => Console.WriteLine($"Downloaded ({x})")),
|
||||
asyncFailurePredicates: [
|
||||
(x) => Task.FromResult(!x.DocumentNode.InnerHtml.Contains("<div id=\"chapter-container\" class=\"chapter-content\" itemprop=\"description\">"))
|
||||
//(x) => Task.FromResult(!x.DocumentNode.InnerHtml.Contains("<div id=\"chapter-container\" class=\"chapter-content\" itemprop=\"description\">"))
|
||||
],
|
||||
timeOut: TimeSpan.FromSeconds(15),
|
||||
downloadLogger: logger
|
||||
|
||||
Reference in New Issue
Block a user