Files
Beam/Beam.Temporary.Cli/MainArchitecture.cs
T

90 lines
5.1 KiB
C#

using aeqw89.DataKeys;
using Beam.Dynamic;
using HtmlAgilityPack;
using Microsoft.Extensions.Logging;
namespace Beam.Temporary.Cli {
public partial interface IArchitecture {
private class MainArchitecture : IArchitecture {
public MainArchitecture() { }
public DataKey<IDocumentMetaData> ChapterKey { get; set; } = new("ma:chapter");
public DataKey<IDocumentMetaData> BookKey { get; set; } = new("ma:book");
public HtmlTransformer<IDocumentMetaData> ArticleDataTransformer(DataBindings? binding) => (x) => {
return new ArticleData() {
Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")],
Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""),
Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [],
Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "")
};
};
public DownloadContext<IDocumentMetaData>? GetMeta(HtmlWeb web, DataKey<TextResource> pieceKey, BeamDataDictionary sdd, CancellationToken ct = default, ILogger? logger = null) {
var piece = sdd.Novels[pieceKey].ToRecord(sdd); // retrieves novel data from the sdd
var auxiliary = piece.AssociatedMetaSource?.ToRecord(sdd); // retrieves novel aux data from the sdd
// null checks
if (auxiliary is null) // aux is required to get metadata
return null;
if (piece?.Resource?.MetaTemplateInitialData is null) // sanity check to avoid null warnings
return null;
// gets the link for the novel's metadata using the auxillary data retrieved from the sdd
var link = sdd.Templates[auxiliary.Resource.Key].Builder.Build(piece?.Resource?.MetaTemplateInitialData);
var binding = auxiliary.Bindings;
return new DownloadContext<IDocumentMetaData>(web, [link], downloadLogger: logger, transformer: (x) => {
return new ArticleData() {
Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")],
Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""),
Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [],
Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "")
};
});
}
public DownloadContext<IDocument>? GetTextRecord(HtmlWeb web, DataKey<TextResource> resKey, BeamDataDictionary sdd, IDocumentMetaData? metaData = null, CancellationToken ct = default, ILogger? logger = null) {
var res = sdd.Novels[resKey].ToRecord(sdd); // retrieves the novel data from the sdd
var aggregator = res.AssociatedSource?.ToRecord(sdd); // retrieves the aggregator (novel web source) from the sdd
if (aggregator is null) // ensure aggergator data was retrieved successfully
return null;
if (res is null) // ensure novel data was retrieved successfully
return null;
var template = sdd.Templates[aggregator.Resource.Key]; // gets the link generator for the specified aggregator
// creates a generative enumerable of type link from 'template'
var sle = SourceLinkEnumerable.FromGenerator(new OrderedSourceLinkGenerator(
template.Builder, new NumberedStateChanger(template.Factory.Behavior),
res.Resource.TemplateInitialData));
return new DownloadContext<IDocument>(web, sle,
transformer: (x) => {
var resolved = aggregator.Bindings.Resolve(x);
var articleData = new ArticleData() {
Name = OnlineCleaner.Clean(resolved.Title),
};
Dictionary<DataKey<IDocumentMetaData>, IDocumentMetaData> meta = [];
meta.Add(ChapterKey, articleData);
if (metaData is not null)
meta.Add(BookKey, metaData);
return new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved.Content)) {
MetaData = meta
};
},
retryReporter: new Progress<RetryReport>((x) => Console.WriteLine($"Retrying download of '{x.Link}' ({x.TryNumber}x)")),
//downloadReporter: new Progress<DownloadReport>((x) => Console.WriteLine($"Downloaded ({x})")),
asyncFailurePredicates: [
//(x) => Task.FromResult(!x.DocumentNode.InnerHtml.Contains("<div id=\"chapter-container\" class=\"chapter-content\" itemprop=\"description\">"))
],
timeOut: TimeSpan.FromSeconds(15),
downloadLogger: logger
);
}
}
}
}