diff --git a/Beam.Temporary.Cli/Architecture.cs b/Beam.Temporary.Cli/Architecture.cs index 3c8d175..ed53096 100644 --- a/Beam.Temporary.Cli/Architecture.cs +++ b/Beam.Temporary.Cli/Architecture.cs @@ -13,26 +13,26 @@ namespace Beam.Temporary.Cli { /// A collection of specific useful methods and constants that facilitate the use of the application; allows other parts of the application to depend on architecture-specific arbitrary choices without compromising the Single-Responsibility principle or increasing redundant code. /// /// - partial interface IArchitecture { + public partial interface IArchitecture { /// /// Gets the metadata associated with a /// /// The web client to use when downloading s /// The key of the stored in the - /// The to be used to retrieve information + /// The to be used to retrieve information /// Optional logger for logging debug information /// A object with the required information to perform the download - public DownloadContext? GetMeta(HtmlWeb web, DataKey pieceKey, SharedDataDictionary sdd, ILogger? logger = null); + public DownloadContext? GetMeta(HtmlWeb web, DataKey pieceKey, BeamDataDictionary sdd, CancellationToken ct = default, ILogger? logger = null); /// /// Gets the of the text record associated with /// /// The web client to use when downloading s /// The key of the stored in the - /// The to be used to retrieve information + /// The to be used to retrieve information /// Optional book metadata to include with the final text record /// Optional logger for logging debug information /// A object with the required information to perform the download - public DownloadContext? GetTextRecord(HtmlWeb web, DataKey pieceKey, SharedDataDictionary sdd, IDocumentMetaData? metadata = null, ILogger? logger = null); + public DownloadContext? GetTextRecord(HtmlWeb web, DataKey pieceKey, BeamDataDictionary sdd, IDocumentMetaData? metadata = null, CancellationToken ct = default, ILogger? logger = null); /// /// The to use when looking for the chapter metadata diff --git a/Beam.Temporary.Cli/Beam.Temporary.Cli.csproj b/Beam.Temporary.Cli/Beam.Temporary.Cli.csproj index fc4622c..1482db0 100644 --- a/Beam.Temporary.Cli/Beam.Temporary.Cli.csproj +++ b/Beam.Temporary.Cli/Beam.Temporary.Cli.csproj @@ -11,6 +11,7 @@ + diff --git a/Beam.Temporary.Cli/SharedDataDictionary.cs b/Beam.Temporary.Cli/BeamDataDictionary.cs similarity index 86% rename from Beam.Temporary.Cli/SharedDataDictionary.cs rename to Beam.Temporary.Cli/BeamDataDictionary.cs index c39edda..8094e95 100644 --- a/Beam.Temporary.Cli/SharedDataDictionary.cs +++ b/Beam.Temporary.Cli/BeamDataDictionary.cs @@ -9,9 +9,9 @@ using System.Text.Json; using System.Threading.Tasks; namespace Beam.Temporary.Cli { - public class SharedDataDictionary : BaseDataDictionary { - public Dictionary, PackagedSourceLinkGenerationData> Templates { - get => GetOrCreateDictionary, PackagedSourceLinkGenerationData>(nameof(Templates)); + public class BeamDataDictionary : BaseDataDictionary { + public Dictionary, Template> Templates { + get => GetOrCreateDictionary, Template>(nameof(Templates)); set => Data[nameof(Templates)] = value; } diff --git a/Beam.Temporary.Cli/CommonTransformers.cs b/Beam.Temporary.Cli/CommonTransformers.cs new file mode 100644 index 0000000..08c6c91 --- /dev/null +++ b/Beam.Temporary.Cli/CommonTransformers.cs @@ -0,0 +1,34 @@ +using aeqw89.DataKeys; +using Beam.Dynamic; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam.Temporary.Cli { + public static class CommonTransformers { + public static HtmlTransformer ArticleDataTransformer(DataBindings? binding) => (x) => { + return new ArticleData() { + Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")], + Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""), + Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [], + Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "") + }; + }; + + public static HtmlTransformer DocumentTransformer(DataBindings? binding, IDocumentMetaData? metaData = null) => (x) => { + var resolved = binding?.Resolve(x); + var articleData = new ArticleData() { + Name = OnlineCleaner.Clean(resolved?.Title), + }; + Dictionary, IDocumentMetaData> meta = []; + meta.Add(IArchitecture.Default.ChapterKey, articleData); + if (metaData is not null) + meta.Add(IArchitecture.Default.BookKey, metaData); + return new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved?.Content)) { + MetaData = meta + }; + }; + } +} diff --git a/Beam.Temporary.Cli/DownloadBuilder.cs b/Beam.Temporary.Cli/DownloadBuilder.cs new file mode 100644 index 0000000..fdf314b --- /dev/null +++ b/Beam.Temporary.Cli/DownloadBuilder.cs @@ -0,0 +1,150 @@ +using aeqw89.DataKeys; +using Beam.Dynamic; +using Microsoft.Extensions.Logging; +using System; +using System.Collections.Generic; + +namespace Beam.Temporary.Cli { + /// + /// Type‑safe, staged builder that prevents callers from forgetting the mandatory steps + /// (source → link selection → transformer) and surfaces operational knobs as first‑class + /// methods instead of magic parameters. + /// + public static class DownloadBuilder { + /* ──────────────────────────── Entry points ─────────────────────────── */ + + public static ILinkStage FromMeta(DataKey novelKey, BeamDataDictionary data) => + Create(novelKey, data, SourceKind.Meta); + + public static ILinkStage FromText(DataKey novelKey, BeamDataDictionary data) => + Create(novelKey, data, SourceKind.Text); + + /* ────────────────────────────── Stages ─────────────────────────────── */ + + public interface ILinkStage { + ITransformStage WithLink(); + ITransformStage WithLinkGenerator(); + } + + public interface ITransformStage { + IContextStage WithTransformer(Func> factory); + } + + public interface IContextStage { + IContextStage Configure(Action> configure); + IContextStage WithParallelism(int degree); + IContextStage WithTimeout(TimeSpan timeout); + IContextStage WithRetryReporter(IProgress reporter); + DownloadEnumerable Build(); + } + + /* ────────────────────────── Implementation ────────────────────────── */ + + private enum SourceKind { Meta, Text } + + private static ILinkStage Create(DataKey novelKey, BeamDataDictionary data, SourceKind kind) { + var (source, initial) = Resolve(novelKey, data, kind); + var ctxBuilder = new DownloadContextBuilder().WithLinks(Array.Empty()); // placeholder, filled later. + return new LinkStage(source, initial, data, ctxBuilder); + } + + private static (WebResource Source, State Initial) Resolve(DataKey novelKey, BeamDataDictionary data, SourceKind kind) { + if (!data.Novels.TryGetValue(novelKey, out var tr)) + throw new KeyNotFoundException($"Novel '{novelKey}' not found in BeamDataDictionary."); + + var textRecord = tr.ToRecord(data); + WebResource? source; + State? initial; + + if (kind == SourceKind.Meta) { + source = textRecord.AssociatedMetaSource ?? throw new InvalidOperationException($"Meta source missing for '{novelKey}'."); + initial = textRecord.Resource.MetaTemplateInitialData ?? throw new InvalidOperationException("Meta template data missing."); + } else { + source = textRecord.AssociatedSource ?? throw new InvalidOperationException($"Text source missing for '{novelKey}'."); + initial = textRecord.Resource.TemplateInitialData ?? throw new InvalidOperationException("Template initial data missing."); + } + + return (source, initial); + } + + /* ──────────────────────────── Stage types ─────────────────────────── */ + + private sealed record LinkStage( + WebResource Source, + State Initial, + BeamDataDictionary Data, + DownloadContextBuilder CtxBuilder) : ILinkStage { + public ITransformStage WithLink() { + var link = Data.Templates[Source.Key].Builder.Build(Initial); + CtxBuilder.WithLinks(new[] { link }); + return new TransformStage(Source, Data, CtxBuilder); + } + + public ITransformStage WithLinkGenerator() { + var template = Data.Templates[Source.Key]; + var generator = SourceLinkEnumerable.FromGenerator(new OrderedSourceLinkGenerator( + template.Builder, + new NumberedStateChanger(template.Factory.Behavior), + Initial)); + CtxBuilder.WithLinks(generator); + return new TransformStage(Source, Data, CtxBuilder); + } + } + + private sealed record TransformStage( + WebResource Source, + BeamDataDictionary Data, + DownloadContextBuilder CtxBuilder) : ITransformStage { + public IContextStage WithTransformer(Func> factory) { + var transformer = factory(Data.Bindings[Source.Bindings]); + CtxBuilder.WithTransformer(transformer); + return new ContextStage(CtxBuilder); + } + } + + private sealed class ContextStage : IContextStage { + private readonly DownloadContextBuilder _ctxBuilder; + private int _parallelism = 4; + + public ContextStage(DownloadContextBuilder ctxBuilder) => _ctxBuilder = ctxBuilder; + + public IContextStage Configure(Action> configure) { + configure(_ctxBuilder); + return this; + } + + public IContextStage WithParallelism(int degree) { + _parallelism = Math.Max(1, degree); + return this; + } + + public IContextStage WithTimeout(TimeSpan timeout) { + _ctxBuilder.WithTimeOut(timeout); + return this; + } + + public IContextStage WithRetryReporter(IProgress reporter) { + _ctxBuilder.WithRetryReporter(reporter); + return this; + } + + public DownloadEnumerable Build() { + var context = _ctxBuilder.Build(); + SequentialFragmentDownloader sequentialDownloader = new( + context, + ctx => new UnitFragmentDownloader( + context.Web, + context.AsyncTranformer, + context.AsyncFailurePredicates, + _parallelism, + context.DownloadLogger), + context.DownloadLogger); + var enumerable = new DownloadEnumerable( + sequentialDownloader + .UnwrapFragmented()); + sequentialDownloader.DisposeAsync().AsTask().Wait(); + return enumerable; + } + } + } +} diff --git a/Beam.Temporary.Cli/MainArchitecture.cs b/Beam.Temporary.Cli/MainArchitecture.cs index 2a13610..b5d4f30 100644 --- a/Beam.Temporary.Cli/MainArchitecture.cs +++ b/Beam.Temporary.Cli/MainArchitecture.cs @@ -4,14 +4,23 @@ using HtmlAgilityPack; using Microsoft.Extensions.Logging; namespace Beam.Temporary.Cli { - partial interface IArchitecture { + public partial interface IArchitecture { private class MainArchitecture : IArchitecture { public MainArchitecture() { } public DataKey ChapterKey { get; set; } = new("ma:chapter"); public DataKey BookKey { get; set; } = new("ma:book"); - public DownloadContext? GetMeta(HtmlWeb web, DataKey pieceKey, SharedDataDictionary sdd, ILogger? logger = null) { + public HtmlTransformer ArticleDataTransformer(DataBindings? binding) => (x) => { + return new ArticleData() { + Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")], + Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""), + Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [], + Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "") + }; + }; + + public DownloadContext? GetMeta(HtmlWeb web, DataKey pieceKey, BeamDataDictionary sdd, CancellationToken ct = default, ILogger? logger = null) { var piece = sdd.Novels[pieceKey].ToRecord(sdd); // retrieves novel data from the sdd var auxiliary = piece.AssociatedMetaSource?.ToRecord(sdd); // retrieves novel aux data from the sdd @@ -22,7 +31,7 @@ namespace Beam.Temporary.Cli { return null; // gets the link for the novel's metadata using the auxillary data retrieved from the sdd - var link = sdd.Templates[auxiliary.Resource.Key].GenerateLink(piece?.Resource?.MetaTemplateInitialData!); + var link = sdd.Templates[auxiliary.Resource.Key].Builder.Build(piece?.Resource?.MetaTemplateInitialData); var binding = auxiliary.Bindings; return new DownloadContext(web, [link], downloadLogger: logger, transformer: (x) => { @@ -35,7 +44,7 @@ namespace Beam.Temporary.Cli { }); } - public DownloadContext? GetTextRecord(HtmlWeb web, DataKey resKey, SharedDataDictionary sdd, IDocumentMetaData? metaData = null, ILogger? logger = null) { + public DownloadContext? GetTextRecord(HtmlWeb web, DataKey resKey, BeamDataDictionary sdd, IDocumentMetaData? metaData = null, CancellationToken ct = default, ILogger? logger = null) { var res = sdd.Novels[resKey].ToRecord(sdd); // retrieves the novel data from the sdd var aggregator = res.AssociatedSource?.ToRecord(sdd); // retrieves the aggregator (novel web source) from the sdd @@ -47,8 +56,9 @@ namespace Beam.Temporary.Cli { var template = sdd.Templates[aggregator.Resource.Key]; // gets the link generator for the specified aggregator // creates a generative enumerable of type link from 'template' - var sle = SourceLinkEnumerable.FromGenerator(new DataBackedSourceLinkGenerator( - template, res.Resource.TemplateInitialData)); + var sle = SourceLinkEnumerable.FromGenerator(new OrderedSourceLinkGenerator( + template.Builder, new NumberedStateChanger(template.Factory.Behavior), + res.Resource.TemplateInitialData)); return new DownloadContext(web, sle, transformer: (x) => { @@ -64,10 +74,10 @@ namespace Beam.Temporary.Cli { MetaData = meta }; }, - retryReporter: new Progress((x) => Console.WriteLine($"Retrying download ({x})")), - downloadReporter: new Progress((x) => Console.WriteLine($"Downloaded ({x.Filename})")), + retryReporter: new Progress((x) => Console.WriteLine($"Retrying download of '{x.Link}' ({x.TryNumber}x)")), + //downloadReporter: new Progress((x) => Console.WriteLine($"Downloaded ({x})")), asyncFailurePredicates: [ - (x) => Task.FromResult(!x.DocumentNode.InnerHtml.Contains("
")) + //(x) => Task.FromResult(!x.DocumentNode.InnerHtml.Contains("
")) ], timeOut: TimeSpan.FromSeconds(15), downloadLogger: logger diff --git a/Beam.Temporary.Cli/NovelStatics.cs b/Beam.Temporary.Cli/NovelStatics.cs index 7183ee7..9dd621d 100644 --- a/Beam.Temporary.Cli/NovelStatics.cs +++ b/Beam.Temporary.Cli/NovelStatics.cs @@ -5,133 +5,131 @@ using Beam.Dynamic; using System; using System.Collections.Generic; using System.Linq; +using System.Runtime.CompilerServices; using System.Text; using System.Threading.Tasks; namespace Beam.Temporary.Cli { - internal static class NovelStatics { - public static void Define_LightNovelWorld_Novel_TheLegendaryMechanic(SharedDataDictionary sdd) { - var lnwAggregator = new DataKey("aeqw89:document:aggregators:light_novel_world"); - var lnwAuxiliary = new DataKey("aeqw89:document:auxillaries:light_novel_world"); - var novel = new TextResource() { - Key = new DataKey("novels:the_legendary_mechanic"), - AssociatedSource = lnwAggregator, - AssociatedMetaSource = lnwAuxiliary, - TemplateInitialData = ["the-legendary-mechanic-245", "1"], - MetaTemplateInitialData = ["the-legendary-mechanic"] - }; - sdd.Novels.TryAdd(novel.Key, novel); + public static class NovelStatics { + //public static void Define_LightNovelWorld_Novel_TheLegendaryMechanic(SharedDataDictionary sdd) { + // var lnwAggregator = new DataKey("aeqw89:document:aggregators:light_novel_world"); + // var lnwAuxiliary = new DataKey("aeqw89:document:auxillaries:light_novel_world"); + // var novel = new TextResource() { + // Key = new DataKey("novels:the_legendary_mechanic"), + // AssociatedSource = lnwAggregator, + // AssociatedMetaSource = lnwAuxiliary, + // TemplateInitialData = ["the-legendary-mechanic-245", "1"], + // MetaTemplateInitialData = ["the-legendary-mechanic"] + // }; + // sdd.Novels.TryAdd(novel.Key, novel); - sdd.AggregatorNovels.TryAdd(lnwAggregator, [novel.Key]); - } + // sdd.AggregatorNovels.TryAdd(lnwAggregator, [novel.Key]); + //} - public static void Define_LightNovelWorl_Novel_IAloneLevelUp(SharedDataDictionary sdd) { - var lnwAggregator = new DataKey("light_novel_world").ToAggregator().As(); - var lnwAuxiliary = new DataKey("light_novel_world").ToAuxiliary().As(); + //public static void Define_LightNovelWorl_Novel_IAloneLevelUp(SharedDataDictionary sdd) { + // var lnwAggregator = new DataKey("light_novel_world").ToAggregator().As(); + // var lnwAuxiliary = new DataKey("light_novel_world").ToAuxiliary().As(); + // var novel = new TextResource() { + // Key = new DataKey("novels:i_alone_level_up"), + // AssociatedSource = lnwAggregator, + // AssociatedMetaSource = lnwAuxiliary, + // TemplateInitialData = ["i-alone-level-up-236", "1"], + // MetaTemplateInitialData = ["i-alone-level-up-solo-leveling-05122225"] + // }; + + // sdd.Novels.TryAdd(novel.Key, novel); + + // sdd.AggregatorNovels.TryAdd(lnwAggregator, [novel.Key]); + //} + + public static void Define_WoDuShu_HouseOfHorrors(BeamDataDictionary sdd) { + var (wdsAgg, wdsAux) = CreateKeyPair("aggregators", "auxillaries", "wodushu", "aeqw89:document"); var novel = new TextResource() { - Key = new DataKey("novels:i_alone_level_up"), - AssociatedSource = lnwAggregator, - AssociatedMetaSource = lnwAuxiliary, - TemplateInitialData = ["i-alone-level-up-236", "1"], - MetaTemplateInitialData = ["i-alone-level-up-solo-leveling-05122225"] + Key = new DataKey("novels:house_of_horrors"), + FriendlyName = "My House Of Horrors", + AssociatedSource = wdsAgg, + AssociatedMetaSource = wdsAux, + TemplateInitialData = new State(["24349", "2896325"]), + MetaTemplateInitialData = new State(["24349"]) }; sdd.Novels.TryAdd(novel.Key, novel); - sdd.AggregatorNovels.TryAdd(lnwAggregator, [novel.Key]); + sdd.AggregatorNovels.TryAdd(wdsAgg, [novel.Key]); } - public static void Define_NovelFull(SharedDataDictionary sdd) { - var docNamespace = "aeqw89:document"; - var nfAgg = new DataKey("aggregators:novel_full").WithNamespace(docNamespace); - var nfAux = new DataKey("auxillaries:novel_full").WithNamespace(docNamespace); - var nfBindings = new DataKey("aeqw89:bindings:light_novel_world"); - var aggregator = new WebResource(nfAgg) { - Name = "Novel Full", - Description = "A novel aggregator site", - Domain = "https://novelfull.net", - Bindings = nfBindings - }; - var auxiliary = new WebResource(nfAux) { - Name = "Novel Full", - Description = "A novel aggregator site", - Domain = "https://novelfull.net", - Bindings = nfBindings.WithSuffix("_aux") - }; - - sdd.Templates.TryAdd(nfAgg, new() { - Template = "" - }); + private static (DataKey, DataKey) CreateKeyPair(string pref1, string pref2, string common, string @namespace) { + return ( + new DataKey(pref1 + ":" + common).WithNamespace(@namespace), + new DataKey(pref2 + ":" + common).WithNamespace(@namespace) + ); } - public static void Define_LightNovelWorld(SharedDataDictionary sdd) { - var lnwAggregator = new DataKey("aeqw89:document:aggregators:light_novel_world"); - var lnwAuxiliary = new DataKey("aeqw89:document:auxillaries:light_novel_world"); - const string lnwBindingsA = "aeqw89:bindings:light_novel_world"; - var aggregator = new WebResource(lnwAggregator) { - Name = "Light Novel World", - Description = "A novel aggregator site maintained by NetherClaw", - Domain = "https://www.lightnovelworld.co", - Bindings = new DataKey(lnwBindingsA) + public static void Define_WoDuShu(BeamDataDictionary sdd) { + var (wdsAgg, wdsAux) = CreateKeyPair("aggregators", "auxillaries", "wodushu", "aeqw89:document"); + var bindings = new DataKey("aeqw89:bindings:wodushu"); + var aggregator = new WebResource(wdsAgg) { + Name = "WoDuShu.com", + Description = "A Chinese novel aggregator site", + Domain = "https://wodushu.com", + Bindings = bindings }; - const string lnwBindingsB = "aeqw89:bindings:light_novel_world_aux"; - var auxiliary = new WebResource(lnwAuxiliary) { - Name = "Light Novel World", - Description = "A novel aggregator site maintained by NetherClaw", - Domain = "https://www.lightnovelworld.co", - Bindings = new DataKey(lnwBindingsB) + var auxiliary = new WebResource(wdsAux) { + Name = "WoDuShu.com", + Description = "A Chinese novel aggregator site", + Domain = "https://wodushu.com", + Bindings = bindings.WithSuffix("_aux") }; - sdd.Templates.TryAdd(lnwAuxiliary, new() { - Template = "https://www.lightnovelworld.co/novel/{0}", - IndexOfChapterIndex = -1 + sdd.Templates.TryAdd(wdsAgg, new() { + Factory = new(StateChangerFactory.LastAsNumber), + Builder = new SourceLinkBuilder("www.wodushu.com") + .WithSegments("read", "", "") + .WithParameters(1, "") + .WithParameters(2, (".html", Position.After)) }); - sdd.Templates.TryAdd(lnwAggregator, new() { - Template = "https://www.lightnovelworld.co/novel/{0}/chapter-{1}", - IndexOfChapterIndex = 1 + sdd.Templates.TryAdd(wdsAux, new() { + Factory = new(StateChangerFactory.Constant), + Builder = new SourceLinkBuilder("www.wodushu.com") + .WithSegments("book", "") + .WithParameters(1, "") }); - sdd.Aggregators.TryAdd(aggregator.Key, aggregator); - sdd.Auxillaries.TryAdd(auxiliary.Key, auxiliary); + sdd.Aggregators.TryAdd(wdsAgg, aggregator); + sdd.Auxillaries.TryAdd(wdsAux, auxiliary); - var lnwBindings = new DataKey(lnwBindingsA); - var lnwBindingsAux = new DataKey(lnwBindingsB); - sdd.Bindings.TryAdd(lnwBindings, new DataBindings() { - Title = new Binding("aeqw89:binding:light_novel_world:title") { - XPath = "/html/body/main/article/section/div[1]/h1/span[2]", + var binding_agg = new DataKey("aeqw89:bindings:wodushu"); + var binding_aux = new DataKey("aeqw89:bindings:wodushu_aux"); + + sdd.Bindings.Add(binding_agg, new() { + Title = new Binding() { + XPath = "/html/body/div[4]/div/div/div[2]/h1", Type = BindingType.Single }, - Content = new("aeqw89:binding:light_novel_world:content") { + Content = new Binding() { + Type = BindingType.UseProvider, Provider = new ParagraphedContentDataProvider() { Content = new Binding() { - XPath = "//*[@id=\"chapter-container\"]" + XPath = "//*[@id=\"content\"]" } - }, - Type = BindingType.UseProvider + } }, }); - sdd.Bindings.TryAdd(lnwBindingsAux, new DataBindings() { - Title = new("aeqw89:binding:light_novel_world_aux:title") { - XPath = "/html/body/main/article/header/div[2]/div[2]/div[1]/h1", + + sdd.Bindings.Add(binding_aux, new() { + Title = new Binding() { + XPath = "/html/body/div[3]/div[1]/div/div/div[2]/div[1]/h1", Type = BindingType.Single }, - Authors = new("aeqw89:binding:light_novel_world_aux:authors") { - XPath = "/html/body/main/article/header/div[2]/div[2]/div[1]/div[1]/a", + Authors = new Binding() { + XPath = "/html/body/div[3]/div[1]/div/div/div[2]/div[1]/div/p[1]/a", Type = BindingType.Single }, - Description = new("aeqw89:binding:light_novel_world_aux:description") { + Description = new Binding() { Provider = new ParagraphedContentDataProvider() { - Content = new() { - XPath = "/html/body/main/article/div/section/div[1]/div" - } - }, - Type = BindingType.UseProvider - }, - Tags = new("aeqw89:binding:light_novel_world_aux:tags") { - Provider = new ListContentDataProvider() { - Content = new() { - XPath = "/html/body/main/article/header/div[2]/div[2]/div[3]/ul" + Content = new Binding() { + XPath = "/html/body/div[3]/div[1]/div/div/div[2]/div[2]" } }, Type = BindingType.UseProvider @@ -139,6 +137,107 @@ namespace Beam.Temporary.Cli { }); } + //public static void Define_NovelFull(SharedDataDictionary sdd) { + // var docNamespace = "aeqw89:document"; + // var nfAgg = new DataKey("aggregators:novel_full").WithNamespace(docNamespace); + // var nfAux = new DataKey("auxillaries:novel_full").WithNamespace(docNamespace); + // var nfBindings = new DataKey("aeqw89:bindings:light_novel_world"); + // var aggregator = new WebResource(nfAgg) { + // Name = "Novel Full", + // Description = "A novel aggregator site", + // Domain = "https://novelfull.net", + // Bindings = nfBindings + // }; + // var auxiliary = new WebResource(nfAux) { + // Name = "Novel Full", + // Description = "A novel aggregator site", + // Domain = "https://novelfull.net", + // Bindings = nfBindings.WithSuffix("_aux") + // }; + + // sdd.Templates.TryAdd(nfAux, new(StateChangerFactory.LastAsNumberPrefixed)); + + // sdd.Aggregators.TryAdd(nfAgg, aggregator); + // sdd.Auxillaries.TryAdd(nfAux, auxiliary); + + // var binding_agg = new DataKey("aeqw89:bindings:be") + + //} + + //public static void Define_LightNovelWorld(SharedDataDictionary sdd) { + // var lnwAggregator = new DataKey("aeqw89:document:aggregators:light_novel_world"); + // var lnwAuxiliary = new DataKey("aeqw89:document:auxillaries:light_novel_world"); + // const string lnwBindingsA = "aeqw89:bindings:light_novel_world"; + // var aggregator = new WebResource(lnwAggregator) { + // Name = "Light Novel World", + // Description = "A novel aggregator site maintained by NetherClaw", + // Domain = "https://www.lightnovelworld.co", + // Bindings = new DataKey(lnwBindingsA) + // }; + // const string lnwBindingsB = "aeqw89:bindings:light_novel_world_aux"; + // var auxiliary = new WebResource(lnwAuxiliary) { + // Name = "Light Novel World", + // Description = "A novel aggregator site maintained by NetherClaw", + // Domain = "https://www.lightnovelworld.co", + // Bindings = new DataKey(lnwBindingsB) + // }; + + // sdd.Templates.TryAdd(lnwAuxiliary, new() { + // Template = "https://www.lightnovelworld.co/novel/{0}", + // IndexOfChapterIndex = -1 + // }); + // sdd.Templates.TryAdd(lnwAggregator, new() { + // Template = "https://www.lightnovelworld.co/novel/{0}/chapter-{1}", + // IndexOfChapterIndex = 1 + // }); + + // sdd.Aggregators.TryAdd(aggregator.Key, aggregator); + // sdd.Auxillaries.TryAdd(auxiliary.Key, auxiliary); + + // var lnwBindings = new DataKey(lnwBindingsA); + // var lnwBindingsAux = new DataKey(lnwBindingsB); + // sdd.Bindings.TryAdd(lnwBindings, new DataBindings() { + // Title = new Binding("aeqw89:binding:light_novel_world:title") { + // XPath = "/html/body/main/article/section/div[1]/h1/span[2]", + // Type = BindingType.Single + // }, + // Content = new("aeqw89:binding:light_novel_world:content") { + // Provider = new ParagraphedContentDataProvider() { + // Content = new Binding() { + // XPath = "//*[@id=\"chapter-container\"]" + // } + // }, + // Type = BindingType.UseProvider + // }, + // }); + // sdd.Bindings.TryAdd(lnwBindingsAux, new DataBindings() { + // Title = new("aeqw89:binding:light_novel_world_aux:title") { + // XPath = "/html/body/main/article/header/div[2]/div[2]/div[1]/h1", + // Type = BindingType.Single + // }, + // Authors = new("aeqw89:binding:light_novel_world_aux:authors") { + // XPath = "/html/body/main/article/header/div[2]/div[2]/div[1]/div[1]/a", + // Type = BindingType.Single + // }, + // Description = new("aeqw89:binding:light_novel_world_aux:description") { + // Provider = new ParagraphedContentDataProvider() { + // Content = new() { + // XPath = "/html/body/main/article/div/section/div[1]/div" + // } + // }, + // Type = BindingType.UseProvider + // }, + // Tags = new("aeqw89:binding:light_novel_world_aux:tags") { + // Provider = new ListContentDataProvider() { + // Content = new() { + // XPath = "/html/body/main/article/header/div[2]/div[2]/div[3]/ul" + // } + // }, + // Type = BindingType.UseProvider + // } + // }); + //} + } } diff --git a/Beam.Temporary.Cli/Program.cs b/Beam.Temporary.Cli/Program.cs index 21ce3e1..1a01804 100644 --- a/Beam.Temporary.Cli/Program.cs +++ b/Beam.Temporary.Cli/Program.cs @@ -15,11 +15,11 @@ namespace Beam.Temporary.Cli { public static JsonSerializerOptions ConversionOptions { get; internal set; } = new(); - public static SharedDataDictionary Shared { get; set; } = []; + public static BeamDataDictionary BeamData { get; set; } = []; public static IArchitecture Architecture = IArchitecture.Default; - const string SharedDataPath = "data/.dat"; + const string BeamDataPath = "data/.dat"; static async Task Main(string[] args) { ConversionOptions.Converters.AddPersistentDataRequiredConverters(); @@ -34,24 +34,49 @@ namespace Beam.Temporary.Cli { ILogger logger = lf .CreateLogger("Program"); - await using var sharedContext = await DataDictionaryContext.Create( - SharedDataPath, + await using var sharedContext = await DataDictionaryContext.Create( + BeamDataPath, false, DataKind.Shared, logger, ConversionOptions ); - Shared = sharedContext.Data; + BeamData = sharedContext.Data; - Shared.Clear(); - NovelStatics.Define_LightNovelWorld(Shared); - NovelStatics.Define_LightNovelWorld_Novel_TheLegendaryMechanic(Shared); - NovelStatics.Define_LightNovelWorl_Novel_IAloneLevelUp(Shared); - ClassicTemplates.Register(Shared); + BeamData.Clear(); + NovelStatics.Define_WoDuShu(BeamData); + NovelStatics.Define_WoDuShu_HouseOfHorrors(BeamData); + ClassicTemplates.Register(BeamData); - var novel = new DataKey("novels:i_alone_level_up"); - var context_aux = Architecture.GetMeta(web, novel, Shared); + CancellationTokenSource cts = new(); + + HtmlTransformer ArticleDataTransformer(DataBindings? binding) => (x) => { + return new ArticleData() { + Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")], + Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""), + Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [], + Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "") + }; + }; + + HtmlTransformer DocumentTransformer(DataBindings? binding, IDocumentMetaData? metaData = null) => (x) => { + var resolved = binding?.Resolve(x); + var articleData = new ArticleData() { + Name = OnlineCleaner.Clean(resolved?.Title), + }; + Dictionary, IDocumentMetaData> meta = []; + meta.Add(IArchitecture.Default.ChapterKey, articleData); + if (metaData is not null) + meta.Add(IArchitecture.Default.BookKey, metaData); + return new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved?.Content)) { + MetaData = meta + }; + }; + + var novel = new DataKey("novels:house_of_horrors"); + var context_aux = Architecture.GetMeta(web, novel, BeamData, cts.Token); + context_aux.RetryReporter = new Progress((x) => Console.WriteLine($"Failed. Trying again. {x.TryNumber}")); var metaDownloader = new DownloadEnumerable( new SequentialFragmentDownloader( context_aux, @@ -60,8 +85,17 @@ namespace Beam.Temporary.Cli { .UnwrapFragmented()); var metadata = (await metaDownloader.FirstAsync()); - var context = Architecture.GetTextRecord(web, novel, Shared, metadata.Data); - context.DownloadReporter = new Progress((x) => Console.WriteLine(x.Filename)); + var metadata2 = await DownloadBuilder.FromMeta(novel, BeamData) + .WithLink() + .WithTransformer(ArticleDataTransformer) + .Configure((x) => x + .WithRetryReporter(new Progress()) + .WithTimeOut(TimeSpan.FromSeconds(15))) + .Build() + .FirstAsync(); + + var context = Architecture.GetTextRecord(web, novel, BeamData, metadata.Data, cts.Token); + context.DownloadReporter = new Progress((x) => Console.WriteLine(x)); var downloader = new DownloadEnumerable( new SequentialFragmentDownloader( context, @@ -69,9 +103,21 @@ namespace Beam.Temporary.Cli { logger) .UnwrapFragmented()); - List> documents = []; + var downloader2 = DownloadBuilder.FromText(novel, BeamData) + .WithLinkGenerator() + .WithTransformer((x) => DocumentTransformer(x, metadata2.Data)) + .Configure((x) => x + .WithDownloadReporter(new Progress((x) => logger.LogInformation(x.ToString()))) + .WithTimeOut(TimeSpan.FromSeconds(15)) + ) + .Build(); + - await foreach (var download in downloader.Take(20)) { + + List>> translationTasks = []; + List> documents = []; + + await foreach (var download in downloader2.Take(10)) { if (!download.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var meta)) continue; if (meta is not ArticleData articleMetaData) @@ -83,8 +129,15 @@ namespace Beam.Temporary.Cli { Console.WriteLine($"Chapter title: {articleMetaData.Name}"); //Console.WriteLine($"Content: {download}"); - documents.Add(download); - } + //translationTasks.Add(Task.Run(async () => { + // logger.LogInformation("Beginning translation {} task for {}", download.Order, articleMetaData.Name); + // var ret = new Ordered(await QuickAndDirtyJanitor.TranslateAsync(download.Data), download.Order); + // logger.LogInformation("Finished translation {} task for {}", download.Order, articleMetaData.Name); + // return ret; + //})); + } + + documents = (await Task.WhenAll(translationTasks)).ToList(); string testDir = Path.Combine("txt", Path.GetRandomFileName()); Directory.CreateDirectory(testDir); @@ -113,7 +166,7 @@ namespace Beam.Temporary.Cli { // HtmlBook.Keys.TitlePage, // HtmlBook.Keys.StylesPage, //}.Select( - // (x) => Shared.Files.ReadToString(x.WithNamespace("aeqw89:files:templates:classic")) + // (x) => BeamData.Files.ReadToString(x.WithNamespace("aeqw89:files:templates:classic")) //).ToArray(); //HtmlBook book = new( diff --git a/Beam.Temporary.Cli/Properties/launchSettings.json b/Beam.Temporary.Cli/Properties/launchSettings.json new file mode 100644 index 0000000..2cccade --- /dev/null +++ b/Beam.Temporary.Cli/Properties/launchSettings.json @@ -0,0 +1,10 @@ +{ + "profiles": { + "Beam.Temporary.Cli": { + "commandName": "Project", + "environmentVariables": { + "OPEN_AI_KEY": "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" + } + } + } +} \ No newline at end of file diff --git a/Beam.Temporary.Cli/QuickAndDirtyJanitor.cs b/Beam.Temporary.Cli/QuickAndDirtyJanitor.cs new file mode 100644 index 0000000..7b03e06 --- /dev/null +++ b/Beam.Temporary.Cli/QuickAndDirtyJanitor.cs @@ -0,0 +1,25 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using OpenAI; +using OpenAI.Chat; + +namespace Beam.Temporary.Cli { + public class QuickAndDirtyJanitor { + static OpenAIClient client; + + static QuickAndDirtyJanitor() { + var key = Environment.GetEnvironmentVariable("OPEN_AI_KEY"); + client = new OpenAIClient(key); + } + + public static async Task TranslateAsync(IDocument document) { + var chatCompletion = await client.GetChatClient("gpt-4.1").CompleteChatAsync( + ChatMessage.CreateSystemMessage("Translate the following text into english. If any part of the text has no direct English translation, you may choose to leave it as is. In either case, make sure to leave footnotes for any difficult to translate words. You must translate the whole text and output only your translation and footnotes. No other comments are necessary."), + ChatMessage.CreateUserMessage("From UNKNOWN to ENGLISH.\n" + document.ToString())); + return new StringDocument(document.Filename, chatCompletion.Value.Content.DefaultIfEmpty().Select((x) => x?.Text).Aggregate((x,y) => $"{x}{y}")); + } + } +} diff --git a/Beam.Temporary.Cli/StateChangerFactory.cs b/Beam.Temporary.Cli/StateChangerFactory.cs new file mode 100644 index 0000000..caaf663 --- /dev/null +++ b/Beam.Temporary.Cli/StateChangerFactory.cs @@ -0,0 +1,33 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Text.Json.Serialization; +using System.Threading.Tasks; + +namespace Beam.Temporary.Cli { + public class StateChangerFactory { + public IStateChangeBehaviour Behavior => FactoryTable[StateChangerKey](); + + [JsonInclude] + public string StateChangerKey { get; set; } + + [JsonConstructor] + public StateChangerFactory(string key) { + if (!Keys.Contains(key)) + throw new ArgumentException($"{key} not in keys list", nameof(key)); + StateChangerKey = key; + } + + public static Dictionary> FactoryTable = new() { + { LastAsNumber, () => CommonStateChangers.LastAsNumber }, + { LastAsNumberPrefixed, () => CommonStateChangers.NthAsNumber(^1, true) }, + { Constant, () => CommonStateChangers.Constant }, + }; + + public HashSet Keys = [LastAsNumber, LastAsNumberPrefixed, Constant]; + public const string LastAsNumber = "LastAsNumber"; + public const string LastAsNumberPrefixed = "LastAsNumberPrefixed"; + public const string Constant = "Constant"; + } +} diff --git a/Beam.Temporary.Cli/Template.cs b/Beam.Temporary.Cli/Template.cs new file mode 100644 index 0000000..071e7a8 --- /dev/null +++ b/Beam.Temporary.Cli/Template.cs @@ -0,0 +1,12 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam.Temporary.Cli { + public class Template { + public StateChangerFactory Factory { get; set; } + public SourceLinkBuilder Builder { get; set; } + } +} diff --git a/Beam.Temporary.Cli/Templates/Classic/ClassicTemplates.cs b/Beam.Temporary.Cli/Templates/Classic/ClassicTemplates.cs index 6b2ac8a..d8f8914 100644 --- a/Beam.Temporary.Cli/Templates/Classic/ClassicTemplates.cs +++ b/Beam.Temporary.Cli/Templates/Classic/ClassicTemplates.cs @@ -6,7 +6,7 @@ using System.Threading.Tasks; namespace Beam.Temporary.Cli.Templates.Classic { internal class ClassicTemplates { - public static void Register(SharedDataDictionary sdd) { + public static void Register(BeamDataDictionary sdd) { sdd.Files.TryAdd( new("aeqw89:files:templates:classic:content_page"), new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\Content.template.html", "htmlpage", "templates")); diff --git a/Beam.Temporary.Cli/TextResource.cs b/Beam.Temporary.Cli/TextResource.cs index ff27c57..0d39516 100644 --- a/Beam.Temporary.Cli/TextResource.cs +++ b/Beam.Temporary.Cli/TextResource.cs @@ -10,12 +10,13 @@ using System.Threading.Tasks; namespace Beam.Temporary.Cli { public class TextResource : IKeyed { public required DataKey Key { get; set; } + public string? FriendlyName { get; set; } public DataKey? AssociatedSource { get; set; } public DataKey? AssociatedMetaSource { get; set; } - public required string[] TemplateInitialData { get; set; } - public string?[]? MetaTemplateInitialData { get; set; } + public required State TemplateInitialData { get; set; } + public State? MetaTemplateInitialData { get; set; } - public TextResourceRecord ToRecord(SharedDataDictionary sdd) { + public TextResourceRecord ToRecord(BeamDataDictionary sdd) { return new(this, AssociatedSource is null ? null : sdd.Aggregators[AssociatedSource], AssociatedMetaSource is null ? null : sdd.Auxillaries[AssociatedMetaSource]); diff --git a/Beam.Temporary.Cli/WebResource.cs b/Beam.Temporary.Cli/WebResource.cs index f7fcc71..17fbf87 100644 --- a/Beam.Temporary.Cli/WebResource.cs +++ b/Beam.Temporary.Cli/WebResource.cs @@ -19,7 +19,7 @@ namespace Beam.Temporary.Cli { public WebResource() : this(new(string.Empty)) { } - public WebResourceRecord ToRecord(SharedDataDictionary sdd) { + public WebResourceRecord ToRecord(BeamDataDictionary sdd) { return new WebResourceRecord(this, sdd.Bindings[Bindings]); } } diff --git a/Beam/CommonStateChangers.cs b/Beam/CommonStateChangers.cs index fca5416..2c58329 100644 --- a/Beam/CommonStateChangers.cs +++ b/Beam/CommonStateChangers.cs @@ -12,5 +12,26 @@ namespace Beam { throw new InvalidOperationException(S.M.StateChangeError); x[^1] = number + i; }); + + public static IStateChangeBehaviour Constant => new ConstantStateChanger(); + + public static IStateChangeBehaviour NthAsNumber(Index n, bool keepSuffix = true) + => new NumberedStateChanger((x, i) => { + string? nth = x[n]?.ToString(); + if (nth is null) + throw new InvalidOperationException(S.M.StateChangeError); + if (!int.TryParse(nth, out var number)) + if (keepSuffix) { + string[] split = nth.Split('.'); + if (!int.TryParse(split[0], out number)) + throw new InvalidOperationException(S.M.StateChangeError); + x[n] = (number + i) + split[1..].Aggregate((x, y) => $"{x}.{y}"); + return; + } else + throw new InvalidOperationException(S.M.StateChangeError); + x[n] = number + i; + }); + + } } diff --git a/Beam/ConstantStateChanger.cs b/Beam/ConstantStateChanger.cs new file mode 100644 index 0000000..72a1480 --- /dev/null +++ b/Beam/ConstantStateChanger.cs @@ -0,0 +1,13 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam { + public class ConstantStateChanger : IStateChangeBehaviour { + public void Apply(State state, object stimulus) { + return; + } + } +} diff --git a/Beam/DownloadContext.cs b/Beam/DownloadContext.cs index 6e1691e..ebd34e1 100644 --- a/Beam/DownloadContext.cs +++ b/Beam/DownloadContext.cs @@ -15,11 +15,14 @@ namespace Beam { public class DownloadContext : IDisposable { private bool disposedValue; + public DownloadContextBuilder CreateBuilder() + => DownloadContextBuilder.FromContext(this); + public HtmlWeb Web { get; } public HtmlTransformer Transformer { get; } public AsyncHtmlTransformer AsyncTranformer { get; } - public IProgress? DownloadReporter { get; set; } - public IProgress? RetryReporter { get; set; } + public IProgress? DownloadReporter { get; set; } + public IProgress? RetryReporter { get; set; } public AsyncDownloadFailurePredicate?[]? AsyncFailurePredicates { get; } public TimeSpan TimeOut { get; set; } public IEnumerable Links { get; } @@ -32,8 +35,8 @@ namespace Beam { CancellationToken cancellationToken = default, HtmlTransformer? transformer = null, AsyncHtmlTransformer? asyncTransformer = null, - IProgress? downloadReporter = null, - IProgress? retryReporter = null, + IProgress? downloadReporter = null, + IProgress? retryReporter = null, AsyncDownloadFailurePredicate?[]? asyncFailurePredicates = null, TimeSpan? timeOut = null, ILogger? downloadLogger = null) { diff --git a/Beam/DownloadContextBuilder.cs b/Beam/DownloadContextBuilder.cs new file mode 100644 index 0000000..9fd67ca --- /dev/null +++ b/Beam/DownloadContextBuilder.cs @@ -0,0 +1,129 @@ +using HtmlAgilityPack; +using Microsoft.Extensions.Logging; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam { + + public class DownloadContextBuilder { + private HtmlWeb _web; + private HtmlTransformer _transformer; + private AsyncHtmlTransformer _asyncTransformer; + private IProgress? _downloadReporter; + private IProgress? _retryReporter; + private AsyncDownloadFailurePredicate?[]? _asyncFailurePredicates; + private TimeSpan _timeOut; + private IEnumerable _links; + private CancellationToken _cancellationToken; + private DocumentCache _cache; + private ILogger? _downloadLogger; + + public DownloadContextBuilder(HtmlWeb? web = null) { + // You can initialize defaults here if needed, e.g.: + // _timeOut = TimeSpan.FromSeconds(30); + // _cancellationToken = CancellationToken.None; + _web = web ?? new(); + _links = []; + } + + public DownloadContextBuilder WithWeb(HtmlWeb web) { + _web = web; + return this; + } + + public DownloadContextBuilder WithTransformer(HtmlTransformer transformer) { + _transformer = transformer; + return this; + } + + public DownloadContextBuilder WithAsyncTransformer(AsyncHtmlTransformer asyncTransformer) { + _asyncTransformer = asyncTransformer; + return this; + } + + public DownloadContextBuilder WithDownloadReporter(IProgress downloadReporter) { + _downloadReporter = downloadReporter; + return this; + } + + public DownloadContextBuilder WithRetryReporter(IProgress retryReporter) { + _retryReporter = retryReporter; + return this; + } + + public DownloadContextBuilder WithAsyncFailurePredicates(params AsyncDownloadFailurePredicate[] predicates) { + _asyncFailurePredicates = predicates; + return this; + } + + public DownloadContextBuilder WithTimeOut(TimeSpan timeOut) { + _timeOut = timeOut; + return this; + } + + public DownloadContextBuilder WithLinks(IEnumerable links) { + _links = links; + return this; + } + + public DownloadContextBuilder WithCancellationToken(CancellationToken cancellationToken) { + _cancellationToken = cancellationToken; + return this; + } + + public DownloadContextBuilder WithCache(DocumentCache cache) { + _cache = cache; + return this; + } + + public DownloadContextBuilder WithDownloadLogger(ILogger downloadLogger) { + _downloadLogger = downloadLogger; + return this; + } + + + public DownloadContext Build() { + // Construct the DownloadContext using the collected values. + var context = new DownloadContext( + web: _web, + links: _links, + cancellationToken: _cancellationToken, + transformer: _transformer, + asyncTransformer: _asyncTransformer, + downloadReporter: _downloadReporter, + retryReporter: _retryReporter, + asyncFailurePredicates: _asyncFailurePredicates, + timeOut: _timeOut, + downloadLogger: _downloadLogger + ); + + //// Assign the DocumentCache if it's been set in the builder. + //// (Even though Cache has a private setter, this code assumes builder + //// is in the same assembly or that the setter will be made internal. + //// Otherwise, remove or adjust this line.) + //context.Cache = _cache; + + return context; + } + + public static DownloadContextBuilder FromContext(DownloadContext existing) { + if (existing == null) throw new ArgumentNullException(nameof(existing)); + + return new DownloadContextBuilder(existing.Web) + .WithLinks(existing.Links) + .WithCancellationToken(existing.CancellationToken) + .WithTransformer(existing.Transformer) + .WithAsyncTransformer(existing.AsyncTranformer) + .WithDownloadReporter(existing.DownloadReporter!) + .WithRetryReporter(existing.RetryReporter!) + .WithAsyncFailurePredicates(existing.AsyncFailurePredicates ?? Array.Empty>()) + .WithTimeOut(existing.TimeOut) + .WithDownloadLogger(existing.DownloadLogger!) + .WithCache(existing.Cache); + } + } + +} diff --git a/Beam/DownloadReport.cs b/Beam/DownloadReport.cs new file mode 100644 index 0000000..1dfdf51 --- /dev/null +++ b/Beam/DownloadReport.cs @@ -0,0 +1,6 @@ +namespace Beam { + public struct DownloadReport { + + } + +} diff --git a/Beam/IUnitDownloader.cs b/Beam/IUnitDownloader.cs index a66c202..443b5ea 100644 --- a/Beam/IUnitDownloader.cs +++ b/Beam/IUnitDownloader.cs @@ -1,6 +1,6 @@ namespace Beam { public interface IUnitDownloader { public int LinksPerDownload { get; } - public Task<(bool, T?)> TryDownload(Ordered[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress? tryProgress = null); + public Task<(bool, T?)> TryDownload(Ordered[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress? tryProgress = null); } } \ No newline at end of file diff --git a/Beam/NumberedStateChanger.cs b/Beam/NumberedStateChanger.cs index c97a523..de597a6 100644 --- a/Beam/NumberedStateChanger.cs +++ b/Beam/NumberedStateChanger.cs @@ -12,5 +12,9 @@ public virtual void Apply(State state, int amount) { MoveStateDlgte(state, amount); } + + public NumberedStateChanger(IStateChangeBehaviour behavior) : this((x, i) => { + behavior.Apply(x, i); + }) {} } } diff --git a/Beam/OrderedSourceLinkGenerator.cs b/Beam/OrderedSourceLinkGenerator.cs index 2edee29..5d32eb3 100644 --- a/Beam/OrderedSourceLinkGenerator.cs +++ b/Beam/OrderedSourceLinkGenerator.cs @@ -15,10 +15,13 @@ namespace Beam { public NumberedStateChanger Behaviour { get; } private State InitialState; - public OrderedSourceLinkGenerator(SourceLinkBuilder builder, NumberedStateChanger behaviour, params object[] initialState) { + + public OrderedSourceLinkGenerator(SourceLinkBuilder builder, NumberedStateChanger behaviour, params object[] initialState) + : this(builder, behaviour, new State(initialState)) { } + public OrderedSourceLinkGenerator(SourceLinkBuilder builder, NumberedStateChanger behaviour, State initialState) { Builder = builder; Behaviour = behaviour; - InitialState = new State(initialState); + InitialState = initialState; State = InitialState.Copy(); Reset(); diff --git a/Beam/RetryReport.cs b/Beam/RetryReport.cs new file mode 100644 index 0000000..e099673 --- /dev/null +++ b/Beam/RetryReport.cs @@ -0,0 +1,12 @@ +namespace Beam { + public readonly struct RetryReport { + public RetryReport(int tryNumber, string link) { + TryNumber = tryNumber; + Link = link; + } + + public int TryNumber { get; } + public string Link { get; } + } + +} diff --git a/Beam/SequentialChunkDownloader.cs b/Beam/SequentialFragmentDownloader.cs similarity index 100% rename from Beam/SequentialChunkDownloader.cs rename to Beam/SequentialFragmentDownloader.cs diff --git a/Beam/SourceLinkBuilder.cs b/Beam/SourceLinkBuilder.cs index 1236b76..8a8c21e 100644 --- a/Beam/SourceLinkBuilder.cs +++ b/Beam/SourceLinkBuilder.cs @@ -5,8 +5,16 @@ using System.Text; using System.Threading.Tasks; namespace Beam { - public class Parameter(string name) { + [Flags] + public enum Position { + Before = 0b01, + After = 0b10, + BeforeAndAfter = 0b11 + } + + public class Parameter(string name, Position position = Position.Before) { public string Name { get; set; } = name; + public Position Position { get; set; } = position; } public class LinkSegment(string name, string separator = "", string suffix = "") { @@ -14,6 +22,16 @@ namespace Beam { public List Parameters { get; set; } = []; public string Separator { get; set; } = separator; public string Suffix { get; set; } = suffix; + + public LinkSegment WithParameters(params string[] parameters) { + Parameters = parameters.Select((x) => new Parameter(x)).ToList(); + return this; + } + + public LinkSegment WithParameters(params (string, Position)[] parameters) { + Parameters = parameters.Select((x) => new Parameter(x.Item1, x.Item2)).ToList(); + return this; + } } public class SourceLinkBuilder(string host, string protocol = "https") { @@ -64,6 +82,26 @@ namespace Beam { Segments.Add(new LinkSegment(name, separator)); } + public SourceLinkBuilder WithSegments(params IEnumerable segments) { + Segments = segments.Select((x) => new LinkSegment(x)).ToList(); + return this; + } + + public SourceLinkBuilder WithSegments(int count) + => WithSegments(Enumerable.Repeat("", count)); + + public SourceLinkBuilder WithParameters(int i, params string[] parameters) { + Segments[i] + .WithParameters(parameters); + return this; + } + + public SourceLinkBuilder WithParameters(int i, params (string, Position)[] parameters) { + Segments[i] + .WithParameters(parameters); + return this; + } + public void AddParameters(int segmentIndex, params string[] parameters) { ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(segmentIndex, Segments.Count); ArgumentOutOfRangeException.ThrowIfNegative(segmentIndex); @@ -97,6 +135,9 @@ namespace Beam { return count; } + public SourceLink Build(State parameterValues) + => Build(parameterValues.GetState()); + public SourceLink Build(params object[] parameterValues) { ArgumentOutOfRangeException.ThrowIfNotEqual(parameterValues.Length, GetParameterCount()); @@ -109,8 +150,11 @@ namespace Beam { link.Append('/'); link.Append(segment.Name); for (int i = 0; i < segment.Parameters.Count; i++) { - link.Append(segment.Parameters[i].Name); + if (segment.Parameters[i].Position.HasFlag(Position.Before)) + link.Append(segment.Parameters[i].Name); link.Append(parameterValues[pvC++]); + if (segment.Parameters[i].Position.HasFlag(Position.After)) + link.Append(segment.Parameters[i].Name); if (i + 1 < segment.Parameters.Count && segment.Separator is not null) link.Append(segment.Separator); } diff --git a/Beam/UnitDownloader.cs b/Beam/UnitDownloader.cs index 7370ae3..e1035d2 100644 --- a/Beam/UnitDownloader.cs +++ b/Beam/UnitDownloader.cs @@ -45,7 +45,7 @@ namespace Beam { } } - public async Task<(bool, T?)> TryDownload(Ordered[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress? tryProgress = null) { + public async Task<(bool, T?)> TryDownload(Ordered[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress? tryProgress = null) { if (link.Length == 0) return (false, default); @@ -56,7 +56,8 @@ namespace Beam { (var success, doc) = await TryDownloadWithNoRetries(link[0].Data, ct); if (success && doc != null) return (true, doc); - tryProgress?.Report(++tryCount); + ++tryCount; + tryProgress?.Report(new RetryReport(tryCount, link[0].Data)); await Task.Delay((int)Math.Pow(2, tryCount) * 1000); } diff --git a/Beam/UnitFragmentDownloader.cs b/Beam/UnitFragmentDownloader.cs index 380a910..a297630 100644 --- a/Beam/UnitFragmentDownloader.cs +++ b/Beam/UnitFragmentDownloader.cs @@ -30,7 +30,7 @@ namespace Beam { private readonly UnitDownloader UnitDownloader; - async Task<(bool, Fragment>?)> IUnitDownloader>>.TryDownload(Ordered[] link, CancellationToken ct, int maximumRetryCount, IProgress? tryProgress) { + async Task<(bool, Fragment>?)> IUnitDownloader>>.TryDownload(Ordered[] link, CancellationToken ct, int maximumRetryCount, IProgress? tryProgress) { Fragment> fragment = new Fragment>(link.Length); if (!Fragment>.TryAcquireUpdater(fragment, out var updater)) throw new S.AssertionException(S.M.NewFragmentShouldBeFree);