From a7d148a96f158b62f6f0691ecd2b0ee872af2225 Mon Sep 17 00:00:00 2001 From: qwsdcvghyu89 <61093706+qwsdcvghyu89@users.noreply.github.com> Date: Thu, 18 Sep 2025 18:32:25 +1000 Subject: [PATCH] Introduce Beam.Fluent and Beam.Models projects Added new Beam.Fluent and Beam.Models projects with staged download builder and data context models. Refactored and moved model classes from Beam.Temporary.Cli to Beam.Models. Added new data providers and extended DataBindings in Beam.Dynamic. Renamed Beam.Puppeteer to Beam.Playwright and updated related classes. Updated project references and package versions. Removed obsolete and unused files from Beam.Temporary.Cli. --- Beam.Dynamic/AnchorCollectionDataProvider.cs | 53 +++ Beam.Dynamic/AnchorDataProvider.cs | 31 ++ Beam.Dynamic/Beam.Dynamic.csproj | 4 +- Beam.Dynamic/DataBindings.cs | 179 +++++++- Beam.Dynamic/DropDownDataProvider.cs | 18 +- Beam.Dynamic/IDataProvider.cs | 9 +- Beam.Dynamic/StateChangerFactory.cs | 34 ++ Beam.Fluent/Beam.Fluent.csproj | 31 ++ Beam.Fluent/DownloadBuilder.ContextStage.cs | 186 ++++++++ Beam.Fluent/DownloadBuilder.DownloadStage.cs | 43 ++ .../DownloadBuilder.IAlternativeLinkStage.cs | 7 + ...nloadBuilder.IAlternativeTransformStage.cs | 10 + Beam.Fluent/DownloadBuilder.IContextStage.cs | 17 + Beam.Fluent/DownloadBuilder.IDownloadStage.cs | 14 + Beam.Fluent/DownloadBuilder.ILinkStage.cs | 13 + .../DownloadBuilder.ITransformStage.cs | 9 + Beam.Fluent/DownloadBuilder.LinkStage.cs | 53 +++ Beam.Fluent/DownloadBuilder.SourceKind.cs | 6 + Beam.Fluent/DownloadBuilder.TransformStage.cs | 20 + Beam.Fluent/DownloadBuilder.cs | 41 ++ Beam.Models/Beam.Models.csproj | 26 ++ Beam.Models/BeamDataContext.cs | 50 +++ {Beam.Temporary.Cli => Beam.Models}/File.cs | 2 +- .../LinkCollection.cs | 2 +- Beam.Models/ResourceDictionary.cs | 25 ++ Beam.Models/Template.cs | 15 + .../Tracked.cs | 2 +- .../WebResource.cs | 19 +- .../Beam.Playwright.csproj | 0 .../PlaywrightConfig.cs | 4 +- .../PlaywrightContext.cs | 8 +- .../PlaywrightUnitDownloader.cs | 10 +- .../PlaywrightUnitPageDownloader.cs | 10 +- Beam.Stealth/Beam.Stealth.csproj | 17 + Beam.Stealth/StealthAsyncManipulator.cs | 10 + Beam.Stealth/StealthConfig.cs | 105 +++++ Beam.Stealth/StealthFragmentDownloader.cs | 12 + Beam.Stealth/StealthFragmentPageDownloader.cs | 13 + Beam.Stealth/StealthUnitDownloader.cs | 116 +++++ Beam.Stealth/StealthUnitPageDownloader.cs | 41 ++ Beam.Temporary.Cli/Architecture.cs | 38 +- Beam.Temporary.Cli/Beam.Temporary.Cli.csproj | 21 +- Beam.Temporary.Cli/BeamDataDictionary.cs | 48 -- Beam.Temporary.Cli/CommonTransformers.cs | 19 +- Beam.Temporary.Cli/CssData.cs | 30 -- Beam.Temporary.Cli/CssData.cs.obsolete | 30 ++ Beam.Temporary.Cli/DataKeyExtensions.cs | 34 -- Beam.Temporary.Cli/DownloadBuilder.cs | 284 ------------ .../{HtmlBook.cs => HtmlBook.cs.obsolete} | 0 ...lates.cs => HtmlBookTemplates.cs.obsolete} | 0 Beam.Temporary.Cli/MainArchitecture.cs | 70 +-- Beam.Temporary.Cli/NovelDefinitionBuilder.cs | 105 +++++ Beam.Temporary.Cli/NovelStatics.cs | 412 +++++++++++++++--- Beam.Temporary.Cli/Program.cs | 203 ++++++--- Beam.Temporary.Cli/StateChangerFactory.cs | 8 +- Beam.Temporary.Cli/TableOfContentsData.cs | 18 + Beam.Temporary.Cli/Template.cs | 12 - Beam.Temporary.Cli/TextResource.cs | 27 -- Beam.sln | 20 +- Beam/ApiCall.cs | 33 +- Beam/ApiCallBuilder.cs | 11 + Beam/ArticleData.cs | 3 +- Beam/Beam.csproj | 4 +- Beam/CommonStateChangers.cs | 4 +- Beam/ImmutableState.cs | 18 +- Beam/OrderedSourceLinkGenerator.cs | 2 +- Beam/S.cs | 3 + Beam/SequentialDownloader.cs | 5 +- Beam/SourceLinkBuilder.cs | 48 +- Beam/State.cs | 12 +- aeqw89.Beam/Class1.cs | 5 - aeqw89.Beam/aeqw89.Beam.csproj | 29 +- 72 files changed, 2100 insertions(+), 721 deletions(-) create mode 100644 Beam.Dynamic/AnchorCollectionDataProvider.cs create mode 100644 Beam.Dynamic/AnchorDataProvider.cs create mode 100644 Beam.Dynamic/StateChangerFactory.cs create mode 100644 Beam.Fluent/Beam.Fluent.csproj create mode 100644 Beam.Fluent/DownloadBuilder.ContextStage.cs create mode 100644 Beam.Fluent/DownloadBuilder.DownloadStage.cs create mode 100644 Beam.Fluent/DownloadBuilder.IAlternativeLinkStage.cs create mode 100644 Beam.Fluent/DownloadBuilder.IAlternativeTransformStage.cs create mode 100644 Beam.Fluent/DownloadBuilder.IContextStage.cs create mode 100644 Beam.Fluent/DownloadBuilder.IDownloadStage.cs create mode 100644 Beam.Fluent/DownloadBuilder.ILinkStage.cs create mode 100644 Beam.Fluent/DownloadBuilder.ITransformStage.cs create mode 100644 Beam.Fluent/DownloadBuilder.LinkStage.cs create mode 100644 Beam.Fluent/DownloadBuilder.SourceKind.cs create mode 100644 Beam.Fluent/DownloadBuilder.TransformStage.cs create mode 100644 Beam.Fluent/DownloadBuilder.cs create mode 100644 Beam.Models/Beam.Models.csproj create mode 100644 Beam.Models/BeamDataContext.cs rename {Beam.Temporary.Cli => Beam.Models}/File.cs (83%) rename {Beam.Temporary.Cli => Beam.Models}/LinkCollection.cs (91%) create mode 100644 Beam.Models/ResourceDictionary.cs create mode 100644 Beam.Models/Template.cs rename {Beam.Temporary.Cli => Beam.Models}/Tracked.cs (88%) rename {Beam.Temporary.Cli => Beam.Models}/WebResource.cs (55%) rename Beam.Puppeteer/Beam.Puppeteer.csproj => Beam.Playwright/Beam.Playwright.csproj (100%) rename Beam.Puppeteer/PuppetConfig.cs => Beam.Playwright/PlaywrightConfig.cs (65%) rename Beam.Puppeteer/PuppetContext.cs => Beam.Playwright/PlaywrightContext.cs (72%) rename Beam.Puppeteer/PuppetUnitDownloader.cs => Beam.Playwright/PlaywrightUnitDownloader.cs (70%) rename Beam.Puppeteer/PuppetUnitPageDownloader.cs => Beam.Playwright/PlaywrightUnitPageDownloader.cs (68%) create mode 100644 Beam.Stealth/Beam.Stealth.csproj create mode 100644 Beam.Stealth/StealthAsyncManipulator.cs create mode 100644 Beam.Stealth/StealthConfig.cs create mode 100644 Beam.Stealth/StealthFragmentDownloader.cs create mode 100644 Beam.Stealth/StealthFragmentPageDownloader.cs create mode 100644 Beam.Stealth/StealthUnitDownloader.cs create mode 100644 Beam.Stealth/StealthUnitPageDownloader.cs delete mode 100644 Beam.Temporary.Cli/BeamDataDictionary.cs delete mode 100644 Beam.Temporary.Cli/CssData.cs create mode 100644 Beam.Temporary.Cli/CssData.cs.obsolete delete mode 100644 Beam.Temporary.Cli/DataKeyExtensions.cs delete mode 100644 Beam.Temporary.Cli/DownloadBuilder.cs rename Beam.Temporary.Cli/{HtmlBook.cs => HtmlBook.cs.obsolete} (100%) rename Beam.Temporary.Cli/{HtmlBookTemplates.cs => HtmlBookTemplates.cs.obsolete} (100%) create mode 100644 Beam.Temporary.Cli/NovelDefinitionBuilder.cs create mode 100644 Beam.Temporary.Cli/TableOfContentsData.cs delete mode 100644 Beam.Temporary.Cli/Template.cs delete mode 100644 Beam.Temporary.Cli/TextResource.cs delete mode 100644 aeqw89.Beam/Class1.cs diff --git a/Beam.Dynamic/AnchorCollectionDataProvider.cs b/Beam.Dynamic/AnchorCollectionDataProvider.cs new file mode 100644 index 0000000..e22e977 --- /dev/null +++ b/Beam.Dynamic/AnchorCollectionDataProvider.cs @@ -0,0 +1,53 @@ +using HtmlAgilityPack; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam.Dynamic { + public class AnchorCollectionDataProvider : IDataProvider, IDataProvider { + public IBinding? Content { get; set; } + public string? RelativeTo { get; set; } + + private string GetAbsolute(string? @base, string relative) { + if (@base is null) + return relative; + + if (@base.EndsWith('/')) + @base = @base[..^1]; + if (relative.StartsWith('/')) + relative = relative[1..]; + return @base + '/' + relative; + } + + public string[] Get(HtmlDocument document) { + if (Content is null) + return []; + + var node = Content.Select(document); + if (node is null) + return []; + + List links = []; + foreach (var child in node.Descendants()) + links.Add(child.GetAttributeValue("href", "")); + + return links.Where(x => !string.IsNullOrWhiteSpace(x)).ToArray(); + } + + SourceLink[] IDataProvider.Get(HtmlDocument document) { + var links = Get(document); + + if (links.Length == 0) + return []; + + List slinks = []; + foreach (var link in links) + if (Uri.TryCreate(GetAbsolute(RelativeTo, link), UriKind.RelativeOrAbsolute, out _)) + slinks.Add(new SourceLink(GetAbsolute(RelativeTo, link))); + + return slinks.ToArray(); + } + } +} diff --git a/Beam.Dynamic/AnchorDataProvider.cs b/Beam.Dynamic/AnchorDataProvider.cs new file mode 100644 index 0000000..6bf1909 --- /dev/null +++ b/Beam.Dynamic/AnchorDataProvider.cs @@ -0,0 +1,31 @@ +using HtmlAgilityPack; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam.Dynamic { + public class AnchorDataProvider : IDataProvider, IDataProvider { + public IBinding? Content { get; set; } + + public string Get(HtmlDocument document) { + if (Content is null) + return ""; + + return Content.Select(document)?.GetAttributeValue("href", "") ?? ""; + + } + + SourceLink IDataProvider.Get(HtmlDocument document) { + var content = Get(document); + if (content is null) + return SourceLink.InvalidLink; + + if (!Uri.TryCreate(content, UriKind.RelativeOrAbsolute, out _)) + return SourceLink.InvalidLink; + + return new SourceLink(content); + } + } +} diff --git a/Beam.Dynamic/Beam.Dynamic.csproj b/Beam.Dynamic/Beam.Dynamic.csproj index 0fe0a46..df7f2f2 100644 --- a/Beam.Dynamic/Beam.Dynamic.csproj +++ b/Beam.Dynamic/Beam.Dynamic.csproj @@ -6,8 +6,8 @@ enable - - + + diff --git a/Beam.Dynamic/DataBindings.cs b/Beam.Dynamic/DataBindings.cs index 193ffc3..77b4371 100644 --- a/Beam.Dynamic/DataBindings.cs +++ b/Beam.Dynamic/DataBindings.cs @@ -1,22 +1,161 @@ using HtmlAgilityPack; +using System.Text.Json.Serialization; namespace Beam.Dynamic { public record class DataBindings { - public IDataProvider? Title { get; set; } - public IDataProvider? Authors { get; set; } - public IDataProvider? Description { get; set; } - public IDataProvider? Content { get; set; } - public IDataProvider? Language { get; set; } - public IDataProvider? Tags { get; set; } + #region ---------------------- Common Bindings ---------------------- + [JsonIgnore] + public IDataProvider? Title { + get => Get(nameof(Title)); + set => Providers[nameof(Title)] = value; + } + [JsonIgnore] + public IDataProvider? Authors { + get => Get(nameof(Authors)); + set => Providers[nameof(Authors)] = value; + } + [JsonIgnore] + public IDataProvider? Description { + get => Get(nameof(Description)); + set => Providers[nameof(Description)] = value; + } + [JsonIgnore] + public IDataProvider? Content { + get => Get(nameof(Content)); + set => Providers[nameof(Content)] = value; + } + [JsonIgnore] + public IDataProvider? Language { + get => Get(nameof(Language)); + set => Providers[nameof(Language)] = value; + } + [JsonIgnore] + public IDataProvider? Tags { + get => Get(nameof(Tags)); + set => Providers[nameof(Tags)] = value; + } + [JsonIgnore] + public IDataProvider? Publisher { + get => Get(nameof(Publisher)); + set => Providers[nameof(Publisher)] = value; + } + [JsonIgnore] + public IDataProvider? PublicationDate { + get => Get(nameof(PublicationDate)); + set => Providers[nameof(PublicationDate)] = value; + } + [JsonIgnore] + public IDataProvider? ISBN { + get => Get(nameof(ISBN)); + set => Providers[nameof(ISBN)] = value; + } + [JsonIgnore] + public IDataProvider? PageCount { + get => Get(nameof(PageCount)); + set => Providers[nameof(PageCount)] = value; + } + [JsonIgnore] + public IDataProvider? CoverImage { + get => Get(nameof(CoverImage)); + set => Providers[nameof(CoverImage)] = value; + } + [JsonIgnore] + public IDataProvider? Series { + get => Get(nameof(Series)); + set => Providers[nameof(Series)] = value; + } + [JsonIgnore] + public IDataProvider? Edition { + get => Get(nameof(Edition)); + set => Providers[nameof(Edition)] = value; + } + [JsonIgnore] + public IDataProvider? Contributors { + get => Get(nameof(Contributors)); + set => Providers[nameof(Contributors)] = value; + } + [JsonIgnore] + public IDataProvider? Subjects { + get => Get(nameof(Subjects)); + set => Providers[nameof(Subjects)] = value; + } + [JsonIgnore] + public IDataProvider? Rights { + get => Get(nameof(Rights)); + set => Providers[nameof(Rights)] = value; + } + [JsonIgnore] + public IDataProvider? TableOfContents { + get => Get(nameof(TableOfContents)); + set => Providers[nameof(TableOfContents)] = value; + } + [JsonIgnore] + public IDataProvider? PagesDropDown { + get => Get(nameof(PagesDropDown)); + set => Providers[nameof(PagesDropDown)] = value; + } + [JsonIgnore] + public IDataProvider? NextPageButton { + get => Get(nameof(NextPageButton)); + set => Providers[nameof(NextPageButton)] = value; + } + [JsonIgnore] + public IDataProvider? PreviousPageButton { + get => Get(nameof(PreviousPageButton)); + set => Providers[nameof(PreviousPageButton)] = value; + } + #endregion + + public Dictionary Providers { get; set; } = []; + + private IDataProvider? Get(string key) { + if (Providers.TryGetValue(key, out var k) && k is IDataProvider ks) + return ks; + return default; + } public virtual ResolvedBindings Resolve(HtmlDocument doc) { - return new ResolvedBindings() { + // explicit fields already handled below + var mappedKeys = new HashSet { + nameof(Title), nameof(Authors), nameof(Description), nameof(Content), + nameof(Language), nameof(Tags), nameof(Publisher), nameof(PublicationDate), + nameof(ISBN), nameof(PageCount), nameof(CoverImage), nameof(Series), + nameof(Edition), nameof(Contributors), nameof(Subjects), nameof(Rights), + nameof(TableOfContents), nameof(PagesDropDown), nameof(NextPageButton), + nameof(PreviousPageButton) + }; + + var additional = new Dictionary(); + + foreach (var (key, provider) in Providers) { + if (!mappedKeys.Contains(key) && provider is not null) { + // dynamic call so any IDataProvider works + additional[key] = ((dynamic)provider).Get(doc); + } + } + + return new ResolvedBindings { Title = Title?.Get(doc), Authors = Authors?.Get(doc) ?? [], - Language = Language?.Get(doc), - Content = Content?.Get(doc), Description = Description?.Get(doc), - Tags = Tags?.Get(doc) ?? [] + Content = Content?.Get(doc), + Language = Language?.Get(doc), + Tags = Tags?.Get(doc) ?? [], + Publisher = Publisher?.Get(doc), + PublicationDate = PublicationDate?.Get(doc), + ISBN = ISBN?.Get(doc), + PageCount = PageCount?.Get(doc), + CoverImage = CoverImage?.Get(doc), + Series = Series?.Get(doc) ?? [], + Edition = Edition?.Get(doc), + Contributors = Contributors?.Get(doc) ?? [], + Subjects = Subjects?.Get(doc) ?? [], + Rights = Rights?.Get(doc), + TableOfContents = TableOfContents?.Get(doc) ?? [], + PagesDropDown = PagesDropDown?.Get(doc), + NextPageButton = NextPageButton?.Get(doc), + PreviousPageButton = PreviousPageButton?.Get(doc), + Additional = additional }; } } @@ -28,5 +167,25 @@ namespace Beam.Dynamic { public string? Content { get; set; } public string[]? Language { get; set; } public string[]? Tags { get; set; } + public string? Publisher { get; set; } + public DateTimeOffset? PublicationDate { get; set; } + public string? ISBN { get; set; } + public int? PageCount { get; set; } + public SourceLink? CoverImage { get; set; } + public string[]? Series { get; set; } + public int? Edition { get; set; } + public string[]? Contributors { get; set; } + public string[]? Subjects { get; set; } + public string? Rights { get; set; } + public SourceLink[]? TableOfContents { get; set; } + public SourceLink[]? PagesDropDown { get; set; } + public SourceLink? NextPageButton { get; set; } + public SourceLink? PreviousPageButton { get; set; } + + /// + /// Values resolved from any providers whose keys aren’t represented + /// by the named properties above. + /// + public Dictionary Additional { get; set; } = []; } } diff --git a/Beam.Dynamic/DropDownDataProvider.cs b/Beam.Dynamic/DropDownDataProvider.cs index 6750758..ad2ef23 100644 --- a/Beam.Dynamic/DropDownDataProvider.cs +++ b/Beam.Dynamic/DropDownDataProvider.cs @@ -2,6 +2,8 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Runtime.InteropServices; +using System.Runtime.InteropServices.Marshalling; using System.Text; using System.Text.Json; using System.Threading.Tasks; @@ -12,6 +14,18 @@ namespace Beam.Dynamic { IDataProvider, IDataProvider { public IBinding? Content { get; set; } + public string? RelativeTo { get; set; } + + private string GetAbsolute(string? @base, string relative) { + if (@base is null) + return relative; + + if (@base.EndsWith('/')) + @base = @base[..^1]; + if (relative.StartsWith('/')) + relative = relative[1..]; + return @base + '/' + relative; + } public SourceLink[] Get(HtmlDocument document) { if (Content is null) @@ -22,9 +36,9 @@ namespace Beam.Dynamic { List links = []; foreach (var child in node.ChildNodes.Where(x => x.Name == "option")) { var childValue = child.GetAttributeValue("value", null); - if (!Uri.TryCreate(childValue, UriKind.Absolute, out _)) + if (!Uri.TryCreate(GetAbsolute(RelativeTo, childValue), UriKind.Absolute, out _)) continue; - links.Add(new SourceLink(childValue)); + links.Add(new SourceLink(GetAbsolute(RelativeTo, childValue))); } return links.ToArray(); diff --git a/Beam.Dynamic/IDataProvider.cs b/Beam.Dynamic/IDataProvider.cs index d46edad..2988b22 100644 --- a/Beam.Dynamic/IDataProvider.cs +++ b/Beam.Dynamic/IDataProvider.cs @@ -7,7 +7,14 @@ namespace Beam.Dynamic { [JsonDerivedType(typeof(ContentsArrayDataProvider), "array")] [JsonDerivedType(typeof(ContentsDataProvider), "single")] [JsonDerivedType(typeof(DropDownDataProvider), "dropdown")] - public interface IDataProvider { + [JsonDerivedType(typeof(AnchorCollectionDataProvider), "anchor-list")] + [JsonDerivedType(typeof(AnchorDataProvider), "anchor")] + public interface IDataProvider { + public string GetString(HtmlDocument document) + => (this as IDataProvider)?.Get(document)?.ToString() ?? ""; + } + + public interface IDataProvider : IDataProvider { public T Get(HtmlDocument document); //public HtmlNode? GetNode(HtmlDocument document); } diff --git a/Beam.Dynamic/StateChangerFactory.cs b/Beam.Dynamic/StateChangerFactory.cs new file mode 100644 index 0000000..a788464 --- /dev/null +++ b/Beam.Dynamic/StateChangerFactory.cs @@ -0,0 +1,34 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Text.Json.Serialization; +using System.Threading.Tasks; + +namespace Beam.Dynamic { + public class StateChangerFactory { + [JsonIgnore] + public IStateChangeBehaviour Behavior => FactoryTable[StateChangerKey](); + + [JsonInclude] + public string StateChangerKey { get; set; } + + [JsonConstructor] + public StateChangerFactory(string stateChangerKey) { + if (!Keys.Contains(stateChangerKey)) + throw new ArgumentException($"{stateChangerKey} not in keys list", nameof(stateChangerKey)); + StateChangerKey = stateChangerKey; + } + + public static Dictionary> FactoryTable = new() { + { LastAsNumber, () => CommonStateChangers.LastAsNumber }, + { LastAsNumberPrefixed, () => CommonStateChangers.NthAsNumber(^1, true) }, + { Constant, () => CommonStateChangers.Constant }, + }; + + public HashSet Keys = [LastAsNumber, LastAsNumberPrefixed, Constant]; + public const string LastAsNumber = "LastAsNumber"; + public const string LastAsNumberPrefixed = "LastAsNumberPrefixed"; + public const string Constant = "Constant"; + } +} diff --git a/Beam.Fluent/Beam.Fluent.csproj b/Beam.Fluent/Beam.Fluent.csproj new file mode 100644 index 0000000..62ea023 --- /dev/null +++ b/Beam.Fluent/Beam.Fluent.csproj @@ -0,0 +1,31 @@ + + + + net9.0 + enable + enable + + + + + + + + + + + + all + + + + all + + + all + + + all + + + \ No newline at end of file diff --git a/Beam.Fluent/DownloadBuilder.ContextStage.cs b/Beam.Fluent/DownloadBuilder.ContextStage.cs new file mode 100644 index 0000000..aedf227 --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.ContextStage.cs @@ -0,0 +1,186 @@ +using HtmlAgilityPack; +using Beam.Playwright; +using Beam.Stealth; + +namespace Beam.Fluent { +public static partial class DownloadBuilder { + private sealed class ContextStage : IContextStage { + private readonly DownloadContextBuilder _ctxBuilder; + private readonly AsyncTransformer _transformer; + private int _parallelism = 4; + private bool _useFragments = false; + private PlaywrightAsyncManipulator? _usePlaywrightManipulator = null; + private StealthAsyncManipulator? _useStealthManipulator = null; + private StealthConfig? _stealthConfig = null; + + public ContextStage(DownloadContextBuilder ctxBuilder, AsyncTransformer transformer) { + _ctxBuilder = ctxBuilder; + _transformer = transformer; + } + + public IContextStage Configure(Action> configure) { + configure(_ctxBuilder); + return this; + } + + public IContextStage WithParallelism(int degree) { + _parallelism = Math.Max(1, degree); + return this; + } + + public IContextStage WithTimeout(TimeSpan timeout) { + _ctxBuilder.WithTimeOut(timeout); + return this; + } + + public IContextStage WithRetryReporter(IProgress reporter) { + _ctxBuilder.WithRetryReporter(reporter); + return this; + } + + /// + /// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with + /// + /// + public IContextStage UseFragments() { + if (_usePlaywrightManipulator is not null) + _usePlaywrightManipulator = null; + + _useFragments = true; + return this; + } + + /// + /// Use a puppet browser to download the links. This strategy is mutually exclusive with + /// + /// The page manipulator + /// + public IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator) { + if (_useFragments) + _useFragments = false; + if (_useStealthManipulator is not null) + _useStealthManipulator = null; + + _usePlaywrightManipulator = manipulator; + return this; + } + + public IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) { + if (_usePlaywrightManipulator is not null) + _usePlaywrightManipulator = null; + + _useStealthManipulator = manipulator; + _stealthConfig = config; + return this; + } + + private object ConstructUnitDownloader(DownloadContext context) { + return (_useFragments, _useStealthManipulator, _usePlaywrightManipulator, _transformer, context.AsyncFailurePredicates) switch { + // ──────────────── fragmented HTML ──────────────── + (true, null, _, AsyncTransformer asyncHtmlTransformer, + AsyncDownloadFailurePredicate[] documentFailurePredicates) + => new UnitFragmentDownloader( + context.Web, + asyncHtmlTransformer, + documentFailurePredicates, + _parallelism, + context.DownloadLogger), + // ──────────────── fragmented binary ──────────────── + (true, null, _, AsyncTransformer asyncBinaryTransformer, + AsyncDownloadFailurePredicate[] responseFailurePredicates) + => new UnitFragmentDownloaderBinary( + context.Client, + asyncBinaryTransformer, + responseFailurePredicates, + _parallelism, + context.DownloadLogger), + // ──────────────── single HTML ──────────────── + (false, null, null, AsyncTransformer asyncHtmlTransformer, + AsyncDownloadFailurePredicate[] documentFailurePredicates) + => new UnitDownloader( + context.Web, + asyncHtmlTransformer, + documentFailurePredicates), + // ──────────────── single binary ──────────────── + (false, null, null, AsyncTransformer asyncBinaryTransformer, + AsyncDownloadFailurePredicate[] responseFailurePredicates) + => new UnitDownloaderBinary( + context.Client, + asyncBinaryTransformer, + responseFailurePredicates), + // ──────────────── single playwright binary ──────────────── + (false, _, PlaywrightAsyncManipulator manipulator, AsyncTransformer asyncBinaryTransformer, + AsyncDownloadFailurePredicate[] responseFailurePredicates) + => new PlaywrightUnitDownloader( + context.Client, + manipulator, + asyncBinaryTransformer, + responseFailurePredicates), + // ──────────────── single playwrigt HTML ──────────────── + (false, _, PlaywrightAsyncManipulator manipulator, AsyncTransformer asyncHtmlTransformer, + AsyncDownloadFailurePredicate[] documentFailurePredicates) + => new PlaywrightUnitPageDownloader( + context.Web, + manipulator, + asyncHtmlTransformer, + documentFailurePredicates), + // ──────────────── single stealth HTML ──────────────── + (false, StealthAsyncManipulator manipulator, _, AsyncTransformer asyncHtmlTransformer, + AsyncDownloadFailurePredicate[] documentFailurePredicates) + => new StealthUnitPageDownloader( + context.Web, + _stealthConfig ?? throw new Exception($"Stealth config is null"), + manipulator, + asyncHtmlTransformer, + documentFailurePredicates), + // ──────────────── single stealth binary ──────────────── + (false, StealthAsyncManipulator manipulator, _, AsyncTransformer asyncBinaryTransformer, + AsyncDownloadFailurePredicate[] responseFailurePredicates) + => new StealthUnitDownloader( + context.Client, + _stealthConfig ?? throw new Exception($"Stealth config is null"), + manipulator, + asyncBinaryTransformer, + responseFailurePredicates), + // ──────────────── fragment stealth HTML ──────────────── + (true, StealthAsyncManipulator manipulator, _, AsyncTransformer asyncHtmlTransformer, + AsyncDownloadFailurePredicate[] downloadFailurePredicates) + => new StealthFragmentPageDownloader( + context.Web, + _stealthConfig ?? throw new Exception($"Stealth config is null"), + manipulator, + asyncHtmlTransformer), + // ──────────────── fragment stealth binary ──────────────── + (true, StealthAsyncManipulator manipulator, _, AsyncTransformer asyncBinaryTransformer, + AsyncDownloadFailurePredicate[] responseFailurePredicates) + => new StealthFragmentDownloader( + context.Client, + _stealthConfig ?? throw new Exception($"Stealth config is null"), + manipulator, + asyncBinaryTransformer), + _ => throw new Exception($"Unsupported transformer / failure-predicate combinatAsyion. Missing pattern: {_useFragments} , {_transformer.GetType().AsUniqueName()} , {context.AsyncFailurePredicates?.GetType().AsUniqueName()}"), + }; + } + + private IAsyncEnumerator> ConstructDownloader(DownloadContext context) { + var copyOfContext = context.CreateBuilder().Build(); + return _useFragments switch { + true => new SequentialFragmentDownloader( + copyOfContext, + ctx => (IUnitDownloader>>)ConstructUnitDownloader(ctx), + context.DownloadLogger).UnwrapFragmented(), + false => new SequentialDownloader( + copyOfContext, + ctx => (IUnitDownloader)ConstructUnitDownloader(ctx), + context.DownloadLogger).WrapOrdered() + }; + } + + public DownloadEnumerable Build() { + var context = _ctxBuilder.Build(); + var enumerable = new DownloadEnumerable(ConstructDownloader(context)); + return enumerable; + } + } + } +} diff --git a/Beam.Fluent/DownloadBuilder.DownloadStage.cs b/Beam.Fluent/DownloadBuilder.DownloadStage.cs new file mode 100644 index 0000000..4909c6f --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.DownloadStage.cs @@ -0,0 +1,43 @@ +using System.Collections.Concurrent; +using System.Text.Json; + +namespace Beam.Fluent { +public static partial class DownloadBuilder { + private sealed class DownloadStage(DownloadEnumerable download) : IDownloadStage { + private IAsyncEnumerable> _download = download; + + public DownloadEnumerable AsAsyncEnumerable() { + return new DownloadEnumerable(_download.GetAsyncEnumerator()); + } + + private async IAsyncEnumerable> _SaveToDirectory(string dir) { + Directory.CreateDirectory(dir); + await foreach(var download in _download) { + await System.IO.File.WriteAllTextAsync(Path.Combine(dir, $"{Path.GetRandomFileName()}.{download.Order}.json"), JsonSerializer.Serialize(dir)); + yield return download; + } + } + + public IDownloadStage SaveToDirectory(string dir) { + _download = _SaveToDirectory(dir); + return this; + } + + public IDownloadStage SaveToFiles(IEnumerable files) { + throw new NotImplementedException(); + } + + public IDownloadStage SaveToMemory(ConcurrentBag bag) { + throw new NotImplementedException(); + } + + public void WaitForDownload() { + throw new NotImplementedException(); + } + + public Task WaitForDownloadAsync() { + throw new NotImplementedException(); + } + } + } +} diff --git a/Beam.Fluent/DownloadBuilder.IAlternativeLinkStage.cs b/Beam.Fluent/DownloadBuilder.IAlternativeLinkStage.cs new file mode 100644 index 0000000..25b64d9 --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.IAlternativeLinkStage.cs @@ -0,0 +1,7 @@ +namespace Beam.Fluent { +public static partial class DownloadBuilder { + public interface IAlternativeLinkStage { + IAlternativeTransformStage WithLinks(IEnumerable links); + } + } +} diff --git a/Beam.Fluent/DownloadBuilder.IAlternativeTransformStage.cs b/Beam.Fluent/DownloadBuilder.IAlternativeTransformStage.cs new file mode 100644 index 0000000..b28f376 --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.IAlternativeTransformStage.cs @@ -0,0 +1,10 @@ +namespace Beam.Fluent { +public static partial class DownloadBuilder { + public interface IAlternativeTransformStage { + IContextStage WithTransformer(AsyncTransformer transformer); + IContextStage WithTransformer(Func transformer) { + return WithTransformer(rt => Task.FromResult(transformer(rt))); + } + } + } +} diff --git a/Beam.Fluent/DownloadBuilder.IContextStage.cs b/Beam.Fluent/DownloadBuilder.IContextStage.cs new file mode 100644 index 0000000..074d7dc --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.IContextStage.cs @@ -0,0 +1,17 @@ +using Beam.Playwright; +using Beam.Stealth; + +namespace Beam.Fluent { +public static partial class DownloadBuilder { + public interface IContextStage { + IContextStage Configure(Action> configure); + IContextStage WithParallelism(int degree); + IContextStage WithTimeout(TimeSpan timeout); + IContextStage WithRetryReporter(IProgress reporter); + DownloadEnumerable Build(); + IContextStage UseFragments(); + IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator); + IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config); + } + } +} diff --git a/Beam.Fluent/DownloadBuilder.IDownloadStage.cs b/Beam.Fluent/DownloadBuilder.IDownloadStage.cs new file mode 100644 index 0000000..99b7f85 --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.IDownloadStage.cs @@ -0,0 +1,14 @@ +using System.Collections.Concurrent; + +namespace Beam.Fluent { +public static partial class DownloadBuilder { + public interface IDownloadStage { + IDownloadStage SaveToDirectory(string dir); + IDownloadStage SaveToFiles(IEnumerable files); + IDownloadStage SaveToMemory(ConcurrentBag bag); + DownloadEnumerable AsAsyncEnumerable(); + void WaitForDownload(); + Task WaitForDownloadAsync(); + } + } +} diff --git a/Beam.Fluent/DownloadBuilder.ILinkStage.cs b/Beam.Fluent/DownloadBuilder.ILinkStage.cs new file mode 100644 index 0000000..17712bf --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.ILinkStage.cs @@ -0,0 +1,13 @@ +using Beam.Models; + +namespace Beam.Fluent { + public static partial class DownloadBuilder { + /* ────────────────────────────── Stages ─────────────────────────────── */ + + public interface ILinkStage { + ITransformStage WithLink(); + ITransformStage WithLinkGenerator(); + ILinkStage WithRange(Range range); + } + } +} diff --git a/Beam.Fluent/DownloadBuilder.ITransformStage.cs b/Beam.Fluent/DownloadBuilder.ITransformStage.cs new file mode 100644 index 0000000..07af773 --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.ITransformStage.cs @@ -0,0 +1,9 @@ +using Beam.Dynamic; + +namespace Beam.Fluent { +public static partial class DownloadBuilder { + public interface ITransformStage { + IContextStage WithTransformer(Func> factory); + } + } +} diff --git a/Beam.Fluent/DownloadBuilder.LinkStage.cs b/Beam.Fluent/DownloadBuilder.LinkStage.cs new file mode 100644 index 0000000..a2796fb --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.LinkStage.cs @@ -0,0 +1,53 @@ +using Beam.Models; + +namespace Beam.Fluent { + public static partial class DownloadBuilder { + /* ──────────────────────────── Stage types ─────────────────────────── */ + + + private sealed record LinkStage( + WebResource Source, + State Initial, + BeamDataContext Data, + DownloadContextBuilder CtxBuilder) : ILinkStage, IAlternativeLinkStage { + + private State? endState; + private bool linksFrozen = false; + + public ITransformStage WithLink() { + var link = Data.Templates[Source.Key].Builder.Build(Initial); + CtxBuilder.WithLinks([link]); + return new TransformStage(Source, Data, CtxBuilder); + } + + public ITransformStage WithLinkGenerator() { + var template = Data.Templates[Source.Key]; + var generator = SourceLinkEnumerable.FromGenerator(new OrderedSourceLinkGenerator( + template.Builder, + new NumberedStateChanger(template.Factory.Behavior), + Initial, endState)); + CtxBuilder.WithLinks(generator); + linksFrozen = true; + return new TransformStage(Source, Data, CtxBuilder); + } + + public IAlternativeTransformStage WithLinks(IEnumerable links) { + CtxBuilder.WithLinks(links); + return new TransformStage(Source, Data, CtxBuilder); + } + + public ILinkStage WithRange(Range range) { + if (linksFrozen) + throw new InvalidOperationException($"WithRange must be called before WithLinkGenerator"); + if (range.End.Value < range.Start.Value) + throw new ArgumentOutOfRangeException(nameof(range), $" start must be < end"); + var template = Data.Templates[Source.Key]; + var stateChanger = new NumberedStateChanger(template.Factory.Behavior); + endState = Initial.Copy(); + stateChanger.Apply(Initial, range.Start.Value - 1); + stateChanger.Apply(endState, range.End.Value - 1); + return this; + } + } + } +} diff --git a/Beam.Fluent/DownloadBuilder.SourceKind.cs b/Beam.Fluent/DownloadBuilder.SourceKind.cs new file mode 100644 index 0000000..46de3e9 --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.SourceKind.cs @@ -0,0 +1,6 @@ +namespace Beam.Fluent { + public static partial class DownloadBuilder { + /* ────────────────────────── Implementation ────────────────────────── */ + private enum SourceKind { Meta, Text } + } +} diff --git a/Beam.Fluent/DownloadBuilder.TransformStage.cs b/Beam.Fluent/DownloadBuilder.TransformStage.cs new file mode 100644 index 0000000..bd77cf3 --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.TransformStage.cs @@ -0,0 +1,20 @@ +using Beam.Dynamic; +using Beam.Models; + +namespace Beam.Fluent { +public static partial class DownloadBuilder { + private sealed record TransformStage( + WebResource Source, + BeamDataContext Data, + DownloadContextBuilder CtxBuilder) : ITransformStage, IAlternativeTransformStage { + public IContextStage WithTransformer(Func> factory) { + var transformer = factory(Data.Bindings[Source.Bindings]); + return new ContextStage(CtxBuilder, transformer); + } + + public IContextStage WithTransformer(AsyncTransformer transformer) { + return new ContextStage(CtxBuilder, transformer); + } + } + } +} diff --git a/Beam.Fluent/DownloadBuilder.cs b/Beam.Fluent/DownloadBuilder.cs new file mode 100644 index 0000000..fae600f --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.cs @@ -0,0 +1,41 @@ +using aeqw89.DataKeys; +using Beam; +using Microsoft.Extensions.Logging; +using System; +using System.Collections.Generic; +using Beam.Models; + +namespace Beam.Fluent { + /// + /// Type‑safe, staged builder that prevents callers from forgetting the mandatory steps + /// (source → link selection → transformer) and surfaces operational knobs as first‑class + /// methods instead of magic parameters. + /// + public static partial class DownloadBuilder { + /* ──────────────────────────── Entry points ─────────────────────────── */ + + public static ILinkStage FromResource(DataKey dict, string kind, BeamDataContext beamDataDictionary) + => Create(dict, beamDataDictionary, kind); + + public static IAlternativeLinkStage FromScratch() + => new LinkStage(null!, null!, null!, new()); + + private static ILinkStage Create(DataKey resourceDict, BeamDataContext data, string kind) { + var (source, initial) = Resolve(resourceDict, kind, data); + var ctxBuilder = new DownloadContextBuilder().WithLinks(Array.Empty()); // placeholder, filled later. + return new LinkStage(source, initial, data, ctxBuilder); + } + + private static (WebResource Source, State Initial) Resolve(DataKey resourceDict, string kind, BeamDataContext data) { + if (!data.ResourceDictionaries.TryGetValue(resourceDict, out var dict)) + throw new KeyNotFoundException($"Novel '{resourceDict}' not found in BeamDataDictionary."); + if (!dict.Resources.TryGetValue(kind, out var sourceKey)) + throw new KeyNotFoundException($"Novel kind '{kind}' not found in '{resourceDict}'"); + if (!data.Resources.TryGetValue(sourceKey, out var source)) + throw new KeyNotFoundException($"Novel source '{sourceKey}' was not found"); + if (!data.InitialStates.TryGetValue(sourceKey.To(), out var istate)) + throw new KeyNotFoundException($"Immutable state for kind '{kind}' not found"); + return (source, istate); + } + } +} diff --git a/Beam.Models/Beam.Models.csproj b/Beam.Models/Beam.Models.csproj new file mode 100644 index 0000000..a30c29b --- /dev/null +++ b/Beam.Models/Beam.Models.csproj @@ -0,0 +1,26 @@ + + + + net9.0 + enable + enable + + + + + + + + + + + runtime; build; native; contentfiles; analyzers; buildtransitive + all + + + runtime; build; native; contentfiles; analyzers; buildtransitive + all + + + + diff --git a/Beam.Models/BeamDataContext.cs b/Beam.Models/BeamDataContext.cs new file mode 100644 index 0000000..c9d7c59 --- /dev/null +++ b/Beam.Models/BeamDataContext.cs @@ -0,0 +1,50 @@ +using aeqw89.PersistentData; +using aeqw89.DataKeys; +using Beam.Dynamic; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Text.Json; +using System.Threading.Tasks; +using System.Data.Entity; + + +namespace Beam.Models { + public class BeamDataContext : BaseDataDictionary { + public Dictionary, Template> Templates { + get => GetOrCreateDictionary, Template>(nameof(Templates)); + set => Set(nameof(Templates), value); + } + + public Dictionary, DataBindings> Bindings { + get => GetOrCreateDictionary, DataBindings>(nameof(Bindings)); + set => Set(nameof(Bindings), value); + } + + public Dictionary, HashSet>> AggregatorNovels { + get => GetOrCreateDictionary, HashSet>>(nameof(AggregatorNovels)); + set => Set(nameof(AggregatorNovels), value); + } + + public Dictionary, WebResource> Resources { + get => GetOrCreateDictionary, WebResource>(nameof(Resources)); + set => Set(nameof(Resources), value); + } + + public Dictionary, ResourceDictionary> ResourceDictionaries { + get => GetOrCreateDictionary, ResourceDictionary>(nameof(ResourceDictionaries)); + set => Set(nameof(ResourceDictionaries), value); + } + + public Dictionary, ImmutableState> InitialStates { + get => GetOrCreateDictionary, ImmutableState>(nameof(InitialStates)); + set => Set(nameof(InitialStates), value); + } + + internal Dictionary, File> Files { + get => GetOrCreateDictionary, File>(nameof(Files)); + set => Set(nameof(Files), value); + } + } +} diff --git a/Beam.Temporary.Cli/File.cs b/Beam.Models/File.cs similarity index 83% rename from Beam.Temporary.Cli/File.cs rename to Beam.Models/File.cs index 6b3b95f..32c3313 100644 --- a/Beam.Temporary.Cli/File.cs +++ b/Beam.Models/File.cs @@ -1,4 +1,4 @@ -namespace Beam.Temporary.Cli { +namespace Beam.Models { internal class File(string path, params string[] tags) { public string Path { get; set; } = path; public string[] Tags { get; set; } = tags; diff --git a/Beam.Temporary.Cli/LinkCollection.cs b/Beam.Models/LinkCollection.cs similarity index 91% rename from Beam.Temporary.Cli/LinkCollection.cs rename to Beam.Models/LinkCollection.cs index 93cc059..ee5172d 100644 --- a/Beam.Temporary.Cli/LinkCollection.cs +++ b/Beam.Models/LinkCollection.cs @@ -5,7 +5,7 @@ using System.Linq; using System.Text; using System.Threading.Tasks; -namespace Beam.Temporary.Cli { +namespace Beam.Models { internal class LinkCollection(DataKey key, List links) { public DataKey Key { get; set; } = key; public List Links { get; set; } = links; diff --git a/Beam.Models/ResourceDictionary.cs b/Beam.Models/ResourceDictionary.cs new file mode 100644 index 0000000..88370dd --- /dev/null +++ b/Beam.Models/ResourceDictionary.cs @@ -0,0 +1,25 @@ + + +using aeqw89.DataKeys; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; +using System.Text.Json.Serialization; +using System.Threading.Tasks; + +namespace Beam.Models { + public class ResourceDictionary : IKeyed { + public required DataKey Key { get; set; } + + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public string? FriendlyName { get; set; } + + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public Dictionary> Resources { get; set; } = []; + + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public Dictionary, ImmutableState> InitialStates { get; set; } = []; + } +} diff --git a/Beam.Models/Template.cs b/Beam.Models/Template.cs new file mode 100644 index 0000000..2b6a701 --- /dev/null +++ b/Beam.Models/Template.cs @@ -0,0 +1,15 @@ +using aeqw89.DataKeys; +using Beam.Dynamic; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam.Models { + public record class Template : IKeyed