diff --git a/Beam.Dynamic/AnchorCollectionDataProvider.cs b/Beam.Dynamic/AnchorCollectionDataProvider.cs new file mode 100644 index 0000000..e22e977 --- /dev/null +++ b/Beam.Dynamic/AnchorCollectionDataProvider.cs @@ -0,0 +1,53 @@ +using HtmlAgilityPack; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam.Dynamic { + public class AnchorCollectionDataProvider : IDataProvider, IDataProvider { + public IBinding? Content { get; set; } + public string? RelativeTo { get; set; } + + private string GetAbsolute(string? @base, string relative) { + if (@base is null) + return relative; + + if (@base.EndsWith('/')) + @base = @base[..^1]; + if (relative.StartsWith('/')) + relative = relative[1..]; + return @base + '/' + relative; + } + + public string[] Get(HtmlDocument document) { + if (Content is null) + return []; + + var node = Content.Select(document); + if (node is null) + return []; + + List links = []; + foreach (var child in node.Descendants()) + links.Add(child.GetAttributeValue("href", "")); + + return links.Where(x => !string.IsNullOrWhiteSpace(x)).ToArray(); + } + + SourceLink[] IDataProvider.Get(HtmlDocument document) { + var links = Get(document); + + if (links.Length == 0) + return []; + + List slinks = []; + foreach (var link in links) + if (Uri.TryCreate(GetAbsolute(RelativeTo, link), UriKind.RelativeOrAbsolute, out _)) + slinks.Add(new SourceLink(GetAbsolute(RelativeTo, link))); + + return slinks.ToArray(); + } + } +} diff --git a/Beam.Dynamic/AnchorDataProvider.cs b/Beam.Dynamic/AnchorDataProvider.cs new file mode 100644 index 0000000..6bf1909 --- /dev/null +++ b/Beam.Dynamic/AnchorDataProvider.cs @@ -0,0 +1,31 @@ +using HtmlAgilityPack; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam.Dynamic { + public class AnchorDataProvider : IDataProvider, IDataProvider { + public IBinding? Content { get; set; } + + public string Get(HtmlDocument document) { + if (Content is null) + return ""; + + return Content.Select(document)?.GetAttributeValue("href", "") ?? ""; + + } + + SourceLink IDataProvider.Get(HtmlDocument document) { + var content = Get(document); + if (content is null) + return SourceLink.InvalidLink; + + if (!Uri.TryCreate(content, UriKind.RelativeOrAbsolute, out _)) + return SourceLink.InvalidLink; + + return new SourceLink(content); + } + } +} diff --git a/Beam.Dynamic/Beam.Dynamic.csproj b/Beam.Dynamic/Beam.Dynamic.csproj index 0fe0a46..df7f2f2 100644 --- a/Beam.Dynamic/Beam.Dynamic.csproj +++ b/Beam.Dynamic/Beam.Dynamic.csproj @@ -6,8 +6,8 @@ enable - - + + diff --git a/Beam.Dynamic/DataBindings.cs b/Beam.Dynamic/DataBindings.cs index 193ffc3..77b4371 100644 --- a/Beam.Dynamic/DataBindings.cs +++ b/Beam.Dynamic/DataBindings.cs @@ -1,22 +1,161 @@ using HtmlAgilityPack; +using System.Text.Json.Serialization; namespace Beam.Dynamic { public record class DataBindings { - public IDataProvider? Title { get; set; } - public IDataProvider? Authors { get; set; } - public IDataProvider? Description { get; set; } - public IDataProvider? Content { get; set; } - public IDataProvider? Language { get; set; } - public IDataProvider? Tags { get; set; } + #region ---------------------- Common Bindings ---------------------- + [JsonIgnore] + public IDataProvider? Title { + get => Get(nameof(Title)); + set => Providers[nameof(Title)] = value; + } + [JsonIgnore] + public IDataProvider? Authors { + get => Get(nameof(Authors)); + set => Providers[nameof(Authors)] = value; + } + [JsonIgnore] + public IDataProvider? Description { + get => Get(nameof(Description)); + set => Providers[nameof(Description)] = value; + } + [JsonIgnore] + public IDataProvider? Content { + get => Get(nameof(Content)); + set => Providers[nameof(Content)] = value; + } + [JsonIgnore] + public IDataProvider? Language { + get => Get(nameof(Language)); + set => Providers[nameof(Language)] = value; + } + [JsonIgnore] + public IDataProvider? Tags { + get => Get(nameof(Tags)); + set => Providers[nameof(Tags)] = value; + } + [JsonIgnore] + public IDataProvider? Publisher { + get => Get(nameof(Publisher)); + set => Providers[nameof(Publisher)] = value; + } + [JsonIgnore] + public IDataProvider? PublicationDate { + get => Get(nameof(PublicationDate)); + set => Providers[nameof(PublicationDate)] = value; + } + [JsonIgnore] + public IDataProvider? ISBN { + get => Get(nameof(ISBN)); + set => Providers[nameof(ISBN)] = value; + } + [JsonIgnore] + public IDataProvider? PageCount { + get => Get(nameof(PageCount)); + set => Providers[nameof(PageCount)] = value; + } + [JsonIgnore] + public IDataProvider? CoverImage { + get => Get(nameof(CoverImage)); + set => Providers[nameof(CoverImage)] = value; + } + [JsonIgnore] + public IDataProvider? Series { + get => Get(nameof(Series)); + set => Providers[nameof(Series)] = value; + } + [JsonIgnore] + public IDataProvider? Edition { + get => Get(nameof(Edition)); + set => Providers[nameof(Edition)] = value; + } + [JsonIgnore] + public IDataProvider? Contributors { + get => Get(nameof(Contributors)); + set => Providers[nameof(Contributors)] = value; + } + [JsonIgnore] + public IDataProvider? Subjects { + get => Get(nameof(Subjects)); + set => Providers[nameof(Subjects)] = value; + } + [JsonIgnore] + public IDataProvider? Rights { + get => Get(nameof(Rights)); + set => Providers[nameof(Rights)] = value; + } + [JsonIgnore] + public IDataProvider? TableOfContents { + get => Get(nameof(TableOfContents)); + set => Providers[nameof(TableOfContents)] = value; + } + [JsonIgnore] + public IDataProvider? PagesDropDown { + get => Get(nameof(PagesDropDown)); + set => Providers[nameof(PagesDropDown)] = value; + } + [JsonIgnore] + public IDataProvider? NextPageButton { + get => Get(nameof(NextPageButton)); + set => Providers[nameof(NextPageButton)] = value; + } + [JsonIgnore] + public IDataProvider? PreviousPageButton { + get => Get(nameof(PreviousPageButton)); + set => Providers[nameof(PreviousPageButton)] = value; + } + #endregion + + public Dictionary Providers { get; set; } = []; + + private IDataProvider? Get(string key) { + if (Providers.TryGetValue(key, out var k) && k is IDataProvider ks) + return ks; + return default; + } public virtual ResolvedBindings Resolve(HtmlDocument doc) { - return new ResolvedBindings() { + // explicit fields already handled below + var mappedKeys = new HashSet { + nameof(Title), nameof(Authors), nameof(Description), nameof(Content), + nameof(Language), nameof(Tags), nameof(Publisher), nameof(PublicationDate), + nameof(ISBN), nameof(PageCount), nameof(CoverImage), nameof(Series), + nameof(Edition), nameof(Contributors), nameof(Subjects), nameof(Rights), + nameof(TableOfContents), nameof(PagesDropDown), nameof(NextPageButton), + nameof(PreviousPageButton) + }; + + var additional = new Dictionary(); + + foreach (var (key, provider) in Providers) { + if (!mappedKeys.Contains(key) && provider is not null) { + // dynamic call so any IDataProvider works + additional[key] = ((dynamic)provider).Get(doc); + } + } + + return new ResolvedBindings { Title = Title?.Get(doc), Authors = Authors?.Get(doc) ?? [], - Language = Language?.Get(doc), - Content = Content?.Get(doc), Description = Description?.Get(doc), - Tags = Tags?.Get(doc) ?? [] + Content = Content?.Get(doc), + Language = Language?.Get(doc), + Tags = Tags?.Get(doc) ?? [], + Publisher = Publisher?.Get(doc), + PublicationDate = PublicationDate?.Get(doc), + ISBN = ISBN?.Get(doc), + PageCount = PageCount?.Get(doc), + CoverImage = CoverImage?.Get(doc), + Series = Series?.Get(doc) ?? [], + Edition = Edition?.Get(doc), + Contributors = Contributors?.Get(doc) ?? [], + Subjects = Subjects?.Get(doc) ?? [], + Rights = Rights?.Get(doc), + TableOfContents = TableOfContents?.Get(doc) ?? [], + PagesDropDown = PagesDropDown?.Get(doc), + NextPageButton = NextPageButton?.Get(doc), + PreviousPageButton = PreviousPageButton?.Get(doc), + Additional = additional }; } } @@ -28,5 +167,25 @@ namespace Beam.Dynamic { public string? Content { get; set; } public string[]? Language { get; set; } public string[]? Tags { get; set; } + public string? Publisher { get; set; } + public DateTimeOffset? PublicationDate { get; set; } + public string? ISBN { get; set; } + public int? PageCount { get; set; } + public SourceLink? CoverImage { get; set; } + public string[]? Series { get; set; } + public int? Edition { get; set; } + public string[]? Contributors { get; set; } + public string[]? Subjects { get; set; } + public string? Rights { get; set; } + public SourceLink[]? TableOfContents { get; set; } + public SourceLink[]? PagesDropDown { get; set; } + public SourceLink? NextPageButton { get; set; } + public SourceLink? PreviousPageButton { get; set; } + + /// + /// Values resolved from any providers whose keys aren’t represented + /// by the named properties above. + /// + public Dictionary Additional { get; set; } = []; } } diff --git a/Beam.Dynamic/DropDownDataProvider.cs b/Beam.Dynamic/DropDownDataProvider.cs index 6750758..ad2ef23 100644 --- a/Beam.Dynamic/DropDownDataProvider.cs +++ b/Beam.Dynamic/DropDownDataProvider.cs @@ -2,6 +2,8 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Runtime.InteropServices; +using System.Runtime.InteropServices.Marshalling; using System.Text; using System.Text.Json; using System.Threading.Tasks; @@ -12,6 +14,18 @@ namespace Beam.Dynamic { IDataProvider, IDataProvider { public IBinding? Content { get; set; } + public string? RelativeTo { get; set; } + + private string GetAbsolute(string? @base, string relative) { + if (@base is null) + return relative; + + if (@base.EndsWith('/')) + @base = @base[..^1]; + if (relative.StartsWith('/')) + relative = relative[1..]; + return @base + '/' + relative; + } public SourceLink[] Get(HtmlDocument document) { if (Content is null) @@ -22,9 +36,9 @@ namespace Beam.Dynamic { List links = []; foreach (var child in node.ChildNodes.Where(x => x.Name == "option")) { var childValue = child.GetAttributeValue("value", null); - if (!Uri.TryCreate(childValue, UriKind.Absolute, out _)) + if (!Uri.TryCreate(GetAbsolute(RelativeTo, childValue), UriKind.Absolute, out _)) continue; - links.Add(new SourceLink(childValue)); + links.Add(new SourceLink(GetAbsolute(RelativeTo, childValue))); } return links.ToArray(); diff --git a/Beam.Dynamic/IDataProvider.cs b/Beam.Dynamic/IDataProvider.cs index d46edad..2988b22 100644 --- a/Beam.Dynamic/IDataProvider.cs +++ b/Beam.Dynamic/IDataProvider.cs @@ -7,7 +7,14 @@ namespace Beam.Dynamic { [JsonDerivedType(typeof(ContentsArrayDataProvider), "array")] [JsonDerivedType(typeof(ContentsDataProvider), "single")] [JsonDerivedType(typeof(DropDownDataProvider), "dropdown")] - public interface IDataProvider { + [JsonDerivedType(typeof(AnchorCollectionDataProvider), "anchor-list")] + [JsonDerivedType(typeof(AnchorDataProvider), "anchor")] + public interface IDataProvider { + public string GetString(HtmlDocument document) + => (this as IDataProvider)?.Get(document)?.ToString() ?? ""; + } + + public interface IDataProvider : IDataProvider { public T Get(HtmlDocument document); //public HtmlNode? GetNode(HtmlDocument document); } diff --git a/Beam.Dynamic/StateChangerFactory.cs b/Beam.Dynamic/StateChangerFactory.cs new file mode 100644 index 0000000..a788464 --- /dev/null +++ b/Beam.Dynamic/StateChangerFactory.cs @@ -0,0 +1,34 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Text.Json.Serialization; +using System.Threading.Tasks; + +namespace Beam.Dynamic { + public class StateChangerFactory { + [JsonIgnore] + public IStateChangeBehaviour Behavior => FactoryTable[StateChangerKey](); + + [JsonInclude] + public string StateChangerKey { get; set; } + + [JsonConstructor] + public StateChangerFactory(string stateChangerKey) { + if (!Keys.Contains(stateChangerKey)) + throw new ArgumentException($"{stateChangerKey} not in keys list", nameof(stateChangerKey)); + StateChangerKey = stateChangerKey; + } + + public static Dictionary> FactoryTable = new() { + { LastAsNumber, () => CommonStateChangers.LastAsNumber }, + { LastAsNumberPrefixed, () => CommonStateChangers.NthAsNumber(^1, true) }, + { Constant, () => CommonStateChangers.Constant }, + }; + + public HashSet Keys = [LastAsNumber, LastAsNumberPrefixed, Constant]; + public const string LastAsNumber = "LastAsNumber"; + public const string LastAsNumberPrefixed = "LastAsNumberPrefixed"; + public const string Constant = "Constant"; + } +} diff --git a/Beam.Fluent/Beam.Fluent.csproj b/Beam.Fluent/Beam.Fluent.csproj new file mode 100644 index 0000000..62ea023 --- /dev/null +++ b/Beam.Fluent/Beam.Fluent.csproj @@ -0,0 +1,31 @@ + + + + net9.0 + enable + enable + + + + + + + + + + + + all + + + + all + + + all + + + all + + + \ No newline at end of file diff --git a/Beam.Fluent/DownloadBuilder.ContextStage.cs b/Beam.Fluent/DownloadBuilder.ContextStage.cs new file mode 100644 index 0000000..aedf227 --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.ContextStage.cs @@ -0,0 +1,186 @@ +using HtmlAgilityPack; +using Beam.Playwright; +using Beam.Stealth; + +namespace Beam.Fluent { +public static partial class DownloadBuilder { + private sealed class ContextStage : IContextStage { + private readonly DownloadContextBuilder _ctxBuilder; + private readonly AsyncTransformer _transformer; + private int _parallelism = 4; + private bool _useFragments = false; + private PlaywrightAsyncManipulator? _usePlaywrightManipulator = null; + private StealthAsyncManipulator? _useStealthManipulator = null; + private StealthConfig? _stealthConfig = null; + + public ContextStage(DownloadContextBuilder ctxBuilder, AsyncTransformer transformer) { + _ctxBuilder = ctxBuilder; + _transformer = transformer; + } + + public IContextStage Configure(Action> configure) { + configure(_ctxBuilder); + return this; + } + + public IContextStage WithParallelism(int degree) { + _parallelism = Math.Max(1, degree); + return this; + } + + public IContextStage WithTimeout(TimeSpan timeout) { + _ctxBuilder.WithTimeOut(timeout); + return this; + } + + public IContextStage WithRetryReporter(IProgress reporter) { + _ctxBuilder.WithRetryReporter(reporter); + return this; + } + + /// + /// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with + /// + /// + public IContextStage UseFragments() { + if (_usePlaywrightManipulator is not null) + _usePlaywrightManipulator = null; + + _useFragments = true; + return this; + } + + /// + /// Use a puppet browser to download the links. This strategy is mutually exclusive with + /// + /// The page manipulator + /// + public IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator) { + if (_useFragments) + _useFragments = false; + if (_useStealthManipulator is not null) + _useStealthManipulator = null; + + _usePlaywrightManipulator = manipulator; + return this; + } + + public IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) { + if (_usePlaywrightManipulator is not null) + _usePlaywrightManipulator = null; + + _useStealthManipulator = manipulator; + _stealthConfig = config; + return this; + } + + private object ConstructUnitDownloader(DownloadContext context) { + return (_useFragments, _useStealthManipulator, _usePlaywrightManipulator, _transformer, context.AsyncFailurePredicates) switch { + // ──────────────── fragmented HTML ──────────────── + (true, null, _, AsyncTransformer asyncHtmlTransformer, + AsyncDownloadFailurePredicate[] documentFailurePredicates) + => new UnitFragmentDownloader( + context.Web, + asyncHtmlTransformer, + documentFailurePredicates, + _parallelism, + context.DownloadLogger), + // ──────────────── fragmented binary ──────────────── + (true, null, _, AsyncTransformer asyncBinaryTransformer, + AsyncDownloadFailurePredicate[] responseFailurePredicates) + => new UnitFragmentDownloaderBinary( + context.Client, + asyncBinaryTransformer, + responseFailurePredicates, + _parallelism, + context.DownloadLogger), + // ──────────────── single HTML ──────────────── + (false, null, null, AsyncTransformer asyncHtmlTransformer, + AsyncDownloadFailurePredicate[] documentFailurePredicates) + => new UnitDownloader( + context.Web, + asyncHtmlTransformer, + documentFailurePredicates), + // ──────────────── single binary ──────────────── + (false, null, null, AsyncTransformer asyncBinaryTransformer, + AsyncDownloadFailurePredicate[] responseFailurePredicates) + => new UnitDownloaderBinary( + context.Client, + asyncBinaryTransformer, + responseFailurePredicates), + // ──────────────── single playwright binary ──────────────── + (false, _, PlaywrightAsyncManipulator manipulator, AsyncTransformer asyncBinaryTransformer, + AsyncDownloadFailurePredicate[] responseFailurePredicates) + => new PlaywrightUnitDownloader( + context.Client, + manipulator, + asyncBinaryTransformer, + responseFailurePredicates), + // ──────────────── single playwrigt HTML ──────────────── + (false, _, PlaywrightAsyncManipulator manipulator, AsyncTransformer asyncHtmlTransformer, + AsyncDownloadFailurePredicate[] documentFailurePredicates) + => new PlaywrightUnitPageDownloader( + context.Web, + manipulator, + asyncHtmlTransformer, + documentFailurePredicates), + // ──────────────── single stealth HTML ──────────────── + (false, StealthAsyncManipulator manipulator, _, AsyncTransformer asyncHtmlTransformer, + AsyncDownloadFailurePredicate[] documentFailurePredicates) + => new StealthUnitPageDownloader( + context.Web, + _stealthConfig ?? throw new Exception($"Stealth config is null"), + manipulator, + asyncHtmlTransformer, + documentFailurePredicates), + // ──────────────── single stealth binary ──────────────── + (false, StealthAsyncManipulator manipulator, _, AsyncTransformer asyncBinaryTransformer, + AsyncDownloadFailurePredicate[] responseFailurePredicates) + => new StealthUnitDownloader( + context.Client, + _stealthConfig ?? throw new Exception($"Stealth config is null"), + manipulator, + asyncBinaryTransformer, + responseFailurePredicates), + // ──────────────── fragment stealth HTML ──────────────── + (true, StealthAsyncManipulator manipulator, _, AsyncTransformer asyncHtmlTransformer, + AsyncDownloadFailurePredicate[] downloadFailurePredicates) + => new StealthFragmentPageDownloader( + context.Web, + _stealthConfig ?? throw new Exception($"Stealth config is null"), + manipulator, + asyncHtmlTransformer), + // ──────────────── fragment stealth binary ──────────────── + (true, StealthAsyncManipulator manipulator, _, AsyncTransformer asyncBinaryTransformer, + AsyncDownloadFailurePredicate[] responseFailurePredicates) + => new StealthFragmentDownloader( + context.Client, + _stealthConfig ?? throw new Exception($"Stealth config is null"), + manipulator, + asyncBinaryTransformer), + _ => throw new Exception($"Unsupported transformer / failure-predicate combinatAsyion. Missing pattern: {_useFragments} , {_transformer.GetType().AsUniqueName()} , {context.AsyncFailurePredicates?.GetType().AsUniqueName()}"), + }; + } + + private IAsyncEnumerator> ConstructDownloader(DownloadContext context) { + var copyOfContext = context.CreateBuilder().Build(); + return _useFragments switch { + true => new SequentialFragmentDownloader( + copyOfContext, + ctx => (IUnitDownloader>>)ConstructUnitDownloader(ctx), + context.DownloadLogger).UnwrapFragmented(), + false => new SequentialDownloader( + copyOfContext, + ctx => (IUnitDownloader)ConstructUnitDownloader(ctx), + context.DownloadLogger).WrapOrdered() + }; + } + + public DownloadEnumerable Build() { + var context = _ctxBuilder.Build(); + var enumerable = new DownloadEnumerable(ConstructDownloader(context)); + return enumerable; + } + } + } +} diff --git a/Beam.Fluent/DownloadBuilder.DownloadStage.cs b/Beam.Fluent/DownloadBuilder.DownloadStage.cs new file mode 100644 index 0000000..4909c6f --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.DownloadStage.cs @@ -0,0 +1,43 @@ +using System.Collections.Concurrent; +using System.Text.Json; + +namespace Beam.Fluent { +public static partial class DownloadBuilder { + private sealed class DownloadStage(DownloadEnumerable download) : IDownloadStage { + private IAsyncEnumerable> _download = download; + + public DownloadEnumerable AsAsyncEnumerable() { + return new DownloadEnumerable(_download.GetAsyncEnumerator()); + } + + private async IAsyncEnumerable> _SaveToDirectory(string dir) { + Directory.CreateDirectory(dir); + await foreach(var download in _download) { + await System.IO.File.WriteAllTextAsync(Path.Combine(dir, $"{Path.GetRandomFileName()}.{download.Order}.json"), JsonSerializer.Serialize(dir)); + yield return download; + } + } + + public IDownloadStage SaveToDirectory(string dir) { + _download = _SaveToDirectory(dir); + return this; + } + + public IDownloadStage SaveToFiles(IEnumerable files) { + throw new NotImplementedException(); + } + + public IDownloadStage SaveToMemory(ConcurrentBag bag) { + throw new NotImplementedException(); + } + + public void WaitForDownload() { + throw new NotImplementedException(); + } + + public Task WaitForDownloadAsync() { + throw new NotImplementedException(); + } + } + } +} diff --git a/Beam.Fluent/DownloadBuilder.IAlternativeLinkStage.cs b/Beam.Fluent/DownloadBuilder.IAlternativeLinkStage.cs new file mode 100644 index 0000000..25b64d9 --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.IAlternativeLinkStage.cs @@ -0,0 +1,7 @@ +namespace Beam.Fluent { +public static partial class DownloadBuilder { + public interface IAlternativeLinkStage { + IAlternativeTransformStage WithLinks(IEnumerable links); + } + } +} diff --git a/Beam.Fluent/DownloadBuilder.IAlternativeTransformStage.cs b/Beam.Fluent/DownloadBuilder.IAlternativeTransformStage.cs new file mode 100644 index 0000000..b28f376 --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.IAlternativeTransformStage.cs @@ -0,0 +1,10 @@ +namespace Beam.Fluent { +public static partial class DownloadBuilder { + public interface IAlternativeTransformStage { + IContextStage WithTransformer(AsyncTransformer transformer); + IContextStage WithTransformer(Func transformer) { + return WithTransformer(rt => Task.FromResult(transformer(rt))); + } + } + } +} diff --git a/Beam.Fluent/DownloadBuilder.IContextStage.cs b/Beam.Fluent/DownloadBuilder.IContextStage.cs new file mode 100644 index 0000000..074d7dc --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.IContextStage.cs @@ -0,0 +1,17 @@ +using Beam.Playwright; +using Beam.Stealth; + +namespace Beam.Fluent { +public static partial class DownloadBuilder { + public interface IContextStage { + IContextStage Configure(Action> configure); + IContextStage WithParallelism(int degree); + IContextStage WithTimeout(TimeSpan timeout); + IContextStage WithRetryReporter(IProgress reporter); + DownloadEnumerable Build(); + IContextStage UseFragments(); + IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator); + IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config); + } + } +} diff --git a/Beam.Fluent/DownloadBuilder.IDownloadStage.cs b/Beam.Fluent/DownloadBuilder.IDownloadStage.cs new file mode 100644 index 0000000..99b7f85 --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.IDownloadStage.cs @@ -0,0 +1,14 @@ +using System.Collections.Concurrent; + +namespace Beam.Fluent { +public static partial class DownloadBuilder { + public interface IDownloadStage { + IDownloadStage SaveToDirectory(string dir); + IDownloadStage SaveToFiles(IEnumerable files); + IDownloadStage SaveToMemory(ConcurrentBag bag); + DownloadEnumerable AsAsyncEnumerable(); + void WaitForDownload(); + Task WaitForDownloadAsync(); + } + } +} diff --git a/Beam.Fluent/DownloadBuilder.ILinkStage.cs b/Beam.Fluent/DownloadBuilder.ILinkStage.cs new file mode 100644 index 0000000..17712bf --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.ILinkStage.cs @@ -0,0 +1,13 @@ +using Beam.Models; + +namespace Beam.Fluent { + public static partial class DownloadBuilder { + /* ────────────────────────────── Stages ─────────────────────────────── */ + + public interface ILinkStage { + ITransformStage WithLink(); + ITransformStage WithLinkGenerator(); + ILinkStage WithRange(Range range); + } + } +} diff --git a/Beam.Fluent/DownloadBuilder.ITransformStage.cs b/Beam.Fluent/DownloadBuilder.ITransformStage.cs new file mode 100644 index 0000000..07af773 --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.ITransformStage.cs @@ -0,0 +1,9 @@ +using Beam.Dynamic; + +namespace Beam.Fluent { +public static partial class DownloadBuilder { + public interface ITransformStage { + IContextStage WithTransformer(Func> factory); + } + } +} diff --git a/Beam.Fluent/DownloadBuilder.LinkStage.cs b/Beam.Fluent/DownloadBuilder.LinkStage.cs new file mode 100644 index 0000000..a2796fb --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.LinkStage.cs @@ -0,0 +1,53 @@ +using Beam.Models; + +namespace Beam.Fluent { + public static partial class DownloadBuilder { + /* ──────────────────────────── Stage types ─────────────────────────── */ + + + private sealed record LinkStage( + WebResource Source, + State Initial, + BeamDataContext Data, + DownloadContextBuilder CtxBuilder) : ILinkStage, IAlternativeLinkStage { + + private State? endState; + private bool linksFrozen = false; + + public ITransformStage WithLink() { + var link = Data.Templates[Source.Key].Builder.Build(Initial); + CtxBuilder.WithLinks([link]); + return new TransformStage(Source, Data, CtxBuilder); + } + + public ITransformStage WithLinkGenerator() { + var template = Data.Templates[Source.Key]; + var generator = SourceLinkEnumerable.FromGenerator(new OrderedSourceLinkGenerator( + template.Builder, + new NumberedStateChanger(template.Factory.Behavior), + Initial, endState)); + CtxBuilder.WithLinks(generator); + linksFrozen = true; + return new TransformStage(Source, Data, CtxBuilder); + } + + public IAlternativeTransformStage WithLinks(IEnumerable links) { + CtxBuilder.WithLinks(links); + return new TransformStage(Source, Data, CtxBuilder); + } + + public ILinkStage WithRange(Range range) { + if (linksFrozen) + throw new InvalidOperationException($"WithRange must be called before WithLinkGenerator"); + if (range.End.Value < range.Start.Value) + throw new ArgumentOutOfRangeException(nameof(range), $" start must be < end"); + var template = Data.Templates[Source.Key]; + var stateChanger = new NumberedStateChanger(template.Factory.Behavior); + endState = Initial.Copy(); + stateChanger.Apply(Initial, range.Start.Value - 1); + stateChanger.Apply(endState, range.End.Value - 1); + return this; + } + } + } +} diff --git a/Beam.Fluent/DownloadBuilder.SourceKind.cs b/Beam.Fluent/DownloadBuilder.SourceKind.cs new file mode 100644 index 0000000..46de3e9 --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.SourceKind.cs @@ -0,0 +1,6 @@ +namespace Beam.Fluent { + public static partial class DownloadBuilder { + /* ────────────────────────── Implementation ────────────────────────── */ + private enum SourceKind { Meta, Text } + } +} diff --git a/Beam.Fluent/DownloadBuilder.TransformStage.cs b/Beam.Fluent/DownloadBuilder.TransformStage.cs new file mode 100644 index 0000000..bd77cf3 --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.TransformStage.cs @@ -0,0 +1,20 @@ +using Beam.Dynamic; +using Beam.Models; + +namespace Beam.Fluent { +public static partial class DownloadBuilder { + private sealed record TransformStage( + WebResource Source, + BeamDataContext Data, + DownloadContextBuilder CtxBuilder) : ITransformStage, IAlternativeTransformStage { + public IContextStage WithTransformer(Func> factory) { + var transformer = factory(Data.Bindings[Source.Bindings]); + return new ContextStage(CtxBuilder, transformer); + } + + public IContextStage WithTransformer(AsyncTransformer transformer) { + return new ContextStage(CtxBuilder, transformer); + } + } + } +} diff --git a/Beam.Fluent/DownloadBuilder.cs b/Beam.Fluent/DownloadBuilder.cs new file mode 100644 index 0000000..fae600f --- /dev/null +++ b/Beam.Fluent/DownloadBuilder.cs @@ -0,0 +1,41 @@ +using aeqw89.DataKeys; +using Beam; +using Microsoft.Extensions.Logging; +using System; +using System.Collections.Generic; +using Beam.Models; + +namespace Beam.Fluent { + /// + /// Type‑safe, staged builder that prevents callers from forgetting the mandatory steps + /// (source → link selection → transformer) and surfaces operational knobs as first‑class + /// methods instead of magic parameters. + /// + public static partial class DownloadBuilder { + /* ──────────────────────────── Entry points ─────────────────────────── */ + + public static ILinkStage FromResource(DataKey dict, string kind, BeamDataContext beamDataDictionary) + => Create(dict, beamDataDictionary, kind); + + public static IAlternativeLinkStage FromScratch() + => new LinkStage(null!, null!, null!, new()); + + private static ILinkStage Create(DataKey resourceDict, BeamDataContext data, string kind) { + var (source, initial) = Resolve(resourceDict, kind, data); + var ctxBuilder = new DownloadContextBuilder().WithLinks(Array.Empty()); // placeholder, filled later. + return new LinkStage(source, initial, data, ctxBuilder); + } + + private static (WebResource Source, State Initial) Resolve(DataKey resourceDict, string kind, BeamDataContext data) { + if (!data.ResourceDictionaries.TryGetValue(resourceDict, out var dict)) + throw new KeyNotFoundException($"Novel '{resourceDict}' not found in BeamDataDictionary."); + if (!dict.Resources.TryGetValue(kind, out var sourceKey)) + throw new KeyNotFoundException($"Novel kind '{kind}' not found in '{resourceDict}'"); + if (!data.Resources.TryGetValue(sourceKey, out var source)) + throw new KeyNotFoundException($"Novel source '{sourceKey}' was not found"); + if (!data.InitialStates.TryGetValue(sourceKey.To(), out var istate)) + throw new KeyNotFoundException($"Immutable state for kind '{kind}' not found"); + return (source, istate); + } + } +} diff --git a/Beam.Models/Beam.Models.csproj b/Beam.Models/Beam.Models.csproj new file mode 100644 index 0000000..a30c29b --- /dev/null +++ b/Beam.Models/Beam.Models.csproj @@ -0,0 +1,26 @@ + + + + net9.0 + enable + enable + + + + + + + + + + + runtime; build; native; contentfiles; analyzers; buildtransitive + all + + + runtime; build; native; contentfiles; analyzers; buildtransitive + all + + + + diff --git a/Beam.Models/BeamDataContext.cs b/Beam.Models/BeamDataContext.cs new file mode 100644 index 0000000..c9d7c59 --- /dev/null +++ b/Beam.Models/BeamDataContext.cs @@ -0,0 +1,50 @@ +using aeqw89.PersistentData; +using aeqw89.DataKeys; +using Beam.Dynamic; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Text.Json; +using System.Threading.Tasks; +using System.Data.Entity; + + +namespace Beam.Models { + public class BeamDataContext : BaseDataDictionary { + public Dictionary, Template> Templates { + get => GetOrCreateDictionary, Template>(nameof(Templates)); + set => Set(nameof(Templates), value); + } + + public Dictionary, DataBindings> Bindings { + get => GetOrCreateDictionary, DataBindings>(nameof(Bindings)); + set => Set(nameof(Bindings), value); + } + + public Dictionary, HashSet>> AggregatorNovels { + get => GetOrCreateDictionary, HashSet>>(nameof(AggregatorNovels)); + set => Set(nameof(AggregatorNovels), value); + } + + public Dictionary, WebResource> Resources { + get => GetOrCreateDictionary, WebResource>(nameof(Resources)); + set => Set(nameof(Resources), value); + } + + public Dictionary, ResourceDictionary> ResourceDictionaries { + get => GetOrCreateDictionary, ResourceDictionary>(nameof(ResourceDictionaries)); + set => Set(nameof(ResourceDictionaries), value); + } + + public Dictionary, ImmutableState> InitialStates { + get => GetOrCreateDictionary, ImmutableState>(nameof(InitialStates)); + set => Set(nameof(InitialStates), value); + } + + internal Dictionary, File> Files { + get => GetOrCreateDictionary, File>(nameof(Files)); + set => Set(nameof(Files), value); + } + } +} diff --git a/Beam.Temporary.Cli/File.cs b/Beam.Models/File.cs similarity index 83% rename from Beam.Temporary.Cli/File.cs rename to Beam.Models/File.cs index 6b3b95f..32c3313 100644 --- a/Beam.Temporary.Cli/File.cs +++ b/Beam.Models/File.cs @@ -1,4 +1,4 @@ -namespace Beam.Temporary.Cli { +namespace Beam.Models { internal class File(string path, params string[] tags) { public string Path { get; set; } = path; public string[] Tags { get; set; } = tags; diff --git a/Beam.Temporary.Cli/LinkCollection.cs b/Beam.Models/LinkCollection.cs similarity index 91% rename from Beam.Temporary.Cli/LinkCollection.cs rename to Beam.Models/LinkCollection.cs index 93cc059..ee5172d 100644 --- a/Beam.Temporary.Cli/LinkCollection.cs +++ b/Beam.Models/LinkCollection.cs @@ -5,7 +5,7 @@ using System.Linq; using System.Text; using System.Threading.Tasks; -namespace Beam.Temporary.Cli { +namespace Beam.Models { internal class LinkCollection(DataKey key, List links) { public DataKey Key { get; set; } = key; public List Links { get; set; } = links; diff --git a/Beam.Models/ResourceDictionary.cs b/Beam.Models/ResourceDictionary.cs new file mode 100644 index 0000000..88370dd --- /dev/null +++ b/Beam.Models/ResourceDictionary.cs @@ -0,0 +1,25 @@ + + +using aeqw89.DataKeys; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; +using System.Text.Json.Serialization; +using System.Threading.Tasks; + +namespace Beam.Models { + public class ResourceDictionary : IKeyed { + public required DataKey Key { get; set; } + + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public string? FriendlyName { get; set; } + + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public Dictionary> Resources { get; set; } = []; + + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public Dictionary, ImmutableState> InitialStates { get; set; } = []; + } +} diff --git a/Beam.Models/Template.cs b/Beam.Models/Template.cs new file mode 100644 index 0000000..2b6a701 --- /dev/null +++ b/Beam.Models/Template.cs @@ -0,0 +1,15 @@ +using aeqw89.DataKeys; +using Beam.Dynamic; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Beam.Models { + public record class Template : IKeyed