diff --git a/Beam.Abstractions/Beam.Abstractions.csproj b/Beam.Abstractions/Beam.Abstractions.csproj index bea4d88..cfe70e6 100644 --- a/Beam.Abstractions/Beam.Abstractions.csproj +++ b/Beam.Abstractions/Beam.Abstractions.csproj @@ -9,7 +9,7 @@ - + diff --git a/Beam.Abstractions/IStateChangerFactory.cs b/Beam.Abstractions/IStateChangerFactory.cs index e5e95e2..55b495d 100644 --- a/Beam.Abstractions/IStateChangerFactory.cs +++ b/Beam.Abstractions/IStateChangerFactory.cs @@ -1,3 +1,5 @@ namespace Beam.Abstractions; -public interface IStateChangerFactory { } \ No newline at end of file +public interface IStateChangerFactory { + IStateChangeBehaviour Behavior { get; } +} \ No newline at end of file diff --git a/Beam.Api/Beam.Api.csproj b/Beam.Api/Beam.Api.csproj new file mode 100644 index 0000000..bcbfc5d --- /dev/null +++ b/Beam.Api/Beam.Api.csproj @@ -0,0 +1,17 @@ + + + + net9.0 + enable + enable + + + + + + + + + + + diff --git a/Beam.Data/Beam.Data.csproj b/Beam.Data/Beam.Data.csproj new file mode 100644 index 0000000..0c8ec0c --- /dev/null +++ b/Beam.Data/Beam.Data.csproj @@ -0,0 +1,14 @@ + + + + net9.0 + enable + enable + + + + + + + + diff --git a/Beam.Downloaders/Beam.Downloaders.csproj b/Beam.Downloaders/Beam.Downloaders.csproj new file mode 100644 index 0000000..3e5ee31 --- /dev/null +++ b/Beam.Downloaders/Beam.Downloaders.csproj @@ -0,0 +1,19 @@ + + + + net9.0 + enable + enable + + + + + + + + + + + + + diff --git a/Beam.Dynamic/Beam.Dynamic.csproj b/Beam.Dynamic/Beam.Dynamic.csproj index dbea70f..3d6d4c8 100644 --- a/Beam.Dynamic/Beam.Dynamic.csproj +++ b/Beam.Dynamic/Beam.Dynamic.csproj @@ -7,7 +7,7 @@ - + diff --git a/Beam.Exceptions/Beam.Exceptions.csproj b/Beam.Exceptions/Beam.Exceptions.csproj new file mode 100644 index 0000000..347f818 --- /dev/null +++ b/Beam.Exceptions/Beam.Exceptions.csproj @@ -0,0 +1,24 @@ + + + + net9.0 + enable + enable + + + + + PublicResXFileCodeGenerator + Exceptions.Designer.cs + + + + + + True + True + Exceptions.resx + + + + diff --git a/Beam.Exceptions/Exceptions.Designer.cs b/Beam.Exceptions/Exceptions.Designer.cs index 1d9d671..5b5cfc9 100644 --- a/Beam.Exceptions/Exceptions.Designer.cs +++ b/Beam.Exceptions/Exceptions.Designer.cs @@ -59,6 +59,42 @@ namespace Beam.Exceptions { } } + /// + /// Looks up a localized string similar to The state of the builder is invalid.. + /// + public static string fluent_invalid_state { + get { + return ResourceManager.GetString("fluent_invalid_state", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to A type conversion from '{0}' to '{1}' has failed, indicating an invalid state.. + /// + public static string fluent_type_conversion_failure { + get { + return ResourceManager.GetString("fluent_type_conversion_failure", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to The builder state '{0}' points to an unsupported unit downloader.. + /// + public static string fluent_unsupported_pattern { + get { + return ResourceManager.GetString("fluent_unsupported_pattern", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to The transformer type '{0}' is unsupported by this fluent pathway. Consider manually instantiating DownloadContext.. + /// + public static string fluent_unsupported_transformer { + get { + return ResourceManager.GetString("fluent_unsupported_transformer", resourceCulture); + } + } + /// /// Looks up a localized string similar to A fragment is locked when it should be free; failed to obtain updater.. /// @@ -104,6 +140,15 @@ namespace Beam.Exceptions { } } + /// + /// Looks up a localized string similar to There must be at least one state in resource definition.. + /// + public static string resource_definition_invalid_states_count { + get { + return ResourceManager.GetString("resource_definition_invalid_states_count", resourceCulture); + } + } + /// /// Looks up a localized string similar to Encountered an error while changing state. /// diff --git a/Beam.Exceptions/Exceptions.resx b/Beam.Exceptions/Exceptions.resx index faae748..0adb3c5 100644 --- a/Beam.Exceptions/Exceptions.resx +++ b/Beam.Exceptions/Exceptions.resx @@ -36,4 +36,19 @@ A fragment is locked when it should be free; failed to obtain updater. + + The transformer type '{0}' is unsupported by this fluent pathway. Consider manually instantiating DownloadContext. + + + The state of the builder is invalid. + + + A type conversion from '{0}' to '{1}' has failed, indicating an invalid state. + + + The builder state '{0}' points to an unsupported unit downloader. + + + There must be at least one state in resource definition. + \ No newline at end of file diff --git a/Beam.Fluent/Beam.Fluent.csproj b/Beam.Fluent/Beam.Fluent.csproj index f0c3c21..2d6a5ab 100644 --- a/Beam.Fluent/Beam.Fluent.csproj +++ b/Beam.Fluent/Beam.Fluent.csproj @@ -7,7 +7,7 @@ - + diff --git a/Beam.Fluent/ContextStage.cs b/Beam.Fluent/ContextStage.cs new file mode 100644 index 0000000..be079d9 --- /dev/null +++ b/Beam.Fluent/ContextStage.cs @@ -0,0 +1,230 @@ +using Beam.Models; +using HtmlAgilityPack; +using Beam.Playwright; +using Beam.Stealth; +using Beam; +using Beam.Abstractions; +using Beam.Downloaders; + +namespace Beam.Fluent; + +internal sealed class ContextStage : IContextStage { + private readonly DownloadContextBuilder _ctxBuilder; + private readonly AsyncTransformer _transformer; + private FragmentMode _fragmentMode = FragmentMode.Single; + private Channel _channel = Channel.Plain; + private readonly ContentKind _contentKind; + private int _parallelism = 4; + + // ──────────────── playwright ──────────────── + private PlaywrightAsyncManipulator? _playwrightManipulator = null; + // ──────────────────────────────────────────── + + // ──────────────── stealth ─────────────────── + private StealthAsyncManipulator? _stealthManipulator = null; + private StealthConfig? _stealthConfig = null; + // ──────────────────────────────────────────── + + + public ContextStage(DownloadContextBuilder ctxBuilder, + AsyncTransformer transformer) { + _ctxBuilder = ctxBuilder; + _transformer = transformer; + _contentKind = transformer switch { + AsyncTransformer => ContentKind.Html, + AsyncTransformer => ContentKind.Binary, + _ => throw new ArgumentException(string.Format(Exceptions.Exceptions.fluent_unsupported_transformer, + transformer.GetType() + .AsUniqueName())) + }; + } + + public IContextStage Configure(Action> configure) { + configure(_ctxBuilder); + return this; + } + + public IContextStage WithParallelism(int degree) { + _parallelism = Math.Max(1, degree); + return this; + } + + public IContextStage WithTimeout(TimeSpan timeout) { + _ctxBuilder.WithTimeOut(timeout); + return this; + } + + public IContextStage WithRetryReporter(IProgress reporter) { + _ctxBuilder.WithRetryReporter(reporter); + return this; + } + + /// + /// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with + /// + /// + public IContextStage UseFragments() { + if (_playwrightManipulator is not null) + _playwrightManipulator = null; + if (_channel == Channel.Playwright) + _channel = Channel.Plain; + + _fragmentMode = FragmentMode.Fragmented; + return this; + } + + /// + /// Use a puppet browser to download the links. This strategy is mutually exclusive with + /// + /// The page manipulator + /// + public IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator) { + if (_fragmentMode == FragmentMode.Fragmented) + _fragmentMode = FragmentMode.Single; + if (_stealthManipulator is not null) + _stealthManipulator = null; + + _channel = Channel.Playwright; + _playwrightManipulator = manipulator; + return this; + } + + public IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) { + if (_playwrightManipulator is not null) + _playwrightManipulator = null; + + _channel = Channel.Stealth; + _stealthManipulator = manipulator; + _stealthConfig = config; + return this; + } + + private object ConstructUnitDownloader(DownloadContext context) { + #region Utility functions + + T To(object? o) where T : class + => (o as T) ?? + throw new Exception( + string.Format(Exceptions.Exceptions.fluent_type_conversion_failure, + o?.GetType().AsUniqueName() ?? "null", typeof(T).AsUniqueName())); + + AsyncTransformer HtmlTransformer() + => To>(_transformer); + + AsyncTransformer ByteTransformer() + => To>(_transformer); + + AsyncDownloadFailurePredicate[] HtmlFailurePredicates() + => To[]>(context.AsyncFailurePredicates); + + AsyncDownloadFailurePredicate[] ByteFailurePredicates() + => To[]>(context.AsyncFailurePredicates); + + T EnsureExists(T? o) where T : class + => (o ?? throw new Exception(Exceptions.Exceptions.fluent_invalid_state)); + + #endregion + + return (_channel, _fragmentMode, _contentKind) switch { + // ──────────────── fragmented HTML ──────────────── + (Channel.Plain, FragmentMode.Fragmented, ContentKind.Html) + => new UnitFragmentDownloader( + context.Web, + HtmlTransformer(), + HtmlFailurePredicates(), + _parallelism, + context.DownloadLogger), + // ──────────────── fragmented binary ──────────────── + (Channel.Plain, FragmentMode.Fragmented, ContentKind.Binary) + => new UnitFragmentDownloaderBinary( + context.Client, + ByteTransformer(), + ByteFailurePredicates(), + _parallelism, + context.DownloadLogger), + // ──────────────── single HTML ──────────────── + (Channel.Plain, FragmentMode.Single, ContentKind.Html) + => new UnitDownloader( + context.Web, + HtmlTransformer(), + HtmlFailurePredicates()), + // ──────────────── single binary ──────────────── + (Channel.Plain, FragmentMode.Single, ContentKind.Binary) + => new UnitDownloaderBinary( + context.Client, + ByteTransformer(), + ByteFailurePredicates()), + // ──────────────── single playwright binary ──────────────── + (Channel.Playwright, FragmentMode.Single, ContentKind.Binary) + => new PlaywrightUnitDownloader( + context.Client, + EnsureExists(_playwrightManipulator), + ByteTransformer(), + ByteFailurePredicates() + ), + // ──────────────── single playwrigt HTML ──────────────── + (Channel.Playwright, FragmentMode.Single, ContentKind.Html) + => new PlaywrightUnitPageDownloader( + context.Web, + EnsureExists(_playwrightManipulator), + HtmlTransformer(), + HtmlFailurePredicates()), + // ──────────────── single stealth HTML ──────────────── + (Channel.Stealth, FragmentMode.Single, ContentKind.Html) + => new StealthUnitPageDownloader( + context.Web, + EnsureExists(_stealthConfig), + EnsureExists(_stealthManipulator), + HtmlTransformer(), + HtmlFailurePredicates()), + // ──────────────── single stealth binary ──────────────── + (Channel.Stealth, FragmentMode.Single, ContentKind.Binary) + => new StealthUnitDownloader( + context.Client, + EnsureExists(_stealthConfig), + EnsureExists(_stealthManipulator), + ByteTransformer(), + ByteFailurePredicates()), + // ──────────────── fragment stealth HTML ──────────────── + (Channel.Stealth, FragmentMode.Fragmented, ContentKind.Html) + => new StealthFragmentPageDownloader( + context.Web, + EnsureExists(_stealthConfig), + EnsureExists(_stealthManipulator), + HtmlTransformer(), + HtmlFailurePredicates()), + // ──────────────── fragment stealth binary ──────────────── + (Channel.Stealth, FragmentMode.Fragmented, ContentKind.Binary) + => new StealthFragmentDownloader( + context.Client, + EnsureExists(_stealthConfig), + EnsureExists(_stealthManipulator), + ByteTransformer(), + ByteFailurePredicates()), + _ => throw new Exception(string.Format(Exceptions.Exceptions.fluent_unsupported_pattern, + $"({_channel}, {_fragmentMode}, {_contentKind})")), + }; + } + + private IAsyncEnumerator> ConstructDownloader(DownloadContext context) { + var copyOfContext = DownloadContextBuilder.FromContext(context).Build(); + return _fragmentMode switch { + FragmentMode.Fragmented => new SequentialFragmentDownloader( + copyOfContext, + ctx => (IUnitDownloader>>)ConstructUnitDownloader(ctx), + context.DownloadLogger).UnwrapFragmented(), + FragmentMode.Single => new SequentialDownloader( + copyOfContext, + ctx => (IUnitDownloader)ConstructUnitDownloader(ctx), + context.DownloadLogger).WrapOrdered(), + _ => throw new Exception(string.Format(Exceptions.Exceptions.fluent_unsupported_pattern, + $"{_fragmentMode}")), + }; + } + + public DownloadEnumerable Build() { + var context = _ctxBuilder.Build(); + var enumerable = new DownloadEnumerable(ConstructDownloader(context)); + return enumerable; + } +} \ No newline at end of file diff --git a/Beam.Fluent/Core/FragmentMode.cs b/Beam.Fluent/Core/FragmentMode.cs new file mode 100644 index 0000000..a2e29b1 --- /dev/null +++ b/Beam.Fluent/Core/FragmentMode.cs @@ -0,0 +1,17 @@ +namespace Beam.Fluent; + +public enum FragmentMode { + Single, + Fragmented +} + +public enum Channel { + Plain, + Stealth, + Playwright +} + +public enum ContentKind { + Html, + Binary +} \ No newline at end of file diff --git a/Beam.Fluent/Core/IContextStage.cs b/Beam.Fluent/Core/IContextStage.cs new file mode 100644 index 0000000..c10728f --- /dev/null +++ b/Beam.Fluent/Core/IContextStage.cs @@ -0,0 +1,18 @@ +using Beam.Abstractions; +using Beam.Downloaders; +using Beam.Models; +using Beam.Playwright; +using Beam.Stealth; + +namespace Beam.Fluent; + +public interface IContextStage { + IContextStage Configure(Action> configure); + IContextStage WithParallelism(int degree); + IContextStage WithTimeout(TimeSpan timeout); + IContextStage WithRetryReporter(IProgress reporter); + IContextStage UseFragments(); + IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator); + IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config); + DownloadEnumerable Build(); +} \ No newline at end of file diff --git a/Beam.Fluent/Core/IDownloadStage.cs b/Beam.Fluent/Core/IDownloadStage.cs new file mode 100644 index 0000000..4c2c6bc --- /dev/null +++ b/Beam.Fluent/Core/IDownloadStage.cs @@ -0,0 +1,12 @@ +using System.Collections.Concurrent; + +namespace Beam.Fluent; + +public interface IDownloadStage { + IDownloadStage SaveToDirectory(string dir); + IDownloadStage SaveToFiles(IEnumerable files); + IDownloadStage SaveToMemory(ConcurrentBag bag); + void WaitForDownload(); + Task WaitForDownloadAsync(); + DownloadEnumerable AsAsyncEnumerable(); +} \ No newline at end of file diff --git a/Beam.Fluent/Core/ITransformStage.cs b/Beam.Fluent/Core/ITransformStage.cs new file mode 100644 index 0000000..34e2ae5 --- /dev/null +++ b/Beam.Fluent/Core/ITransformStage.cs @@ -0,0 +1,8 @@ +using Beam.Dynamic; +using Beam.Models; + +namespace Beam.Fluent; + +public interface ITransformStage { + IContextStage WithTransformer(AsyncTransformer factory); +} \ No newline at end of file diff --git a/Beam.Fluent/DownloadBuilder.ContextStage.cs b/Beam.Fluent/DownloadBuilder.ContextStage.cs deleted file mode 100644 index 1940bd2..0000000 --- a/Beam.Fluent/DownloadBuilder.ContextStage.cs +++ /dev/null @@ -1,190 +0,0 @@ -using Beam.Models; -using HtmlAgilityPack; -using Beam.Playwright; -using Beam.Stealth; -using Beam; -using Beam.Abstractions; -using Beam.Downloaders; - -namespace Beam.Fluent { -public static partial class DownloadBuilder { - private sealed class ContextStage : IContextStage { - private readonly DownloadContextBuilder _ctxBuilder; - private readonly AsyncTransformer _transformer; - private int _parallelism = 4; - private bool _useFragments = false; - private PlaywrightAsyncManipulator? _usePlaywrightManipulator = null; - private StealthAsyncManipulator? _useStealthManipulator = null; - private StealthConfig? _stealthConfig = null; - - public ContextStage(DownloadContextBuilder ctxBuilder, AsyncTransformer transformer) { - _ctxBuilder = ctxBuilder; - _transformer = transformer; - } - - public IContextStage Configure(Action> configure) { - configure(_ctxBuilder); - return this; - } - - public IContextStage WithParallelism(int degree) { - _parallelism = Math.Max(1, degree); - return this; - } - - public IContextStage WithTimeout(TimeSpan timeout) { - _ctxBuilder.WithTimeOut(timeout); - return this; - } - - public IContextStage WithRetryReporter(IProgress reporter) { - _ctxBuilder.WithRetryReporter(reporter); - return this; - } - - /// - /// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with - /// - /// - public IContextStage UseFragments() { - if (_usePlaywrightManipulator is not null) - _usePlaywrightManipulator = null; - - _useFragments = true; - return this; - } - - /// - /// Use a puppet browser to download the links. This strategy is mutually exclusive with - /// - /// The page manipulator - /// - public IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator) { - if (_useFragments) - _useFragments = false; - if (_useStealthManipulator is not null) - _useStealthManipulator = null; - - _usePlaywrightManipulator = manipulator; - return this; - } - - public IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) { - if (_usePlaywrightManipulator is not null) - _usePlaywrightManipulator = null; - - _useStealthManipulator = manipulator; - _stealthConfig = config; - return this; - } - - private object ConstructUnitDownloader(DownloadContext context) { - return (_useFragments, _useStealthManipulator, _usePlaywrightManipulator, _transformer, context.AsyncFailurePredicates) switch { - // ──────────────── fragmented HTML ──────────────── - (true, null, _, AsyncTransformer asyncHtmlTransformer, - AsyncDownloadFailurePredicate[] documentFailurePredicates) - => new UnitFragmentDownloader( - context.Web, - asyncHtmlTransformer, - documentFailurePredicates, - _parallelism, - context.DownloadLogger), - // ──────────────── fragmented binary ──────────────── - (true, null, _, AsyncTransformer asyncBinaryTransformer, - AsyncDownloadFailurePredicate[] responseFailurePredicates) - => new UnitFragmentDownloaderBinary( - context.Client, - asyncBinaryTransformer, - responseFailurePredicates, - _parallelism, - context.DownloadLogger), - // ──────────────── single HTML ──────────────── - (false, null, null, AsyncTransformer asyncHtmlTransformer, - AsyncDownloadFailurePredicate[] documentFailurePredicates) - => new UnitDownloader( - context.Web, - asyncHtmlTransformer, - documentFailurePredicates), - // ──────────────── single binary ──────────────── - (false, null, null, AsyncTransformer asyncBinaryTransformer, - AsyncDownloadFailurePredicate[] responseFailurePredicates) - => new UnitDownloaderBinary( - context.Client, - asyncBinaryTransformer, - responseFailurePredicates), - // ──────────────── single playwright binary ──────────────── - (false, _, PlaywrightAsyncManipulator manipulator, AsyncTransformer asyncBinaryTransformer, - AsyncDownloadFailurePredicate[] responseFailurePredicates) - => new PlaywrightUnitDownloader( - context.Client, - manipulator, - asyncBinaryTransformer, - responseFailurePredicates), - // ──────────────── single playwrigt HTML ──────────────── - (false, _, PlaywrightAsyncManipulator manipulator, AsyncTransformer asyncHtmlTransformer, - AsyncDownloadFailurePredicate[] documentFailurePredicates) - => new PlaywrightUnitPageDownloader( - context.Web, - manipulator, - asyncHtmlTransformer, - documentFailurePredicates), - // ──────────────── single stealth HTML ──────────────── - (false, StealthAsyncManipulator manipulator, _, AsyncTransformer asyncHtmlTransformer, - AsyncDownloadFailurePredicate[] documentFailurePredicates) - => new StealthUnitPageDownloader( - context.Web, - _stealthConfig ?? throw new Exception($"Stealth config is null"), - manipulator, - asyncHtmlTransformer, - documentFailurePredicates), - // ──────────────── single stealth binary ──────────────── - (false, StealthAsyncManipulator manipulator, _, AsyncTransformer asyncBinaryTransformer, - AsyncDownloadFailurePredicate[] responseFailurePredicates) - => new StealthUnitDownloader( - context.Client, - _stealthConfig ?? throw new Exception($"Stealth config is null"), - manipulator, - asyncBinaryTransformer, - responseFailurePredicates), - // ──────────────── fragment stealth HTML ──────────────── - (true, StealthAsyncManipulator manipulator, _, AsyncTransformer asyncHtmlTransformer, - AsyncDownloadFailurePredicate[] downloadFailurePredicates) - => new StealthFragmentPageDownloader( - context.Web, - _stealthConfig ?? throw new Exception($"Stealth config is null"), - manipulator, - asyncHtmlTransformer), - // ──────────────── fragment stealth binary ──────────────── - (true, StealthAsyncManipulator manipulator, _, AsyncTransformer asyncBinaryTransformer, - AsyncDownloadFailurePredicate[] responseFailurePredicates) - => new StealthFragmentDownloader( - context.Client, - _stealthConfig ?? throw new Exception($"Stealth config is null"), - manipulator, - asyncBinaryTransformer), - _ => throw new Exception($"Unsupported transformer / failure-predicate combinatAsyion. Missing pattern: {_useFragments} , {_transformer.GetType().AsUniqueName()} , {context.AsyncFailurePredicates?.GetType().AsUniqueName()}"), - }; - } - - private IAsyncEnumerator> ConstructDownloader(DownloadContext context) { - var copyOfContext = DownloadContextBuilder.FromContext(context).Build(); - return _useFragments switch { - true => new SequentialFragmentDownloader( - copyOfContext, - ctx => (IUnitDownloader>>)ConstructUnitDownloader(ctx), - context.DownloadLogger).UnwrapFragmented(), - false => new SequentialDownloader( - copyOfContext, - ctx => (IUnitDownloader)ConstructUnitDownloader(ctx), - context.DownloadLogger).WrapOrdered() - }; - } - - public DownloadEnumerable Build() { - var context = _ctxBuilder.Build(); - var enumerable = new DownloadEnumerable(ConstructDownloader(context)); - return enumerable; - } - } - } -} diff --git a/Beam.Fluent/DownloadBuilder.DownloadStage.cs b/Beam.Fluent/DownloadBuilder.DownloadStage.cs deleted file mode 100644 index 6dcd6ed..0000000 --- a/Beam.Fluent/DownloadBuilder.DownloadStage.cs +++ /dev/null @@ -1,44 +0,0 @@ -using System.Collections.Concurrent; -using System.Text.Json; -using Beam.Models; - -namespace Beam.Fluent { -public static partial class DownloadBuilder { - private sealed class DownloadStage(DownloadEnumerable download) : IDownloadStage { - private IAsyncEnumerable> _download = download; - - public DownloadEnumerable AsAsyncEnumerable() { - return new DownloadEnumerable(_download.GetAsyncEnumerator()); - } - - private async IAsyncEnumerable> _SaveToDirectory(string dir) { - Directory.CreateDirectory(dir); - await foreach(var download in _download) { - await System.IO.File.WriteAllTextAsync(Path.Combine(dir, $"{Path.GetRandomFileName()}.{download.Order}.json"), JsonSerializer.Serialize(dir)); - yield return download; - } - } - - public IDownloadStage SaveToDirectory(string dir) { - _download = _SaveToDirectory(dir); - return this; - } - - public IDownloadStage SaveToFiles(IEnumerable files) { - throw new NotImplementedException(); - } - - public IDownloadStage SaveToMemory(ConcurrentBag bag) { - throw new NotImplementedException(); - } - - public void WaitForDownload() { - throw new NotImplementedException(); - } - - public Task WaitForDownloadAsync() { - throw new NotImplementedException(); - } - } - } -} diff --git a/Beam.Fluent/DownloadBuilder.IAlternativeLinkStage.cs b/Beam.Fluent/DownloadBuilder.IAlternativeLinkStage.cs deleted file mode 100644 index 47ead72..0000000 --- a/Beam.Fluent/DownloadBuilder.IAlternativeLinkStage.cs +++ /dev/null @@ -1,9 +0,0 @@ -using Beam.Models; - -namespace Beam.Fluent { -public static partial class DownloadBuilder { - public interface IAlternativeLinkStage { - IAlternativeTransformStage WithLinks(IEnumerable links); - } - } -} diff --git a/Beam.Fluent/DownloadBuilder.IAlternativeTransformStage.cs b/Beam.Fluent/DownloadBuilder.IAlternativeTransformStage.cs deleted file mode 100644 index bad6e38..0000000 --- a/Beam.Fluent/DownloadBuilder.IAlternativeTransformStage.cs +++ /dev/null @@ -1,12 +0,0 @@ -using Beam.Models; - -namespace Beam.Fluent { -public static partial class DownloadBuilder { - public interface IAlternativeTransformStage { - IContextStage WithTransformer(AsyncTransformer transformer); - IContextStage WithTransformer(Func transformer) { - return WithTransformer(rt => Task.FromResult(transformer(rt))); - } - } - } -} diff --git a/Beam.Fluent/DownloadBuilder.IContextStage.cs b/Beam.Fluent/DownloadBuilder.IContextStage.cs deleted file mode 100644 index 8212f60..0000000 --- a/Beam.Fluent/DownloadBuilder.IContextStage.cs +++ /dev/null @@ -1,20 +0,0 @@ -using Beam.Abstractions; -using Beam.Downloaders; -using Beam.Models; -using Beam.Playwright; -using Beam.Stealth; - -namespace Beam.Fluent { -public static partial class DownloadBuilder { - public interface IContextStage { - IContextStage Configure(Action> configure); - IContextStage WithParallelism(int degree); - IContextStage WithTimeout(TimeSpan timeout); - IContextStage WithRetryReporter(IProgress reporter); - DownloadEnumerable Build(); - IContextStage UseFragments(); - IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator); - IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config); - } - } -} diff --git a/Beam.Fluent/DownloadBuilder.IDownloadStage.cs b/Beam.Fluent/DownloadBuilder.IDownloadStage.cs deleted file mode 100644 index 99b7f85..0000000 --- a/Beam.Fluent/DownloadBuilder.IDownloadStage.cs +++ /dev/null @@ -1,14 +0,0 @@ -using System.Collections.Concurrent; - -namespace Beam.Fluent { -public static partial class DownloadBuilder { - public interface IDownloadStage { - IDownloadStage SaveToDirectory(string dir); - IDownloadStage SaveToFiles(IEnumerable files); - IDownloadStage SaveToMemory(ConcurrentBag bag); - DownloadEnumerable AsAsyncEnumerable(); - void WaitForDownload(); - Task WaitForDownloadAsync(); - } - } -} diff --git a/Beam.Fluent/DownloadBuilder.ILinkStage.cs b/Beam.Fluent/DownloadBuilder.ILinkStage.cs deleted file mode 100644 index 17712bf..0000000 --- a/Beam.Fluent/DownloadBuilder.ILinkStage.cs +++ /dev/null @@ -1,13 +0,0 @@ -using Beam.Models; - -namespace Beam.Fluent { - public static partial class DownloadBuilder { - /* ────────────────────────────── Stages ─────────────────────────────── */ - - public interface ILinkStage { - ITransformStage WithLink(); - ITransformStage WithLinkGenerator(); - ILinkStage WithRange(Range range); - } - } -} diff --git a/Beam.Fluent/DownloadBuilder.ITransformStage.cs b/Beam.Fluent/DownloadBuilder.ITransformStage.cs deleted file mode 100644 index 7790df5..0000000 --- a/Beam.Fluent/DownloadBuilder.ITransformStage.cs +++ /dev/null @@ -1,10 +0,0 @@ -using Beam.Dynamic; -using Beam.Models; - -namespace Beam.Fluent { -public static partial class DownloadBuilder { - public interface ITransformStage { - IContextStage WithTransformer(Func> factory); - } - } -} diff --git a/Beam.Fluent/DownloadBuilder.LinkStage.cs b/Beam.Fluent/DownloadBuilder.LinkStage.cs deleted file mode 100644 index 8145bad..0000000 --- a/Beam.Fluent/DownloadBuilder.LinkStage.cs +++ /dev/null @@ -1,54 +0,0 @@ -using Beam.Data; -using Beam.Downloaders; -using Beam.Dynamic; -using Beam.Models; - -namespace Beam.Fluent { - public static partial class DownloadBuilder { - /* ──────────────────────────── Stage types ─────────────────────────── */ - private sealed record LinkStage( - WebResource Source, - State Initial, - BeamDataContext Data, - DownloadContextBuilder CtxBuilder) : ILinkStage, IAlternativeLinkStage { - - private State? endState; - private bool linksFrozen = false; - - public ITransformStage WithLink() { - var link = Data.Templates[Source.Key].Builder.Build(Initial); - CtxBuilder.WithLinks([link]); - return new TransformStage(Source, Data, CtxBuilder); - } - - public ITransformStage WithLinkGenerator() { - var template = Data.Templates[Source.Key]; - var generator = StringEnumerable.FromGenerator(new OrderedLinkGenerator( - template.Builder, - new NumberedStateChanger(template.Factory.Behavior), - Initial, endState)); - CtxBuilder.WithLinks(generator); - linksFrozen = true; - return new TransformStage(Source, Data, CtxBuilder); - } - - public IAlternativeTransformStage WithLinks(IEnumerable links) { - CtxBuilder.WithLinks(links); - return new TransformStage(Source, Data, CtxBuilder); - } - - public ILinkStage WithRange(Range range) { - if (linksFrozen) - throw new InvalidOperationException($"WithRange must be called before WithLinkGenerator"); - if (range.End.Value < range.Start.Value) - throw new ArgumentOutOfRangeException(nameof(range), $" start must be < end"); - var template = Data.Templates[Source.Key]; - var stateChanger = new NumberedStateChanger(template.Factory.Behavior); - endState = Initial.Copy(); - stateChanger.Apply(Initial, range.Start.Value - 1); - stateChanger.Apply(endState, range.End.Value - 1); - return this; - } - } - } -} diff --git a/Beam.Fluent/DownloadBuilder.SourceKind.cs b/Beam.Fluent/DownloadBuilder.SourceKind.cs deleted file mode 100644 index 46de3e9..0000000 --- a/Beam.Fluent/DownloadBuilder.SourceKind.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace Beam.Fluent { - public static partial class DownloadBuilder { - /* ────────────────────────── Implementation ────────────────────────── */ - private enum SourceKind { Meta, Text } - } -} diff --git a/Beam.Fluent/DownloadBuilder.TransformStage.cs b/Beam.Fluent/DownloadBuilder.TransformStage.cs deleted file mode 100644 index 1b04cc0..0000000 --- a/Beam.Fluent/DownloadBuilder.TransformStage.cs +++ /dev/null @@ -1,22 +0,0 @@ -using Beam.Data; -using Beam.Downloaders; -using Beam.Dynamic; -using Beam.Models; - -namespace Beam.Fluent { -public static partial class DownloadBuilder { - private sealed record TransformStage( - WebResource Source, - BeamDataContext Data, - DownloadContextBuilder CtxBuilder) : ITransformStage, IAlternativeTransformStage { - public IContextStage WithTransformer(Func> factory) { - var transformer = factory(Data.Bindings[Source.Bindings]); - return new ContextStage(CtxBuilder, transformer); - } - - public IContextStage WithTransformer(AsyncTransformer transformer) { - return new ContextStage(CtxBuilder, transformer); - } - } - } -} diff --git a/Beam.Fluent/DownloadBuilder.cs b/Beam.Fluent/DownloadBuilder.cs deleted file mode 100644 index fcb85b6..0000000 --- a/Beam.Fluent/DownloadBuilder.cs +++ /dev/null @@ -1,43 +0,0 @@ -using aeqw89.DataKeys; -using Beam; -using Microsoft.Extensions.Logging; -using System; -using System.Collections.Generic; -using Beam.Data; -using Beam.Downloaders; -using Beam.Models; - -namespace Beam.Fluent { - /// - /// Type‑safe, staged builder that prevents callers from forgetting the mandatory steps - /// (source → link selection → transformer) and surfaces operational knobs as first‑class - /// methods instead of magic parameters. - /// - public static partial class DownloadBuilder { - /* ──────────────────────────── Entry points ─────────────────────────── */ - - public static ILinkStage FromResource(DataKey dict, string kind, BeamDataContext beamDataDictionary) - => Create(dict, beamDataDictionary, kind); - - public static IAlternativeLinkStage FromScratch() - => new LinkStage(null!, null!, null!, new()); - - private static ILinkStage Create(DataKey resourceDict, BeamDataContext data, string kind) { - var (source, initial) = Resolve(resourceDict, kind, data); - var ctxBuilder = new DownloadContextBuilder().WithLinks([]); // placeholder, filled later. - return new LinkStage(source, initial, data, ctxBuilder); - } - - private static (WebResource Source, State Initial) Resolve(DataKey resourceDict, string kind, BeamDataContext data) { - if (!data.ResourceDictionaries.TryGetValue(resourceDict, out var dict)) - throw new KeyNotFoundException($"Novel '{resourceDict}' not found in BeamDataDictionary."); - if (!dict.Resources.TryGetValue(kind, out var sourceKey)) - throw new KeyNotFoundException($"Novel kind '{kind}' not found in '{resourceDict}'"); - if (!data.Resources.TryGetValue(sourceKey, out var source)) - throw new KeyNotFoundException($"Novel source '{sourceKey}' was not found"); - if (!data.InitialStates.TryGetValue(sourceKey.To(), out var istate)) - throw new KeyNotFoundException($"Immutable state for kind '{kind}' not found"); - return (source, istate); - } - } -} diff --git a/Beam.Fluent/DownloadStage.cs b/Beam.Fluent/DownloadStage.cs new file mode 100644 index 0000000..3186cc1 --- /dev/null +++ b/Beam.Fluent/DownloadStage.cs @@ -0,0 +1,44 @@ +using System.Collections.Concurrent; +using System.Text.Json; +using Beam.Models; + +namespace Beam.Fluent; + +internal sealed class DownloadStage(DownloadEnumerable download) : IDownloadStage { + private IAsyncEnumerable> _download = download; + + public DownloadEnumerable AsAsyncEnumerable() { + return new DownloadEnumerable(_download.GetAsyncEnumerator()); + } + + private async IAsyncEnumerable> _SaveToDirectory(string dir) { + Directory.CreateDirectory(dir); + await foreach (var download in _download) { + await System.IO.File.WriteAllTextAsync( + Path.Combine(dir, $"{Path.GetRandomFileName()}.{download.Order}.json"), + JsonSerializer.Serialize(dir)); + yield return download; + } + } + + public IDownloadStage SaveToDirectory(string dir) { + _download = _SaveToDirectory(dir); + return this; + } + + public IDownloadStage SaveToFiles(IEnumerable files) { + throw new NotImplementedException(); + } + + public IDownloadStage SaveToMemory(ConcurrentBag bag) { + throw new NotImplementedException(); + } + + public void WaitForDownload() { + throw new NotImplementedException(); + } + + public Task WaitForDownloadAsync() { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/Beam.Fluent/FluentDownload.cs b/Beam.Fluent/FluentDownload.cs new file mode 100644 index 0000000..745329d --- /dev/null +++ b/Beam.Fluent/FluentDownload.cs @@ -0,0 +1,28 @@ +using aeqw89.DataKeys; +using Beam.Data; +using Beam.Downloaders; +using Beam.Dynamic; +using Beam.Models; + +namespace Beam.Fluent; + +public static class FluentDownload { + public static ITransformStage Links(params IEnumerable links) { + return new TransformStage(new DownloadContextBuilder() + .WithLinks(links)); + } + + public static ITransformStage + ResourceDefinition(ResourceDefinition definition) { + if (definition.Location.States.Count == 0) + throw new ArgumentException(Exceptions.Exceptions.resource_definition_invalid_states_count, nameof(definition)); + var linkGenerator = new OrderedLinkGenerator(definition.Location.Segments, (NumberedStateChanger)definition.Location.StateChanger.Behavior, + definition.Location.States.First().Copy()); + return new TransformStage(new DownloadContextBuilder() + .WithLinks(StringEnumerable.FromGenerator(linkGenerator!))); + } + + public static ITransformStage FromContext(DownloadContext existing) { + return new TransformStage(DownloadContextBuilder.FromContext(existing)); + } +} \ No newline at end of file diff --git a/Beam.Fluent/TransformStage.cs b/Beam.Fluent/TransformStage.cs new file mode 100644 index 0000000..778c1c2 --- /dev/null +++ b/Beam.Fluent/TransformStage.cs @@ -0,0 +1,12 @@ +using Beam.Data; +using Beam.Downloaders; +using Beam.Dynamic; +using Beam.Models; + +namespace Beam.Fluent; + +internal sealed class TransformStage(DownloadContextBuilder CtxBuilder) : ITransformStage { + public IContextStage WithTransformer(AsyncTransformer transformer) { + return new ContextStage(CtxBuilder, transformer); + } +} \ No newline at end of file diff --git a/Beam.Models/Beam.Models.csproj b/Beam.Models/Beam.Models.csproj index 9464a24..7ace109 100644 --- a/Beam.Models/Beam.Models.csproj +++ b/Beam.Models/Beam.Models.csproj @@ -11,7 +11,7 @@ - + runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/Beam.Models/ResourceDefinition.cs b/Beam.Models/ResourceDefinition.cs index d5d3e39..ff74b31 100644 --- a/Beam.Models/ResourceDefinition.cs +++ b/Beam.Models/ResourceDefinition.cs @@ -6,16 +6,9 @@ namespace Beam.Models; public class ResourceDefinition { public required DataKey Key { get; init; } - public required MetaData Meta { get; init; } - - /// Map of element name to extraction config. Keys must match ^[A-Za-z0-9_-]+$ public required Table Elements { get; init; } - - /// Minimum 1 item; polymorphic segments discriminated by "type". - public required ILinkBuilder Url { get; init; } - - /// Keys must match ^[A-Za-z0-9_-]+$ + public required UrlLocation Location { get; init; } public required Table Relations { get; init; } public class MetaData { @@ -24,4 +17,10 @@ public class ResourceDefinition { public string? Description { get; init; } public string? ProjectUrl { get; init; } } + + public class UrlLocation { + public ILinkBuilder Segments { get; init; } + public List States { get; init; } + public IStateChangerFactory StateChanger { get; init; } + } } \ No newline at end of file diff --git a/aeqw89.Beam/aeqw89.Beam.csproj b/aeqw89.Beam/aeqw89.Beam.csproj index 8d1c4ef..9474961 100644 --- a/aeqw89.Beam/aeqw89.Beam.csproj +++ b/aeqw89.Beam/aeqw89.Beam.csproj @@ -35,7 +35,7 @@ true - + true