Refactor fluent download pipelines
This commit is contained in:
@@ -9,7 +9,7 @@
|
|||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<PackageReference Include="aeqw89.DataKeys" Version="2.1.0" />
|
<PackageReference Include="aeqw89.DataKeys" Version="2.1.0" />
|
||||||
<PackageReference Include="aeqw89.PersistentData" Version="1.3.3" />
|
<PackageReference Include="aeqw89.PersistentData" Version="1.4.5" />
|
||||||
<PackageReference Include="HtmlAgilityPack" Version="1.11.72" />
|
<PackageReference Include="HtmlAgilityPack" Version="1.11.72" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
namespace Beam.Abstractions;
|
namespace Beam.Abstractions;
|
||||||
|
|
||||||
public interface IStateChangerFactory { }
|
public interface IStateChangerFactory {
|
||||||
|
IStateChangeBehaviour Behavior { get; }
|
||||||
|
}
|
||||||
@@ -0,0 +1,17 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<TargetFramework>net9.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="Microsoft.Extensions.Logging" Version="10.0.0-rc.1.25451.107" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\Beam.Abstractions\Beam.Abstractions.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<TargetFramework>net9.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\Beam.Dynamic\Beam.Dynamic.csproj" />
|
||||||
|
<ProjectReference Include="..\Beam.Models\Beam.Models.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<TargetFramework>net9.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\Beam.Abstractions\Beam.Abstractions.csproj" />
|
||||||
|
<ProjectReference Include="..\Beam.Exceptions\Beam.Exceptions.csproj" />
|
||||||
|
<ProjectReference Include="..\Beam.Models\Beam.Models.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.9" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
@@ -7,7 +7,7 @@
|
|||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<PackageReference Include="aeqw89.DataKeys" Version="2.0.1" />
|
<PackageReference Include="aeqw89.DataKeys" Version="2.0.1" />
|
||||||
<PackageReference Include="aeqw89.PersistentData" Version="1.3.3" />
|
<PackageReference Include="aeqw89.PersistentData" Version="1.4.5" />
|
||||||
<PackageReference Include="HtmlAgilityPack" Version="1.11.72" />
|
<PackageReference Include="HtmlAgilityPack" Version="1.11.72" />
|
||||||
<PackageReference Include="Microsoft.Recognizers.Text.Number" Version="1.8.13" />
|
<PackageReference Include="Microsoft.Recognizers.Text.Number" Version="1.8.13" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|||||||
@@ -0,0 +1,24 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<TargetFramework>net9.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<EmbeddedResource Update="Exceptions.resx">
|
||||||
|
<Generator>PublicResXFileCodeGenerator</Generator>
|
||||||
|
<LastGenOutput>Exceptions.Designer.cs</LastGenOutput>
|
||||||
|
</EmbeddedResource>
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<Compile Update="Exceptions.Designer.cs">
|
||||||
|
<DesignTime>True</DesignTime>
|
||||||
|
<AutoGen>True</AutoGen>
|
||||||
|
<DependentUpon>Exceptions.resx</DependentUpon>
|
||||||
|
</Compile>
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
Generated
+45
@@ -59,6 +59,42 @@ namespace Beam.Exceptions {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Looks up a localized string similar to The state of the builder is invalid..
|
||||||
|
/// </summary>
|
||||||
|
public static string fluent_invalid_state {
|
||||||
|
get {
|
||||||
|
return ResourceManager.GetString("fluent_invalid_state", resourceCulture);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Looks up a localized string similar to A type conversion from '{0}' to '{1}' has failed, indicating an invalid state..
|
||||||
|
/// </summary>
|
||||||
|
public static string fluent_type_conversion_failure {
|
||||||
|
get {
|
||||||
|
return ResourceManager.GetString("fluent_type_conversion_failure", resourceCulture);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Looks up a localized string similar to The builder state '{0}' points to an unsupported unit downloader..
|
||||||
|
/// </summary>
|
||||||
|
public static string fluent_unsupported_pattern {
|
||||||
|
get {
|
||||||
|
return ResourceManager.GetString("fluent_unsupported_pattern", resourceCulture);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Looks up a localized string similar to The transformer type '{0}' is unsupported by this fluent pathway. Consider manually instantiating DownloadContext..
|
||||||
|
/// </summary>
|
||||||
|
public static string fluent_unsupported_transformer {
|
||||||
|
get {
|
||||||
|
return ResourceManager.GetString("fluent_unsupported_transformer", resourceCulture);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Looks up a localized string similar to A fragment is locked when it should be free; failed to obtain updater..
|
/// Looks up a localized string similar to A fragment is locked when it should be free; failed to obtain updater..
|
||||||
/// </summary>
|
/// </summary>
|
||||||
@@ -104,6 +140,15 @@ namespace Beam.Exceptions {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Looks up a localized string similar to There must be at least one state in resource definition..
|
||||||
|
/// </summary>
|
||||||
|
public static string resource_definition_invalid_states_count {
|
||||||
|
get {
|
||||||
|
return ResourceManager.GetString("resource_definition_invalid_states_count", resourceCulture);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Looks up a localized string similar to Encountered an error while changing state.
|
/// Looks up a localized string similar to Encountered an error while changing state.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
|||||||
@@ -36,4 +36,19 @@
|
|||||||
<data name="fragment_locked" xml:space="preserve">
|
<data name="fragment_locked" xml:space="preserve">
|
||||||
<value>A fragment is locked when it should be free; failed to obtain updater.</value>
|
<value>A fragment is locked when it should be free; failed to obtain updater.</value>
|
||||||
</data>
|
</data>
|
||||||
|
<data name="fluent_unsupported_transformer" xml:space="preserve">
|
||||||
|
<value>The transformer type '{0}' is unsupported by this fluent pathway. Consider manually instantiating DownloadContext.</value>
|
||||||
|
</data>
|
||||||
|
<data name="fluent_invalid_state" xml:space="preserve">
|
||||||
|
<value>The state of the builder is invalid.</value>
|
||||||
|
</data>
|
||||||
|
<data name="fluent_type_conversion_failure" xml:space="preserve">
|
||||||
|
<value>A type conversion from '{0}' to '{1}' has failed, indicating an invalid state.</value>
|
||||||
|
</data>
|
||||||
|
<data name="fluent_unsupported_pattern" xml:space="preserve">
|
||||||
|
<value>The builder state '{0}' points to an unsupported unit downloader.</value>
|
||||||
|
</data>
|
||||||
|
<data name="resource_definition_invalid_states_count" xml:space="preserve">
|
||||||
|
<value>There must be at least one state in resource definition.</value>
|
||||||
|
</data>
|
||||||
</root>
|
</root>
|
||||||
@@ -7,7 +7,7 @@
|
|||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<PackageReference Include="aeqw89.DataKeys" Version="2.0.1" />
|
<PackageReference Include="aeqw89.DataKeys" Version="2.0.1" />
|
||||||
<PackageReference Include="aeqw89.PersistentData" Version="1.3.3" />
|
<PackageReference Include="aeqw89.PersistentData" Version="1.4.5" />
|
||||||
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.9" />
|
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.9" />
|
||||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.9" />
|
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.9" />
|
||||||
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.9" />
|
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.9" />
|
||||||
|
|||||||
@@ -0,0 +1,230 @@
|
|||||||
|
using Beam.Models;
|
||||||
|
using HtmlAgilityPack;
|
||||||
|
using Beam.Playwright;
|
||||||
|
using Beam.Stealth;
|
||||||
|
using Beam;
|
||||||
|
using Beam.Abstractions;
|
||||||
|
using Beam.Downloaders;
|
||||||
|
|
||||||
|
namespace Beam.Fluent;
|
||||||
|
|
||||||
|
internal sealed class ContextStage<RawType, OutType> : IContextStage<RawType, OutType> {
|
||||||
|
private readonly DownloadContextBuilder<RawType> _ctxBuilder;
|
||||||
|
private readonly AsyncTransformer<RawType, OutType> _transformer;
|
||||||
|
private FragmentMode _fragmentMode = FragmentMode.Single;
|
||||||
|
private Channel _channel = Channel.Plain;
|
||||||
|
private readonly ContentKind _contentKind;
|
||||||
|
private int _parallelism = 4;
|
||||||
|
|
||||||
|
// ──────────────── playwright ────────────────
|
||||||
|
private PlaywrightAsyncManipulator? _playwrightManipulator = null;
|
||||||
|
// ────────────────────────────────────────────
|
||||||
|
|
||||||
|
// ──────────────── stealth ───────────────────
|
||||||
|
private StealthAsyncManipulator? _stealthManipulator = null;
|
||||||
|
private StealthConfig? _stealthConfig = null;
|
||||||
|
// ────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
public ContextStage(DownloadContextBuilder<RawType> ctxBuilder,
|
||||||
|
AsyncTransformer<RawType, OutType> transformer) {
|
||||||
|
_ctxBuilder = ctxBuilder;
|
||||||
|
_transformer = transformer;
|
||||||
|
_contentKind = transformer switch {
|
||||||
|
AsyncTransformer<HtmlDocument, OutType> => ContentKind.Html,
|
||||||
|
AsyncTransformer<ByteDocument, OutType> => ContentKind.Binary,
|
||||||
|
_ => throw new ArgumentException(string.Format(Exceptions.Exceptions.fluent_unsupported_transformer,
|
||||||
|
transformer.GetType()
|
||||||
|
.AsUniqueName()))
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
public IContextStage<RawType, OutType> Configure(Action<DownloadContextBuilder<RawType>> configure) {
|
||||||
|
configure(_ctxBuilder);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IContextStage<RawType, OutType> WithParallelism(int degree) {
|
||||||
|
_parallelism = Math.Max(1, degree);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IContextStage<RawType, OutType> WithTimeout(TimeSpan timeout) {
|
||||||
|
_ctxBuilder.WithTimeOut(timeout);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IContextStage<RawType, OutType> WithRetryReporter(IProgress<IRetryReport> reporter) {
|
||||||
|
_ctxBuilder.WithRetryReporter(reporter);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with <see cref="UsePlaywright(PlaywrightAsyncManipulator)"/>
|
||||||
|
/// </summary>
|
||||||
|
/// <returns></returns>
|
||||||
|
public IContextStage<RawType, OutType> UseFragments() {
|
||||||
|
if (_playwrightManipulator is not null)
|
||||||
|
_playwrightManipulator = null;
|
||||||
|
if (_channel == Channel.Playwright)
|
||||||
|
_channel = Channel.Plain;
|
||||||
|
|
||||||
|
_fragmentMode = FragmentMode.Fragmented;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Use a puppet browser to download the links. This strategy is mutually exclusive with <see cref="UseFragments"/>
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="manipulator">The page manipulator</param>
|
||||||
|
/// <returns></returns>
|
||||||
|
public IContextStage<RawType, OutType> UsePlaywright(PlaywrightAsyncManipulator manipulator) {
|
||||||
|
if (_fragmentMode == FragmentMode.Fragmented)
|
||||||
|
_fragmentMode = FragmentMode.Single;
|
||||||
|
if (_stealthManipulator is not null)
|
||||||
|
_stealthManipulator = null;
|
||||||
|
|
||||||
|
_channel = Channel.Playwright;
|
||||||
|
_playwrightManipulator = manipulator;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IContextStage<RawType, OutType> UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) {
|
||||||
|
if (_playwrightManipulator is not null)
|
||||||
|
_playwrightManipulator = null;
|
||||||
|
|
||||||
|
_channel = Channel.Stealth;
|
||||||
|
_stealthManipulator = manipulator;
|
||||||
|
_stealthConfig = config;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
private object ConstructUnitDownloader(DownloadContext<RawType> context) {
|
||||||
|
#region Utility functions
|
||||||
|
|
||||||
|
T To<T>(object? o) where T : class
|
||||||
|
=> (o as T) ??
|
||||||
|
throw new Exception(
|
||||||
|
string.Format(Exceptions.Exceptions.fluent_type_conversion_failure,
|
||||||
|
o?.GetType().AsUniqueName() ?? "null", typeof(T).AsUniqueName()));
|
||||||
|
|
||||||
|
AsyncTransformer<HtmlDocument, OutType> HtmlTransformer()
|
||||||
|
=> To<AsyncTransformer<HtmlDocument, OutType>>(_transformer);
|
||||||
|
|
||||||
|
AsyncTransformer<ByteDocument, OutType> ByteTransformer()
|
||||||
|
=> To<AsyncTransformer<ByteDocument, OutType>>(_transformer);
|
||||||
|
|
||||||
|
AsyncDownloadFailurePredicate<HtmlDocument>[] HtmlFailurePredicates()
|
||||||
|
=> To<AsyncDownloadFailurePredicate<HtmlDocument>[]>(context.AsyncFailurePredicates);
|
||||||
|
|
||||||
|
AsyncDownloadFailurePredicate<ByteDocument>[] ByteFailurePredicates()
|
||||||
|
=> To<AsyncDownloadFailurePredicate<ByteDocument>[]>(context.AsyncFailurePredicates);
|
||||||
|
|
||||||
|
T EnsureExists<T>(T? o) where T : class
|
||||||
|
=> (o ?? throw new Exception(Exceptions.Exceptions.fluent_invalid_state));
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
return (_channel, _fragmentMode, _contentKind) switch {
|
||||||
|
// ──────────────── fragmented HTML ────────────────
|
||||||
|
(Channel.Plain, FragmentMode.Fragmented, ContentKind.Html)
|
||||||
|
=> new UnitFragmentDownloader<OutType>(
|
||||||
|
context.Web,
|
||||||
|
HtmlTransformer(),
|
||||||
|
HtmlFailurePredicates(),
|
||||||
|
_parallelism,
|
||||||
|
context.DownloadLogger),
|
||||||
|
// ──────────────── fragmented binary ────────────────
|
||||||
|
(Channel.Plain, FragmentMode.Fragmented, ContentKind.Binary)
|
||||||
|
=> new UnitFragmentDownloaderBinary<OutType>(
|
||||||
|
context.Client,
|
||||||
|
ByteTransformer(),
|
||||||
|
ByteFailurePredicates(),
|
||||||
|
_parallelism,
|
||||||
|
context.DownloadLogger),
|
||||||
|
// ──────────────── single HTML ────────────────
|
||||||
|
(Channel.Plain, FragmentMode.Single, ContentKind.Html)
|
||||||
|
=> new UnitDownloader<OutType>(
|
||||||
|
context.Web,
|
||||||
|
HtmlTransformer(),
|
||||||
|
HtmlFailurePredicates()),
|
||||||
|
// ──────────────── single binary ────────────────
|
||||||
|
(Channel.Plain, FragmentMode.Single, ContentKind.Binary)
|
||||||
|
=> new UnitDownloaderBinary<OutType>(
|
||||||
|
context.Client,
|
||||||
|
ByteTransformer(),
|
||||||
|
ByteFailurePredicates()),
|
||||||
|
// ──────────────── single playwright binary ────────────────
|
||||||
|
(Channel.Playwright, FragmentMode.Single, ContentKind.Binary)
|
||||||
|
=> new PlaywrightUnitDownloader<OutType>(
|
||||||
|
context.Client,
|
||||||
|
EnsureExists(_playwrightManipulator),
|
||||||
|
ByteTransformer(),
|
||||||
|
ByteFailurePredicates()
|
||||||
|
),
|
||||||
|
// ──────────────── single playwrigt HTML ────────────────
|
||||||
|
(Channel.Playwright, FragmentMode.Single, ContentKind.Html)
|
||||||
|
=> new PlaywrightUnitPageDownloader<OutType>(
|
||||||
|
context.Web,
|
||||||
|
EnsureExists(_playwrightManipulator),
|
||||||
|
HtmlTransformer(),
|
||||||
|
HtmlFailurePredicates()),
|
||||||
|
// ──────────────── single stealth HTML ────────────────
|
||||||
|
(Channel.Stealth, FragmentMode.Single, ContentKind.Html)
|
||||||
|
=> new StealthUnitPageDownloader<OutType>(
|
||||||
|
context.Web,
|
||||||
|
EnsureExists(_stealthConfig),
|
||||||
|
EnsureExists(_stealthManipulator),
|
||||||
|
HtmlTransformer(),
|
||||||
|
HtmlFailurePredicates()),
|
||||||
|
// ──────────────── single stealth binary ────────────────
|
||||||
|
(Channel.Stealth, FragmentMode.Single, ContentKind.Binary)
|
||||||
|
=> new StealthUnitDownloader<OutType>(
|
||||||
|
context.Client,
|
||||||
|
EnsureExists(_stealthConfig),
|
||||||
|
EnsureExists(_stealthManipulator),
|
||||||
|
ByteTransformer(),
|
||||||
|
ByteFailurePredicates()),
|
||||||
|
// ──────────────── fragment stealth HTML ────────────────
|
||||||
|
(Channel.Stealth, FragmentMode.Fragmented, ContentKind.Html)
|
||||||
|
=> new StealthFragmentPageDownloader<OutType>(
|
||||||
|
context.Web,
|
||||||
|
EnsureExists(_stealthConfig),
|
||||||
|
EnsureExists(_stealthManipulator),
|
||||||
|
HtmlTransformer(),
|
||||||
|
HtmlFailurePredicates()),
|
||||||
|
// ──────────────── fragment stealth binary ────────────────
|
||||||
|
(Channel.Stealth, FragmentMode.Fragmented, ContentKind.Binary)
|
||||||
|
=> new StealthFragmentDownloader<OutType>(
|
||||||
|
context.Client,
|
||||||
|
EnsureExists(_stealthConfig),
|
||||||
|
EnsureExists(_stealthManipulator),
|
||||||
|
ByteTransformer(),
|
||||||
|
ByteFailurePredicates()),
|
||||||
|
_ => throw new Exception(string.Format(Exceptions.Exceptions.fluent_unsupported_pattern,
|
||||||
|
$"({_channel}, {_fragmentMode}, {_contentKind})")),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private IAsyncEnumerator<Ordered<OutType>> ConstructDownloader(DownloadContext<RawType> context) {
|
||||||
|
var copyOfContext = DownloadContextBuilder<RawType>.FromContext(context).Build();
|
||||||
|
return _fragmentMode switch {
|
||||||
|
FragmentMode.Fragmented => new SequentialFragmentDownloader<RawType, OutType>(
|
||||||
|
copyOfContext,
|
||||||
|
ctx => (IUnitDownloader<Fragment<Ordered<OutType>>>)ConstructUnitDownloader(ctx),
|
||||||
|
context.DownloadLogger).UnwrapFragmented(),
|
||||||
|
FragmentMode.Single => new SequentialDownloader<RawType, OutType>(
|
||||||
|
copyOfContext,
|
||||||
|
ctx => (IUnitDownloader<OutType>)ConstructUnitDownloader(ctx),
|
||||||
|
context.DownloadLogger).WrapOrdered(),
|
||||||
|
_ => throw new Exception(string.Format(Exceptions.Exceptions.fluent_unsupported_pattern,
|
||||||
|
$"{_fragmentMode}")),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
public DownloadEnumerable<OutType> Build() {
|
||||||
|
var context = _ctxBuilder.Build();
|
||||||
|
var enumerable = new DownloadEnumerable<OutType>(ConstructDownloader(context));
|
||||||
|
return enumerable;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,17 @@
|
|||||||
|
namespace Beam.Fluent;
|
||||||
|
|
||||||
|
public enum FragmentMode {
|
||||||
|
Single,
|
||||||
|
Fragmented
|
||||||
|
}
|
||||||
|
|
||||||
|
public enum Channel {
|
||||||
|
Plain,
|
||||||
|
Stealth,
|
||||||
|
Playwright
|
||||||
|
}
|
||||||
|
|
||||||
|
public enum ContentKind {
|
||||||
|
Html,
|
||||||
|
Binary
|
||||||
|
}
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
using Beam.Abstractions;
|
||||||
|
using Beam.Downloaders;
|
||||||
|
using Beam.Models;
|
||||||
|
using Beam.Playwright;
|
||||||
|
using Beam.Stealth;
|
||||||
|
|
||||||
|
namespace Beam.Fluent;
|
||||||
|
|
||||||
|
public interface IContextStage<RawType, OutType> {
|
||||||
|
IContextStage<RawType, OutType> Configure(Action<DownloadContextBuilder<RawType>> configure);
|
||||||
|
IContextStage<RawType, OutType> WithParallelism(int degree);
|
||||||
|
IContextStage<RawType, OutType> WithTimeout(TimeSpan timeout);
|
||||||
|
IContextStage<RawType, OutType> WithRetryReporter(IProgress<IRetryReport> reporter);
|
||||||
|
IContextStage<RawType, OutType> UseFragments();
|
||||||
|
IContextStage<RawType, OutType> UsePlaywright(PlaywrightAsyncManipulator manipulator);
|
||||||
|
IContextStage<RawType, OutType> UseStealth(StealthAsyncManipulator manipulator, StealthConfig config);
|
||||||
|
DownloadEnumerable<OutType> Build();
|
||||||
|
}
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
using System.Collections.Concurrent;
|
||||||
|
|
||||||
|
namespace Beam.Fluent;
|
||||||
|
|
||||||
|
public interface IDownloadStage<RawType, OutType> {
|
||||||
|
IDownloadStage<RawType, OutType> SaveToDirectory(string dir);
|
||||||
|
IDownloadStage<RawType, OutType> SaveToFiles(IEnumerable<string> files);
|
||||||
|
IDownloadStage<RawType, OutType> SaveToMemory(ConcurrentBag<OutType> bag);
|
||||||
|
void WaitForDownload();
|
||||||
|
Task WaitForDownloadAsync();
|
||||||
|
DownloadEnumerable<OutType> AsAsyncEnumerable();
|
||||||
|
}
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
using Beam.Dynamic;
|
||||||
|
using Beam.Models;
|
||||||
|
|
||||||
|
namespace Beam.Fluent;
|
||||||
|
|
||||||
|
public interface ITransformStage<RawType, OutType> {
|
||||||
|
IContextStage<RawType, OutType> WithTransformer(AsyncTransformer<RawType, OutType> factory);
|
||||||
|
}
|
||||||
@@ -1,190 +0,0 @@
|
|||||||
using Beam.Models;
|
|
||||||
using HtmlAgilityPack;
|
|
||||||
using Beam.Playwright;
|
|
||||||
using Beam.Stealth;
|
|
||||||
using Beam;
|
|
||||||
using Beam.Abstractions;
|
|
||||||
using Beam.Downloaders;
|
|
||||||
|
|
||||||
namespace Beam.Fluent {
|
|
||||||
public static partial class DownloadBuilder<RawType, OutType> {
|
|
||||||
private sealed class ContextStage : IContextStage {
|
|
||||||
private readonly DownloadContextBuilder<RawType> _ctxBuilder;
|
|
||||||
private readonly AsyncTransformer<RawType, OutType> _transformer;
|
|
||||||
private int _parallelism = 4;
|
|
||||||
private bool _useFragments = false;
|
|
||||||
private PlaywrightAsyncManipulator? _usePlaywrightManipulator = null;
|
|
||||||
private StealthAsyncManipulator? _useStealthManipulator = null;
|
|
||||||
private StealthConfig? _stealthConfig = null;
|
|
||||||
|
|
||||||
public ContextStage(DownloadContextBuilder<RawType> ctxBuilder, AsyncTransformer<RawType, OutType> transformer) {
|
|
||||||
_ctxBuilder = ctxBuilder;
|
|
||||||
_transformer = transformer;
|
|
||||||
}
|
|
||||||
|
|
||||||
public IContextStage Configure(Action<DownloadContextBuilder<RawType>> configure) {
|
|
||||||
configure(_ctxBuilder);
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public IContextStage WithParallelism(int degree) {
|
|
||||||
_parallelism = Math.Max(1, degree);
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public IContextStage WithTimeout(TimeSpan timeout) {
|
|
||||||
_ctxBuilder.WithTimeOut(timeout);
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public IContextStage WithRetryReporter(IProgress<IRetryReport> reporter) {
|
|
||||||
_ctxBuilder.WithRetryReporter(reporter);
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Uses fragments to download multiple links in parallel. This strategy is mutually exclusive with <see cref="UsePlaywright(PlaywrightAsyncManipulator)"/>
|
|
||||||
/// </summary>
|
|
||||||
/// <returns></returns>
|
|
||||||
public IContextStage UseFragments() {
|
|
||||||
if (_usePlaywrightManipulator is not null)
|
|
||||||
_usePlaywrightManipulator = null;
|
|
||||||
|
|
||||||
_useFragments = true;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Use a puppet browser to download the links. This strategy is mutually exclusive with <see cref="UseFragments"/>
|
|
||||||
/// </summary>
|
|
||||||
/// <param name="manipulator">The page manipulator</param>
|
|
||||||
/// <returns></returns>
|
|
||||||
public IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator) {
|
|
||||||
if (_useFragments)
|
|
||||||
_useFragments = false;
|
|
||||||
if (_useStealthManipulator is not null)
|
|
||||||
_useStealthManipulator = null;
|
|
||||||
|
|
||||||
_usePlaywrightManipulator = manipulator;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config) {
|
|
||||||
if (_usePlaywrightManipulator is not null)
|
|
||||||
_usePlaywrightManipulator = null;
|
|
||||||
|
|
||||||
_useStealthManipulator = manipulator;
|
|
||||||
_stealthConfig = config;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
private object ConstructUnitDownloader(DownloadContext<RawType> context) {
|
|
||||||
return (_useFragments, _useStealthManipulator, _usePlaywrightManipulator, _transformer, context.AsyncFailurePredicates) switch {
|
|
||||||
// ──────────────── fragmented HTML ────────────────
|
|
||||||
(true, null, _, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
|
|
||||||
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
|
|
||||||
=> new UnitFragmentDownloader<OutType>(
|
|
||||||
context.Web,
|
|
||||||
asyncHtmlTransformer,
|
|
||||||
documentFailurePredicates,
|
|
||||||
_parallelism,
|
|
||||||
context.DownloadLogger),
|
|
||||||
// ──────────────── fragmented binary ────────────────
|
|
||||||
(true, null, _, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
|
|
||||||
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
|
|
||||||
=> new UnitFragmentDownloaderBinary<OutType>(
|
|
||||||
context.Client,
|
|
||||||
asyncBinaryTransformer,
|
|
||||||
responseFailurePredicates,
|
|
||||||
_parallelism,
|
|
||||||
context.DownloadLogger),
|
|
||||||
// ──────────────── single HTML ────────────────
|
|
||||||
(false, null, null, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
|
|
||||||
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
|
|
||||||
=> new UnitDownloader<OutType>(
|
|
||||||
context.Web,
|
|
||||||
asyncHtmlTransformer,
|
|
||||||
documentFailurePredicates),
|
|
||||||
// ──────────────── single binary ────────────────
|
|
||||||
(false, null, null, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
|
|
||||||
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
|
|
||||||
=> new UnitDownloaderBinary<OutType>(
|
|
||||||
context.Client,
|
|
||||||
asyncBinaryTransformer,
|
|
||||||
responseFailurePredicates),
|
|
||||||
// ──────────────── single playwright binary ────────────────
|
|
||||||
(false, _, PlaywrightAsyncManipulator manipulator, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
|
|
||||||
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
|
|
||||||
=> new PlaywrightUnitDownloader<OutType>(
|
|
||||||
context.Client,
|
|
||||||
manipulator,
|
|
||||||
asyncBinaryTransformer,
|
|
||||||
responseFailurePredicates),
|
|
||||||
// ──────────────── single playwrigt HTML ────────────────
|
|
||||||
(false, _, PlaywrightAsyncManipulator manipulator, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
|
|
||||||
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
|
|
||||||
=> new PlaywrightUnitPageDownloader<OutType>(
|
|
||||||
context.Web,
|
|
||||||
manipulator,
|
|
||||||
asyncHtmlTransformer,
|
|
||||||
documentFailurePredicates),
|
|
||||||
// ──────────────── single stealth HTML ────────────────
|
|
||||||
(false, StealthAsyncManipulator manipulator, _, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
|
|
||||||
AsyncDownloadFailurePredicate<HtmlDocument>[] documentFailurePredicates)
|
|
||||||
=> new StealthUnitPageDownloader<OutType>(
|
|
||||||
context.Web,
|
|
||||||
_stealthConfig ?? throw new Exception($"Stealth config is null"),
|
|
||||||
manipulator,
|
|
||||||
asyncHtmlTransformer,
|
|
||||||
documentFailurePredicates),
|
|
||||||
// ──────────────── single stealth binary ────────────────
|
|
||||||
(false, StealthAsyncManipulator manipulator, _, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
|
|
||||||
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
|
|
||||||
=> new StealthUnitDownloader<OutType>(
|
|
||||||
context.Client,
|
|
||||||
_stealthConfig ?? throw new Exception($"Stealth config is null"),
|
|
||||||
manipulator,
|
|
||||||
asyncBinaryTransformer,
|
|
||||||
responseFailurePredicates),
|
|
||||||
// ──────────────── fragment stealth HTML ────────────────
|
|
||||||
(true, StealthAsyncManipulator manipulator, _, AsyncTransformer<HtmlDocument, OutType> asyncHtmlTransformer,
|
|
||||||
AsyncDownloadFailurePredicate<HtmlDocument>[] downloadFailurePredicates)
|
|
||||||
=> new StealthFragmentPageDownloader<OutType>(
|
|
||||||
context.Web,
|
|
||||||
_stealthConfig ?? throw new Exception($"Stealth config is null"),
|
|
||||||
manipulator,
|
|
||||||
asyncHtmlTransformer),
|
|
||||||
// ──────────────── fragment stealth binary ────────────────
|
|
||||||
(true, StealthAsyncManipulator manipulator, _, AsyncTransformer<ByteDocument, OutType> asyncBinaryTransformer,
|
|
||||||
AsyncDownloadFailurePredicate<ByteDocument>[] responseFailurePredicates)
|
|
||||||
=> new StealthFragmentDownloader<OutType>(
|
|
||||||
context.Client,
|
|
||||||
_stealthConfig ?? throw new Exception($"Stealth config is null"),
|
|
||||||
manipulator,
|
|
||||||
asyncBinaryTransformer),
|
|
||||||
_ => throw new Exception($"Unsupported transformer / failure-predicate combinatAsyion. Missing pattern: {_useFragments} , {_transformer.GetType().AsUniqueName()} , {context.AsyncFailurePredicates?.GetType().AsUniqueName()}"),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
private IAsyncEnumerator<Ordered<OutType>> ConstructDownloader(DownloadContext<RawType> context) {
|
|
||||||
var copyOfContext = DownloadContextBuilder<RawType>.FromContext(context).Build();
|
|
||||||
return _useFragments switch {
|
|
||||||
true => new SequentialFragmentDownloader<RawType, OutType>(
|
|
||||||
copyOfContext,
|
|
||||||
ctx => (IUnitDownloader<Fragment<Ordered<OutType>>>)ConstructUnitDownloader(ctx),
|
|
||||||
context.DownloadLogger).UnwrapFragmented(),
|
|
||||||
false => new SequentialDownloader<RawType, OutType>(
|
|
||||||
copyOfContext,
|
|
||||||
ctx => (IUnitDownloader<OutType>)ConstructUnitDownloader(ctx),
|
|
||||||
context.DownloadLogger).WrapOrdered()
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
public DownloadEnumerable<OutType> Build() {
|
|
||||||
var context = _ctxBuilder.Build();
|
|
||||||
var enumerable = new DownloadEnumerable<OutType>(ConstructDownloader(context));
|
|
||||||
return enumerable;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,44 +0,0 @@
|
|||||||
using System.Collections.Concurrent;
|
|
||||||
using System.Text.Json;
|
|
||||||
using Beam.Models;
|
|
||||||
|
|
||||||
namespace Beam.Fluent {
|
|
||||||
public static partial class DownloadBuilder<RawType, OutType> {
|
|
||||||
private sealed class DownloadStage(DownloadEnumerable<OutType> download) : IDownloadStage {
|
|
||||||
private IAsyncEnumerable<Ordered<OutType>> _download = download;
|
|
||||||
|
|
||||||
public DownloadEnumerable<OutType> AsAsyncEnumerable() {
|
|
||||||
return new DownloadEnumerable<OutType>(_download.GetAsyncEnumerator());
|
|
||||||
}
|
|
||||||
|
|
||||||
private async IAsyncEnumerable<Ordered<OutType>> _SaveToDirectory(string dir) {
|
|
||||||
Directory.CreateDirectory(dir);
|
|
||||||
await foreach(var download in _download) {
|
|
||||||
await System.IO.File.WriteAllTextAsync(Path.Combine(dir, $"{Path.GetRandomFileName()}.{download.Order}.json"), JsonSerializer.Serialize(dir));
|
|
||||||
yield return download;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public IDownloadStage SaveToDirectory(string dir) {
|
|
||||||
_download = _SaveToDirectory(dir);
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public IDownloadStage SaveToFiles(IEnumerable<string> files) {
|
|
||||||
throw new NotImplementedException();
|
|
||||||
}
|
|
||||||
|
|
||||||
public IDownloadStage SaveToMemory(ConcurrentBag<OutType> bag) {
|
|
||||||
throw new NotImplementedException();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void WaitForDownload() {
|
|
||||||
throw new NotImplementedException();
|
|
||||||
}
|
|
||||||
|
|
||||||
public Task WaitForDownloadAsync() {
|
|
||||||
throw new NotImplementedException();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
using Beam.Models;
|
|
||||||
|
|
||||||
namespace Beam.Fluent {
|
|
||||||
public static partial class DownloadBuilder<RawType, OutType> {
|
|
||||||
public interface IAlternativeLinkStage {
|
|
||||||
IAlternativeTransformStage WithLinks(IEnumerable<string> links);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
using Beam.Models;
|
|
||||||
|
|
||||||
namespace Beam.Fluent {
|
|
||||||
public static partial class DownloadBuilder<RawType, OutType> {
|
|
||||||
public interface IAlternativeTransformStage {
|
|
||||||
IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer);
|
|
||||||
IContextStage WithTransformer(Func<RawType, OutType> transformer) {
|
|
||||||
return WithTransformer(rt => Task.FromResult(transformer(rt)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
using Beam.Abstractions;
|
|
||||||
using Beam.Downloaders;
|
|
||||||
using Beam.Models;
|
|
||||||
using Beam.Playwright;
|
|
||||||
using Beam.Stealth;
|
|
||||||
|
|
||||||
namespace Beam.Fluent {
|
|
||||||
public static partial class DownloadBuilder<RawType, OutType> {
|
|
||||||
public interface IContextStage {
|
|
||||||
IContextStage Configure(Action<DownloadContextBuilder<RawType>> configure);
|
|
||||||
IContextStage WithParallelism(int degree);
|
|
||||||
IContextStage WithTimeout(TimeSpan timeout);
|
|
||||||
IContextStage WithRetryReporter(IProgress<IRetryReport> reporter);
|
|
||||||
DownloadEnumerable<OutType> Build();
|
|
||||||
IContextStage UseFragments();
|
|
||||||
IContextStage UsePlaywright(PlaywrightAsyncManipulator manipulator);
|
|
||||||
IContextStage UseStealth(StealthAsyncManipulator manipulator, StealthConfig config);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
using System.Collections.Concurrent;
|
|
||||||
|
|
||||||
namespace Beam.Fluent {
|
|
||||||
public static partial class DownloadBuilder<RawType, OutType> {
|
|
||||||
public interface IDownloadStage {
|
|
||||||
IDownloadStage SaveToDirectory(string dir);
|
|
||||||
IDownloadStage SaveToFiles(IEnumerable<string> files);
|
|
||||||
IDownloadStage SaveToMemory(ConcurrentBag<OutType> bag);
|
|
||||||
DownloadEnumerable<OutType> AsAsyncEnumerable();
|
|
||||||
void WaitForDownload();
|
|
||||||
Task WaitForDownloadAsync();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
using Beam.Models;
|
|
||||||
|
|
||||||
namespace Beam.Fluent {
|
|
||||||
public static partial class DownloadBuilder<RawType, OutType> {
|
|
||||||
/* ────────────────────────────── Stages ─────────────────────────────── */
|
|
||||||
|
|
||||||
public interface ILinkStage {
|
|
||||||
ITransformStage WithLink();
|
|
||||||
ITransformStage WithLinkGenerator();
|
|
||||||
ILinkStage WithRange(Range range);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
using Beam.Dynamic;
|
|
||||||
using Beam.Models;
|
|
||||||
|
|
||||||
namespace Beam.Fluent {
|
|
||||||
public static partial class DownloadBuilder<RawType, OutType> {
|
|
||||||
public interface ITransformStage {
|
|
||||||
IContextStage WithTransformer(Func<DataBindings, AsyncTransformer<RawType, OutType>> factory);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,54 +0,0 @@
|
|||||||
using Beam.Data;
|
|
||||||
using Beam.Downloaders;
|
|
||||||
using Beam.Dynamic;
|
|
||||||
using Beam.Models;
|
|
||||||
|
|
||||||
namespace Beam.Fluent {
|
|
||||||
public static partial class DownloadBuilder<RawType, OutType> {
|
|
||||||
/* ──────────────────────────── Stage types ─────────────────────────── */
|
|
||||||
private sealed record LinkStage(
|
|
||||||
WebResource Source,
|
|
||||||
State Initial,
|
|
||||||
BeamDataContext Data,
|
|
||||||
DownloadContextBuilder<RawType> CtxBuilder) : ILinkStage, IAlternativeLinkStage {
|
|
||||||
|
|
||||||
private State? endState;
|
|
||||||
private bool linksFrozen = false;
|
|
||||||
|
|
||||||
public ITransformStage WithLink() {
|
|
||||||
var link = Data.Templates[Source.Key].Builder.Build(Initial);
|
|
||||||
CtxBuilder.WithLinks([link]);
|
|
||||||
return new TransformStage(Source, Data, CtxBuilder);
|
|
||||||
}
|
|
||||||
|
|
||||||
public ITransformStage WithLinkGenerator() {
|
|
||||||
var template = Data.Templates[Source.Key];
|
|
||||||
var generator = StringEnumerable.FromGenerator(new OrderedLinkGenerator(
|
|
||||||
template.Builder,
|
|
||||||
new NumberedStateChanger(template.Factory.Behavior),
|
|
||||||
Initial, endState));
|
|
||||||
CtxBuilder.WithLinks(generator);
|
|
||||||
linksFrozen = true;
|
|
||||||
return new TransformStage(Source, Data, CtxBuilder);
|
|
||||||
}
|
|
||||||
|
|
||||||
public IAlternativeTransformStage WithLinks(IEnumerable<string> links) {
|
|
||||||
CtxBuilder.WithLinks(links);
|
|
||||||
return new TransformStage(Source, Data, CtxBuilder);
|
|
||||||
}
|
|
||||||
|
|
||||||
public ILinkStage WithRange(Range range) {
|
|
||||||
if (linksFrozen)
|
|
||||||
throw new InvalidOperationException($"WithRange must be called before WithLinkGenerator");
|
|
||||||
if (range.End.Value < range.Start.Value)
|
|
||||||
throw new ArgumentOutOfRangeException(nameof(range), $" start must be < end");
|
|
||||||
var template = Data.Templates[Source.Key];
|
|
||||||
var stateChanger = new NumberedStateChanger(template.Factory.Behavior);
|
|
||||||
endState = Initial.Copy();
|
|
||||||
stateChanger.Apply(Initial, range.Start.Value - 1);
|
|
||||||
stateChanger.Apply(endState, range.End.Value - 1);
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
namespace Beam.Fluent {
|
|
||||||
public static partial class DownloadBuilder<RawType, OutType> {
|
|
||||||
/* ────────────────────────── Implementation ────────────────────────── */
|
|
||||||
private enum SourceKind { Meta, Text }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
using Beam.Data;
|
|
||||||
using Beam.Downloaders;
|
|
||||||
using Beam.Dynamic;
|
|
||||||
using Beam.Models;
|
|
||||||
|
|
||||||
namespace Beam.Fluent {
|
|
||||||
public static partial class DownloadBuilder<RawType, OutType> {
|
|
||||||
private sealed record TransformStage(
|
|
||||||
WebResource Source,
|
|
||||||
BeamDataContext Data,
|
|
||||||
DownloadContextBuilder<RawType> CtxBuilder) : ITransformStage, IAlternativeTransformStage {
|
|
||||||
public IContextStage WithTransformer(Func<DataBindings, AsyncTransformer<RawType, OutType>> factory) {
|
|
||||||
var transformer = factory(Data.Bindings[Source.Bindings]);
|
|
||||||
return new ContextStage(CtxBuilder, transformer);
|
|
||||||
}
|
|
||||||
|
|
||||||
public IContextStage WithTransformer(AsyncTransformer<RawType, OutType> transformer) {
|
|
||||||
return new ContextStage(CtxBuilder, transformer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,43 +0,0 @@
|
|||||||
using aeqw89.DataKeys;
|
|
||||||
using Beam;
|
|
||||||
using Microsoft.Extensions.Logging;
|
|
||||||
using System;
|
|
||||||
using System.Collections.Generic;
|
|
||||||
using Beam.Data;
|
|
||||||
using Beam.Downloaders;
|
|
||||||
using Beam.Models;
|
|
||||||
|
|
||||||
namespace Beam.Fluent {
|
|
||||||
/// <summary>
|
|
||||||
/// Type‑safe, staged builder that prevents callers from forgetting the mandatory steps
|
|
||||||
/// (source → link selection → transformer) and surfaces operational knobs as first‑class
|
|
||||||
/// methods instead of magic parameters.
|
|
||||||
/// </summary>
|
|
||||||
public static partial class DownloadBuilder<RawType, OutType> {
|
|
||||||
/* ──────────────────────────── Entry points ─────────────────────────── */
|
|
||||||
|
|
||||||
public static ILinkStage FromResource(DataKey<ResourceDictionary> dict, string kind, BeamDataContext beamDataDictionary)
|
|
||||||
=> Create(dict, beamDataDictionary, kind);
|
|
||||||
|
|
||||||
public static IAlternativeLinkStage FromScratch()
|
|
||||||
=> new LinkStage(null!, null!, null!, new());
|
|
||||||
|
|
||||||
private static ILinkStage Create(DataKey<ResourceDictionary> resourceDict, BeamDataContext data, string kind) {
|
|
||||||
var (source, initial) = Resolve(resourceDict, kind, data);
|
|
||||||
var ctxBuilder = new DownloadContextBuilder<RawType>().WithLinks([]); // placeholder, filled later.
|
|
||||||
return new LinkStage(source, initial, data, ctxBuilder);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static (WebResource Source, State Initial) Resolve(DataKey<ResourceDictionary> resourceDict, string kind, BeamDataContext data) {
|
|
||||||
if (!data.ResourceDictionaries.TryGetValue(resourceDict, out var dict))
|
|
||||||
throw new KeyNotFoundException($"Novel '{resourceDict}' not found in BeamDataDictionary.");
|
|
||||||
if (!dict.Resources.TryGetValue(kind, out var sourceKey))
|
|
||||||
throw new KeyNotFoundException($"Novel kind '{kind}' not found in '{resourceDict}'");
|
|
||||||
if (!data.Resources.TryGetValue(sourceKey, out var source))
|
|
||||||
throw new KeyNotFoundException($"Novel source '{sourceKey}' was not found");
|
|
||||||
if (!data.InitialStates.TryGetValue(sourceKey.To<ImmutableState>(), out var istate))
|
|
||||||
throw new KeyNotFoundException($"Immutable state for kind '{kind}' not found");
|
|
||||||
return (source, istate);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,44 @@
|
|||||||
|
using System.Collections.Concurrent;
|
||||||
|
using System.Text.Json;
|
||||||
|
using Beam.Models;
|
||||||
|
|
||||||
|
namespace Beam.Fluent;
|
||||||
|
|
||||||
|
internal sealed class DownloadStage<RawType, OutType>(DownloadEnumerable<OutType> download) : IDownloadStage<RawType, OutType> {
|
||||||
|
private IAsyncEnumerable<Ordered<OutType>> _download = download;
|
||||||
|
|
||||||
|
public DownloadEnumerable<OutType> AsAsyncEnumerable() {
|
||||||
|
return new DownloadEnumerable<OutType>(_download.GetAsyncEnumerator());
|
||||||
|
}
|
||||||
|
|
||||||
|
private async IAsyncEnumerable<Ordered<OutType>> _SaveToDirectory(string dir) {
|
||||||
|
Directory.CreateDirectory(dir);
|
||||||
|
await foreach (var download in _download) {
|
||||||
|
await System.IO.File.WriteAllTextAsync(
|
||||||
|
Path.Combine(dir, $"{Path.GetRandomFileName()}.{download.Order}.json"),
|
||||||
|
JsonSerializer.Serialize(dir));
|
||||||
|
yield return download;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public IDownloadStage<RawType, OutType> SaveToDirectory(string dir) {
|
||||||
|
_download = _SaveToDirectory(dir);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IDownloadStage<RawType, OutType> SaveToFiles(IEnumerable<string> files) {
|
||||||
|
throw new NotImplementedException();
|
||||||
|
}
|
||||||
|
|
||||||
|
public IDownloadStage<RawType, OutType> SaveToMemory(ConcurrentBag<OutType> bag) {
|
||||||
|
throw new NotImplementedException();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void WaitForDownload() {
|
||||||
|
throw new NotImplementedException();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Task WaitForDownloadAsync() {
|
||||||
|
throw new NotImplementedException();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,28 @@
|
|||||||
|
using aeqw89.DataKeys;
|
||||||
|
using Beam.Data;
|
||||||
|
using Beam.Downloaders;
|
||||||
|
using Beam.Dynamic;
|
||||||
|
using Beam.Models;
|
||||||
|
|
||||||
|
namespace Beam.Fluent;
|
||||||
|
|
||||||
|
public static class FluentDownload {
|
||||||
|
public static ITransformStage<RawType, OutType> Links<RawType, OutType>(params IEnumerable<string> links) {
|
||||||
|
return new TransformStage<RawType, OutType>(new DownloadContextBuilder<RawType>()
|
||||||
|
.WithLinks(links));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ITransformStage<RawType, OutType>
|
||||||
|
ResourceDefinition<RawType, OutType>(ResourceDefinition definition) {
|
||||||
|
if (definition.Location.States.Count == 0)
|
||||||
|
throw new ArgumentException(Exceptions.Exceptions.resource_definition_invalid_states_count, nameof(definition));
|
||||||
|
var linkGenerator = new OrderedLinkGenerator(definition.Location.Segments, (NumberedStateChanger)definition.Location.StateChanger.Behavior,
|
||||||
|
definition.Location.States.First().Copy());
|
||||||
|
return new TransformStage<RawType, OutType>(new DownloadContextBuilder<RawType>()
|
||||||
|
.WithLinks(StringEnumerable.FromGenerator(linkGenerator!)));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ITransformStage<RawType, OutType> FromContext<RawType, OutType>(DownloadContext<RawType> existing) {
|
||||||
|
return new TransformStage<RawType, OutType>(DownloadContextBuilder<RawType>.FromContext(existing));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
using Beam.Data;
|
||||||
|
using Beam.Downloaders;
|
||||||
|
using Beam.Dynamic;
|
||||||
|
using Beam.Models;
|
||||||
|
|
||||||
|
namespace Beam.Fluent;
|
||||||
|
|
||||||
|
internal sealed class TransformStage<RawType, OutType>(DownloadContextBuilder<RawType> CtxBuilder) : ITransformStage<RawType, OutType> {
|
||||||
|
public IContextStage<RawType, OutType> WithTransformer(AsyncTransformer<RawType, OutType> transformer) {
|
||||||
|
return new ContextStage<RawType, OutType>(CtxBuilder, transformer);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -11,7 +11,7 @@
|
|||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<PackageReference Include="aeqw89.PersistentData" Version="1.3.3" />
|
<PackageReference Include="aeqw89.PersistentData" Version="1.4.5" />
|
||||||
<PackageReference Include="EntityFramework" Version="6.5.1" />
|
<PackageReference Include="EntityFramework" Version="6.5.1" />
|
||||||
<PackageReference Include="Microsoft.EntityFrameworkCore.Design" Version="9.0.8">
|
<PackageReference Include="Microsoft.EntityFrameworkCore.Design" Version="9.0.8">
|
||||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||||
|
|||||||
@@ -6,16 +6,9 @@ namespace Beam.Models;
|
|||||||
|
|
||||||
public class ResourceDefinition {
|
public class ResourceDefinition {
|
||||||
public required DataKey<ResourceDefinition> Key { get; init; }
|
public required DataKey<ResourceDefinition> Key { get; init; }
|
||||||
|
|
||||||
public required MetaData Meta { get; init; }
|
public required MetaData Meta { get; init; }
|
||||||
|
|
||||||
/// <summary>Map of element name to extraction config. Keys must match ^[A-Za-z0-9_-]+$</summary>
|
|
||||||
public required Table<IDataProvider> Elements { get; init; }
|
public required Table<IDataProvider> Elements { get; init; }
|
||||||
|
public required UrlLocation Location { get; init; }
|
||||||
/// <summary>Minimum 1 item; polymorphic segments discriminated by "type".</summary>
|
|
||||||
public required ILinkBuilder Url { get; init; }
|
|
||||||
|
|
||||||
/// <summary>Keys must match ^[A-Za-z0-9_-]+$</summary>
|
|
||||||
public required Table<ResourceRelation> Relations { get; init; }
|
public required Table<ResourceRelation> Relations { get; init; }
|
||||||
|
|
||||||
public class MetaData {
|
public class MetaData {
|
||||||
@@ -24,4 +17,10 @@ public class ResourceDefinition {
|
|||||||
public string? Description { get; init; }
|
public string? Description { get; init; }
|
||||||
public string? ProjectUrl { get; init; }
|
public string? ProjectUrl { get; init; }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public class UrlLocation {
|
||||||
|
public ILinkBuilder Segments { get; init; }
|
||||||
|
public List<ImmutableState> States { get; init; }
|
||||||
|
public IStateChangerFactory StateChanger { get; init; }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@@ -35,7 +35,7 @@
|
|||||||
<PackageReference Include="aeqw89.DataKeys" Version="2.0.1">
|
<PackageReference Include="aeqw89.DataKeys" Version="2.0.1">
|
||||||
<Transitive>true</Transitive>
|
<Transitive>true</Transitive>
|
||||||
</PackageReference>
|
</PackageReference>
|
||||||
<PackageReference Include="aeqw89.PersistentData" Version="1.3.3">
|
<PackageReference Include="aeqw89.PersistentData" Version="1.4.5">
|
||||||
<Transitive>true</Transitive>
|
<Transitive>true</Transitive>
|
||||||
</PackageReference>
|
</PackageReference>
|
||||||
<PackageReference Include="HtmlAgilityPack" Version="1.11.72">
|
<PackageReference Include="HtmlAgilityPack" Version="1.11.72">
|
||||||
|
|||||||
Reference in New Issue
Block a user