Enhance project metadata and refactor core classes
Updated project files for `Beam.Dynamic`, `Beam.Exports`, `Beam.Temporary.Cli`, and `Beam` to include additional metadata and specific package versions. Refactored `DataBindings` and `ResolvedBindings` to records, added a new `Text` property in `Binding.cs`, and introduced `ParseNumbers` in `OnlineCleaner`. New classes `PuppetContext` and `PuppetUnitDownloader` added for Playwright integration. Introduced `ImmutableState` struct and `UnitDownloaderBinary` class for improved download management. Updated tests in `UnitTest1.cs` for number localization. Added `Beam.Puppeteer` project to the solution.
This commit is contained in:
@@ -4,19 +4,21 @@
|
|||||||
<TargetFramework>net9.0</TargetFramework>
|
<TargetFramework>net9.0</TargetFramework>
|
||||||
<ImplicitUsings>enable</ImplicitUsings>
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
|
<Title>Beam Dynamic</Title>
|
||||||
|
<Authors>aeqw89</Authors>
|
||||||
|
<Company>qwsdcvghyu</Company>
|
||||||
|
<Product />
|
||||||
|
<Description>Beam utilities facilitating dynamic fetching of elements of webpages</Description>
|
||||||
|
<RepositoryUrl>https://github.com/qwsdcvghyu89/Beam</RepositoryUrl>
|
||||||
|
<PackageId>aeqw89.Beam.Dynamic</PackageId>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ProjectReference Include="..\Beam\Beam.csproj" />
|
<PackageReference Include="aeqw89.DataKeys" Version="1.0.1" />
|
||||||
</ItemGroup>
|
<PackageReference Include="aeqw89.PersistentData" Version="1.0.0" />
|
||||||
|
<PackageReference Include="aeqw89.Beam" Version="1.0.0" />
|
||||||
<ItemGroup>
|
<PackageReference Include="HtmlAgilityPack" Version="1.11.72" />
|
||||||
<Reference Include="aeqw89.DataKeys">
|
<PackageReference Include="Microsoft.Recognizers.Text.Number" Version="1.8.13" />
|
||||||
<HintPath>..\..\aeqw89.DataKeys\aeqw89.DataKeys\bin\Debug\net9.0\aeqw89.DataKeys.dll</HintPath>
|
|
||||||
</Reference>
|
|
||||||
<Reference Include="aeqw89.PersistentData">
|
|
||||||
<HintPath>..\..\aeqw89.PersistentData\aeqw89.PersistentData\bin\Release\net9.0\aeqw89.PersistentData.dll</HintPath>
|
|
||||||
</Reference>
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
</Project>
|
</Project>
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ namespace Beam.Dynamic {
|
|||||||
public string? ArrayDelimiters { get; set; }
|
public string? ArrayDelimiters { get; set; }
|
||||||
public string? XPath { get; set; }
|
public string? XPath { get; set; }
|
||||||
public string? CssPath { get; set; }
|
public string? CssPath { get; set; }
|
||||||
|
public string? Text { get; set; }
|
||||||
private IDataProvider? Provider_;
|
private IDataProvider? Provider_;
|
||||||
public IDataProvider? Provider {
|
public IDataProvider? Provider {
|
||||||
get => Provider_;
|
get => Provider_;
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
using HtmlAgilityPack;
|
using HtmlAgilityPack;
|
||||||
|
|
||||||
namespace Beam.Dynamic {
|
namespace Beam.Dynamic {
|
||||||
public class DataBindings {
|
public record class DataBindings {
|
||||||
public Binding? Title { get; set; }
|
public Binding? Title { get; set; }
|
||||||
public Binding? Authors { get; set; }
|
public Binding? Authors { get; set; }
|
||||||
public Binding? Description { get; set; }
|
public Binding? Description { get; set; }
|
||||||
@@ -9,7 +9,7 @@ namespace Beam.Dynamic {
|
|||||||
public Binding? Language { get; set; }
|
public Binding? Language { get; set; }
|
||||||
public Binding? Tags { get; set; }
|
public Binding? Tags { get; set; }
|
||||||
|
|
||||||
public ResolvedBindings Resolve(HtmlDocument doc) {
|
public virtual ResolvedBindings Resolve(HtmlDocument doc) {
|
||||||
return new ResolvedBindings() {
|
return new ResolvedBindings() {
|
||||||
Title = Title?.Resolve(doc),
|
Title = Title?.Resolve(doc),
|
||||||
Authors = Authors?.Resolve(doc) ?? Array.Empty<string>(),
|
Authors = Authors?.Resolve(doc) ?? Array.Empty<string>(),
|
||||||
@@ -21,7 +21,7 @@ namespace Beam.Dynamic {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public class ResolvedBindings {
|
public record class ResolvedBindings {
|
||||||
public string? Title { get; set; }
|
public string? Title { get; set; }
|
||||||
public string[]? Authors { get; set; }
|
public string[]? Authors { get; set; }
|
||||||
public string? Description { get; set; }
|
public string? Description { get; set; }
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
using HtmlAgilityPack;
|
using HtmlAgilityPack;
|
||||||
|
|
||||||
namespace Beam.Dynamic {
|
namespace Beam.Dynamic {
|
||||||
[System.Text.Json.Serialization.JsonDerivedType(typeof(ParagraphedContentDataProvider), 20)]
|
[System.Text.Json.Serialization.JsonDerivedType(typeof(ParagraphedContentDataProvider), "paragraphed-data-provider")]
|
||||||
[System.Text.Json.Serialization.JsonDerivedType(typeof(ListContentDataProvider), 21)]
|
[System.Text.Json.Serialization.JsonDerivedType(typeof(ListContentDataProvider), "list-data-provider")]
|
||||||
public interface IDataProvider {
|
public interface IDataProvider {
|
||||||
public string Get(HtmlDocument document);
|
public string Get(HtmlDocument document);
|
||||||
public HtmlNode? GetNode(HtmlDocument document);
|
public HtmlNode? GetNode(HtmlDocument document);
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
using HtmlAgilityPack;
|
using HtmlAgilityPack;
|
||||||
|
using Microsoft.Recognizers.Text.Number;
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
|
using System.Globalization;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using System.Text.RegularExpressions;
|
using System.Text.RegularExpressions;
|
||||||
@@ -25,6 +27,19 @@ namespace Beam.Dynamic {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static List<int> ParseNumbers(string text, string from) {
|
||||||
|
var results = NumberRecognizer.RecognizeNumber(text, from, NumberOptions.None, false);
|
||||||
|
var resolved = results.Select((x) => {
|
||||||
|
if (x.Resolution.TryGetValue("value", out var value) && double.TryParse(value.ToString(), out var number))
|
||||||
|
return (int?)number;
|
||||||
|
return null;
|
||||||
|
})
|
||||||
|
.Where((x) => x.HasValue).ToList();
|
||||||
|
if (resolved.Count == 0)
|
||||||
|
return [];
|
||||||
|
return resolved.Select((x) => x!.Value).ToList();
|
||||||
|
}
|
||||||
|
|
||||||
public static string Clean(string? onlineText) {
|
public static string Clean(string? onlineText) {
|
||||||
if (string.IsNullOrWhiteSpace(onlineText))
|
if (string.IsNullOrWhiteSpace(onlineText))
|
||||||
return "";
|
return "";
|
||||||
|
|||||||
@@ -4,10 +4,17 @@
|
|||||||
<TargetFramework>net9.0</TargetFramework>
|
<TargetFramework>net9.0</TargetFramework>
|
||||||
<ImplicitUsings>enable</ImplicitUsings>
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
|
<Title>Beam.Exports</Title>
|
||||||
|
<Authors>aeqw89</Authors>
|
||||||
|
<Company>qwsdcvghyu</Company>
|
||||||
|
<Description>Beam library that facilitates exporting different kinds of views for IDocuments</Description>
|
||||||
|
<PackageProjectUrl>https://github.com/qwsdcvghyu89/Beam</PackageProjectUrl>
|
||||||
|
<RepositoryUrl>https://github.com/qwsdcvghyu89/Beam</RepositoryUrl>
|
||||||
|
<PackageId>aeqw89.Beam.Exports</PackageId>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ProjectReference Include="..\Beam\Beam.csproj" />
|
<PackageReference Include="aeqw89.Beam" Version="1.0.0" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
</Project>
|
</Project>
|
||||||
|
|||||||
@@ -0,0 +1,17 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<TargetFramework>net9.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="Microsoft.Playwright" Version="1.52.0" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\Beam\Beam.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
@@ -0,0 +1,39 @@
|
|||||||
|
|
||||||
|
using HtmlAgilityPack;
|
||||||
|
using Microsoft.Playwright;
|
||||||
|
|
||||||
|
namespace Beam.Puppeteer {
|
||||||
|
public class PuppetContext(IPlaywright playwright, IBrowser browser) {
|
||||||
|
public IPlaywright Playwright { get; set; } = playwright;
|
||||||
|
public IBrowser Browser { get; set; } = browser;
|
||||||
|
}
|
||||||
|
|
||||||
|
public class PuppetUnitDownloader<T> : UnitDownloader<T> {
|
||||||
|
public PuppetContext Context { get; }
|
||||||
|
|
||||||
|
public PuppetUnitDownloader(PuppetContext pc, DownloadContext<T> context)
|
||||||
|
: base(context.Web, context.AsyncTranformer, context.AsyncFailurePredicates) {
|
||||||
|
Context = pc;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected override async Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
|
||||||
|
var page = await Context.Browser.NewPageAsync();
|
||||||
|
try {
|
||||||
|
var content = await page.ContentAsync();
|
||||||
|
await page.CloseAsync();
|
||||||
|
|
||||||
|
HtmlDocument doc = new();
|
||||||
|
doc.LoadHtml(content);
|
||||||
|
var transformed = await Transformer(doc);
|
||||||
|
if (FailurePredicates is null || !(await IsFailure(doc)))
|
||||||
|
return (true, transformed);
|
||||||
|
return (false, default);
|
||||||
|
} catch (Exception) {
|
||||||
|
return (false, default);
|
||||||
|
} finally {
|
||||||
|
if (!page.IsClosed)
|
||||||
|
await page.CloseAsync();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -5,9 +5,22 @@
|
|||||||
<TargetFramework>net9.0</TargetFramework>
|
<TargetFramework>net9.0</TargetFramework>
|
||||||
<ImplicitUsings>enable</ImplicitUsings>
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
|
|
||||||
|
<Title>Beam.Temporary.Cli</Title>
|
||||||
|
<Authors>aeqw89</Authors>
|
||||||
|
<Company>qwsdcvghyu</Company>
|
||||||
|
<Description>A temporary CLI for Beam providing several useful mechanisms</Description>
|
||||||
|
<PackageProjectUrl>https://github.com/qwsdcvghyu89/Beam</PackageProjectUrl>
|
||||||
|
<RepositoryUrl>https://github.com/qwsdcvghyu89/Beam</RepositoryUrl>
|
||||||
|
<PackageId>aeqw89.Beam.Temporary.Cli</PackageId>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<PackageReference Include="aeqw89.Beam.Exports" Version="1.0.0" />
|
||||||
|
<PackageReference Include="aeqw89.DataKeys" Version="1.0.1" />
|
||||||
|
<PackageReference Include="aeqw89.PersistentData" Version="1.0.0" />
|
||||||
|
<PackageReference Include="aeqw89.Beam" Version="1.0.0" />
|
||||||
|
<PackageReference Include="aeqw89.Beam.Dynamic" Version="1.0.0" />
|
||||||
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.1" />
|
<PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.1" />
|
||||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.1" />
|
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.1" />
|
||||||
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.1" />
|
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.1" />
|
||||||
@@ -16,19 +29,4 @@
|
|||||||
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
|
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
|
||||||
<ProjectReference Include="..\Beam.Dynamic\Beam.Dynamic.csproj" />
|
|
||||||
<ProjectReference Include="..\Beam.Exports\Beam.Exports.csproj" />
|
|
||||||
<ProjectReference Include="..\Beam\Beam.csproj" />
|
|
||||||
</ItemGroup>
|
|
||||||
|
|
||||||
<ItemGroup>
|
|
||||||
<Reference Include="aeqw89.DataKeys">
|
|
||||||
<HintPath>..\..\aeqw89.DataKeys\aeqw89.DataKeys\bin\Debug\net9.0\aeqw89.DataKeys.dll</HintPath>
|
|
||||||
</Reference>
|
|
||||||
<Reference Include="aeqw89.PersistentData">
|
|
||||||
<HintPath>..\..\aeqw89.PersistentData\aeqw89.PersistentData\bin\Release\net9.0\aeqw89.PersistentData.dll</HintPath>
|
|
||||||
</Reference>
|
|
||||||
</ItemGroup>
|
|
||||||
|
|
||||||
</Project>
|
</Project>
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
using aeqw89.DataKeys;
|
using aeqw89.DataKeys;
|
||||||
using Beam.Dynamic;
|
using Beam.Dynamic;
|
||||||
|
using Beam;
|
||||||
using Microsoft.Extensions.Logging;
|
using Microsoft.Extensions.Logging;
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
@@ -24,17 +25,18 @@ namespace Beam.Temporary.Cli {
|
|||||||
public interface ILinkStage {
|
public interface ILinkStage {
|
||||||
ITransformStage WithLink();
|
ITransformStage WithLink();
|
||||||
ITransformStage WithLinkGenerator();
|
ITransformStage WithLinkGenerator();
|
||||||
|
ILinkStage WithRange(Range range);
|
||||||
}
|
}
|
||||||
|
|
||||||
public interface ITransformStage {
|
public interface ITransformStage {
|
||||||
IContextStage WithTransformer(Func<DataBindings, HtmlTransformer<T>> factory);
|
IContextStage<U> WithTransformer<U>(Func<DataBindings, AsyncTransformer<T, U>> factory);
|
||||||
}
|
}
|
||||||
|
|
||||||
public interface IContextStage {
|
public interface IContextStage<U> {
|
||||||
IContextStage Configure(Action<DownloadContextBuilder<T>> configure);
|
IContextStage<U> Configure(Action<DownloadContextBuilder<T>> configure);
|
||||||
IContextStage WithParallelism(int degree);
|
IContextStage<U> WithParallelism(int degree);
|
||||||
IContextStage WithTimeout(TimeSpan timeout);
|
IContextStage<U> WithTimeout(TimeSpan timeout);
|
||||||
IContextStage WithRetryReporter(IProgress<RetryReport> reporter);
|
IContextStage<U> WithRetryReporter(IProgress<RetryReport> reporter);
|
||||||
DownloadEnumerable<T> Build();
|
DownloadEnumerable<T> Build();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -61,7 +63,7 @@ namespace Beam.Temporary.Cli {
|
|||||||
initial = textRecord.Resource.MetaTemplateInitialData ?? throw new InvalidOperationException("Meta template data missing.");
|
initial = textRecord.Resource.MetaTemplateInitialData ?? throw new InvalidOperationException("Meta template data missing.");
|
||||||
} else {
|
} else {
|
||||||
source = textRecord.AssociatedSource ?? throw new InvalidOperationException($"Text source missing for '{novelKey}'.");
|
source = textRecord.AssociatedSource ?? throw new InvalidOperationException($"Text source missing for '{novelKey}'.");
|
||||||
initial = textRecord.Resource.TemplateInitialData ?? throw new InvalidOperationException("Template initial data missing.");
|
initial = textRecord.Resource.TemplateInitialData;
|
||||||
}
|
}
|
||||||
|
|
||||||
return (source, initial);
|
return (source, initial);
|
||||||
@@ -74,6 +76,10 @@ namespace Beam.Temporary.Cli {
|
|||||||
State Initial,
|
State Initial,
|
||||||
BeamDataDictionary Data,
|
BeamDataDictionary Data,
|
||||||
DownloadContextBuilder<T> CtxBuilder) : ILinkStage {
|
DownloadContextBuilder<T> CtxBuilder) : ILinkStage {
|
||||||
|
|
||||||
|
private State? endState;
|
||||||
|
private bool linksFrozen = false;
|
||||||
|
|
||||||
public ITransformStage WithLink() {
|
public ITransformStage WithLink() {
|
||||||
var link = Data.Templates[Source.Key].Builder.Build(Initial);
|
var link = Data.Templates[Source.Key].Builder.Build(Initial);
|
||||||
CtxBuilder.WithLinks(new[] { link });
|
CtxBuilder.WithLinks(new[] { link });
|
||||||
@@ -85,28 +91,45 @@ namespace Beam.Temporary.Cli {
|
|||||||
var generator = SourceLinkEnumerable.FromGenerator(new OrderedSourceLinkGenerator(
|
var generator = SourceLinkEnumerable.FromGenerator(new OrderedSourceLinkGenerator(
|
||||||
template.Builder,
|
template.Builder,
|
||||||
new NumberedStateChanger(template.Factory.Behavior),
|
new NumberedStateChanger(template.Factory.Behavior),
|
||||||
Initial));
|
Initial, endState));
|
||||||
CtxBuilder.WithLinks(generator);
|
CtxBuilder.WithLinks(generator);
|
||||||
|
linksFrozen = true;
|
||||||
return new TransformStage(Source, Data, CtxBuilder);
|
return new TransformStage(Source, Data, CtxBuilder);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public ILinkStage WithRange(Range range) {
|
||||||
|
if (linksFrozen)
|
||||||
|
throw new InvalidOperationException($"WithRange must be called before WithLinkGenerator");
|
||||||
|
if (range.End.Value < range.Start.Value)
|
||||||
|
throw new ArgumentOutOfRangeException(nameof(range), $" start must be < end");
|
||||||
|
var template = Data.Templates[Source.Key];
|
||||||
|
var stateChanger = new NumberedStateChanger(template.Factory.Behavior);
|
||||||
|
endState = Initial.Copy();
|
||||||
|
stateChanger.Apply(Initial, range.Start.Value - 1);
|
||||||
|
stateChanger.Apply(endState, range.End.Value - 1);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private sealed record TransformStage(
|
private sealed record TransformStage(
|
||||||
WebResource Source,
|
WebResource Source,
|
||||||
BeamDataDictionary Data,
|
BeamDataDictionary Data,
|
||||||
DownloadContextBuilder<T> CtxBuilder) : ITransformStage {
|
DownloadContextBuilder<T> CtxBuilder) : ITransformStage {
|
||||||
public IContextStage WithTransformer(Func<DataBindings, HtmlTransformer<T>> factory) {
|
public IContextStage WithTransformer<U>(Func<DataBindings, Func<object, T>> factory) {
|
||||||
var transformer = factory(Data.Bindings[Source.Bindings]);
|
var transformer = factory(Data.Bindings[Source.Bindings]);
|
||||||
CtxBuilder.WithTransformer(transformer);
|
return new ContextStage<U>(CtxBuilder, transformer);
|
||||||
return new ContextStage(CtxBuilder);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private sealed class ContextStage : IContextStage {
|
private sealed class ContextStage<U> : IContextStage {
|
||||||
private readonly DownloadContextBuilder<T> _ctxBuilder;
|
private readonly DownloadContextBuilder<T> _ctxBuilder;
|
||||||
|
private readonly Func<object, T> _transformer;
|
||||||
private int _parallelism = 4;
|
private int _parallelism = 4;
|
||||||
|
|
||||||
public ContextStage(DownloadContextBuilder<T> ctxBuilder) => _ctxBuilder = ctxBuilder;
|
public ContextStage(DownloadContextBuilder<T> ctxBuilder, Func<object, T> transformer) {
|
||||||
|
_ctxBuilder = ctxBuilder;
|
||||||
|
_transformer = transformer;
|
||||||
|
}
|
||||||
|
|
||||||
public IContextStage Configure(Action<DownloadContextBuilder<T>> configure) {
|
public IContextStage Configure(Action<DownloadContextBuilder<T>> configure) {
|
||||||
configure(_ctxBuilder);
|
configure(_ctxBuilder);
|
||||||
@@ -134,7 +157,7 @@ namespace Beam.Temporary.Cli {
|
|||||||
context,
|
context,
|
||||||
ctx => new UnitFragmentDownloader<T>(
|
ctx => new UnitFragmentDownloader<T>(
|
||||||
context.Web,
|
context.Web,
|
||||||
context.AsyncTranformer,
|
_transformer,
|
||||||
context.AsyncFailurePredicates,
|
context.AsyncFailurePredicates,
|
||||||
_parallelism,
|
_parallelism,
|
||||||
context.DownloadLogger),
|
context.DownloadLogger),
|
||||||
|
|||||||
@@ -0,0 +1,13 @@
|
|||||||
|
using aeqw89.DataKeys;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam.Temporary.Cli {
|
||||||
|
internal class LinkCollection(DataKey<string> key, List<SourceLink> links) {
|
||||||
|
public DataKey<string> Key { get; set; } = key;
|
||||||
|
public List<SourceLink> Links { get; set; } = links;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -50,8 +50,8 @@ namespace Beam.Temporary.Cli {
|
|||||||
FriendlyName = "My House Of Horrors",
|
FriendlyName = "My House Of Horrors",
|
||||||
AssociatedSource = wdsAgg,
|
AssociatedSource = wdsAgg,
|
||||||
AssociatedMetaSource = wdsAux,
|
AssociatedMetaSource = wdsAux,
|
||||||
TemplateInitialData = new State(["24349", "2896325"]),
|
TemplateInitialData = new ImmutableState(["24349", "2896325"]),
|
||||||
MetaTemplateInitialData = new State(["24349"])
|
MetaTemplateInitialData = new ImmutableState(["24349"])
|
||||||
};
|
};
|
||||||
|
|
||||||
sdd.Novels.TryAdd(novel.Key, novel);
|
sdd.Novels.TryAdd(novel.Key, novel);
|
||||||
|
|||||||
@@ -49,6 +49,8 @@ namespace Beam.Temporary.Cli {
|
|||||||
NovelStatics.Define_WoDuShu_HouseOfHorrors(BeamData);
|
NovelStatics.Define_WoDuShu_HouseOfHorrors(BeamData);
|
||||||
ClassicTemplates.Register(BeamData);
|
ClassicTemplates.Register(BeamData);
|
||||||
|
|
||||||
|
await sharedContext.ForceSave();
|
||||||
|
|
||||||
CancellationTokenSource cts = new();
|
CancellationTokenSource cts = new();
|
||||||
|
|
||||||
HtmlTransformer<IDocumentMetaData> ArticleDataTransformer(DataBindings? binding) => (x) => {
|
HtmlTransformer<IDocumentMetaData> ArticleDataTransformer(DataBindings? binding) => (x) => {
|
||||||
@@ -75,38 +77,23 @@ namespace Beam.Temporary.Cli {
|
|||||||
};
|
};
|
||||||
|
|
||||||
var novel = new DataKey<TextResource>("novels:house_of_horrors");
|
var novel = new DataKey<TextResource>("novels:house_of_horrors");
|
||||||
var context_aux = Architecture.GetMeta(web, novel, BeamData, cts.Token);
|
|
||||||
context_aux.RetryReporter = new Progress<RetryReport>((x) => Console.WriteLine($"Failed. Trying again. {x.TryNumber}"));
|
|
||||||
var metaDownloader = new DownloadEnumerable<IDocumentMetaData>(
|
|
||||||
new SequentialFragmentDownloader<IDocumentMetaData>(
|
|
||||||
context_aux,
|
|
||||||
(c) => new UnitFragmentDownloader<IDocumentMetaData>(c.Web, c.AsyncTranformer, c.AsyncFailurePredicates, 4, logger),
|
|
||||||
logger)
|
|
||||||
.UnwrapFragmented());
|
|
||||||
var metadata = (await metaDownloader.FirstAsync());
|
|
||||||
|
|
||||||
var metadata2 = await DownloadBuilder<IDocumentMetaData>.FromMeta(novel, BeamData)
|
var metadata2 = await DownloadBuilder<IDocumentMetaData>.FromMeta(novel, BeamData)
|
||||||
.WithLink()
|
.WithLink()
|
||||||
.WithTransformer(ArticleDataTransformer)
|
.WithTransformer(ArticleDataTransformer)
|
||||||
.Configure((x) => x
|
.Configure((x) => x
|
||||||
|
.WithDownloadLogger(logger)
|
||||||
.WithRetryReporter(new Progress<RetryReport>())
|
.WithRetryReporter(new Progress<RetryReport>())
|
||||||
.WithTimeOut(TimeSpan.FromSeconds(15)))
|
.WithTimeOut(TimeSpan.FromSeconds(15)))
|
||||||
.Build()
|
.Build()
|
||||||
.FirstAsync();
|
.FirstAsync();
|
||||||
|
|
||||||
var context = Architecture.GetTextRecord(web, novel, BeamData, metadata.Data, cts.Token);
|
|
||||||
context.DownloadReporter = new Progress<DownloadReport>((x) => Console.WriteLine(x));
|
|
||||||
var downloader = new DownloadEnumerable<IDocument>(
|
|
||||||
new SequentialFragmentDownloader<IDocument>(
|
|
||||||
context,
|
|
||||||
(c) => new UnitFragmentDownloader<IDocument>(c.Web, c.AsyncTranformer, c.AsyncFailurePredicates, 4, logger),
|
|
||||||
logger)
|
|
||||||
.UnwrapFragmented());
|
|
||||||
|
|
||||||
var downloader2 = DownloadBuilder<IDocument>.FromText(novel, BeamData)
|
var downloader2 = DownloadBuilder<IDocument>.FromText(novel, BeamData)
|
||||||
|
.WithRange(1..5)
|
||||||
.WithLinkGenerator()
|
.WithLinkGenerator()
|
||||||
.WithTransformer((x) => DocumentTransformer(x, metadata2.Data))
|
.WithTransformer((x) => DocumentTransformer(x, metadata2.Data))
|
||||||
.Configure((x) => x
|
.Configure((x) => x
|
||||||
|
.WithDownloadLogger(logger)
|
||||||
.WithDownloadReporter(new Progress<DownloadReport>((x) => logger.LogInformation(x.ToString())))
|
.WithDownloadReporter(new Progress<DownloadReport>((x) => logger.LogInformation(x.ToString())))
|
||||||
.WithTimeOut(TimeSpan.FromSeconds(15))
|
.WithTimeOut(TimeSpan.FromSeconds(15))
|
||||||
)
|
)
|
||||||
@@ -122,11 +109,16 @@ namespace Beam.Temporary.Cli {
|
|||||||
continue;
|
continue;
|
||||||
if (meta is not ArticleData articleMetaData)
|
if (meta is not ArticleData articleMetaData)
|
||||||
continue;
|
continue;
|
||||||
|
if (!download.Data.MetaData.TryGetValue(Architecture.BookKey, out var bookmeta))
|
||||||
|
continue;
|
||||||
|
if (meta is not ArticleData bookMetaData)
|
||||||
|
continue;
|
||||||
//Console.WriteLine($"Title: {data.Name}");
|
//Console.WriteLine($"Title: {data.Name}");
|
||||||
//Console.WriteLine($"Description: {data.Description}");
|
//Console.WriteLine($"Description: {data.Description}");
|
||||||
//Console.WriteLine($"Categories: {data.Categories.Aggregate((x, y) => $"{x}; {y}")}");
|
//Console.WriteLine($"Categories: {data.Categories.Aggregate((x, y) => $"{x}; {y}")}");
|
||||||
//Console.WriteLine($"Authors: {data.Authors.Aggregate((x,y) => $"{x}; {y}")}");
|
//Console.WriteLine($"Authors: {data.Authors.Aggregate((x,y) => $"{x}; {y}")}");
|
||||||
Console.WriteLine($"Chapter title: {articleMetaData.Name}");
|
Console.WriteLine($"Chapter title: {articleMetaData.Name}");
|
||||||
|
Console.WriteLine($"Book title: {bookMetaData.Name}");
|
||||||
//Console.WriteLine($"Content: {download}");
|
//Console.WriteLine($"Content: {download}");
|
||||||
|
|
||||||
//translationTasks.Add(Task.Run(async () => {
|
//translationTasks.Add(Task.Run(async () => {
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ using System.Threading.Tasks;
|
|||||||
|
|
||||||
namespace Beam.Temporary.Cli {
|
namespace Beam.Temporary.Cli {
|
||||||
public class StateChangerFactory {
|
public class StateChangerFactory {
|
||||||
|
[JsonIgnore]
|
||||||
public IStateChangeBehaviour Behavior => FactoryTable[StateChangerKey]();
|
public IStateChangeBehaviour Behavior => FactoryTable[StateChangerKey]();
|
||||||
|
|
||||||
[JsonInclude]
|
[JsonInclude]
|
||||||
|
|||||||
@@ -13,8 +13,8 @@ namespace Beam.Temporary.Cli {
|
|||||||
public string? FriendlyName { get; set; }
|
public string? FriendlyName { get; set; }
|
||||||
public DataKey<WebResource>? AssociatedSource { get; set; }
|
public DataKey<WebResource>? AssociatedSource { get; set; }
|
||||||
public DataKey<WebResource>? AssociatedMetaSource { get; set; }
|
public DataKey<WebResource>? AssociatedMetaSource { get; set; }
|
||||||
public required State TemplateInitialData { get; set; }
|
public required ImmutableState TemplateInitialData { get; set; }
|
||||||
public State? MetaTemplateInitialData { get; set; }
|
public ImmutableState? MetaTemplateInitialData { get; set; }
|
||||||
|
|
||||||
public TextResourceRecord ToRecord(BeamDataDictionary sdd) {
|
public TextResourceRecord ToRecord(BeamDataDictionary sdd) {
|
||||||
return new(this,
|
return new(this,
|
||||||
|
|||||||
@@ -0,0 +1,34 @@
|
|||||||
|
using Beam.Dynamic;
|
||||||
|
using Microsoft.Recognizers.Text;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Globalization;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam.Tests {
|
||||||
|
public class OnlineCleanerTests {
|
||||||
|
[Fact]
|
||||||
|
public void Should_LocalizeArabic() {
|
||||||
|
const string test = "1234";
|
||||||
|
|
||||||
|
List<int> localized = OnlineCleaner.ParseNumbers(test, Culture.English);
|
||||||
|
|
||||||
|
Assert.Single(localized);
|
||||||
|
Assert.Equal(1234, localized[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Should_LocalizeIndian() {
|
||||||
|
const string test = "九一五";
|
||||||
|
|
||||||
|
List<int> localized = OnlineCleaner.ParseNumbers(test, Culture.Chinese);
|
||||||
|
|
||||||
|
Assert.Single(localized);
|
||||||
|
Assert.Equal(915, localized[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,7 +1,12 @@
|
|||||||
namespace Beam.Tests {
|
using System.Globalization;
|
||||||
|
|
||||||
|
namespace Beam.Tests {
|
||||||
public class UnitTest1 {
|
public class UnitTest1 {
|
||||||
[Fact]
|
[Fact]
|
||||||
public void Test1() {
|
public void Test1() {
|
||||||
|
const string test = "九一五";
|
||||||
|
|
||||||
|
Assert.True(int.TryParse(test, CultureInfo.GetCultureInfo("zh-Hans").NumberFormat, out var number));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,6 +13,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Exports", "Beam.Export
|
|||||||
EndProject
|
EndProject
|
||||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Tests", "Beam.Tests\Beam.Tests.csproj", "{E26800C2-0518-49E8-88DF-A0B6ED97D4AB}"
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Tests", "Beam.Tests\Beam.Tests.csproj", "{E26800C2-0518-49E8-88DF-A0B6ED97D4AB}"
|
||||||
EndProject
|
EndProject
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Puppeteer", "Beam.Puppeteer\Beam.Puppeteer.csproj", "{1A967563-D643-401D-A031-68DD43FACE8D}"
|
||||||
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
Debug|Any CPU = Debug|Any CPU
|
Debug|Any CPU = Debug|Any CPU
|
||||||
@@ -39,8 +41,15 @@ Global
|
|||||||
{E26800C2-0518-49E8-88DF-A0B6ED97D4AB}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
{E26800C2-0518-49E8-88DF-A0B6ED97D4AB}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
{E26800C2-0518-49E8-88DF-A0B6ED97D4AB}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
{E26800C2-0518-49E8-88DF-A0B6ED97D4AB}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
{E26800C2-0518-49E8-88DF-A0B6ED97D4AB}.Release|Any CPU.Build.0 = Release|Any CPU
|
{E26800C2-0518-49E8-88DF-A0B6ED97D4AB}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{1A967563-D643-401D-A031-68DD43FACE8D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{1A967563-D643-401D-A031-68DD43FACE8D}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{1A967563-D643-401D-A031-68DD43FACE8D}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{1A967563-D643-401D-A031-68DD43FACE8D}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
EndGlobalSection
|
EndGlobalSection
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
HideSolutionNode = FALSE
|
HideSolutionNode = FALSE
|
||||||
EndGlobalSection
|
EndGlobalSection
|
||||||
|
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||||
|
SolutionGuid = {363CAF17-9E48-45B9-AA3F-78BB5E95DB0E}
|
||||||
|
EndGlobalSection
|
||||||
EndGlobal
|
EndGlobal
|
||||||
|
|||||||
+11
-7
@@ -4,22 +4,26 @@
|
|||||||
<TargetFramework>net9.0</TargetFramework>
|
<TargetFramework>net9.0</TargetFramework>
|
||||||
<ImplicitUsings>enable</ImplicitUsings>
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
|
|
||||||
|
<GeneratePackageOnBuild>True</GeneratePackageOnBuild>
|
||||||
|
<Title>Beam</Title>
|
||||||
|
<Authors>aeqw89</Authors>
|
||||||
|
<Company>qwsdcvghyu</Company>
|
||||||
|
<Description>A library for downloading internet resources</Description>
|
||||||
|
<PackageProjectUrl>https://github.com/qwsdcvghyu89/Beam</PackageProjectUrl>
|
||||||
|
<RepositoryUrl>https://github.com/qwsdcvghyu89/Beam</RepositoryUrl>
|
||||||
|
<PackageId>aeqw89.Beam</PackageId>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<PackageReference Include="FluentBuilder" Version="0.10.0">
|
<PackageReference Include="aeqw89.DataKeys" Version="1.0.1" />
|
||||||
<PrivateAssets>all</PrivateAssets>
|
|
||||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
|
||||||
</PackageReference>
|
|
||||||
<PackageReference Include="HtmlAgilityPack" Version="1.11.72" />
|
<PackageReference Include="HtmlAgilityPack" Version="1.11.72" />
|
||||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.1" />
|
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.1" />
|
||||||
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
|
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Reference Include="aeqw89.DataKeys">
|
|
||||||
<HintPath>..\..\aeqw89.DataKeys\aeqw89.DataKeys\bin\Debug\net9.0\aeqw89.DataKeys.dll</HintPath>
|
|
||||||
</Reference>
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
</Project>
|
</Project>
|
||||||
|
|||||||
+2
-15
@@ -5,12 +5,13 @@ using System.Collections.Generic;
|
|||||||
using System.Linq;
|
using System.Linq;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
using FluentBuilder;
|
|
||||||
using Microsoft.Extensions.Logging;
|
using Microsoft.Extensions.Logging;
|
||||||
|
|
||||||
namespace Beam {
|
namespace Beam {
|
||||||
public delegate T HtmlTransformer<out T>(HtmlDocument doc);
|
public delegate T HtmlTransformer<out T>(HtmlDocument doc);
|
||||||
|
public delegate Task<U> AsyncTransformer<in T, U>(T elem);
|
||||||
public delegate Task<T> AsyncHtmlTransformer<T>(HtmlDocument doc);
|
public delegate Task<T> AsyncHtmlTransformer<T>(HtmlDocument doc);
|
||||||
|
public delegate Task<T> AsyncBinaryTransformer<T>(byte[] bin);
|
||||||
|
|
||||||
public class DownloadContext<T> : IDisposable {
|
public class DownloadContext<T> : IDisposable {
|
||||||
private bool disposedValue;
|
private bool disposedValue;
|
||||||
@@ -19,8 +20,6 @@ namespace Beam {
|
|||||||
=> DownloadContextBuilder<T>.FromContext(this);
|
=> DownloadContextBuilder<T>.FromContext(this);
|
||||||
|
|
||||||
public HtmlWeb Web { get; }
|
public HtmlWeb Web { get; }
|
||||||
public HtmlTransformer<T> Transformer { get; }
|
|
||||||
public AsyncHtmlTransformer<T> AsyncTranformer { get; }
|
|
||||||
public IProgress<DownloadReport>? DownloadReporter { get; set; }
|
public IProgress<DownloadReport>? DownloadReporter { get; set; }
|
||||||
public IProgress<RetryReport>? RetryReporter { get; set; }
|
public IProgress<RetryReport>? RetryReporter { get; set; }
|
||||||
public AsyncDownloadFailurePredicate<HtmlDocument>?[]? AsyncFailurePredicates { get; }
|
public AsyncDownloadFailurePredicate<HtmlDocument>?[]? AsyncFailurePredicates { get; }
|
||||||
@@ -33,8 +32,6 @@ namespace Beam {
|
|||||||
public DownloadContext(HtmlWeb web,
|
public DownloadContext(HtmlWeb web,
|
||||||
IEnumerable<SourceLink> links,
|
IEnumerable<SourceLink> links,
|
||||||
CancellationToken cancellationToken = default,
|
CancellationToken cancellationToken = default,
|
||||||
HtmlTransformer<T>? transformer = null,
|
|
||||||
AsyncHtmlTransformer<T>? asyncTransformer = null,
|
|
||||||
IProgress<DownloadReport>? downloadReporter = null,
|
IProgress<DownloadReport>? downloadReporter = null,
|
||||||
IProgress<RetryReport>? retryReporter = null,
|
IProgress<RetryReport>? retryReporter = null,
|
||||||
AsyncDownloadFailurePredicate<HtmlDocument>?[]? asyncFailurePredicates = null,
|
AsyncDownloadFailurePredicate<HtmlDocument>?[]? asyncFailurePredicates = null,
|
||||||
@@ -46,16 +43,6 @@ namespace Beam {
|
|||||||
Web = web;
|
Web = web;
|
||||||
Links = links;
|
Links = links;
|
||||||
CancellationToken = cancellationToken;
|
CancellationToken = cancellationToken;
|
||||||
if (transformer is null && asyncTransformer is null)
|
|
||||||
throw new ArgumentException($"Either {nameof(transformer)} or {nameof(asyncTransformer)} must be not null.");
|
|
||||||
|
|
||||||
Transformer = transformer!;
|
|
||||||
AsyncTranformer = asyncTransformer!;
|
|
||||||
if (transformer is null && asyncTransformer is not null)
|
|
||||||
Transformer = (x) => asyncTransformer(x).Result;
|
|
||||||
if (asyncTransformer is null && transformer is not null)
|
|
||||||
AsyncTranformer = (x) => Task.FromResult(transformer(x));
|
|
||||||
|
|
||||||
DownloadReporter = downloadReporter;
|
DownloadReporter = downloadReporter;
|
||||||
RetryReporter = retryReporter;
|
RetryReporter = retryReporter;
|
||||||
AsyncFailurePredicates = asyncFailurePredicates;
|
AsyncFailurePredicates = asyncFailurePredicates;
|
||||||
|
|||||||
@@ -10,8 +10,6 @@ namespace Beam {
|
|||||||
|
|
||||||
public class DownloadContextBuilder<T> {
|
public class DownloadContextBuilder<T> {
|
||||||
private HtmlWeb _web;
|
private HtmlWeb _web;
|
||||||
private HtmlTransformer<T> _transformer;
|
|
||||||
private AsyncHtmlTransformer<T> _asyncTransformer;
|
|
||||||
private IProgress<DownloadReport>? _downloadReporter;
|
private IProgress<DownloadReport>? _downloadReporter;
|
||||||
private IProgress<RetryReport>? _retryReporter;
|
private IProgress<RetryReport>? _retryReporter;
|
||||||
private AsyncDownloadFailurePredicate<HtmlDocument>?[]? _asyncFailurePredicates;
|
private AsyncDownloadFailurePredicate<HtmlDocument>?[]? _asyncFailurePredicates;
|
||||||
@@ -34,16 +32,6 @@ namespace Beam {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public DownloadContextBuilder<T> WithTransformer(HtmlTransformer<T> transformer) {
|
|
||||||
_transformer = transformer;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public DownloadContextBuilder<T> WithAsyncTransformer(AsyncHtmlTransformer<T> asyncTransformer) {
|
|
||||||
_asyncTransformer = asyncTransformer;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public DownloadContextBuilder<T> WithDownloadReporter(IProgress<DownloadReport> downloadReporter) {
|
public DownloadContextBuilder<T> WithDownloadReporter(IProgress<DownloadReport> downloadReporter) {
|
||||||
_downloadReporter = downloadReporter;
|
_downloadReporter = downloadReporter;
|
||||||
return this;
|
return this;
|
||||||
@@ -91,8 +79,6 @@ namespace Beam {
|
|||||||
web: _web,
|
web: _web,
|
||||||
links: _links,
|
links: _links,
|
||||||
cancellationToken: _cancellationToken,
|
cancellationToken: _cancellationToken,
|
||||||
transformer: _transformer,
|
|
||||||
asyncTransformer: _asyncTransformer,
|
|
||||||
downloadReporter: _downloadReporter,
|
downloadReporter: _downloadReporter,
|
||||||
retryReporter: _retryReporter,
|
retryReporter: _retryReporter,
|
||||||
asyncFailurePredicates: _asyncFailurePredicates,
|
asyncFailurePredicates: _asyncFailurePredicates,
|
||||||
@@ -115,8 +101,6 @@ namespace Beam {
|
|||||||
return new DownloadContextBuilder<T>(existing.Web)
|
return new DownloadContextBuilder<T>(existing.Web)
|
||||||
.WithLinks(existing.Links)
|
.WithLinks(existing.Links)
|
||||||
.WithCancellationToken(existing.CancellationToken)
|
.WithCancellationToken(existing.CancellationToken)
|
||||||
.WithTransformer(existing.Transformer)
|
|
||||||
.WithAsyncTransformer(existing.AsyncTranformer)
|
|
||||||
.WithDownloadReporter(existing.DownloadReporter!)
|
.WithDownloadReporter(existing.DownloadReporter!)
|
||||||
.WithRetryReporter(existing.RetryReporter!)
|
.WithRetryReporter(existing.RetryReporter!)
|
||||||
.WithAsyncFailurePredicates(existing.AsyncFailurePredicates ?? Array.Empty<AsyncDownloadFailurePredicate<HtmlDocument>>())
|
.WithAsyncFailurePredicates(existing.AsyncFailurePredicates ?? Array.Empty<AsyncDownloadFailurePredicate<HtmlDocument>>())
|
||||||
|
|||||||
@@ -0,0 +1,23 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
public readonly struct ImmutableState(object[] state) {
|
||||||
|
readonly object[] state = state;
|
||||||
|
|
||||||
|
public readonly Span<object> GetState() => state;
|
||||||
|
|
||||||
|
public readonly State Copy()
|
||||||
|
=> new((object[])state.Clone());
|
||||||
|
|
||||||
|
public readonly object this[Index i] {
|
||||||
|
get => state[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
public static implicit operator State(ImmutableState state)
|
||||||
|
=> state.Copy();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -13,15 +13,17 @@ namespace Beam {
|
|||||||
public class OrderedSourceLinkGenerator : IEnumerator<SourceLink> {
|
public class OrderedSourceLinkGenerator : IEnumerator<SourceLink> {
|
||||||
public SourceLinkBuilder Builder { get; set; }
|
public SourceLinkBuilder Builder { get; set; }
|
||||||
public NumberedStateChanger Behaviour { get; }
|
public NumberedStateChanger Behaviour { get; }
|
||||||
private State InitialState;
|
|
||||||
|
|
||||||
|
private State? EndState;
|
||||||
|
private State InitialState;
|
||||||
|
|
||||||
public OrderedSourceLinkGenerator(SourceLinkBuilder builder, NumberedStateChanger behaviour, params object[] initialState)
|
public OrderedSourceLinkGenerator(SourceLinkBuilder builder, NumberedStateChanger behaviour, params object[] initialState)
|
||||||
: this(builder, behaviour, new State(initialState)) { }
|
: this(builder, behaviour, new State(initialState)) { }
|
||||||
public OrderedSourceLinkGenerator(SourceLinkBuilder builder, NumberedStateChanger behaviour, State initialState) {
|
public OrderedSourceLinkGenerator(SourceLinkBuilder builder, NumberedStateChanger behaviour, State initialState, State? endState = null) {
|
||||||
Builder = builder;
|
Builder = builder;
|
||||||
Behaviour = behaviour;
|
Behaviour = behaviour;
|
||||||
InitialState = initialState;
|
InitialState = initialState.Copy();
|
||||||
|
EndState = endState?.Copy();
|
||||||
State = InitialState.Copy();
|
State = InitialState.Copy();
|
||||||
|
|
||||||
Reset();
|
Reset();
|
||||||
@@ -37,8 +39,15 @@ namespace Beam {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public bool MoveNext() {
|
public bool MoveNext() {
|
||||||
|
if (!Current.HasValue || (EndState is not null && State.GetState().SequenceEqual(EndState.GetState()))) {
|
||||||
|
Current = SourceLink.InvalidLink;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
Behaviour.Apply(State, 1);
|
Behaviour.Apply(State, 1);
|
||||||
Current = Builder.Build(State);
|
Current = Builder.Build(State);
|
||||||
|
if (!Current.HasValue || (EndState is not null && State.GetState().SequenceEqual(EndState.GetState()))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
return Current.HasValue;
|
return Current.HasValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -35,13 +35,16 @@ namespace Beam {
|
|||||||
|
|
||||||
links.Add(new Ordered<string>(LinksEnumerator.Current.Link.ToString(), LastOrder++));
|
links.Add(new Ordered<string>(LinksEnumerator.Current.Link.ToString(), LastOrder++));
|
||||||
|
|
||||||
while (LinksEnumerator.MoveNext() && links.Count < idealLinkCount)
|
while (LinksEnumerator.MoveNext() && LinksEnumerator.Current != SourceLink.InvalidLink && links.Count < idealLinkCount)
|
||||||
links.Add(new Ordered<string>(LinksEnumerator.Current.Link.ToString(), LastOrder++));
|
links.Add(new Ordered<string>(LinksEnumerator.Current.Link.ToString(), LastOrder++));
|
||||||
if (links.Count == 0) {
|
if (links.Count == 0) {
|
||||||
Logger?.LogInformation("Out of links!");
|
Logger?.LogInformation("Out of links!");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (links.Any((x) => x.Data == SourceLink.InvalidLink.Link.ToString()))
|
||||||
|
return false;
|
||||||
|
|
||||||
var (result, downloadedT) = await unit.TryDownload(
|
var (result, downloadedT) = await unit.TryDownload(
|
||||||
links.ToArray(),
|
links.ToArray(),
|
||||||
Context.CancellationToken,
|
Context.CancellationToken,
|
||||||
|
|||||||
@@ -0,0 +1,73 @@
|
|||||||
|
using System.Net.Http;
|
||||||
|
using System.Threading;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Beam {
|
||||||
|
/// <summary>
|
||||||
|
/// A download-managing class that retrieves binary data through <see cref="HttpClient"/>,
|
||||||
|
/// applies an <see cref="AsyncBinaryTransformer{T}"/>, and supports failure detection
|
||||||
|
/// plus exponential-back-off retries. Safe to instantiate per request.
|
||||||
|
/// </summary>
|
||||||
|
public class UnitDownloaderBinary<T>(
|
||||||
|
HttpClient client,
|
||||||
|
AsyncBinaryTransformer<T> transformer,
|
||||||
|
AsyncDownloadFailurePredicate<HttpResponseMessage>?[]? failurePredicates = null)
|
||||||
|
: IUnitDownloader<T> {
|
||||||
|
public HttpClient Client { get; } = client;
|
||||||
|
public virtual AsyncBinaryTransformer<T> Transformer { get; } = transformer;
|
||||||
|
public virtual AsyncDownloadFailurePredicate<HttpResponseMessage>?[]? FailurePredicates { get; } = failurePredicates;
|
||||||
|
|
||||||
|
public int LinksPerDownload { get; } = 1;
|
||||||
|
|
||||||
|
/// <summary>Runs all configured failure predicates in parallel on the raw HTTP response.</summary>
|
||||||
|
protected virtual async Task<bool> IsFailure(HttpResponseMessage response) {
|
||||||
|
if (FailurePredicates is null) return false;
|
||||||
|
|
||||||
|
var failed = false;
|
||||||
|
await Parallel.ForEachAsync(FailurePredicates, async (pred, _) => {
|
||||||
|
if (failed || pred is null) return;
|
||||||
|
if (await pred(response)) failed = true;
|
||||||
|
});
|
||||||
|
return failed;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>One attempt without retries or back-off.</summary>
|
||||||
|
protected virtual async Task<(bool Success, T? Result)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
|
||||||
|
try {
|
||||||
|
using var response = await Client.GetAsync(link, HttpCompletionOption.ResponseHeadersRead, ct);
|
||||||
|
if (!response.IsSuccessStatusCode) return (false, default);
|
||||||
|
|
||||||
|
if (await IsFailure(response)) return (false, default);
|
||||||
|
|
||||||
|
var bytes = await response.Content.ReadAsByteArrayAsync(ct);
|
||||||
|
return (true, await Transformer(bytes));
|
||||||
|
} catch {
|
||||||
|
return (false, default);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<(bool, T?)> TryDownload(
|
||||||
|
Ordered<string>[] link,
|
||||||
|
CancellationToken ct,
|
||||||
|
int maximumRetryCount = 7,
|
||||||
|
IProgress<RetryReport>? tryProgress = null) {
|
||||||
|
if (link.Length == 0) return (false, default);
|
||||||
|
|
||||||
|
T? result = default;
|
||||||
|
var attempt = 0;
|
||||||
|
|
||||||
|
while (attempt < maximumRetryCount) {
|
||||||
|
ct.ThrowIfCancellationRequested();
|
||||||
|
|
||||||
|
(var success, result) = await TryDownloadWithNoRetries(link[0].Data, ct);
|
||||||
|
if (success && result is not null) return (true, result);
|
||||||
|
|
||||||
|
++attempt;
|
||||||
|
tryProgress?.Report(new RetryReport(attempt, link[0].Data));
|
||||||
|
await Task.Delay((int)Math.Pow(2, attempt) * 1000, ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (false, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -13,11 +13,12 @@ namespace Beam {
|
|||||||
AsyncHtmlTransformer<T> transformer,
|
AsyncHtmlTransformer<T> transformer,
|
||||||
AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null,
|
AsyncDownloadFailurePredicate<HtmlDocument>?[]? failurePredicate = null,
|
||||||
int fragmentSize = 4,
|
int fragmentSize = 4,
|
||||||
ILogger? logger = null) {
|
ILogger? logger = null,
|
||||||
|
IUnitDownloader<T>? internalDownloader = null) {
|
||||||
Web = web;
|
Web = web;
|
||||||
Transformer = transformer;
|
Transformer = transformer;
|
||||||
FailurePredicate = failurePredicate;
|
FailurePredicate = failurePredicate;
|
||||||
UnitDownloader = new UnitDownloader<T>(Web, Transformer, FailurePredicate);
|
UnitDownloader = internalDownloader ?? new UnitDownloader<T>(Web, Transformer, FailurePredicate);
|
||||||
LinksPerDownload = fragmentSize;
|
LinksPerDownload = fragmentSize;
|
||||||
Logger = logger;
|
Logger = logger;
|
||||||
}
|
}
|
||||||
@@ -28,7 +29,7 @@ namespace Beam {
|
|||||||
public int LinksPerDownload { get; set; }
|
public int LinksPerDownload { get; set; }
|
||||||
public ILogger? Logger { get; set; }
|
public ILogger? Logger { get; set; }
|
||||||
|
|
||||||
private readonly UnitDownloader<T> UnitDownloader;
|
private readonly IUnitDownloader<T> UnitDownloader;
|
||||||
|
|
||||||
async Task<(bool, Fragment<Ordered<T>>?)> IUnitDownloader<Fragment<Ordered<T>>>.TryDownload(Ordered<string>[] link, CancellationToken ct, int maximumRetryCount, IProgress<RetryReport>? tryProgress) {
|
async Task<(bool, Fragment<Ordered<T>>?)> IUnitDownloader<Fragment<Ordered<T>>>.TryDownload(Ordered<string>[] link, CancellationToken ct, int maximumRetryCount, IProgress<RetryReport>? tryProgress) {
|
||||||
Fragment<Ordered<T>> fragment = new Fragment<Ordered<T>>(link.Length);
|
Fragment<Ordered<T>> fragment = new Fragment<Ordered<T>>(link.Length);
|
||||||
|
|||||||
Reference in New Issue
Block a user