From 482a46b568a66c90f4dc5d53c4f2a391f88de83e Mon Sep 17 00:00:00 2001
From: qwsdcvghyu89 <61093706+qwsdcvghyu89@users.noreply.github.com>
Date: Mon, 23 Jun 2025 02:11:19 +0300
Subject: [PATCH] Enhance project metadata and refactor core classes
Updated project files for `Beam.Dynamic`, `Beam.Exports`, `Beam.Temporary.Cli`, and `Beam` to include additional metadata and specific package versions. Refactored `DataBindings` and `ResolvedBindings` to records, added a new `Text` property in `Binding.cs`, and introduced `ParseNumbers` in `OnlineCleaner`. New classes `PuppetContext` and `PuppetUnitDownloader` added for Playwright integration. Introduced `ImmutableState` struct and `UnitDownloaderBinary` class for improved download management. Updated tests in `UnitTest1.cs` for number localization. Added `Beam.Puppeteer` project to the solution.
---
Beam.Dynamic/Beam.Dynamic.csproj | 22 +++---
Beam.Dynamic/Binding.cs | 1 +
Beam.Dynamic/DataBindings.cs | 6 +-
Beam.Dynamic/IDataProvider.cs | 4 +-
Beam.Dynamic/OnlineCleaner.cs | 15 ++++
Beam.Exports/Beam.Exports.csproj | 9 ++-
Beam.Puppeteer/Beam.Puppeteer.csproj | 17 +++++
Beam.Puppeteer/PuppetedUnitDownloader.cs | 39 +++++++++++
Beam.Temporary.Cli/Beam.Temporary.Cli.csproj | 28 ++++----
Beam.Temporary.Cli/DownloadBuilder.cs | 51 ++++++++++----
Beam.Temporary.Cli/LinkCollection.cs | 13 ++++
Beam.Temporary.Cli/NovelStatics.cs | 4 +-
Beam.Temporary.Cli/Program.cs | 28 +++-----
Beam.Temporary.Cli/StateChangerFactory.cs | 1 +
Beam.Temporary.Cli/TextResource.cs | 4 +-
Beam.Tests/OnlineCleaner.Tests.cs | 34 +++++++++
Beam.Tests/UnitTest1.cs | 7 +-
Beam.sln | 9 +++
Beam/Beam.csproj | 18 +++--
Beam/DownloadContext.cs | 17 +----
Beam/DownloadContextBuilder.cs | 16 -----
Beam/ImmutableState.cs | 23 ++++++
Beam/OrderedSourceLinkGenerator.cs | 15 +++-
Beam/SequentialDownloader.cs | 5 +-
Beam/State.cs | 2 +-
Beam/UnitDownloaderBinary.cs | 73 ++++++++++++++++++++
Beam/UnitFragmentDownloader.cs | 7 +-
27 files changed, 354 insertions(+), 114 deletions(-)
create mode 100644 Beam.Puppeteer/Beam.Puppeteer.csproj
create mode 100644 Beam.Puppeteer/PuppetedUnitDownloader.cs
create mode 100644 Beam.Temporary.Cli/LinkCollection.cs
create mode 100644 Beam.Tests/OnlineCleaner.Tests.cs
create mode 100644 Beam/ImmutableState.cs
create mode 100644 Beam/UnitDownloaderBinary.cs
diff --git a/Beam.Dynamic/Beam.Dynamic.csproj b/Beam.Dynamic/Beam.Dynamic.csproj
index 7dc4e64..2a6cd5c 100644
--- a/Beam.Dynamic/Beam.Dynamic.csproj
+++ b/Beam.Dynamic/Beam.Dynamic.csproj
@@ -4,19 +4,21 @@
net9.0
enable
enable
+
Beam Dynamic
+ aeqw89
+ qwsdcvghyu
+
+ Beam utilities facilitating dynamic fetching of elements of webpages
+ https://github.com/qwsdcvghyu89/Beam
+ aeqw89.Beam.Dynamic
-
-
-
-
-
- ..\..\aeqw89.DataKeys\aeqw89.DataKeys\bin\Debug\net9.0\aeqw89.DataKeys.dll
-
-
- ..\..\aeqw89.PersistentData\aeqw89.PersistentData\bin\Release\net9.0\aeqw89.PersistentData.dll
-
+
+
+
+
+
diff --git a/Beam.Dynamic/Binding.cs b/Beam.Dynamic/Binding.cs
index c4897d6..a66624f 100644
--- a/Beam.Dynamic/Binding.cs
+++ b/Beam.Dynamic/Binding.cs
@@ -16,6 +16,7 @@ namespace Beam.Dynamic {
public string? ArrayDelimiters { get; set; }
public string? XPath { get; set; }
public string? CssPath { get; set; }
+ public string? Text { get; set; }
private IDataProvider? Provider_;
public IDataProvider? Provider {
get => Provider_;
diff --git a/Beam.Dynamic/DataBindings.cs b/Beam.Dynamic/DataBindings.cs
index d0c89c9..95ad480 100644
--- a/Beam.Dynamic/DataBindings.cs
+++ b/Beam.Dynamic/DataBindings.cs
@@ -1,7 +1,7 @@
using HtmlAgilityPack;
namespace Beam.Dynamic {
- public class DataBindings {
+ public record class DataBindings {
public Binding? Title { get; set; }
public Binding? Authors { get; set; }
public Binding? Description { get; set; }
@@ -9,7 +9,7 @@ namespace Beam.Dynamic {
public Binding? Language { get; set; }
public Binding? Tags { get; set; }
- public ResolvedBindings Resolve(HtmlDocument doc) {
+ public virtual ResolvedBindings Resolve(HtmlDocument doc) {
return new ResolvedBindings() {
Title = Title?.Resolve(doc),
Authors = Authors?.Resolve(doc) ?? Array.Empty(),
@@ -21,7 +21,7 @@ namespace Beam.Dynamic {
}
}
- public class ResolvedBindings {
+ public record class ResolvedBindings {
public string? Title { get; set; }
public string[]? Authors { get; set; }
public string? Description { get; set; }
diff --git a/Beam.Dynamic/IDataProvider.cs b/Beam.Dynamic/IDataProvider.cs
index e2e43b8..95e961e 100644
--- a/Beam.Dynamic/IDataProvider.cs
+++ b/Beam.Dynamic/IDataProvider.cs
@@ -1,8 +1,8 @@
using HtmlAgilityPack;
namespace Beam.Dynamic {
- [System.Text.Json.Serialization.JsonDerivedType(typeof(ParagraphedContentDataProvider), 20)]
- [System.Text.Json.Serialization.JsonDerivedType(typeof(ListContentDataProvider), 21)]
+ [System.Text.Json.Serialization.JsonDerivedType(typeof(ParagraphedContentDataProvider), "paragraphed-data-provider")]
+ [System.Text.Json.Serialization.JsonDerivedType(typeof(ListContentDataProvider), "list-data-provider")]
public interface IDataProvider {
public string Get(HtmlDocument document);
public HtmlNode? GetNode(HtmlDocument document);
diff --git a/Beam.Dynamic/OnlineCleaner.cs b/Beam.Dynamic/OnlineCleaner.cs
index 6ccd781..7a8097d 100644
--- a/Beam.Dynamic/OnlineCleaner.cs
+++ b/Beam.Dynamic/OnlineCleaner.cs
@@ -1,6 +1,8 @@
using HtmlAgilityPack;
+using Microsoft.Recognizers.Text.Number;
using System;
using System.Collections.Generic;
+using System.Globalization;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
@@ -25,6 +27,19 @@ namespace Beam.Dynamic {
});
}
+ public static List ParseNumbers(string text, string from) {
+ var results = NumberRecognizer.RecognizeNumber(text, from, NumberOptions.None, false);
+ var resolved = results.Select((x) => {
+ if (x.Resolution.TryGetValue("value", out var value) && double.TryParse(value.ToString(), out var number))
+ return (int?)number;
+ return null;
+ })
+ .Where((x) => x.HasValue).ToList();
+ if (resolved.Count == 0)
+ return [];
+ return resolved.Select((x) => x!.Value).ToList();
+ }
+
public static string Clean(string? onlineText) {
if (string.IsNullOrWhiteSpace(onlineText))
return "";
diff --git a/Beam.Exports/Beam.Exports.csproj b/Beam.Exports/Beam.Exports.csproj
index 9d971be..1ad0212 100644
--- a/Beam.Exports/Beam.Exports.csproj
+++ b/Beam.Exports/Beam.Exports.csproj
@@ -4,10 +4,17 @@
net9.0
enable
enable
+ Beam.Exports
+ aeqw89
+ qwsdcvghyu
+ Beam library that facilitates exporting different kinds of views for IDocuments
+ https://github.com/qwsdcvghyu89/Beam
+ https://github.com/qwsdcvghyu89/Beam
+ aeqw89.Beam.Exports
-
+
diff --git a/Beam.Puppeteer/Beam.Puppeteer.csproj b/Beam.Puppeteer/Beam.Puppeteer.csproj
new file mode 100644
index 0000000..6e23fcf
--- /dev/null
+++ b/Beam.Puppeteer/Beam.Puppeteer.csproj
@@ -0,0 +1,17 @@
+
+
+
+ net9.0
+ enable
+ enable
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Beam.Puppeteer/PuppetedUnitDownloader.cs b/Beam.Puppeteer/PuppetedUnitDownloader.cs
new file mode 100644
index 0000000..dd571af
--- /dev/null
+++ b/Beam.Puppeteer/PuppetedUnitDownloader.cs
@@ -0,0 +1,39 @@
+
+using HtmlAgilityPack;
+using Microsoft.Playwright;
+
+namespace Beam.Puppeteer {
+ public class PuppetContext(IPlaywright playwright, IBrowser browser) {
+ public IPlaywright Playwright { get; set; } = playwright;
+ public IBrowser Browser { get; set; } = browser;
+ }
+
+ public class PuppetUnitDownloader : UnitDownloader {
+ public PuppetContext Context { get; }
+
+ public PuppetUnitDownloader(PuppetContext pc, DownloadContext context)
+ : base(context.Web, context.AsyncTranformer, context.AsyncFailurePredicates) {
+ Context = pc;
+ }
+
+ protected override async Task<(bool, T?)> TryDownloadWithNoRetries(string link, CancellationToken ct) {
+ var page = await Context.Browser.NewPageAsync();
+ try {
+ var content = await page.ContentAsync();
+ await page.CloseAsync();
+
+ HtmlDocument doc = new();
+ doc.LoadHtml(content);
+ var transformed = await Transformer(doc);
+ if (FailurePredicates is null || !(await IsFailure(doc)))
+ return (true, transformed);
+ return (false, default);
+ } catch (Exception) {
+ return (false, default);
+ } finally {
+ if (!page.IsClosed)
+ await page.CloseAsync();
+ }
+ }
+ }
+}
diff --git a/Beam.Temporary.Cli/Beam.Temporary.Cli.csproj b/Beam.Temporary.Cli/Beam.Temporary.Cli.csproj
index 1482db0..a404a59 100644
--- a/Beam.Temporary.Cli/Beam.Temporary.Cli.csproj
+++ b/Beam.Temporary.Cli/Beam.Temporary.Cli.csproj
@@ -5,9 +5,22 @@
net9.0
enable
enable
+
+ Beam.Temporary.Cli
+ aeqw89
+ qwsdcvghyu
+ A temporary CLI for Beam providing several useful mechanisms
+ https://github.com/qwsdcvghyu89/Beam
+ https://github.com/qwsdcvghyu89/Beam
+ aeqw89.Beam.Temporary.Cli
+
+
+
+
+
@@ -16,19 +29,4 @@
-
-
-
-
-
-
-
-
- ..\..\aeqw89.DataKeys\aeqw89.DataKeys\bin\Debug\net9.0\aeqw89.DataKeys.dll
-
-
- ..\..\aeqw89.PersistentData\aeqw89.PersistentData\bin\Release\net9.0\aeqw89.PersistentData.dll
-
-
-
diff --git a/Beam.Temporary.Cli/DownloadBuilder.cs b/Beam.Temporary.Cli/DownloadBuilder.cs
index fdf314b..8d71ead 100644
--- a/Beam.Temporary.Cli/DownloadBuilder.cs
+++ b/Beam.Temporary.Cli/DownloadBuilder.cs
@@ -1,5 +1,6 @@
using aeqw89.DataKeys;
using Beam.Dynamic;
+using Beam;
using Microsoft.Extensions.Logging;
using System;
using System.Collections.Generic;
@@ -24,17 +25,18 @@ namespace Beam.Temporary.Cli {
public interface ILinkStage {
ITransformStage WithLink();
ITransformStage WithLinkGenerator();
+ ILinkStage WithRange(Range range);
}
public interface ITransformStage {
- IContextStage WithTransformer(Func> factory);
+ IContextStage WithTransformer(Func> factory);
}
- public interface IContextStage {
- IContextStage Configure(Action> configure);
- IContextStage WithParallelism(int degree);
- IContextStage WithTimeout(TimeSpan timeout);
- IContextStage WithRetryReporter(IProgress reporter);
+ public interface IContextStage {
+ IContextStage Configure(Action> configure);
+ IContextStage WithParallelism(int degree);
+ IContextStage WithTimeout(TimeSpan timeout);
+ IContextStage WithRetryReporter(IProgress reporter);
DownloadEnumerable Build();
}
@@ -61,7 +63,7 @@ namespace Beam.Temporary.Cli {
initial = textRecord.Resource.MetaTemplateInitialData ?? throw new InvalidOperationException("Meta template data missing.");
} else {
source = textRecord.AssociatedSource ?? throw new InvalidOperationException($"Text source missing for '{novelKey}'.");
- initial = textRecord.Resource.TemplateInitialData ?? throw new InvalidOperationException("Template initial data missing.");
+ initial = textRecord.Resource.TemplateInitialData;
}
return (source, initial);
@@ -74,6 +76,10 @@ namespace Beam.Temporary.Cli {
State Initial,
BeamDataDictionary Data,
DownloadContextBuilder CtxBuilder) : ILinkStage {
+
+ private State? endState;
+ private bool linksFrozen = false;
+
public ITransformStage WithLink() {
var link = Data.Templates[Source.Key].Builder.Build(Initial);
CtxBuilder.WithLinks(new[] { link });
@@ -85,28 +91,45 @@ namespace Beam.Temporary.Cli {
var generator = SourceLinkEnumerable.FromGenerator(new OrderedSourceLinkGenerator(
template.Builder,
new NumberedStateChanger(template.Factory.Behavior),
- Initial));
+ Initial, endState));
CtxBuilder.WithLinks(generator);
+ linksFrozen = true;
return new TransformStage(Source, Data, CtxBuilder);
}
+
+ public ILinkStage WithRange(Range range) {
+ if (linksFrozen)
+ throw new InvalidOperationException($"WithRange must be called before WithLinkGenerator");
+ if (range.End.Value < range.Start.Value)
+ throw new ArgumentOutOfRangeException(nameof(range), $" start must be < end");
+ var template = Data.Templates[Source.Key];
+ var stateChanger = new NumberedStateChanger(template.Factory.Behavior);
+ endState = Initial.Copy();
+ stateChanger.Apply(Initial, range.Start.Value - 1);
+ stateChanger.Apply(endState, range.End.Value - 1);
+ return this;
+ }
}
private sealed record TransformStage(
WebResource Source,
BeamDataDictionary Data,
DownloadContextBuilder CtxBuilder) : ITransformStage {
- public IContextStage WithTransformer(Func> factory) {
+ public IContextStage WithTransformer(Func> factory) {
var transformer = factory(Data.Bindings[Source.Bindings]);
- CtxBuilder.WithTransformer(transformer);
- return new ContextStage(CtxBuilder);
+ return new ContextStage(CtxBuilder, transformer);
}
}
- private sealed class ContextStage : IContextStage {
+ private sealed class ContextStage : IContextStage {
private readonly DownloadContextBuilder _ctxBuilder;
+ private readonly Func