Introduced some unit testing. Cleaned up some classes in Beam. Overhauled source link generation.

This commit is contained in:
2025-05-10 17:20:33 +03:00
parent bfdcdb1f3b
commit a086cfa02b
23 changed files with 386 additions and 185 deletions
+1
View File
@@ -36,6 +36,7 @@ namespace Beam.Temporary.Cli {
await using var sharedContext = await DataDictionaryContext<SharedDataDictionary>.Create(
SharedDataPath,
false,
DataKind.Shared,
logger,
ConversionOptions
+27
View File
@@ -0,0 +1,27 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<IsPackable>false</IsPackable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="coverlet.collector" Version="6.0.2" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
<PackageReference Include="xunit" Version="2.9.2" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.8.2" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Beam.Dynamic\Beam.Dynamic.csproj" />
<ProjectReference Include="..\Beam.Exports\Beam.Exports.csproj" />
<ProjectReference Include="..\Beam\Beam.csproj" />
</ItemGroup>
<ItemGroup>
<Using Include="Xunit" />
</ItemGroup>
</Project>
+92
View File
@@ -0,0 +1,92 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam.Tests {
public class SouceLinkBuilder {
[Fact]
public void ShouldConstruct_NoErrors() {
_ = new SourceLinkBuilder("example.com");
}
[Fact]
public void ShouldBuild_NoErrors() {
_ = new SourceLinkBuilder("example.com").Build();
}
[Fact]
public void ShouldBuild_Correctly() {
var k = new SourceLinkBuilder("example.com");
Assert.NotNull(k);
var link = k.Build();
Assert.Equal("https://example.com/", link.Link.ToString()); // trailing slash for RFC standardization stuff
}
[Fact]
public void ShouldBuild_SegmentAddsCorrectly() {
var k = new SourceLinkBuilder("example.com");
Assert.NotNull(k);
k.AddSegment("folder1");
var link = k.Build();
Assert.Equal("https://example.com/folder1", link.Link.ToString());
}
[Fact]
public void ShouldThrow_EmptySegmentsDisallowed() {
var k = new SourceLinkBuilder("example.com");
Assert.NotNull(k);
Assert.Throws<ArgumentException>(() => {
k.AddSegment("");
});
}
[Theory]
[InlineData("folder1", "folder2", "folder3")]
[InlineData("f1", "f5", "f6")]
public void ShouldBuild_MultipleSegmentsCorrect(params string[] segments) {
var k = new SourceLinkBuilder("example.com");
Assert.NotNull(k);
foreach (var segment in segments) {
k.AddSegment(segment);
}
StringBuilder builder = new();
builder.Append("https://example.com/");
foreach(var segment in segments) {
builder.Append(segment + "/");
}
// Remove trailing slash
builder.Remove(builder.Length - 1, 1);
var link = k.Build();
Assert.Equal(builder.ToString(), link.Link.ToString());
}
[Fact]
public void ShouldBuild_SingleParameterCorrect() {
var k = new SourceLinkBuilder("example.com");
Assert.NotNull(k);
k.AddSegment("f1");
k.AddParameters(0, "?q=");
var link = k.Build("foo");
Assert.Equal("https://example.com/f1?q=foo", link.Link.ToString());
}
[Fact]
public void ShouldBuild_MultiParameterCorrect() {
var k = new SourceLinkBuilder("example.com");
Assert.NotNull(k);
k.AddSegment("f1");
k.AddParameters(0, "?q=", "?m=");
var link = k.Build("foo", "bar");
Assert.Equal("https://example.com/f1?q=foo?m=bar", link.Link.ToString());
}
[Fact]
public void ShouldBuild_MultiParameterCorrectWithSuffix() {
var k = new SourceLinkBuilder("example.com");
Assert.NotNull(k);
k.AddSegment("f1", "&");
k.AddParameters(0, "?q=", "?m=");
var link = k.Build("foo", "bar");
Assert.Equal("https://example.com/f1?q=foo&?m=bar", link.Link.ToString());
}
}
}
+7
View File
@@ -0,0 +1,7 @@
namespace Beam.Tests {
public class UnitTest1 {
[Fact]
public void Test1() {
}
}
}
+6
View File
@@ -11,6 +11,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Dynamic", "Beam.Dynami
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Exports", "Beam.Exports\Beam.Exports.csproj", "{7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beam.Tests", "Beam.Tests\Beam.Tests.csproj", "{E26800C2-0518-49E8-88DF-A0B6ED97D4AB}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -33,6 +35,10 @@ Global
{7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}.Debug|Any CPU.Build.0 = Debug|Any CPU
{7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}.Release|Any CPU.ActiveCfg = Release|Any CPU
{7C0ADBC0-44D4-48F8-901B-9C93F1B1FFDC}.Release|Any CPU.Build.0 = Release|Any CPU
{E26800C2-0518-49E8-88DF-A0B6ED97D4AB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{E26800C2-0518-49E8-88DF-A0B6ED97D4AB}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E26800C2-0518-49E8-88DF-A0B6ED97D4AB}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E26800C2-0518-49E8-88DF-A0B6ED97D4AB}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
+16
View File
@@ -0,0 +1,16 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam {
public static class CommonStateChangers {
public static IStateChangeBehaviour LastAsNumber => new NumberedStateChanger((x, i) => {
object last = x[^1];
if (!int.TryParse(last.ToString(), out var number))
throw new InvalidOperationException(S.M.StateChangeError);
x[^1] = number + i;
});
}
}
-9
View File
@@ -1,9 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam {
public class DataBackedSourceLinkGenerator(PackagedSourceLinkGenerationData data, params object[] initialState) : DelegateBackedSourceLinkGenerator(data.GenerateLink, data.GetBehaviour(), initialState) {}
}
-48
View File
@@ -1,48 +0,0 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam {
public delegate DocumentSourceLink LinkGenerator(params object[] ps);
public delegate object Incrementor(object obj, int amount);
public class DelegateBackedSourceLinkGenerator : IEnumerator<DocumentSourceLink> {
public LinkGenerator Generator { get; set; }
public IncrementationBehaviour Behaviour { get; }
private object[] InitialState;
public DelegateBackedSourceLinkGenerator(LinkGenerator generator, IncrementationBehaviour behaviour, params object[] initialState) {
Generator = generator;
Behaviour = behaviour;
InitialState = (object[])initialState.Clone();
State = (object[])initialState.Clone();
Reset();
}
public object[] State { get; set; }
public DocumentSourceLink Current { get; private set; }
object IEnumerator.Current => Current;
public void Dispose() {
return;
}
public bool MoveNext() {
Behaviour.Apply(State, 1);
Current = Generator(State);
return Current.HasValue;
}
public void Reset() {
State = (object[])InitialState.Clone();
Behaviour.Apply(State, -1);
Current = Generator(State);
}
}
}
+2 -2
View File
@@ -22,13 +22,13 @@ namespace Beam {
public IProgress<int>? RetryReporter { get; set; }
public AsyncDownloadFailurePredicate<HtmlDocument>?[]? AsyncFailurePredicates { get; }
public TimeSpan TimeOut { get; set; }
public IEnumerable<DocumentSourceLink> Links { get; }
public IEnumerable<SourceLink> Links { get; }
public CancellationToken CancellationToken { get; }
public DocumentCache Cache { get; private set; } = [];
public ILogger? DownloadLogger { get; set; }
public DownloadContext(HtmlWeb web,
IEnumerable<DocumentSourceLink> links,
IEnumerable<SourceLink> links,
CancellationToken cancellationToken = default,
HtmlTransformer<T>? transformer = null,
AsyncHtmlTransformer<T>? asyncTransformer = null,
+3 -3
View File
@@ -1,8 +1,8 @@
namespace Beam {
internal interface IDocumentSourceLinkFactory {
DocumentSourceLink GetNextLink(DocumentSourceLink current);
DocumentSourceLink GetPrecedingLink(DocumentSourceLink current);
DocumentSourceLink GetArbitraryLink(DocumentSourceLink current, int offset) => offset switch {
SourceLink GetNextLink(SourceLink current);
SourceLink GetPrecedingLink(SourceLink current);
SourceLink GetArbitraryLink(SourceLink current, int offset) => offset switch {
0 => current,
> 0 => GetArbitraryLink(GetNextLink(current), offset - 1),
< 0 => GetArbitraryLink(GetPrecedingLink(current), offset + 1)
+8
View File
@@ -0,0 +1,8 @@
namespace Beam {
/// <summary>
/// Defines how a url template should should be updated, in what order, and by how much
/// </summary>
public interface IStateChangeBehaviour {
public void Apply(State state, object stimulus);
}
}
-17
View File
@@ -1,17 +0,0 @@
namespace Beam {
/// <summary>
/// Defines how a url template should should be updated, in what order, and by how much
/// </summary>
public struct IncrementationBehaviour {
public Dictionary<int, Incrementor> Map { get; set; }
public readonly void Apply(object[] objects, int amount) {
foreach(var (i, inc) in Map) {
if (i < objects.Length)
objects[i] = inc(objects[i], amount)?.ToString();
else
throw new S.MapException(S.M.MapDoesNotMatchArgs);
}
}
}
}
+16
View File
@@ -0,0 +1,16 @@
namespace Beam {
public class NumberedStateChanger(NumberedStateChanger.MoveState moveState) : IStateChangeBehaviour {
public delegate void MoveState(State state, int amount);
public MoveState MoveStateDlgte { get; set; } = moveState;
public virtual void Apply(State state, object stimulus) {
if (stimulus is not int amount)
throw new ArgumentException(S.M.StimulusMustBeInt, nameof(stimulus));
Apply(state, amount);
}
public virtual void Apply(State state, int amount) {
MoveStateDlgte(state, amount);
}
}
}
+3
View File
@@ -0,0 +1,3 @@
namespace Beam {
public record Ordered<T>(T Data, int Order);
}
+48
View File
@@ -0,0 +1,48 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam {
//public delegate SourceLink LinkGenerator(params object[] ps);
//public delegate object Incrementor(object obj, int amount);
public class OrderedSourceLinkGenerator : IEnumerator<SourceLink> {
public SourceLinkBuilder Builder { get; set; }
public NumberedStateChanger Behaviour { get; }
private State InitialState;
public OrderedSourceLinkGenerator(SourceLinkBuilder builder, NumberedStateChanger behaviour, params object[] initialState) {
Builder = builder;
Behaviour = behaviour;
InitialState = new State(initialState);
State = InitialState.Copy();
Reset();
}
public State State { get; set; }
public SourceLink Current { get; private set; }
object IEnumerator.Current => Current;
public void Dispose() {
return;
}
public bool MoveNext() {
Behaviour.Apply(State, 1);
Current = Builder.Build(State);
return Current.HasValue;
}
public void Reset() {
State = InitialState.Copy();
Behaviour.Apply(State, -1);
Current = Builder.Build(State);
}
}
}
-18
View File
@@ -1,18 +0,0 @@
namespace Beam {
public struct PackagedSourceLinkGenerationData {
public string Template { get; set; }
public int IndexOfChapterIndex { get; set; }
public readonly DocumentSourceLink GenerateLink(params object[] ps)
=> new(string.Format(Template, ps));
public IncrementationBehaviour GetBehaviour() {
return new IncrementationBehaviour() {
Map = new Dictionary<int, Incrementor>() { {
IndexOfChapterIndex,
(x, i) => int.Parse(x.ToString() ?? throw new ArgumentException()) + i
}
}
};
}
}
}
-78
View File
@@ -1,78 +0,0 @@
using HtmlAgilityPack;
using System.Collections;
using System.Collections.Concurrent;
namespace Beam {
public record Ordered<T>(T Data, int Order);
[Obsolete("Use chunk downloader instead.")]
public class ParallelDownloader<T>(DownloadContext<T> context, int maximumConcurrentDownloads = 4) : IAsyncEnumerator<Ordered<T>> {
public DownloadContext<T> Context { get; } = context;
public int MaximumConcurrentDownloads { get; } = maximumConcurrentDownloads;
private Task? CacheFiller { get; set; }
private int Count = 0;
private ConcurrentBag<Ordered<T>> Cache { get; set; } = [];
public Ordered<T> Current { get; set; }
private UnitDownloader<T> GetUnitDownloader()
=> new(Context.Web, Context.AsyncTranformer, Context.AsyncFailurePredicates);
private ParallelOptions GetOptions()
=> new() {
CancellationToken = Context.CancellationToken,
MaxDegreeOfParallelism = MaximumConcurrentDownloads
};
private async Task FillCache() {
List<Ordered<DocumentSourceLink>> chunk = [];
int i = 0;
foreach (var link in Context.Links.Take(MaximumConcurrentDownloads * 2))
chunk.Add(new Ordered<DocumentSourceLink>(link, i++));
Console.WriteLine(chunk.Select((x) => $"{x.Order}: {x.Data.Link}").Aggregate((x, y) => $"{x}\n{y}"));
var unitDownloader = GetUnitDownloader();
int downloadedCount = 0;
await Parallel.ForEachAsync(chunk, GetOptions(), async (x, ct) => {
var (result, doc) = await unitDownloader.TryDownload([new Ordered<string>(x.Data.Link.ToString(), x.Order)], ct, tryProgress: Context.RetryReporter);
if (!result || doc is null) {
Console.WriteLine($"FAILED to download {x.Data.Link}");
return;
}
Cache.Add(new(doc, x.Order));
Context.DownloadReporter?.Report(doc);
Interlocked.Increment(ref downloadedCount);
Interlocked.Increment(ref Count);
});
Console.WriteLine("Downloaded Chunk");
CacheFiller = null;
}
public async ValueTask<bool> MoveNextAsync() {
TimeSpan waited = TimeSpan.Zero;
TimeSpan delta = TimeSpan.FromSeconds(0.01);
while(waited < Context.TimeOut) {
if (Cache.Count < MaximumConcurrentDownloads && CacheFiller is null) // strange
CacheFiller ??= FillCache();
Cache.TryTake(out var k);
if (k is not null) {
Current = k;
return true;
}
waited += delta;
await Task.Delay(delta);
}
return false;
}
public ValueTask DisposeAsync() {
GC.SuppressFinalize(this);
return ValueTask.CompletedTask;
}
}
}
+3
View File
@@ -34,6 +34,9 @@ namespace Beam {
public const string MapDoesNotMatchArgs = "Error; Map contains indicies that exceed the argument list passed.";
public const string NewFragmentShouldBeFree = "Assertion Error: Could not acquire lock of newly created fragment";
public const string LinksCannotBeEmpty = "Cannot construct downloader with empty links collection!";
public const string StimulusMustBeInt = "Stimulus must be an integer";
public const string StateCastException = "State cannot be cast to T";
public const string StateChangeError = "Something went wrong while changing the state.";
}
}
}
+1 -1
View File
@@ -8,7 +8,7 @@ namespace Beam {
public ILogger? Logger { get; set; }
public int LastOrder { get; set; } = 0;
protected IEnumerator<DocumentSourceLink> LinksEnumerator;
protected IEnumerator<SourceLink> LinksEnumerator;
public Func<IUnitDownloader<TOutput>> GetUnitDownloader { get; set; }
@@ -5,18 +5,18 @@ using System.Text;
using System.Threading.Tasks;
namespace Beam {
public readonly struct DocumentSourceLink(string link) {
public readonly struct SourceLink(string link) {
private readonly string Link_ { get; } = link;
public readonly Uri Link => new(Link_);
public bool HasValue => !string.IsNullOrWhiteSpace(Link_);
public static DocumentSourceLink InvalidLink { get; } = new("https://invalid.link");
public static SourceLink InvalidLink { get; } = new("invalid://link");
public static bool operator ==(DocumentSourceLink lhs, DocumentSourceLink rhs) {
public static bool operator ==(SourceLink lhs, SourceLink rhs) {
return lhs.Link == rhs.Link;
}
public static bool operator !=(DocumentSourceLink lhs, DocumentSourceLink rhs) {
public static bool operator !=(SourceLink lhs, SourceLink rhs) {
return lhs.Link != rhs.Link;
}
+122
View File
@@ -0,0 +1,122 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam {
public class Parameter(string name) {
public string Name { get; set; } = name;
}
public class LinkSegment(string name, string separator = "", string suffix = "") {
public string Name { get; set; } = name;
public List<Parameter> Parameters { get; set; } = [];
public string Separator { get; set; } = separator;
public string Suffix { get; set; } = suffix;
}
public class SourceLinkBuilder(string host, string protocol = "https") {
public string Protocol { get; set; } = protocol;
public string Host { get; set; } = host;
public List<LinkSegment> Segments { get; set; } = [];
public string GetSuffix(int segmentIndex) {
ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(segmentIndex, Segments.Count);
ArgumentOutOfRangeException.ThrowIfNegative(segmentIndex);
return Segments[segmentIndex].Suffix;
}
public string GetSuffix()
=> GetSuffix(Segments.Count - 1);
public string GetSeparator(int segmentIndex) {
ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(segmentIndex, Segments.Count);
ArgumentOutOfRangeException.ThrowIfNegative(segmentIndex);
return Segments[segmentIndex].Separator;
}
public string GetSeparator()
=> GetSeparator(Segments.Count - 1);
public void SetSuffix(int segmentIndex, string suffix) {
ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(segmentIndex, Segments.Count);
ArgumentOutOfRangeException.ThrowIfNegative(segmentIndex);
var seg = Segments[segmentIndex];
seg.Suffix = suffix;
}
public void SetSuffix(string suffix)
=> SetSuffix(Segments.Count - 1, suffix);
public void SetSeparator(int segmentIndex, string separator) {
ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(segmentIndex, Segments.Count);
ArgumentOutOfRangeException.ThrowIfNegative(segmentIndex);
var seg = Segments[segmentIndex];
seg.Separator = separator;
}
public void SetSeparator(string separator)
=> SetSeparator(Segments.Count - 1, separator);
public void AddSegment(string name, string? separator = null) {
ArgumentException.ThrowIfNullOrWhiteSpace(name);
Segments.Add(new LinkSegment(name, separator));
}
public void AddParameters(int segmentIndex, params string[] parameters) {
ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(segmentIndex, Segments.Count);
ArgumentOutOfRangeException.ThrowIfNegative(segmentIndex);
var seg = Segments[segmentIndex];
foreach(var parameter in parameters) {
ArgumentException.ThrowIfNullOrWhiteSpace(parameter);
seg.Parameters.Add(new Parameter(parameter));
}
}
public void AddParameters(params string[] parameters)
=> AddParameters(Segments.Count - 1, parameters);
public void SetParameters(int segmentIndex, params string[] parameters) {
ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(segmentIndex, Segments.Count);
ArgumentOutOfRangeException.ThrowIfNegative(segmentIndex);
var seg = Segments[segmentIndex];
seg.Parameters.Clear();
AddParameters(segmentIndex, parameters);
}
public void SetParameters(params string[] parameters)
=> SetParameters(Segments.Count - 1, parameters);
public int GetParameterCount() {
int count = 0;
foreach(var segment in Segments) {
count += segment.Parameters.Count;
}
return count;
}
public SourceLink Build(params object[] parameterValues) {
ArgumentOutOfRangeException.ThrowIfNotEqual(parameterValues.Length, GetParameterCount());
StringBuilder link = new();
link.Append(Protocol);
link.Append("://");
link.Append(Host);
int pvC = 0;
foreach(var segment in Segments) {
link.Append('/');
link.Append(segment.Name);
for (int i = 0; i < segment.Parameters.Count; i++) {
link.Append(segment.Parameters[i].Name);
link.Append(parameterValues[pvC++]);
if (i + 1 < segment.Parameters.Count && segment.Separator is not null)
link.Append(segment.Separator);
}
}
return new SourceLink(link.ToString());
}
}
}
+5 -5
View File
@@ -6,17 +6,17 @@ using System.Text;
using System.Threading.Tasks;
namespace Beam {
public class SourceLinkEnumerable : IEnumerable<DocumentSourceLink> {
private SourceLinkEnumerable(IEnumerator<DocumentSourceLink> enumerator) {
public class SourceLinkEnumerable : IEnumerable<SourceLink> {
private SourceLinkEnumerable(IEnumerator<SourceLink> enumerator) {
Enumerator = enumerator;
}
public IEnumerator<DocumentSourceLink> Enumerator { get; }
public IEnumerator<SourceLink> Enumerator { get; }
public static SourceLinkEnumerable FromGenerator(IEnumerator<DocumentSourceLink> generator)
public static SourceLinkEnumerable FromGenerator(IEnumerator<SourceLink> generator)
=> new SourceLinkEnumerable(generator);
public IEnumerator<DocumentSourceLink> GetEnumerator() {
public IEnumerator<SourceLink> GetEnumerator() {
return Enumerator;
}
+22
View File
@@ -0,0 +1,22 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Beam {
public class State(object[] state) {
object[] state = state;
public object[] GetState() => state;
public void SetState(object[] state) => this.state = state;
public State Copy()
=> new((object[])state.Clone());
public object this[Index i] {
get => state[i];
set => state[i] = value;
}
}
}