"))
+ //(x) => Task.FromResult(!x.DocumentNode.InnerHtml.Contains("
"))
],
timeOut: TimeSpan.FromSeconds(15),
downloadLogger: logger
diff --git a/Beam.Temporary.Cli/NovelStatics.cs b/Beam.Temporary.Cli/NovelStatics.cs
index 7183ee7..9dd621d 100644
--- a/Beam.Temporary.Cli/NovelStatics.cs
+++ b/Beam.Temporary.Cli/NovelStatics.cs
@@ -5,133 +5,131 @@ using Beam.Dynamic;
using System;
using System.Collections.Generic;
using System.Linq;
+using System.Runtime.CompilerServices;
using System.Text;
using System.Threading.Tasks;
namespace Beam.Temporary.Cli {
- internal static class NovelStatics {
- public static void Define_LightNovelWorld_Novel_TheLegendaryMechanic(SharedDataDictionary sdd) {
- var lnwAggregator = new DataKey
("aeqw89:document:aggregators:light_novel_world");
- var lnwAuxiliary = new DataKey("aeqw89:document:auxillaries:light_novel_world");
- var novel = new TextResource() {
- Key = new DataKey("novels:the_legendary_mechanic"),
- AssociatedSource = lnwAggregator,
- AssociatedMetaSource = lnwAuxiliary,
- TemplateInitialData = ["the-legendary-mechanic-245", "1"],
- MetaTemplateInitialData = ["the-legendary-mechanic"]
- };
- sdd.Novels.TryAdd(novel.Key, novel);
+ public static class NovelStatics {
+ //public static void Define_LightNovelWorld_Novel_TheLegendaryMechanic(SharedDataDictionary sdd) {
+ // var lnwAggregator = new DataKey("aeqw89:document:aggregators:light_novel_world");
+ // var lnwAuxiliary = new DataKey("aeqw89:document:auxillaries:light_novel_world");
+ // var novel = new TextResource() {
+ // Key = new DataKey("novels:the_legendary_mechanic"),
+ // AssociatedSource = lnwAggregator,
+ // AssociatedMetaSource = lnwAuxiliary,
+ // TemplateInitialData = ["the-legendary-mechanic-245", "1"],
+ // MetaTemplateInitialData = ["the-legendary-mechanic"]
+ // };
+ // sdd.Novels.TryAdd(novel.Key, novel);
- sdd.AggregatorNovels.TryAdd(lnwAggregator, [novel.Key]);
- }
+ // sdd.AggregatorNovels.TryAdd(lnwAggregator, [novel.Key]);
+ //}
- public static void Define_LightNovelWorl_Novel_IAloneLevelUp(SharedDataDictionary sdd) {
- var lnwAggregator = new DataKey("light_novel_world").ToAggregator().As();
- var lnwAuxiliary = new DataKey("light_novel_world").ToAuxiliary().As();
+ //public static void Define_LightNovelWorl_Novel_IAloneLevelUp(SharedDataDictionary sdd) {
+ // var lnwAggregator = new DataKey("light_novel_world").ToAggregator().As();
+ // var lnwAuxiliary = new DataKey("light_novel_world").ToAuxiliary().As();
+ // var novel = new TextResource() {
+ // Key = new DataKey("novels:i_alone_level_up"),
+ // AssociatedSource = lnwAggregator,
+ // AssociatedMetaSource = lnwAuxiliary,
+ // TemplateInitialData = ["i-alone-level-up-236", "1"],
+ // MetaTemplateInitialData = ["i-alone-level-up-solo-leveling-05122225"]
+ // };
+
+ // sdd.Novels.TryAdd(novel.Key, novel);
+
+ // sdd.AggregatorNovels.TryAdd(lnwAggregator, [novel.Key]);
+ //}
+
+ public static void Define_WoDuShu_HouseOfHorrors(BeamDataDictionary sdd) {
+ var (wdsAgg, wdsAux) = CreateKeyPair("aggregators", "auxillaries", "wodushu", "aeqw89:document");
var novel = new TextResource() {
- Key = new DataKey("novels:i_alone_level_up"),
- AssociatedSource = lnwAggregator,
- AssociatedMetaSource = lnwAuxiliary,
- TemplateInitialData = ["i-alone-level-up-236", "1"],
- MetaTemplateInitialData = ["i-alone-level-up-solo-leveling-05122225"]
+ Key = new DataKey("novels:house_of_horrors"),
+ FriendlyName = "My House Of Horrors",
+ AssociatedSource = wdsAgg,
+ AssociatedMetaSource = wdsAux,
+ TemplateInitialData = new State(["24349", "2896325"]),
+ MetaTemplateInitialData = new State(["24349"])
};
sdd.Novels.TryAdd(novel.Key, novel);
- sdd.AggregatorNovels.TryAdd(lnwAggregator, [novel.Key]);
+ sdd.AggregatorNovels.TryAdd(wdsAgg, [novel.Key]);
}
- public static void Define_NovelFull(SharedDataDictionary sdd) {
- var docNamespace = "aeqw89:document";
- var nfAgg = new DataKey("aggregators:novel_full").WithNamespace(docNamespace);
- var nfAux = new DataKey("auxillaries:novel_full").WithNamespace(docNamespace);
- var nfBindings = new DataKey("aeqw89:bindings:light_novel_world");
- var aggregator = new WebResource(nfAgg) {
- Name = "Novel Full",
- Description = "A novel aggregator site",
- Domain = "https://novelfull.net",
- Bindings = nfBindings
- };
- var auxiliary = new WebResource(nfAux) {
- Name = "Novel Full",
- Description = "A novel aggregator site",
- Domain = "https://novelfull.net",
- Bindings = nfBindings.WithSuffix("_aux")
- };
-
- sdd.Templates.TryAdd(nfAgg, new() {
- Template = ""
- });
+ private static (DataKey, DataKey) CreateKeyPair(string pref1, string pref2, string common, string @namespace) {
+ return (
+ new DataKey(pref1 + ":" + common).WithNamespace(@namespace),
+ new DataKey(pref2 + ":" + common).WithNamespace(@namespace)
+ );
}
- public static void Define_LightNovelWorld(SharedDataDictionary sdd) {
- var lnwAggregator = new DataKey("aeqw89:document:aggregators:light_novel_world");
- var lnwAuxiliary = new DataKey("aeqw89:document:auxillaries:light_novel_world");
- const string lnwBindingsA = "aeqw89:bindings:light_novel_world";
- var aggregator = new WebResource(lnwAggregator) {
- Name = "Light Novel World",
- Description = "A novel aggregator site maintained by NetherClaw",
- Domain = "https://www.lightnovelworld.co",
- Bindings = new DataKey(lnwBindingsA)
+ public static void Define_WoDuShu(BeamDataDictionary sdd) {
+ var (wdsAgg, wdsAux) = CreateKeyPair("aggregators", "auxillaries", "wodushu", "aeqw89:document");
+ var bindings = new DataKey("aeqw89:bindings:wodushu");
+ var aggregator = new WebResource(wdsAgg) {
+ Name = "WoDuShu.com",
+ Description = "A Chinese novel aggregator site",
+ Domain = "https://wodushu.com",
+ Bindings = bindings
};
- const string lnwBindingsB = "aeqw89:bindings:light_novel_world_aux";
- var auxiliary = new WebResource(lnwAuxiliary) {
- Name = "Light Novel World",
- Description = "A novel aggregator site maintained by NetherClaw",
- Domain = "https://www.lightnovelworld.co",
- Bindings = new DataKey(lnwBindingsB)
+ var auxiliary = new WebResource(wdsAux) {
+ Name = "WoDuShu.com",
+ Description = "A Chinese novel aggregator site",
+ Domain = "https://wodushu.com",
+ Bindings = bindings.WithSuffix("_aux")
};
- sdd.Templates.TryAdd(lnwAuxiliary, new() {
- Template = "https://www.lightnovelworld.co/novel/{0}",
- IndexOfChapterIndex = -1
+ sdd.Templates.TryAdd(wdsAgg, new() {
+ Factory = new(StateChangerFactory.LastAsNumber),
+ Builder = new SourceLinkBuilder("www.wodushu.com")
+ .WithSegments("read", "", "")
+ .WithParameters(1, "")
+ .WithParameters(2, (".html", Position.After))
});
- sdd.Templates.TryAdd(lnwAggregator, new() {
- Template = "https://www.lightnovelworld.co/novel/{0}/chapter-{1}",
- IndexOfChapterIndex = 1
+ sdd.Templates.TryAdd(wdsAux, new() {
+ Factory = new(StateChangerFactory.Constant),
+ Builder = new SourceLinkBuilder("www.wodushu.com")
+ .WithSegments("book", "")
+ .WithParameters(1, "")
});
- sdd.Aggregators.TryAdd(aggregator.Key, aggregator);
- sdd.Auxillaries.TryAdd(auxiliary.Key, auxiliary);
+ sdd.Aggregators.TryAdd(wdsAgg, aggregator);
+ sdd.Auxillaries.TryAdd(wdsAux, auxiliary);
- var lnwBindings = new DataKey(lnwBindingsA);
- var lnwBindingsAux = new DataKey(lnwBindingsB);
- sdd.Bindings.TryAdd(lnwBindings, new DataBindings() {
- Title = new Binding("aeqw89:binding:light_novel_world:title") {
- XPath = "/html/body/main/article/section/div[1]/h1/span[2]",
+ var binding_agg = new DataKey("aeqw89:bindings:wodushu");
+ var binding_aux = new DataKey("aeqw89:bindings:wodushu_aux");
+
+ sdd.Bindings.Add(binding_agg, new() {
+ Title = new Binding() {
+ XPath = "/html/body/div[4]/div/div/div[2]/h1",
Type = BindingType.Single
},
- Content = new("aeqw89:binding:light_novel_world:content") {
+ Content = new Binding() {
+ Type = BindingType.UseProvider,
Provider = new ParagraphedContentDataProvider() {
Content = new Binding() {
- XPath = "//*[@id=\"chapter-container\"]"
+ XPath = "//*[@id=\"content\"]"
}
- },
- Type = BindingType.UseProvider
+ }
},
});
- sdd.Bindings.TryAdd(lnwBindingsAux, new DataBindings() {
- Title = new("aeqw89:binding:light_novel_world_aux:title") {
- XPath = "/html/body/main/article/header/div[2]/div[2]/div[1]/h1",
+
+ sdd.Bindings.Add(binding_aux, new() {
+ Title = new Binding() {
+ XPath = "/html/body/div[3]/div[1]/div/div/div[2]/div[1]/h1",
Type = BindingType.Single
},
- Authors = new("aeqw89:binding:light_novel_world_aux:authors") {
- XPath = "/html/body/main/article/header/div[2]/div[2]/div[1]/div[1]/a",
+ Authors = new Binding() {
+ XPath = "/html/body/div[3]/div[1]/div/div/div[2]/div[1]/div/p[1]/a",
Type = BindingType.Single
},
- Description = new("aeqw89:binding:light_novel_world_aux:description") {
+ Description = new Binding() {
Provider = new ParagraphedContentDataProvider() {
- Content = new() {
- XPath = "/html/body/main/article/div/section/div[1]/div"
- }
- },
- Type = BindingType.UseProvider
- },
- Tags = new("aeqw89:binding:light_novel_world_aux:tags") {
- Provider = new ListContentDataProvider() {
- Content = new() {
- XPath = "/html/body/main/article/header/div[2]/div[2]/div[3]/ul"
+ Content = new Binding() {
+ XPath = "/html/body/div[3]/div[1]/div/div/div[2]/div[2]"
}
},
Type = BindingType.UseProvider
@@ -139,6 +137,107 @@ namespace Beam.Temporary.Cli {
});
}
+ //public static void Define_NovelFull(SharedDataDictionary sdd) {
+ // var docNamespace = "aeqw89:document";
+ // var nfAgg = new DataKey("aggregators:novel_full").WithNamespace(docNamespace);
+ // var nfAux = new DataKey("auxillaries:novel_full").WithNamespace(docNamespace);
+ // var nfBindings = new DataKey("aeqw89:bindings:light_novel_world");
+ // var aggregator = new WebResource(nfAgg) {
+ // Name = "Novel Full",
+ // Description = "A novel aggregator site",
+ // Domain = "https://novelfull.net",
+ // Bindings = nfBindings
+ // };
+ // var auxiliary = new WebResource(nfAux) {
+ // Name = "Novel Full",
+ // Description = "A novel aggregator site",
+ // Domain = "https://novelfull.net",
+ // Bindings = nfBindings.WithSuffix("_aux")
+ // };
+
+ // sdd.Templates.TryAdd(nfAux, new(StateChangerFactory.LastAsNumberPrefixed));
+
+ // sdd.Aggregators.TryAdd(nfAgg, aggregator);
+ // sdd.Auxillaries.TryAdd(nfAux, auxiliary);
+
+ // var binding_agg = new DataKey("aeqw89:bindings:be")
+
+ //}
+
+ //public static void Define_LightNovelWorld(SharedDataDictionary sdd) {
+ // var lnwAggregator = new DataKey("aeqw89:document:aggregators:light_novel_world");
+ // var lnwAuxiliary = new DataKey("aeqw89:document:auxillaries:light_novel_world");
+ // const string lnwBindingsA = "aeqw89:bindings:light_novel_world";
+ // var aggregator = new WebResource(lnwAggregator) {
+ // Name = "Light Novel World",
+ // Description = "A novel aggregator site maintained by NetherClaw",
+ // Domain = "https://www.lightnovelworld.co",
+ // Bindings = new DataKey(lnwBindingsA)
+ // };
+ // const string lnwBindingsB = "aeqw89:bindings:light_novel_world_aux";
+ // var auxiliary = new WebResource(lnwAuxiliary) {
+ // Name = "Light Novel World",
+ // Description = "A novel aggregator site maintained by NetherClaw",
+ // Domain = "https://www.lightnovelworld.co",
+ // Bindings = new DataKey(lnwBindingsB)
+ // };
+
+ // sdd.Templates.TryAdd(lnwAuxiliary, new() {
+ // Template = "https://www.lightnovelworld.co/novel/{0}",
+ // IndexOfChapterIndex = -1
+ // });
+ // sdd.Templates.TryAdd(lnwAggregator, new() {
+ // Template = "https://www.lightnovelworld.co/novel/{0}/chapter-{1}",
+ // IndexOfChapterIndex = 1
+ // });
+
+ // sdd.Aggregators.TryAdd(aggregator.Key, aggregator);
+ // sdd.Auxillaries.TryAdd(auxiliary.Key, auxiliary);
+
+ // var lnwBindings = new DataKey(lnwBindingsA);
+ // var lnwBindingsAux = new DataKey(lnwBindingsB);
+ // sdd.Bindings.TryAdd(lnwBindings, new DataBindings() {
+ // Title = new Binding("aeqw89:binding:light_novel_world:title") {
+ // XPath = "/html/body/main/article/section/div[1]/h1/span[2]",
+ // Type = BindingType.Single
+ // },
+ // Content = new("aeqw89:binding:light_novel_world:content") {
+ // Provider = new ParagraphedContentDataProvider() {
+ // Content = new Binding() {
+ // XPath = "//*[@id=\"chapter-container\"]"
+ // }
+ // },
+ // Type = BindingType.UseProvider
+ // },
+ // });
+ // sdd.Bindings.TryAdd(lnwBindingsAux, new DataBindings() {
+ // Title = new("aeqw89:binding:light_novel_world_aux:title") {
+ // XPath = "/html/body/main/article/header/div[2]/div[2]/div[1]/h1",
+ // Type = BindingType.Single
+ // },
+ // Authors = new("aeqw89:binding:light_novel_world_aux:authors") {
+ // XPath = "/html/body/main/article/header/div[2]/div[2]/div[1]/div[1]/a",
+ // Type = BindingType.Single
+ // },
+ // Description = new("aeqw89:binding:light_novel_world_aux:description") {
+ // Provider = new ParagraphedContentDataProvider() {
+ // Content = new() {
+ // XPath = "/html/body/main/article/div/section/div[1]/div"
+ // }
+ // },
+ // Type = BindingType.UseProvider
+ // },
+ // Tags = new("aeqw89:binding:light_novel_world_aux:tags") {
+ // Provider = new ListContentDataProvider() {
+ // Content = new() {
+ // XPath = "/html/body/main/article/header/div[2]/div[2]/div[3]/ul"
+ // }
+ // },
+ // Type = BindingType.UseProvider
+ // }
+ // });
+ //}
+
}
}
diff --git a/Beam.Temporary.Cli/Program.cs b/Beam.Temporary.Cli/Program.cs
index 21ce3e1..1a01804 100644
--- a/Beam.Temporary.Cli/Program.cs
+++ b/Beam.Temporary.Cli/Program.cs
@@ -15,11 +15,11 @@ namespace Beam.Temporary.Cli {
public static JsonSerializerOptions ConversionOptions { get; internal set; } = new();
- public static SharedDataDictionary Shared { get; set; } = [];
+ public static BeamDataDictionary BeamData { get; set; } = [];
public static IArchitecture Architecture = IArchitecture.Default;
- const string SharedDataPath = "data/.dat";
+ const string BeamDataPath = "data/.dat";
static async Task Main(string[] args) {
ConversionOptions.Converters.AddPersistentDataRequiredConverters();
@@ -34,24 +34,49 @@ namespace Beam.Temporary.Cli {
ILogger logger = lf
.CreateLogger("Program");
- await using var sharedContext = await DataDictionaryContext.Create(
- SharedDataPath,
+ await using var sharedContext = await DataDictionaryContext.Create(
+ BeamDataPath,
false,
DataKind.Shared,
logger,
ConversionOptions
);
- Shared = sharedContext.Data;
+ BeamData = sharedContext.Data;
- Shared.Clear();
- NovelStatics.Define_LightNovelWorld(Shared);
- NovelStatics.Define_LightNovelWorld_Novel_TheLegendaryMechanic(Shared);
- NovelStatics.Define_LightNovelWorl_Novel_IAloneLevelUp(Shared);
- ClassicTemplates.Register(Shared);
+ BeamData.Clear();
+ NovelStatics.Define_WoDuShu(BeamData);
+ NovelStatics.Define_WoDuShu_HouseOfHorrors(BeamData);
+ ClassicTemplates.Register(BeamData);
- var novel = new DataKey("novels:i_alone_level_up");
- var context_aux = Architecture.GetMeta(web, novel, Shared);
+ CancellationTokenSource cts = new();
+
+ HtmlTransformer ArticleDataTransformer(DataBindings? binding) => (x) => {
+ return new ArticleData() {
+ Authors = [OnlineCleaner.Clean(binding?.Authors?.Resolve(x) ?? "")],
+ Name = OnlineCleaner.Clean(binding?.Title?.ResolveString(x) ?? ""),
+ Categories = OnlineCleaner.Clean(binding?.Tags?.ResolveString(x) ?? "").Split(';') ?? [],
+ Description = OnlineCleaner.Clean(binding?.Description?.ResolveString(x) ?? "")
+ };
+ };
+
+ HtmlTransformer DocumentTransformer(DataBindings? binding, IDocumentMetaData? metaData = null) => (x) => {
+ var resolved = binding?.Resolve(x);
+ var articleData = new ArticleData() {
+ Name = OnlineCleaner.Clean(resolved?.Title),
+ };
+ Dictionary, IDocumentMetaData> meta = [];
+ meta.Add(IArchitecture.Default.ChapterKey, articleData);
+ if (metaData is not null)
+ meta.Add(IArchitecture.Default.BookKey, metaData);
+ return new StringDocument(Path.GetRandomFileName(), OnlineCleaner.Clean(resolved?.Content)) {
+ MetaData = meta
+ };
+ };
+
+ var novel = new DataKey("novels:house_of_horrors");
+ var context_aux = Architecture.GetMeta(web, novel, BeamData, cts.Token);
+ context_aux.RetryReporter = new Progress((x) => Console.WriteLine($"Failed. Trying again. {x.TryNumber}"));
var metaDownloader = new DownloadEnumerable(
new SequentialFragmentDownloader(
context_aux,
@@ -60,8 +85,17 @@ namespace Beam.Temporary.Cli {
.UnwrapFragmented());
var metadata = (await metaDownloader.FirstAsync());
- var context = Architecture.GetTextRecord(web, novel, Shared, metadata.Data);
- context.DownloadReporter = new Progress((x) => Console.WriteLine(x.Filename));
+ var metadata2 = await DownloadBuilder.FromMeta(novel, BeamData)
+ .WithLink()
+ .WithTransformer(ArticleDataTransformer)
+ .Configure((x) => x
+ .WithRetryReporter(new Progress())
+ .WithTimeOut(TimeSpan.FromSeconds(15)))
+ .Build()
+ .FirstAsync();
+
+ var context = Architecture.GetTextRecord(web, novel, BeamData, metadata.Data, cts.Token);
+ context.DownloadReporter = new Progress((x) => Console.WriteLine(x));
var downloader = new DownloadEnumerable(
new SequentialFragmentDownloader(
context,
@@ -69,9 +103,21 @@ namespace Beam.Temporary.Cli {
logger)
.UnwrapFragmented());
- List> documents = [];
+ var downloader2 = DownloadBuilder.FromText(novel, BeamData)
+ .WithLinkGenerator()
+ .WithTransformer((x) => DocumentTransformer(x, metadata2.Data))
+ .Configure((x) => x
+ .WithDownloadReporter(new Progress((x) => logger.LogInformation(x.ToString())))
+ .WithTimeOut(TimeSpan.FromSeconds(15))
+ )
+ .Build();
+
- await foreach (var download in downloader.Take(20)) {
+
+ List>> translationTasks = [];
+ List> documents = [];
+
+ await foreach (var download in downloader2.Take(10)) {
if (!download.Data.MetaData.TryGetValue(Architecture.ChapterKey, out var meta))
continue;
if (meta is not ArticleData articleMetaData)
@@ -83,8 +129,15 @@ namespace Beam.Temporary.Cli {
Console.WriteLine($"Chapter title: {articleMetaData.Name}");
//Console.WriteLine($"Content: {download}");
- documents.Add(download);
- }
+ //translationTasks.Add(Task.Run(async () => {
+ // logger.LogInformation("Beginning translation {} task for {}", download.Order, articleMetaData.Name);
+ // var ret = new Ordered(await QuickAndDirtyJanitor.TranslateAsync(download.Data), download.Order);
+ // logger.LogInformation("Finished translation {} task for {}", download.Order, articleMetaData.Name);
+ // return ret;
+ //}));
+ }
+
+ documents = (await Task.WhenAll(translationTasks)).ToList();
string testDir = Path.Combine("txt", Path.GetRandomFileName());
Directory.CreateDirectory(testDir);
@@ -113,7 +166,7 @@ namespace Beam.Temporary.Cli {
// HtmlBook.Keys.TitlePage,
// HtmlBook.Keys.StylesPage,
//}.Select(
- // (x) => Shared.Files.ReadToString(x.WithNamespace("aeqw89:files:templates:classic"))
+ // (x) => BeamData.Files.ReadToString(x.WithNamespace("aeqw89:files:templates:classic"))
//).ToArray();
//HtmlBook book = new(
diff --git a/Beam.Temporary.Cli/Properties/launchSettings.json b/Beam.Temporary.Cli/Properties/launchSettings.json
new file mode 100644
index 0000000..2cccade
--- /dev/null
+++ b/Beam.Temporary.Cli/Properties/launchSettings.json
@@ -0,0 +1,10 @@
+{
+ "profiles": {
+ "Beam.Temporary.Cli": {
+ "commandName": "Project",
+ "environmentVariables": {
+ "OPEN_AI_KEY": "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"
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/Beam.Temporary.Cli/QuickAndDirtyJanitor.cs b/Beam.Temporary.Cli/QuickAndDirtyJanitor.cs
new file mode 100644
index 0000000..7b03e06
--- /dev/null
+++ b/Beam.Temporary.Cli/QuickAndDirtyJanitor.cs
@@ -0,0 +1,25 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using OpenAI;
+using OpenAI.Chat;
+
+namespace Beam.Temporary.Cli {
+ public class QuickAndDirtyJanitor {
+ static OpenAIClient client;
+
+ static QuickAndDirtyJanitor() {
+ var key = Environment.GetEnvironmentVariable("OPEN_AI_KEY");
+ client = new OpenAIClient(key);
+ }
+
+ public static async Task TranslateAsync(IDocument document) {
+ var chatCompletion = await client.GetChatClient("gpt-4.1").CompleteChatAsync(
+ ChatMessage.CreateSystemMessage("Translate the following text into english. If any part of the text has no direct English translation, you may choose to leave it as is. In either case, make sure to leave footnotes for any difficult to translate words. You must translate the whole text and output only your translation and footnotes. No other comments are necessary."),
+ ChatMessage.CreateUserMessage("From UNKNOWN to ENGLISH.\n" + document.ToString()));
+ return new StringDocument(document.Filename, chatCompletion.Value.Content.DefaultIfEmpty().Select((x) => x?.Text).Aggregate((x,y) => $"{x}{y}"));
+ }
+ }
+}
diff --git a/Beam.Temporary.Cli/StateChangerFactory.cs b/Beam.Temporary.Cli/StateChangerFactory.cs
new file mode 100644
index 0000000..caaf663
--- /dev/null
+++ b/Beam.Temporary.Cli/StateChangerFactory.cs
@@ -0,0 +1,33 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Text.Json.Serialization;
+using System.Threading.Tasks;
+
+namespace Beam.Temporary.Cli {
+ public class StateChangerFactory {
+ public IStateChangeBehaviour Behavior => FactoryTable[StateChangerKey]();
+
+ [JsonInclude]
+ public string StateChangerKey { get; set; }
+
+ [JsonConstructor]
+ public StateChangerFactory(string key) {
+ if (!Keys.Contains(key))
+ throw new ArgumentException($"{key} not in keys list", nameof(key));
+ StateChangerKey = key;
+ }
+
+ public static Dictionary> FactoryTable = new() {
+ { LastAsNumber, () => CommonStateChangers.LastAsNumber },
+ { LastAsNumberPrefixed, () => CommonStateChangers.NthAsNumber(^1, true) },
+ { Constant, () => CommonStateChangers.Constant },
+ };
+
+ public HashSet Keys = [LastAsNumber, LastAsNumberPrefixed, Constant];
+ public const string LastAsNumber = "LastAsNumber";
+ public const string LastAsNumberPrefixed = "LastAsNumberPrefixed";
+ public const string Constant = "Constant";
+ }
+}
diff --git a/Beam.Temporary.Cli/Template.cs b/Beam.Temporary.Cli/Template.cs
new file mode 100644
index 0000000..071e7a8
--- /dev/null
+++ b/Beam.Temporary.Cli/Template.cs
@@ -0,0 +1,12 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Beam.Temporary.Cli {
+ public class Template {
+ public StateChangerFactory Factory { get; set; }
+ public SourceLinkBuilder Builder { get; set; }
+ }
+}
diff --git a/Beam.Temporary.Cli/Templates/Classic/ClassicTemplates.cs b/Beam.Temporary.Cli/Templates/Classic/ClassicTemplates.cs
index 6b2ac8a..d8f8914 100644
--- a/Beam.Temporary.Cli/Templates/Classic/ClassicTemplates.cs
+++ b/Beam.Temporary.Cli/Templates/Classic/ClassicTemplates.cs
@@ -6,7 +6,7 @@ using System.Threading.Tasks;
namespace Beam.Temporary.Cli.Templates.Classic {
internal class ClassicTemplates {
- public static void Register(SharedDataDictionary sdd) {
+ public static void Register(BeamDataDictionary sdd) {
sdd.Files.TryAdd(
new("aeqw89:files:templates:classic:content_page"),
new("C:\\Users\\qwsdc\\source\\repos\\Beam\\Beam.Temporary.Cli\\Templates\\Classic\\Content.template.html", "htmlpage", "templates"));
diff --git a/Beam.Temporary.Cli/TextResource.cs b/Beam.Temporary.Cli/TextResource.cs
index ff27c57..0d39516 100644
--- a/Beam.Temporary.Cli/TextResource.cs
+++ b/Beam.Temporary.Cli/TextResource.cs
@@ -10,12 +10,13 @@ using System.Threading.Tasks;
namespace Beam.Temporary.Cli {
public class TextResource : IKeyed {
public required DataKey Key { get; set; }
+ public string? FriendlyName { get; set; }
public DataKey? AssociatedSource { get; set; }
public DataKey? AssociatedMetaSource { get; set; }
- public required string[] TemplateInitialData { get; set; }
- public string?[]? MetaTemplateInitialData { get; set; }
+ public required State TemplateInitialData { get; set; }
+ public State? MetaTemplateInitialData { get; set; }
- public TextResourceRecord ToRecord(SharedDataDictionary sdd) {
+ public TextResourceRecord ToRecord(BeamDataDictionary sdd) {
return new(this,
AssociatedSource is null ? null : sdd.Aggregators[AssociatedSource],
AssociatedMetaSource is null ? null : sdd.Auxillaries[AssociatedMetaSource]);
diff --git a/Beam.Temporary.Cli/WebResource.cs b/Beam.Temporary.Cli/WebResource.cs
index f7fcc71..17fbf87 100644
--- a/Beam.Temporary.Cli/WebResource.cs
+++ b/Beam.Temporary.Cli/WebResource.cs
@@ -19,7 +19,7 @@ namespace Beam.Temporary.Cli {
public WebResource() : this(new(string.Empty)) { }
- public WebResourceRecord ToRecord(SharedDataDictionary sdd) {
+ public WebResourceRecord ToRecord(BeamDataDictionary sdd) {
return new WebResourceRecord(this, sdd.Bindings[Bindings]);
}
}
diff --git a/Beam/CommonStateChangers.cs b/Beam/CommonStateChangers.cs
index fca5416..2c58329 100644
--- a/Beam/CommonStateChangers.cs
+++ b/Beam/CommonStateChangers.cs
@@ -12,5 +12,26 @@ namespace Beam {
throw new InvalidOperationException(S.M.StateChangeError);
x[^1] = number + i;
});
+
+ public static IStateChangeBehaviour Constant => new ConstantStateChanger();
+
+ public static IStateChangeBehaviour NthAsNumber(Index n, bool keepSuffix = true)
+ => new NumberedStateChanger((x, i) => {
+ string? nth = x[n]?.ToString();
+ if (nth is null)
+ throw new InvalidOperationException(S.M.StateChangeError);
+ if (!int.TryParse(nth, out var number))
+ if (keepSuffix) {
+ string[] split = nth.Split('.');
+ if (!int.TryParse(split[0], out number))
+ throw new InvalidOperationException(S.M.StateChangeError);
+ x[n] = (number + i) + split[1..].Aggregate((x, y) => $"{x}.{y}");
+ return;
+ } else
+ throw new InvalidOperationException(S.M.StateChangeError);
+ x[n] = number + i;
+ });
+
+
}
}
diff --git a/Beam/ConstantStateChanger.cs b/Beam/ConstantStateChanger.cs
new file mode 100644
index 0000000..72a1480
--- /dev/null
+++ b/Beam/ConstantStateChanger.cs
@@ -0,0 +1,13 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Beam {
+ public class ConstantStateChanger : IStateChangeBehaviour {
+ public void Apply(State state, object stimulus) {
+ return;
+ }
+ }
+}
diff --git a/Beam/DownloadContext.cs b/Beam/DownloadContext.cs
index 6e1691e..ebd34e1 100644
--- a/Beam/DownloadContext.cs
+++ b/Beam/DownloadContext.cs
@@ -15,11 +15,14 @@ namespace Beam {
public class DownloadContext : IDisposable {
private bool disposedValue;
+ public DownloadContextBuilder CreateBuilder()
+ => DownloadContextBuilder.FromContext(this);
+
public HtmlWeb Web { get; }
public HtmlTransformer Transformer { get; }
public AsyncHtmlTransformer AsyncTranformer { get; }
- public IProgress? DownloadReporter { get; set; }
- public IProgress? RetryReporter { get; set; }
+ public IProgress? DownloadReporter { get; set; }
+ public IProgress? RetryReporter { get; set; }
public AsyncDownloadFailurePredicate?[]? AsyncFailurePredicates { get; }
public TimeSpan TimeOut { get; set; }
public IEnumerable Links { get; }
@@ -32,8 +35,8 @@ namespace Beam {
CancellationToken cancellationToken = default,
HtmlTransformer? transformer = null,
AsyncHtmlTransformer? asyncTransformer = null,
- IProgress? downloadReporter = null,
- IProgress? retryReporter = null,
+ IProgress? downloadReporter = null,
+ IProgress? retryReporter = null,
AsyncDownloadFailurePredicate?[]? asyncFailurePredicates = null,
TimeSpan? timeOut = null,
ILogger? downloadLogger = null) {
diff --git a/Beam/DownloadContextBuilder.cs b/Beam/DownloadContextBuilder.cs
new file mode 100644
index 0000000..9fd67ca
--- /dev/null
+++ b/Beam/DownloadContextBuilder.cs
@@ -0,0 +1,129 @@
+using HtmlAgilityPack;
+using Microsoft.Extensions.Logging;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Beam {
+
+ public class DownloadContextBuilder {
+ private HtmlWeb _web;
+ private HtmlTransformer _transformer;
+ private AsyncHtmlTransformer _asyncTransformer;
+ private IProgress? _downloadReporter;
+ private IProgress? _retryReporter;
+ private AsyncDownloadFailurePredicate?[]? _asyncFailurePredicates;
+ private TimeSpan _timeOut;
+ private IEnumerable _links;
+ private CancellationToken _cancellationToken;
+ private DocumentCache _cache;
+ private ILogger? _downloadLogger;
+
+ public DownloadContextBuilder(HtmlWeb? web = null) {
+ // You can initialize defaults here if needed, e.g.:
+ // _timeOut = TimeSpan.FromSeconds(30);
+ // _cancellationToken = CancellationToken.None;
+ _web = web ?? new();
+ _links = [];
+ }
+
+ public DownloadContextBuilder WithWeb(HtmlWeb web) {
+ _web = web;
+ return this;
+ }
+
+ public DownloadContextBuilder WithTransformer(HtmlTransformer transformer) {
+ _transformer = transformer;
+ return this;
+ }
+
+ public DownloadContextBuilder WithAsyncTransformer(AsyncHtmlTransformer asyncTransformer) {
+ _asyncTransformer = asyncTransformer;
+ return this;
+ }
+
+ public DownloadContextBuilder WithDownloadReporter(IProgress downloadReporter) {
+ _downloadReporter = downloadReporter;
+ return this;
+ }
+
+ public DownloadContextBuilder WithRetryReporter(IProgress retryReporter) {
+ _retryReporter = retryReporter;
+ return this;
+ }
+
+ public DownloadContextBuilder WithAsyncFailurePredicates(params AsyncDownloadFailurePredicate[] predicates) {
+ _asyncFailurePredicates = predicates;
+ return this;
+ }
+
+ public DownloadContextBuilder WithTimeOut(TimeSpan timeOut) {
+ _timeOut = timeOut;
+ return this;
+ }
+
+ public DownloadContextBuilder WithLinks(IEnumerable links) {
+ _links = links;
+ return this;
+ }
+
+ public DownloadContextBuilder WithCancellationToken(CancellationToken cancellationToken) {
+ _cancellationToken = cancellationToken;
+ return this;
+ }
+
+ public DownloadContextBuilder WithCache(DocumentCache cache) {
+ _cache = cache;
+ return this;
+ }
+
+ public DownloadContextBuilder WithDownloadLogger(ILogger downloadLogger) {
+ _downloadLogger = downloadLogger;
+ return this;
+ }
+
+
+ public DownloadContext Build() {
+ // Construct the DownloadContext using the collected values.
+ var context = new DownloadContext(
+ web: _web,
+ links: _links,
+ cancellationToken: _cancellationToken,
+ transformer: _transformer,
+ asyncTransformer: _asyncTransformer,
+ downloadReporter: _downloadReporter,
+ retryReporter: _retryReporter,
+ asyncFailurePredicates: _asyncFailurePredicates,
+ timeOut: _timeOut,
+ downloadLogger: _downloadLogger
+ );
+
+ //// Assign the DocumentCache if it's been set in the builder.
+ //// (Even though Cache has a private setter, this code assumes builder
+ //// is in the same assembly or that the setter will be made internal.
+ //// Otherwise, remove or adjust this line.)
+ //context.Cache = _cache;
+
+ return context;
+ }
+
+ public static DownloadContextBuilder FromContext(DownloadContext existing) {
+ if (existing == null) throw new ArgumentNullException(nameof(existing));
+
+ return new DownloadContextBuilder(existing.Web)
+ .WithLinks(existing.Links)
+ .WithCancellationToken(existing.CancellationToken)
+ .WithTransformer(existing.Transformer)
+ .WithAsyncTransformer(existing.AsyncTranformer)
+ .WithDownloadReporter(existing.DownloadReporter!)
+ .WithRetryReporter(existing.RetryReporter!)
+ .WithAsyncFailurePredicates(existing.AsyncFailurePredicates ?? Array.Empty>())
+ .WithTimeOut(existing.TimeOut)
+ .WithDownloadLogger(existing.DownloadLogger!)
+ .WithCache(existing.Cache);
+ }
+ }
+
+}
diff --git a/Beam/DownloadReport.cs b/Beam/DownloadReport.cs
new file mode 100644
index 0000000..1dfdf51
--- /dev/null
+++ b/Beam/DownloadReport.cs
@@ -0,0 +1,6 @@
+namespace Beam {
+ public struct DownloadReport {
+
+ }
+
+}
diff --git a/Beam/IUnitDownloader.cs b/Beam/IUnitDownloader.cs
index a66c202..443b5ea 100644
--- a/Beam/IUnitDownloader.cs
+++ b/Beam/IUnitDownloader.cs
@@ -1,6 +1,6 @@
namespace Beam {
public interface IUnitDownloader {
public int LinksPerDownload { get; }
- public Task<(bool, T?)> TryDownload(Ordered[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress? tryProgress = null);
+ public Task<(bool, T?)> TryDownload(Ordered[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress? tryProgress = null);
}
}
\ No newline at end of file
diff --git a/Beam/NumberedStateChanger.cs b/Beam/NumberedStateChanger.cs
index c97a523..de597a6 100644
--- a/Beam/NumberedStateChanger.cs
+++ b/Beam/NumberedStateChanger.cs
@@ -12,5 +12,9 @@
public virtual void Apply(State state, int amount) {
MoveStateDlgte(state, amount);
}
+
+ public NumberedStateChanger(IStateChangeBehaviour behavior) : this((x, i) => {
+ behavior.Apply(x, i);
+ }) {}
}
}
diff --git a/Beam/OrderedSourceLinkGenerator.cs b/Beam/OrderedSourceLinkGenerator.cs
index 2edee29..5d32eb3 100644
--- a/Beam/OrderedSourceLinkGenerator.cs
+++ b/Beam/OrderedSourceLinkGenerator.cs
@@ -15,10 +15,13 @@ namespace Beam {
public NumberedStateChanger Behaviour { get; }
private State InitialState;
- public OrderedSourceLinkGenerator(SourceLinkBuilder builder, NumberedStateChanger behaviour, params object[] initialState) {
+
+ public OrderedSourceLinkGenerator(SourceLinkBuilder builder, NumberedStateChanger behaviour, params object[] initialState)
+ : this(builder, behaviour, new State(initialState)) { }
+ public OrderedSourceLinkGenerator(SourceLinkBuilder builder, NumberedStateChanger behaviour, State initialState) {
Builder = builder;
Behaviour = behaviour;
- InitialState = new State(initialState);
+ InitialState = initialState;
State = InitialState.Copy();
Reset();
diff --git a/Beam/RetryReport.cs b/Beam/RetryReport.cs
new file mode 100644
index 0000000..e099673
--- /dev/null
+++ b/Beam/RetryReport.cs
@@ -0,0 +1,12 @@
+namespace Beam {
+ public readonly struct RetryReport {
+ public RetryReport(int tryNumber, string link) {
+ TryNumber = tryNumber;
+ Link = link;
+ }
+
+ public int TryNumber { get; }
+ public string Link { get; }
+ }
+
+}
diff --git a/Beam/SequentialChunkDownloader.cs b/Beam/SequentialFragmentDownloader.cs
similarity index 100%
rename from Beam/SequentialChunkDownloader.cs
rename to Beam/SequentialFragmentDownloader.cs
diff --git a/Beam/SourceLinkBuilder.cs b/Beam/SourceLinkBuilder.cs
index 1236b76..8a8c21e 100644
--- a/Beam/SourceLinkBuilder.cs
+++ b/Beam/SourceLinkBuilder.cs
@@ -5,8 +5,16 @@ using System.Text;
using System.Threading.Tasks;
namespace Beam {
- public class Parameter(string name) {
+ [Flags]
+ public enum Position {
+ Before = 0b01,
+ After = 0b10,
+ BeforeAndAfter = 0b11
+ }
+
+ public class Parameter(string name, Position position = Position.Before) {
public string Name { get; set; } = name;
+ public Position Position { get; set; } = position;
}
public class LinkSegment(string name, string separator = "", string suffix = "") {
@@ -14,6 +22,16 @@ namespace Beam {
public List Parameters { get; set; } = [];
public string Separator { get; set; } = separator;
public string Suffix { get; set; } = suffix;
+
+ public LinkSegment WithParameters(params string[] parameters) {
+ Parameters = parameters.Select((x) => new Parameter(x)).ToList();
+ return this;
+ }
+
+ public LinkSegment WithParameters(params (string, Position)[] parameters) {
+ Parameters = parameters.Select((x) => new Parameter(x.Item1, x.Item2)).ToList();
+ return this;
+ }
}
public class SourceLinkBuilder(string host, string protocol = "https") {
@@ -64,6 +82,26 @@ namespace Beam {
Segments.Add(new LinkSegment(name, separator));
}
+ public SourceLinkBuilder WithSegments(params IEnumerable segments) {
+ Segments = segments.Select((x) => new LinkSegment(x)).ToList();
+ return this;
+ }
+
+ public SourceLinkBuilder WithSegments(int count)
+ => WithSegments(Enumerable.Repeat("", count));
+
+ public SourceLinkBuilder WithParameters(int i, params string[] parameters) {
+ Segments[i]
+ .WithParameters(parameters);
+ return this;
+ }
+
+ public SourceLinkBuilder WithParameters(int i, params (string, Position)[] parameters) {
+ Segments[i]
+ .WithParameters(parameters);
+ return this;
+ }
+
public void AddParameters(int segmentIndex, params string[] parameters) {
ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(segmentIndex, Segments.Count);
ArgumentOutOfRangeException.ThrowIfNegative(segmentIndex);
@@ -97,6 +135,9 @@ namespace Beam {
return count;
}
+ public SourceLink Build(State parameterValues)
+ => Build(parameterValues.GetState());
+
public SourceLink Build(params object[] parameterValues) {
ArgumentOutOfRangeException.ThrowIfNotEqual(parameterValues.Length, GetParameterCount());
@@ -109,8 +150,11 @@ namespace Beam {
link.Append('/');
link.Append(segment.Name);
for (int i = 0; i < segment.Parameters.Count; i++) {
- link.Append(segment.Parameters[i].Name);
+ if (segment.Parameters[i].Position.HasFlag(Position.Before))
+ link.Append(segment.Parameters[i].Name);
link.Append(parameterValues[pvC++]);
+ if (segment.Parameters[i].Position.HasFlag(Position.After))
+ link.Append(segment.Parameters[i].Name);
if (i + 1 < segment.Parameters.Count && segment.Separator is not null)
link.Append(segment.Separator);
}
diff --git a/Beam/UnitDownloader.cs b/Beam/UnitDownloader.cs
index 7370ae3..e1035d2 100644
--- a/Beam/UnitDownloader.cs
+++ b/Beam/UnitDownloader.cs
@@ -45,7 +45,7 @@ namespace Beam {
}
}
- public async Task<(bool, T?)> TryDownload(Ordered[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress? tryProgress = null) {
+ public async Task<(bool, T?)> TryDownload(Ordered[] link, CancellationToken ct, int maximumRetryCount = 7, IProgress? tryProgress = null) {
if (link.Length == 0)
return (false, default);
@@ -56,7 +56,8 @@ namespace Beam {
(var success, doc) = await TryDownloadWithNoRetries(link[0].Data, ct);
if (success && doc != null)
return (true, doc);
- tryProgress?.Report(++tryCount);
+ ++tryCount;
+ tryProgress?.Report(new RetryReport(tryCount, link[0].Data));
await Task.Delay((int)Math.Pow(2, tryCount) * 1000);
}
diff --git a/Beam/UnitFragmentDownloader.cs b/Beam/UnitFragmentDownloader.cs
index 380a910..a297630 100644
--- a/Beam/UnitFragmentDownloader.cs
+++ b/Beam/UnitFragmentDownloader.cs
@@ -30,7 +30,7 @@ namespace Beam {
private readonly UnitDownloader UnitDownloader;
- async Task<(bool, Fragment>?)> IUnitDownloader>>.TryDownload(Ordered[] link, CancellationToken ct, int maximumRetryCount, IProgress? tryProgress) {
+ async Task<(bool, Fragment>?)> IUnitDownloader>>.TryDownload(Ordered[] link, CancellationToken ct, int maximumRetryCount, IProgress? tryProgress) {
Fragment> fragment = new Fragment>(link.Length);
if (!Fragment>.TryAcquireUpdater(fragment, out var updater))
throw new S.AssertionException(S.M.NewFragmentShouldBeFree);