From 6ab2faa5876656c76756191aa8da4070b1562d81 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Fri, 15 Nov 2024 11:47:32 -0500 Subject: [PATCH] More reviewer-requested changes --- src/SIL.Machine/Corpora/CorporaExtensions.cs | 12 ++++-------- src/SIL.Machine/Corpora/MergedTextCorpus.cs | 14 ++++++-------- .../Corpora/CorporaExtensionsTests.cs | 14 +++++--------- 3 files changed, 15 insertions(+), 25 deletions(-) diff --git a/src/SIL.Machine/Corpora/CorporaExtensions.cs b/src/SIL.Machine/Corpora/CorporaExtensions.cs index f5e1b4b1..e20be3f7 100644 --- a/src/SIL.Machine/Corpora/CorporaExtensions.cs +++ b/src/SIL.Machine/Corpora/CorporaExtensions.cs @@ -538,18 +538,14 @@ public static INParallelTextCorpus AlignMany( return nParallelTextCorpus; } - public static ITextCorpus ChooseRandom( - this IEnumerable corpora, - IEnumerable allRows, - int seed - ) + public static ITextCorpus ChooseRandom(this IEnumerable corpora, int seed) { - return new MergedTextCorpus(corpora, allRows.ToArray(), MergeRule.Random, seed); + return new MergedTextCorpus(corpora, MergeRule.Random, seed); } - public static ITextCorpus ChooseFirst(this IEnumerable corpora, IEnumerable allRows) + public static ITextCorpus ChooseFirst(this IEnumerable corpora) { - return new MergedTextCorpus(corpora, allRows.ToArray(), MergeRule.First, 0); + return new MergedTextCorpus(corpora, MergeRule.First); } #endregion diff --git a/src/SIL.Machine/Corpora/MergedTextCorpus.cs b/src/SIL.Machine/Corpora/MergedTextCorpus.cs index 1bcbf822..5e85b60b 100644 --- a/src/SIL.Machine/Corpora/MergedTextCorpus.cs +++ b/src/SIL.Machine/Corpora/MergedTextCorpus.cs @@ -13,16 +13,14 @@ public class MergedTextCorpus : TextCorpusBase private readonly Random _random; - public MergedTextCorpus( - IEnumerable corpora, - IEnumerable allRows, - MergeRule mergeRule, - int seed - ) + public MergedTextCorpus(IEnumerable corpora, MergeRule mergeRule, int? seed = null) { - _corpus = new NParallelTextCorpus(corpora) { AllRows = allRows.ToList() }; + _corpus = new NParallelTextCorpus(corpora) { AllRows = Enumerable.Repeat(true, corpora.Count()).ToArray() }; _mergeRule = mergeRule; - _random = new Random(seed); + if (seed != null) + _random = new Random(seed.Value); + else + _random = new Random(); } public override IEnumerable Texts => _corpus.Corpora.SelectMany(c => c.Texts); diff --git a/tests/SIL.Machine.Tests/Corpora/CorporaExtensionsTests.cs b/tests/SIL.Machine.Tests/Corpora/CorporaExtensionsTests.cs index db5e85ac..836f3bda 100644 --- a/tests/SIL.Machine.Tests/Corpora/CorporaExtensionsTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/CorporaExtensionsTests.cs @@ -94,7 +94,7 @@ public void MergedCorpus_SelectFirst() } ) ); - var mergedCorpus = new List { corpus1, corpus2, corpus3 }.ChooseFirst([true, true, true]); + var mergedCorpus = new List { corpus1, corpus2, corpus3 }.ChooseFirst(); var rows = mergedCorpus.ToArray(); Assert.That(rows, Has.Length.EqualTo(3), JsonSerializer.Serialize(rows)); Assert.That(rows[0].Text, Is.EqualTo("source 1 segment 1 .")); @@ -138,7 +138,7 @@ public void MergedCorpus_SelectRandom_Seed123456() } ) ); - var mergedCorpus = new List { corpus1, corpus2, corpus3 }.ChooseRandom([true, true, true], 123456); + var mergedCorpus = new List { corpus1, corpus2, corpus3 }.ChooseRandom(123456); var rows = mergedCorpus.ToArray(); Assert.That(rows, Has.Length.EqualTo(3), JsonSerializer.Serialize(rows)); Assert.Multiple(() => @@ -185,7 +185,7 @@ public void MergedCorpus_SelectRandom_Seed4501() } ) ); - var mergedCorpus = new List { corpus1, corpus2, corpus3 }.ChooseRandom([true, true, true], 4501); + var mergedCorpus = new List { corpus1, corpus2, corpus3 }.ChooseRandom(4501); var rows = mergedCorpus.ToArray(); Assert.That(rows, Has.Length.EqualTo(3), JsonSerializer.Serialize(rows)); Assert.Multiple(() => @@ -233,9 +233,7 @@ public void AlignMergedCorpora() ) ); - ITextCorpus sourceCorpus = new List { sourceCorpus1, sourceCorpus2, sourceCorpus3 }.ChooseFirst( - [true, true, true] - ); + ITextCorpus sourceCorpus = new List { sourceCorpus1, sourceCorpus2, sourceCorpus3 }.ChooseFirst(); var targetCorpus1 = new DictionaryTextCorpus( new MemoryText( @@ -271,9 +269,7 @@ public void AlignMergedCorpora() ) ); - ITextCorpus targetCorpus = new List { targetCorpus1, targetCorpus2, targetCorpus3 }.ChooseFirst( - [true, true, true] - ); + ITextCorpus targetCorpus = new List { targetCorpus1, targetCorpus2, targetCorpus3 }.ChooseFirst(); IParallelTextCorpus alignedCorpus = sourceCorpus.AlignRows(targetCorpus); ParallelTextRow[] rows = alignedCorpus.GetRows().ToArray();