Skip to content

Commit

Permalink
Improve OCR replace list guessses
Browse files Browse the repository at this point in the history
  • Loading branch information
niksedk committed Jun 14, 2020
1 parent a1c35e3 commit c78dda9
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 9 deletions.
2 changes: 1 addition & 1 deletion Dictionaries/eng_OCRFixReplaceList.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2820,7 +2820,7 @@
</PartialWordsAlways>
<PartialWords>
<!-- Will be used to check words not in dictionary.
If new word(s) and longer than 5 chars and exists in spelling dictionary, it is (or they are) accepted -->
If new word(s) and longer than 4 chars and exists in spelling dictionary, it is (or they are) accepted -->
<WordPart from="IVI" to="M" />
<WordPart from="/" to="l" />
<WordPart from="|" to="I" />
Expand Down
2 changes: 1 addition & 1 deletion Dictionaries/pol_OCRFixReplaceList.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<PartialWordsAlways />
<PartialWords>
<!-- Will be used to check words not in dictionary.
If new word(s) and longer than 5 chars and exists
If new word(s) and longer than 4 chars and exists
in spelling dictionary, it is (or they are) accepted -->
<!-- "f " will be two words -->
<WordPart from="f" to="f " />
Expand Down
21 changes: 15 additions & 6 deletions libse/Dictionaries/OcrFixReplaceList.cs
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ public IEnumerable<string> CreateGuessesFromLetters(string word)
if (word.Substring(i).StartsWith(letter, StringComparison.Ordinal))
{

if (i == word.Length - letter.Length && !_partialWordReplaceList[letter].Contains(" "))
if (i == word.Length - letter.Length && !_partialWordReplaceList[letter].Contains(' '))
{
var guess = word.Remove(i, letter.Length).Insert(i, _partialWordReplaceList[letter]);
AddToGuessList(list, guess);
Expand All @@ -379,14 +379,23 @@ public IEnumerable<string> CreateGuessesFromLetters(string word)

if (indexes.Count > 1)
{
var multiGuess = word;
for (int i = indexes.Count - 1; i >= 0; i--)
if (!_partialWordReplaceList[letter].Contains(' '))
{
var idx = indexes[i];
multiGuess = multiGuess.Remove(idx, letter.Length).Insert(idx, _partialWordReplaceList[letter]);
AddToGuessList(list, multiGuess);
var multiGuess = word;
for (int i = indexes.Count - 1; i >= 0; i--)
{
var idx = indexes[i];
multiGuess = multiGuess.Remove(idx, letter.Length).Insert(idx, _partialWordReplaceList[letter]);
AddToGuessList(list, multiGuess);
}

AddToGuessList(list, word.Replace(letter, _partialWordReplaceList[letter]));
}
}
else if (indexes.Count > 0)
{
AddToGuessList(list, word.Replace(letter, _partialWordReplaceList[letter]));
}

if (indexes.Count > 0)
{
Expand Down
2 changes: 1 addition & 1 deletion src/Logic/Ocr/OcrFixEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1592,7 +1592,7 @@ public string FixUnknownWordsViaGuessOrPrompt(out int wordsNotFound, string line
guesses.Add(wordWithVerticalLine);
}

if (word.Length > 5 && autoGuess == AutoGuessLevel.Aggressive)
if (word.Length > 4 && autoGuess == AutoGuessLevel.Aggressive)
{
guesses.AddRange((List<string>)_ocrFixReplaceList.CreateGuessesFromLetters(word));

Expand Down

0 comments on commit c78dda9

Please sign in to comment.