Skip to content

Commit

Permalink
Few improvements to Tesseract API.
Browse files Browse the repository at this point in the history
GeReV committed Dec 10, 2024
1 parent 3e72291 commit 3d32a1c
Showing 3 changed files with 55 additions and 40 deletions.
4 changes: 2 additions & 2 deletions HocrEditor.Tesseract/TesseractApi.cs
Original file line number Diff line number Diff line change
@@ -136,8 +136,8 @@ public bool GetDoubleVariable(string name, out double value)
public void SetInputName(string name) =>
tesseractDllHandle.TessBaseAPISetInputName(apiHandle.DangerousGetHandle(), name);

public void SetImage(byte[] data, int width, int height, int bytesPerPixel, int bytesPerLine) =>
tesseractDllHandle.TessBaseAPISetImage(apiHandle.DangerousGetHandle(), data, width, height, bytesPerPixel, bytesPerLine);
public void SetImage(ReadOnlySpan<byte> data, int width, int height, int bytesPerPixel, int bytesPerLine) =>
tesseractDllHandle.TessBaseAPISetImage(apiHandle.DangerousGetHandle(), MemoryMarshal.GetReference(data), width, height, bytesPerPixel, bytesPerLine);

public void SetSourceResolution(int ppi) =>
tesseractDllHandle.TessBaseAPISetSourceResolution(apiHandle.DangerousGetHandle(), ppi);
2 changes: 1 addition & 1 deletion HocrEditor.Tesseract/TesseractDllHandle.cs
Original file line number Diff line number Diff line change
@@ -126,7 +126,7 @@ int configSize

internal delegate void TessBaseAPISetImage(
IntPtr handle,
byte[] data,
in byte data,
int width,
int height,
int bytesPerPixel,
89 changes: 52 additions & 37 deletions HocrEditor.Tesseract/TesseractService.cs
Original file line number Diff line number Diff line change
@@ -1,15 +1,41 @@
using System.Diagnostics;
using System.Drawing;
using System.Drawing;
using System.Security;
using Microsoft.Win32;
using Optional;
using SkiaSharp;

namespace HocrEditor.Tesseract;

public sealed class TesseractService : IDisposable
{
private bool isDisposed;
private readonly object lck = new();
private readonly Lock lck = new();
private readonly TesseractApi tesseractApi;

public static Option<string> DefaultPath
{
get
{
const string tesseractKey = "SOFTWARE\\Tesseract-OCR";

try
{
using var tesseractRegistryKey = Registry.CurrentUser.OpenSubKey(tesseractKey) ??
Registry.LocalMachine.OpenSubKey(tesseractKey);

if (tesseractRegistryKey?.GetValue("Path") is string path)
{
return Option.Some(path);
}
}
catch (SecurityException)
{
}

return Option.None<string>();
}
}

public TesseractService(string tesseractPath, IEnumerable<string> languages)
{
tesseractApi = TesseractFactory.CreateApi(tesseractPath);
@@ -46,58 +72,50 @@ public string[] GetLanguages()

public SKBitmap GetThresholdedImage(SKBitmap image, Rectangle region = new())
{
if (isDisposed)
{
throw new ObjectDisposedException(nameof(tesseractApi));
}
ObjectDisposedException.ThrowIf(isDisposed, this);

lock (lck)
{
var bytes = GetBitmapBytes(image);

tesseractApi.SetImage(bytes, image.Width, image.Height, image.BytesPerPixel, image.RowBytes);
tesseractApi.SetImage(image.GetPixelSpan(), image.Width, image.Height, image.BytesPerPixel, image.RowBytes);

return tesseractApi.GetThresholdedImage();
}
}

public async Task<string> Recognize(SKBitmap image, string imageFilename, Rectangle region = new())
{
if (isDisposed)
{
throw new ObjectDisposedException(nameof(tesseractApi));
}
ObjectDisposedException.ThrowIf(isDisposed, this);

return await Task.Run(
() =>
{
lock (lck)
() =>
{
if (isDisposed)
lock (lck)
{
throw new ObjectDisposedException(nameof(tesseractApi));
}
ObjectDisposedException.ThrowIf(isDisposed, this);

var bytes = GetBitmapBytes(image);
tesseractApi.SetInputName(imageFilename);
tesseractApi.SetImage(
image.GetPixelSpan(),
image.Width,
image.Height,
image.BytesPerPixel,
image.RowBytes
);

tesseractApi.SetInputName(imageFilename);
tesseractApi.SetImage(bytes, image.Width, image.Height, image.BytesPerPixel, image.RowBytes);
// tesseractApi.SetSourceResolution(300);

// tesseractApi.SetSourceResolution(300);
if (!region.IsEmpty)
{
tesseractApi.SetRectangle(region.X, region.Y, region.Width, region.Height);
}

if (!region.IsEmpty)
{
tesseractApi.SetRectangle(region.X, region.Y, region.Width, region.Height);
return tesseractApi.GetHocrText();
}

return tesseractApi.GetHocrText();
}
}
).ConfigureAwait(false);
)
.ConfigureAwait(false);
}

private static byte[] GetBitmapBytes(SKBitmap image) => image.GetPixelSpan().ToArray();

public void Dispose()
{
if (isDisposed)
@@ -107,10 +125,7 @@ public void Dispose()

lock (lck)
{
if (isDisposed)
{
throw new ObjectDisposedException(nameof(tesseractApi));
}
ObjectDisposedException.ThrowIf(isDisposed, this);

tesseractApi.Clear();
tesseractApi.Dispose();

0 comments on commit 3d32a1c

Please sign in to comment.