From 9eb9d6db1c32212832a72f67af3738a0d4eb7f9b Mon Sep 17 00:00:00 2001
From: Emmanuel Marty <emmanuel@fgl.com>
Date: Wed, 3 Nov 2021 15:35:47 +0100
Subject: [PATCH] Initial checkin

---
 LICENSE                                       |    3 +
 LICENSE.cc0.md                                |   43 +
 LICENSE.zlib.md                               |   19 +
 Makefile                                      |   28 +
 README.md                                     |   23 +
 VS2019/salvador.sln                           |   31 +
 VS2019/salvador.vcxproj                       |  178 ++
 VS2019/salvador.vcxproj.filters               |   78 +
 VS2019/salvador.vcxproj.user                  |   19 +
 src/expand.c                                  |  311 +++
 src/expand.h                                  |   69 +
 src/format.h                                  |   43 +
 src/libdivsufsort/CHANGELOG.md                |   21 +
 src/libdivsufsort/CMakeLists.txt              |   99 +
 .../CMakeModules/AppendCompilerFlags.cmake    |   38 +
 .../CMakeModules/CheckFunctionKeywords.cmake  |   15 +
 src/libdivsufsort/CMakeModules/CheckLFS.cmake |  109 +
 .../CMakeModules/ProjectCPack.cmake           |   38 +
 .../CMakeModules/cmake_uninstall.cmake.in     |   36 +
 src/libdivsufsort/LICENSE                     |   21 +
 src/libdivsufsort/README.md                   |  140 ++
 src/libdivsufsort/VERSION.cmake               |   23 +
 src/libdivsufsort/examples/CMakeLists.txt     |   11 +
 src/libdivsufsort/examples/bwt.c              |  220 ++
 src/libdivsufsort/examples/mksary.c           |  193 ++
 src/libdivsufsort/examples/sasearch.c         |  165 ++
 src/libdivsufsort/examples/suftest.c          |  164 ++
 src/libdivsufsort/examples/unbwt.c            |  207 ++
 src/libdivsufsort/include/CMakeLists.txt      |  162 ++
 src/libdivsufsort/include/config.h.cmake      |   81 +
 src/libdivsufsort/include/divsufsort.h        |  189 ++
 src/libdivsufsort/include/divsufsort.h.cmake  |  180 ++
 src/libdivsufsort/include/divsufsort_config.h |    9 +
 .../include/divsufsort_private.h              |  205 ++
 src/libdivsufsort/include/lfs.h.cmake         |   56 +
 src/libdivsufsort/lib/CMakeLists.txt          |   31 +
 src/libdivsufsort/lib/divsufsort.c            |  431 ++++
 src/libdivsufsort/lib/divsufsort_utils.c      |  383 ++++
 src/libdivsufsort/lib/sssort.c                |  815 ++++++++
 src/libdivsufsort/lib/trsort.c                |  586 ++++++
 src/libdivsufsort/pkgconfig/CMakeLists.txt    |    9 +
 .../pkgconfig/libdivsufsort.pc.cmake          |   11 +
 src/libsalvador.h                             |   40 +
 src/matchfinder.c                             |  410 ++++
 src/matchfinder.h                             |   77 +
 src/salvador.c                                | 1233 +++++++++++
 src/shrink.c                                  | 1820 +++++++++++++++++
 src/shrink.h                                  |  178 ++
 48 files changed, 9251 insertions(+)
 create mode 100644 LICENSE
 create mode 100644 LICENSE.cc0.md
 create mode 100644 LICENSE.zlib.md
 create mode 100644 Makefile
 create mode 100644 README.md
 create mode 100644 VS2019/salvador.sln
 create mode 100644 VS2019/salvador.vcxproj
 create mode 100644 VS2019/salvador.vcxproj.filters
 create mode 100644 VS2019/salvador.vcxproj.user
 create mode 100644 src/expand.c
 create mode 100644 src/expand.h
 create mode 100644 src/format.h
 create mode 100644 src/libdivsufsort/CHANGELOG.md
 create mode 100644 src/libdivsufsort/CMakeLists.txt
 create mode 100644 src/libdivsufsort/CMakeModules/AppendCompilerFlags.cmake
 create mode 100644 src/libdivsufsort/CMakeModules/CheckFunctionKeywords.cmake
 create mode 100644 src/libdivsufsort/CMakeModules/CheckLFS.cmake
 create mode 100644 src/libdivsufsort/CMakeModules/ProjectCPack.cmake
 create mode 100644 src/libdivsufsort/CMakeModules/cmake_uninstall.cmake.in
 create mode 100644 src/libdivsufsort/LICENSE
 create mode 100644 src/libdivsufsort/README.md
 create mode 100644 src/libdivsufsort/VERSION.cmake
 create mode 100644 src/libdivsufsort/examples/CMakeLists.txt
 create mode 100644 src/libdivsufsort/examples/bwt.c
 create mode 100644 src/libdivsufsort/examples/mksary.c
 create mode 100644 src/libdivsufsort/examples/sasearch.c
 create mode 100644 src/libdivsufsort/examples/suftest.c
 create mode 100644 src/libdivsufsort/examples/unbwt.c
 create mode 100644 src/libdivsufsort/include/CMakeLists.txt
 create mode 100644 src/libdivsufsort/include/config.h.cmake
 create mode 100644 src/libdivsufsort/include/divsufsort.h
 create mode 100644 src/libdivsufsort/include/divsufsort.h.cmake
 create mode 100644 src/libdivsufsort/include/divsufsort_config.h
 create mode 100644 src/libdivsufsort/include/divsufsort_private.h
 create mode 100644 src/libdivsufsort/include/lfs.h.cmake
 create mode 100644 src/libdivsufsort/lib/CMakeLists.txt
 create mode 100644 src/libdivsufsort/lib/divsufsort.c
 create mode 100644 src/libdivsufsort/lib/divsufsort_utils.c
 create mode 100644 src/libdivsufsort/lib/sssort.c
 create mode 100644 src/libdivsufsort/lib/trsort.c
 create mode 100644 src/libdivsufsort/pkgconfig/CMakeLists.txt
 create mode 100644 src/libdivsufsort/pkgconfig/libdivsufsort.pc.cmake
 create mode 100644 src/libsalvador.h
 create mode 100644 src/matchfinder.c
 create mode 100644 src/matchfinder.h
 create mode 100644 src/salvador.c
 create mode 100644 src/shrink.c
 create mode 100644 src/shrink.h

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..213be1a
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,3 @@
+The apultra code is available under the Zlib license, except for src/matchfinder.c which is placed under the Creative Commons CC0 license.
+
+Please consult LICENSE.zlib.md and LICENSE.CC0.md for more information.
diff --git a/LICENSE.cc0.md b/LICENSE.cc0.md
new file mode 100644
index 0000000..139c68e
--- /dev/null
+++ b/LICENSE.cc0.md
@@ -0,0 +1,43 @@
+## creative commons
+
+# CC0 1.0 Universal
+
+CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER.
+
+### Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others.
+
+For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights.
+
+1. __Copyright and Related Rights.__ A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following:
+
+    i. the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work;
+
+    ii. moral rights retained by the original author(s) and/or performer(s);
+
+    iii. publicity and privacy rights pertaining to a person's image or likeness depicted in a Work;
+
+    iv. rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below;
+
+    v. rights protecting the extraction, dissemination, use and reuse of data in a Work;
+
+    vi. database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and
+
+    vii. other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof.
+
+2. __Waiver.__ To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose.
+
+3. __Public License Fallback.__ Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose.
+
+4. __Limitations and Disclaimers.__
+
+    a. No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document.
+
+    b. Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law.
+
+    c. Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work.
+
+    d. Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work.
diff --git a/LICENSE.zlib.md b/LICENSE.zlib.md
new file mode 100644
index 0000000..e1296a1
--- /dev/null
+++ b/LICENSE.zlib.md
@@ -0,0 +1,19 @@
+Copyright (c) 2019 Emmanuel Marty
+
+This software is provided 'as-is', without any express or implied warranty. In
+no event will the authors be held liable for any damages arising from the use of
+this software.
+
+Permission is granted to anyone to use this software for any purpose, including
+commercial applications, and to alter it and redistribute it freely, subject to
+the following restrictions:
+
+1.  The origin of this software must not be misrepresented; you must not claim
+    that you wrote the original software. If you use this software in a product,
+    an acknowledgment in the product documentation would be appreciated but is
+    not required.
+
+2.  Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+3.  This notice may not be removed or altered from any source distribution.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..c09b0e9
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,28 @@
+CC=clang
+CFLAGS=-O3 -g -fomit-frame-pointer -Isrc/libdivsufsort/include -Isrc
+OBJDIR=obj
+LDFLAGS=
+
+$(OBJDIR)/%.o: src/../%.c
+	@mkdir -p '$(@D)'
+	$(CC) $(CFLAGS) -c $< -o $@
+
+APP := salvador
+
+OBJS += $(OBJDIR)/src/salvador.o
+OBJS += $(OBJDIR)/src/expand.o
+OBJS += $(OBJDIR)/src/matchfinder.o
+OBJS += $(OBJDIR)/src/shrink.o
+OBJS += $(OBJDIR)/src/libdivsufsort/lib/divsufsort.o
+OBJS += $(OBJDIR)/src/libdivsufsort/lib/divsufsort_utils.o
+OBJS += $(OBJDIR)/src/libdivsufsort/lib/sssort.o
+OBJS += $(OBJDIR)/src/libdivsufsort/lib/trsort.o
+
+all: $(APP)
+
+$(APP): $(OBJS)
+	$(CC) $^ $(LDFLAGS) -o $(APP)
+
+clean:
+	@rm -rf $(APP) $(OBJDIR)
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..1bdc088
--- /dev/null
+++ b/README.md
@@ -0,0 +1,23 @@
+salvador -- a fast, near-optimal compressor for the ZX0 format
+==============================================================
+
+salvador is a command-line tool and a library that compresses bitstreams in the ZX0 format. 
+
+The tool outputs compressed files that are within 0.02% on average, of the files produced by the zx0 packer itself. The compressor is, however, several orders of magnitude faster, with compression speed similar to [apultra](https://github.com/emmanuel-marty/apultra). 
+
+The compressor can pack files of any size, however, due to the 31.5 KB window size, files larger than 128-256 KB will get a better ratio with apultra. This will not be an issue when compressing for the main target, 8-bit micros. By default, salvador compresses for the modern (V2) format. The classic, legacy format is also supported; use the -classic flag on the command line.
+
+salvador is written in portable C. It is fully open-source under a liberal license. You can use the ZX0 decompression libraries for your target environment. As with LZSA and apultra, you can do whatever you like with it.
+
+The output is fully compatible with the [ZX0](https://github.com/einar-saukas/ZX0) compressor by Einar Saukas.
+
+Decompression code:
+
+ * [z80](https://github.com/einar-saukas/ZX0/tree/main/z80) by Einar Saukas, Urusergi and spke.
+ * [8088](https://github.com/emmanuel-marty/unzx0_x86) by Emmanuel Marty. 
+ * [68000](https://github.com/emmanuel-marty/unzx0_68000) by Emmanuel Marty. 
+
+License:
+
+* The salvador code is available under the Zlib license.
+* The match finder (matchfinder.c) is available under the CC0 license due to using portions of code from Eric Bigger's Wimlib in the suffix array-based matchfinder.
diff --git a/VS2019/salvador.sln b/VS2019/salvador.sln
new file mode 100644
index 0000000..e3eb04b
--- /dev/null
+++ b/VS2019/salvador.sln
@@ -0,0 +1,31 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.31729.503
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "salvador", "salvador.vcxproj", "{F4C10DBA-8808-4418-A78F-719C6A7761EF}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{F4C10DBA-8808-4418-A78F-719C6A7761EF}.Debug|x64.ActiveCfg = Debug|x64
+		{F4C10DBA-8808-4418-A78F-719C6A7761EF}.Debug|x64.Build.0 = Debug|x64
+		{F4C10DBA-8808-4418-A78F-719C6A7761EF}.Debug|x86.ActiveCfg = Debug|Win32
+		{F4C10DBA-8808-4418-A78F-719C6A7761EF}.Debug|x86.Build.0 = Debug|Win32
+		{F4C10DBA-8808-4418-A78F-719C6A7761EF}.Release|x64.ActiveCfg = Release|x64
+		{F4C10DBA-8808-4418-A78F-719C6A7761EF}.Release|x64.Build.0 = Release|x64
+		{F4C10DBA-8808-4418-A78F-719C6A7761EF}.Release|x86.ActiveCfg = Release|Win32
+		{F4C10DBA-8808-4418-A78F-719C6A7761EF}.Release|x86.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {6A45AD4E-9B6B-4AAB-9FEA-CA453AE1822D}
+	EndGlobalSection
+EndGlobal
diff --git a/VS2019/salvador.vcxproj b/VS2019/salvador.vcxproj
new file mode 100644
index 0000000..82b324f
--- /dev/null
+++ b/VS2019/salvador.vcxproj
@@ -0,0 +1,178 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>16.0</VCProjectVersion>
+    <Keyword>Win32Proj</Keyword>
+    <ProjectGuid>{f4c10dba-8808-4418-a78f-719c6a7761ef}</ProjectGuid>
+    <RootNamespace>salvador</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(ProjectDir)bin\</OutDir>
+    <TargetName>$(ProjectName)_debug</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(ProjectDir)bin\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(ProjectDir)bin\</OutDir>
+    <TargetName>$(ProjectName)_debug</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(ProjectDir)bin\</OutDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\src\libdivsufsort\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\src\libdivsufsort\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\src\libdivsufsort\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\src\libdivsufsort\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <OmitFramePointers>true</OmitFramePointers>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\src\expand.c" />
+    <ClCompile Include="..\src\libdivsufsort\lib\divsufsort.c" />
+    <ClCompile Include="..\src\libdivsufsort\lib\divsufsort_utils.c" />
+    <ClCompile Include="..\src\libdivsufsort\lib\sssort.c" />
+    <ClCompile Include="..\src\libdivsufsort\lib\trsort.c" />
+    <ClCompile Include="..\src\matchfinder.c" />
+    <ClCompile Include="..\src\salvador.c" />
+    <ClCompile Include="..\src\shrink.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\src\expand.h" />
+    <ClInclude Include="..\src\format.h" />
+    <ClInclude Include="..\src\libdivsufsort\include\divsufsort.h" />
+    <ClInclude Include="..\src\libdivsufsort\include\divsufsort_config.h" />
+    <ClInclude Include="..\src\libdivsufsort\include\divsufsort_private.h" />
+    <ClInclude Include="..\src\libsalvador.h" />
+    <ClInclude Include="..\src\matchfinder.h" />
+    <ClInclude Include="..\src\shrink.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/VS2019/salvador.vcxproj.filters b/VS2019/salvador.vcxproj.filters
new file mode 100644
index 0000000..607fa47
--- /dev/null
+++ b/VS2019/salvador.vcxproj.filters
@@ -0,0 +1,78 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Fichiers sources">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Fichiers d%27en-tête">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
+    </Filter>
+    <Filter Include="Fichiers de ressources">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+    <Filter Include="Fichiers sources\libdivsufsort">
+      <UniqueIdentifier>{86e66d4d-937b-4037-af93-856105377549}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Fichiers sources\libdivsufsort\include">
+      <UniqueIdentifier>{c4baa29b-3731-40b1-bcc9-d5aa5673114f}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Fichiers sources\libdivsufsort\lib">
+      <UniqueIdentifier>{56092ba4-e514-4de2-8528-adff2da7ac3f}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\src\expand.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\src\matchfinder.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\src\salvador.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\src\shrink.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\src\libdivsufsort\lib\divsufsort.c">
+      <Filter>Fichiers sources\libdivsufsort\lib</Filter>
+    </ClCompile>
+    <ClCompile Include="..\src\libdivsufsort\lib\divsufsort_utils.c">
+      <Filter>Fichiers sources\libdivsufsort\lib</Filter>
+    </ClCompile>
+    <ClCompile Include="..\src\libdivsufsort\lib\sssort.c">
+      <Filter>Fichiers sources\libdivsufsort\lib</Filter>
+    </ClCompile>
+    <ClCompile Include="..\src\libdivsufsort\lib\trsort.c">
+      <Filter>Fichiers sources\libdivsufsort\lib</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\src\expand.h">
+      <Filter>Fichiers sources</Filter>
+    </ClInclude>
+    <ClInclude Include="..\src\format.h">
+      <Filter>Fichiers sources</Filter>
+    </ClInclude>
+    <ClInclude Include="..\src\libsalvador.h">
+      <Filter>Fichiers sources</Filter>
+    </ClInclude>
+    <ClInclude Include="..\src\matchfinder.h">
+      <Filter>Fichiers sources</Filter>
+    </ClInclude>
+    <ClInclude Include="..\src\shrink.h">
+      <Filter>Fichiers sources</Filter>
+    </ClInclude>
+    <ClInclude Include="..\src\libdivsufsort\include\divsufsort.h">
+      <Filter>Fichiers sources\libdivsufsort\include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\src\libdivsufsort\include\divsufsort_config.h">
+      <Filter>Fichiers sources\libdivsufsort\include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\src\libdivsufsort\include\divsufsort_private.h">
+      <Filter>Fichiers sources\libdivsufsort\include</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/VS2019/salvador.vcxproj.user b/VS2019/salvador.vcxproj.user
new file mode 100644
index 0000000..217273f
--- /dev/null
+++ b/VS2019/salvador.vcxproj.user
@@ -0,0 +1,19 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LocalDebuggerCommandArguments>-c -v -test</LocalDebuggerCommandArguments>
+    <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LocalDebuggerCommandArguments>-c -v -test</LocalDebuggerCommandArguments>
+    <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LocalDebuggerCommandArguments>-c -v -test</LocalDebuggerCommandArguments>
+    <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LocalDebuggerCommandArguments>-c -v -test</LocalDebuggerCommandArguments>
+    <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
+  </PropertyGroup>
+</Project>
\ No newline at end of file
diff --git a/src/expand.c b/src/expand.c
new file mode 100644
index 0000000..711fff2
--- /dev/null
+++ b/src/expand.c
@@ -0,0 +1,311 @@
+/*
+ * expand.c - decompressor implementation
+ *
+ * Copyright (C) 2021 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Implements the ZX0 encoding designed by Einar Saukas. https://github.com/einar-saukas/ZX0
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "format.h"
+#include "expand.h"
+#include "libsalvador.h"
+
+#ifdef _MSC_VER
+#define FORCE_INLINE __forceinline
+#else /* _MSC_VER */
+#define FORCE_INLINE __attribute__((always_inline))
+#endif /* _MSC_VER */
+
+static inline FORCE_INLINE int salvador_read_bit(const unsigned char **ppInBlock, const unsigned char *pDataEnd, int *nCurBitMask, unsigned char *bits) {
+   int nBit;
+
+   const unsigned char* pInBlock = *ppInBlock;
+
+   if ((*nCurBitMask) == 0) {
+      if (pInBlock >= pDataEnd) return -1;
+      (*bits) = *pInBlock++;
+      (*nCurBitMask) = 128;
+   }
+
+   nBit = ((*bits) & 128) ? 1 : 0;
+
+   (*bits) <<= 1;
+   (*nCurBitMask) >>= 1;
+
+   *ppInBlock = pInBlock;
+   return nBit;
+}
+
+static inline FORCE_INLINE int salvador_read_elias(const unsigned char** ppInBlock, const unsigned char* pDataEnd, const int nInitialValue, int* nCurBitMask, unsigned char* bits) {
+   int nValue = nInitialValue;
+
+   while (!salvador_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits)) {
+      nValue = (nValue << 1) | salvador_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits);
+   }
+
+   return nValue;
+}
+
+static inline FORCE_INLINE int salvador_read_elias_inverted(const unsigned char** ppInBlock, const unsigned char* pDataEnd, const int nInitialValue, int* nCurBitMask, unsigned char* bits) {
+   int nValue = nInitialValue;
+
+   while (!salvador_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits)) {
+      nValue = (nValue << 1) | (salvador_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits) ^ 1);
+   }
+
+   return nValue;
+}
+
+static inline FORCE_INLINE int salvador_read_elias_prefix(const unsigned char** ppInBlock, const unsigned char* pDataEnd, const int nInitialValue, int* nCurBitMask, unsigned char* bits, unsigned int nFirstBit) {
+   int nValue = nInitialValue;
+
+   if (!nFirstBit) {
+      nValue = (nValue << 1) | salvador_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits);
+      while (!salvador_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits)) {
+         nValue = (nValue << 1) | salvador_read_bit(ppInBlock, pDataEnd, nCurBitMask, bits);
+      }
+   }
+
+   return nValue;
+}
+
+/**
+ * Get maximum decompressed size of compressed data
+ *
+ * @param pInputData compressed data
+ * @param nInputSize compressed size in bytes
+ * @param nFlags compression flags (set to 0)
+ *
+ * @return maximum decompressed size
+ */
+size_t salvador_get_max_decompressed_size(const unsigned char *pInputData, size_t nInputSize, const unsigned int nFlags) {
+   const unsigned char* pInputDataEnd = pInputData + nInputSize;
+   int nCurBitMask = 0;
+   unsigned char bits = 0;
+   int nMatchOffset = 1;
+   int nIsFirstCommand = 1;
+   const int nIsInverted = (nFlags & FLG_IS_INVERTED) ? 1 : 0;
+   int nDecompressedSize = 0;
+
+   if (pInputData >= pInputDataEnd)
+      return -1;
+
+   while (1) {
+      unsigned int nIsMatchWithOffset;
+
+      if (nIsFirstCommand) {
+         /* The first command is always literals */
+         nIsFirstCommand = 0;
+         nIsMatchWithOffset = 0;
+      }
+      else {
+         /* Read match with offset / literals bit */
+         nIsMatchWithOffset = salvador_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+         if (nIsMatchWithOffset == -1)
+            return -1;
+      }
+
+      if (nIsMatchWithOffset == 0) {
+         unsigned int nLiterals = salvador_read_elias(&pInputData, pInputDataEnd, 1, &nCurBitMask, &bits);
+
+         /* Count literals */
+
+         if ((pInputData + nLiterals) <= pInputDataEnd) {
+            pInputData += nLiterals;
+            nDecompressedSize += nLiterals;
+         }
+         else {
+            return -1;
+         }
+
+         /* Read match with offset / rep match bit */
+
+         nIsMatchWithOffset = salvador_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+         if (nIsMatchWithOffset == -1)
+            return -1;
+      }
+
+      unsigned int nMatchLen;
+
+      if (nIsMatchWithOffset) {
+         /* Match with offset */
+
+         unsigned int nMatchOffsetHighByte;
+
+         if (nIsInverted)
+            nMatchOffsetHighByte = salvador_read_elias_inverted(&pInputData, pInputDataEnd, 1, &nCurBitMask, &bits);
+         else
+            nMatchOffsetHighByte = salvador_read_elias(&pInputData, pInputDataEnd, 1, &nCurBitMask, &bits);
+
+         if (nMatchOffsetHighByte == 256)
+            break;
+         nMatchOffsetHighByte--;
+
+         if (pInputData >= pInputDataEnd)
+            return -1;
+
+         unsigned int nMatchOffsetLowByte = (unsigned int)(*pInputData++);
+         nMatchOffset = (nMatchOffsetHighByte << 7) | (127 - (nMatchOffsetLowByte >> 1));
+         nMatchOffset++;
+
+         nMatchLen = salvador_read_elias_prefix(&pInputData, pInputDataEnd, 1, &nCurBitMask, &bits, nMatchOffsetLowByte & 1);
+
+         nMatchLen += (2 - 1);
+      }
+      else {
+         /* Rep-match */
+
+         nMatchLen = salvador_read_elias(&pInputData, pInputDataEnd, 1, &nCurBitMask, &bits);
+      }
+
+      /* Count matched bytes */
+      nDecompressedSize += nMatchLen;
+   }
+
+   return nDecompressedSize;
+}
+
+/**
+ * Decompress data in memory
+ *
+ * @param pInputData compressed data
+ * @param pOutData buffer for decompressed data
+ * @param nInputSize compressed size in bytes
+ * @param nMaxOutBufferSize maximum capacity of decompression buffer
+ * @param nDictionarySize size of dictionary in front of input data (0 for none)
+ * @param nFlags compression flags (set to 0)
+ *
+ * @return actual decompressed size, or -1 for error
+ */
+size_t salvador_decompress(const unsigned char *pInputData, unsigned char *pOutData, size_t nInputSize, size_t nMaxOutBufferSize, size_t nDictionarySize, const unsigned int nFlags) {
+   const unsigned char *pInputDataEnd = pInputData + nInputSize;
+   unsigned char *pCurOutData = pOutData + nDictionarySize;
+   const unsigned char *pOutDataEnd = pCurOutData + nMaxOutBufferSize;
+   int nCurBitMask = 0;
+   unsigned char bits = 0;
+   int nMatchOffset = 1;
+   int nIsFirstCommand = 1;
+   const int nIsInverted = (nFlags & FLG_IS_INVERTED) ? 1 : 0;
+
+   if (pInputData >= pInputDataEnd && pCurOutData < pOutDataEnd)
+      return -1;
+
+   while (1) {
+      unsigned int nIsMatchWithOffset;
+
+      if (nIsFirstCommand) {
+         /* The first command is always literals */
+         nIsFirstCommand = 0;
+         nIsMatchWithOffset = 0;
+      }
+      else {
+         /* Read match with offset / literals bit */
+         nIsMatchWithOffset = salvador_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+         if (nIsMatchWithOffset == -1)
+            return -1;
+      }
+
+      if (nIsMatchWithOffset == 0) {
+         unsigned int nLiterals = salvador_read_elias(&pInputData, pInputDataEnd, 1, &nCurBitMask, &bits);
+
+         /* Copy literals */
+
+         if ((pInputData + nLiterals) <= pInputDataEnd &&
+            (pCurOutData + nLiterals) <= pOutDataEnd) {
+            memcpy(pCurOutData, pInputData, nLiterals);
+            pInputData += nLiterals;
+            pCurOutData += nLiterals;
+         }
+         else {
+            return -1;
+         }
+
+         /* Read match with offset / rep match bit */
+
+         nIsMatchWithOffset = salvador_read_bit(&pInputData, pInputDataEnd, &nCurBitMask, &bits);
+         if (nIsMatchWithOffset == -1)
+            return -1;
+      }
+
+      unsigned int nMatchLen;
+
+      if (nIsMatchWithOffset) {
+         /* Match with offset */
+
+         unsigned int nMatchOffsetHighByte;
+
+         if (nIsInverted)
+            nMatchOffsetHighByte = salvador_read_elias_inverted(&pInputData, pInputDataEnd, 1, &nCurBitMask, &bits);
+         else
+            nMatchOffsetHighByte = salvador_read_elias(&pInputData, pInputDataEnd, 1, &nCurBitMask, &bits);
+
+         if (nMatchOffsetHighByte == 256)
+            break;
+         nMatchOffsetHighByte--;
+
+         if (pInputData >= pInputDataEnd)
+            return -1;
+
+         unsigned int nMatchOffsetLowByte = (unsigned int)(*pInputData++);
+         nMatchOffset = (nMatchOffsetHighByte << 7) | (127 - (nMatchOffsetLowByte >> 1));
+         nMatchOffset++;
+
+         nMatchLen = salvador_read_elias_prefix(&pInputData, pInputDataEnd, 1, &nCurBitMask, &bits, nMatchOffsetLowByte & 1);
+
+         nMatchLen += (2 - 1);
+      }
+      else {
+         /* Rep-match */
+
+         nMatchLen = salvador_read_elias(&pInputData, pInputDataEnd, 1, &nCurBitMask, &bits);
+      }
+
+      /* Copy matched bytes */
+      const unsigned char* pSrc = pCurOutData - nMatchOffset;
+      if (pSrc >= pOutData) {
+         if ((pSrc + nMatchLen) <= pOutDataEnd) {
+            if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
+               while (nMatchLen) {
+                  *pCurOutData++ = *pSrc++;
+                  nMatchLen--;
+               }
+            }
+            else {
+               return -1;
+            }
+         }
+         else {
+            return -1;
+         }
+      }
+      else {
+         return -1;
+      }
+   }
+
+   return (size_t)(pCurOutData - pOutData) - nDictionarySize;
+}
diff --git a/src/expand.h b/src/expand.h
new file mode 100644
index 0000000..d7efe44
--- /dev/null
+++ b/src/expand.h
@@ -0,0 +1,69 @@
+/*
+ * expand.h - decompressor definitions
+ *
+ * Copyright (C) 2021 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Implements the ZX0 encoding designed by Einar Saukas. https://github.com/einar-saukas/ZX0
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ *
+ */
+
+#ifndef _EXPAND_H
+#define _EXPAND_H
+
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Get maximum decompressed size of compressed data
+ *
+ * @param pInputData compressed data
+ * @param nInputSize compressed size in bytes
+ * @param nFlags compression flags (set to 0)
+ *
+ * @return maximum decompressed size
+ */
+size_t salvador_get_max_decompressed_size(const unsigned char *pInputData, size_t nInputSize, const unsigned int nFlags);
+
+/**
+ * Decompress data in memory
+ *
+ * @param pInputData compressed data
+ * @param pOutData buffer for decompressed data
+ * @param nInputSize compressed size in bytes
+ * @param nMaxOutBufferSize maximum capacity of decompression buffer
+ * @param nDictionarySize size of dictionary in front of input data (0 for none)
+ * @param nFlags compression flags (set to 0)
+ *
+ * @return actual decompressed size, or -1 for error
+ */
+size_t salvador_decompress(const unsigned char *pInputData, unsigned char *pOutData, size_t nInputSize, size_t nMaxOutBufferSize, size_t nDictionarySize, const unsigned int nFlags);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _EXPAND_H */
diff --git a/src/format.h b/src/format.h
new file mode 100644
index 0000000..705ea66
--- /dev/null
+++ b/src/format.h
@@ -0,0 +1,43 @@
+/*
+ * format.h - byte stream format definitions
+ *
+ * Copyright (C) 2021 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Implements the ZX0 encoding designed by Einar Saukas. https://github.com/einar-saukas/ZX0
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ *
+ */
+
+#ifndef _FORMAT_H
+#define _FORMAT_H
+
+#define MIN_OFFSET 1
+#define MAX_OFFSET 0x7f7f
+
+#define MAX_VARLEN 0xffff
+
+#define BLOCK_SIZE 0x10000
+
+#define MIN_MATCH_SIZE 1
+
+#endif /* _FORMAT_H */
diff --git a/src/libdivsufsort/CHANGELOG.md b/src/libdivsufsort/CHANGELOG.md
new file mode 100644
index 0000000..fe9d004
--- /dev/null
+++ b/src/libdivsufsort/CHANGELOG.md
@@ -0,0 +1,21 @@
+# libdivsufsort Change Log
+
+See full changelog at: https://github.com/y-256/libdivsufsort/commits
+
+## [2.0.1] - 2010-11-11
+### Fixed
+* Wrong variable used in `divbwt` function
+* Enclose some string variables with double quotation marks in include/CMakeLists.txt
+* Fix typo in include/CMakeLists.txt
+
+## 2.0.0 - 2008-08-23
+### Changed
+* Switch the build system to [CMake](http://www.cmake.org/)
+* Improve the performance of the suffix-sorting algorithm
+
+### Added
+* OpenMP support
+* 64-bit version of divsufsort
+
+[Unreleased]: https://github.com/y-256/libdivsufsort/compare/2.0.1...HEAD
+[2.0.1]: https://github.com/y-256/libdivsufsort/compare/2.0.0...2.0.1
diff --git a/src/libdivsufsort/CMakeLists.txt b/src/libdivsufsort/CMakeLists.txt
new file mode 100644
index 0000000..7859943
--- /dev/null
+++ b/src/libdivsufsort/CMakeLists.txt
@@ -0,0 +1,99 @@
+### cmake file for building libdivsufsort Package ###
+cmake_minimum_required(VERSION 2.4.4)
+set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
+include(AppendCompilerFlags)
+
+## Project information ##
+project(libdivsufsort C)
+set(PROJECT_VENDOR "Yuta Mori")
+set(PROJECT_CONTACT "yuta.256@gmail.com")
+set(PROJECT_URL "https://github.com/y-256/libdivsufsort")
+set(PROJECT_DESCRIPTION "A lightweight suffix sorting library")
+include(VERSION.cmake)
+
+## CPack configuration ##
+set(CPACK_GENERATOR "TGZ;TBZ2;ZIP")
+set(CPACK_SOURCE_GENERATOR "TGZ;TBZ2;ZIP")
+include(ProjectCPack)
+
+## Project options ##
+option(BUILD_SHARED_LIBS "Set to OFF to build static libraries" ON)
+option(BUILD_EXAMPLES "Build examples" ON)
+option(BUILD_DIVSUFSORT64 "Build libdivsufsort64" OFF)
+option(USE_OPENMP "Use OpenMP for parallelization" OFF)
+option(WITH_LFS "Enable Large File Support" ON)
+
+## Installation directories ##
+set(LIB_SUFFIX "" CACHE STRING "Define suffix of directory name (32 or 64)")
+
+set(CMAKE_INSTALL_RUNTIMEDIR "" CACHE PATH "Specify the output directory for dll runtimes (default is bin)")
+if(NOT CMAKE_INSTALL_RUNTIMEDIR)
+  set(CMAKE_INSTALL_RUNTIMEDIR "${CMAKE_INSTALL_PREFIX}/bin")
+endif(NOT CMAKE_INSTALL_RUNTIMEDIR)
+
+set(CMAKE_INSTALL_LIBDIR "" CACHE PATH "Specify the output directory for libraries (default is lib)")
+if(NOT CMAKE_INSTALL_LIBDIR)
+  set(CMAKE_INSTALL_LIBDIR "${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX}")
+endif(NOT CMAKE_INSTALL_LIBDIR)
+
+set(CMAKE_INSTALL_INCLUDEDIR "" CACHE PATH "Specify the output directory for header files (default is include)")
+if(NOT CMAKE_INSTALL_INCLUDEDIR)
+  set(CMAKE_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_PREFIX}/include")
+endif(NOT CMAKE_INSTALL_INCLUDEDIR)
+
+set(CMAKE_INSTALL_PKGCONFIGDIR "" CACHE PATH "Specify the output directory for pkgconfig files (default is lib/pkgconfig)")
+if(NOT CMAKE_INSTALL_PKGCONFIGDIR)
+  set(CMAKE_INSTALL_PKGCONFIGDIR "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
+endif(NOT CMAKE_INSTALL_PKGCONFIGDIR)
+
+## Build type ##
+if(NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE "Release")
+elseif(CMAKE_BUILD_TYPE STREQUAL "Debug")
+  set(CMAKE_VERBOSE_MAKEFILE ON)
+endif(NOT CMAKE_BUILD_TYPE)
+
+## Compiler options ##
+if(MSVC)
+  append_c_compiler_flags("/W4" "VC" CMAKE_C_FLAGS)
+  append_c_compiler_flags("/Oi;/Ot;/Ox;/Oy" "VC" CMAKE_C_FLAGS_RELEASE)
+  if(USE_OPENMP)
+    append_c_compiler_flags("/openmp" "VC" CMAKE_C_FLAGS)
+  endif(USE_OPENMP)
+elseif(BORLAND)
+  append_c_compiler_flags("-w" "BCC" CMAKE_C_FLAGS)
+  append_c_compiler_flags("-Oi;-Og;-Os;-Ov;-Ox" "BCC" CMAKE_C_FLAGS_RELEASE)
+else(MSVC)
+  if(CMAKE_COMPILER_IS_GNUCC)
+    append_c_compiler_flags("-Wall" "GCC" CMAKE_C_FLAGS)
+    append_c_compiler_flags("-fomit-frame-pointer" "GCC" CMAKE_C_FLAGS_RELEASE)
+    if(USE_OPENMP)
+      append_c_compiler_flags("-fopenmp" "GCC" CMAKE_C_FLAGS)
+    endif(USE_OPENMP)
+  else(CMAKE_COMPILER_IS_GNUCC)
+    append_c_compiler_flags("-Wall" "UNKNOWN" CMAKE_C_FLAGS)
+    append_c_compiler_flags("-fomit-frame-pointer" "UNKNOWN" CMAKE_C_FLAGS_RELEASE)
+    if(USE_OPENMP)
+      append_c_compiler_flags("-fopenmp;-openmp;-omp" "UNKNOWN" CMAKE_C_FLAGS)
+    endif(USE_OPENMP)
+  endif(CMAKE_COMPILER_IS_GNUCC)
+endif(MSVC)
+
+## Add definitions ##
+add_definitions(-DHAVE_CONFIG_H=1 -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS)
+
+## Add subdirectories ##
+add_subdirectory(pkgconfig)
+add_subdirectory(include)
+add_subdirectory(lib)
+if(BUILD_EXAMPLES)
+  add_subdirectory(examples)
+endif(BUILD_EXAMPLES)
+
+## Add 'uninstall' target ##
+CONFIGURE_FILE(
+  "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/cmake_uninstall.cmake.in"
+  "${CMAKE_CURRENT_BINARY_DIR}/CMakeModules/cmake_uninstall.cmake"
+  IMMEDIATE @ONLY)
+ADD_CUSTOM_TARGET(uninstall
+  "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/CMakeModules/cmake_uninstall.cmake")
diff --git a/src/libdivsufsort/CMakeModules/AppendCompilerFlags.cmake b/src/libdivsufsort/CMakeModules/AppendCompilerFlags.cmake
new file mode 100644
index 0000000..58d3f99
--- /dev/null
+++ b/src/libdivsufsort/CMakeModules/AppendCompilerFlags.cmake
@@ -0,0 +1,38 @@
+include(CheckCSourceCompiles)
+include(CheckCXXSourceCompiles)
+
+macro(append_c_compiler_flags _flags _name _result)
+  set(SAFE_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
+  string(REGEX REPLACE "[-+/ ]" "_" cname "${_name}")
+  string(TOUPPER "${cname}" cname)
+  foreach(flag ${_flags})
+    string(REGEX REPLACE "^[-+/ ]+(.*)[-+/ ]*$" "\\1" flagname "${flag}")
+    string(REGEX REPLACE "[-+/ ]" "_" flagname "${flagname}")
+    string(TOUPPER "${flagname}" flagname)
+    set(have_flag "HAVE_${cname}_${flagname}")
+    set(CMAKE_REQUIRED_FLAGS "${flag}")
+    check_c_source_compiles("int main() { return 0; }" ${have_flag})
+    if(${have_flag})
+      set(${_result} "${${_result}} ${flag}")
+    endif(${have_flag})
+  endforeach(flag)
+  set(CMAKE_REQUIRED_FLAGS ${SAFE_CMAKE_REQUIRED_FLAGS})
+endmacro(append_c_compiler_flags)
+
+macro(append_cxx_compiler_flags _flags _name _result)
+  set(SAFE_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
+  string(REGEX REPLACE "[-+/ ]" "_" cname "${_name}")
+  string(TOUPPER "${cname}" cname)
+  foreach(flag ${_flags})
+    string(REGEX REPLACE "^[-+/ ]+(.*)[-+/ ]*$" "\\1" flagname "${flag}")
+    string(REGEX REPLACE "[-+/ ]" "_" flagname "${flagname}")
+    string(TOUPPER "${flagname}" flagname)
+    set(have_flag "HAVE_${cname}_${flagname}")
+    set(CMAKE_REQUIRED_FLAGS "${flag}")
+    check_cxx_source_compiles("int main() { return 0; }" ${have_flag})
+    if(${have_flag})
+      set(${_result} "${${_result}} ${flag}")
+    endif(${have_flag})
+  endforeach(flag)
+  set(CMAKE_REQUIRED_FLAGS ${SAFE_CMAKE_REQUIRED_FLAGS})
+endmacro(append_cxx_compiler_flags)
diff --git a/src/libdivsufsort/CMakeModules/CheckFunctionKeywords.cmake b/src/libdivsufsort/CMakeModules/CheckFunctionKeywords.cmake
new file mode 100644
index 0000000..44601fd
--- /dev/null
+++ b/src/libdivsufsort/CMakeModules/CheckFunctionKeywords.cmake
@@ -0,0 +1,15 @@
+include(CheckCSourceCompiles)
+
+macro(check_function_keywords _wordlist)
+  set(${_result} "")
+  foreach(flag ${_wordlist})
+    string(REGEX REPLACE "[-+/ ()]" "_" flagname "${flag}")
+    string(TOUPPER "${flagname}" flagname)
+    set(have_flag "HAVE_${flagname}")
+    check_c_source_compiles("${flag} void func(); void func() { } int main() { func(); return 0; }" ${have_flag})
+    if(${have_flag} AND NOT ${_result})
+      set(${_result} "${flag}")
+#      break()
+    endif(${have_flag} AND NOT ${_result})
+  endforeach(flag)
+endmacro(check_function_keywords)
diff --git a/src/libdivsufsort/CMakeModules/CheckLFS.cmake b/src/libdivsufsort/CMakeModules/CheckLFS.cmake
new file mode 100644
index 0000000..e2b0099
--- /dev/null
+++ b/src/libdivsufsort/CMakeModules/CheckLFS.cmake
@@ -0,0 +1,109 @@
+## Checks for large file support ##
+include(CheckIncludeFile)
+include(CheckSymbolExists)
+include(CheckTypeSize)
+
+macro(check_lfs _isenable)
+  set(LFS_OFF_T "")
+  set(LFS_FOPEN "")
+  set(LFS_FSEEK "")
+  set(LFS_FTELL "")
+  set(LFS_PRID "")
+
+  if(${_isenable})
+    set(SAFE_CMAKE_REQUIRED_DEFINITIONS "${CMAKE_REQUIRED_DEFINITIONS}")
+    set(CMAKE_REQUIRED_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS}
+        -D_LARGEFILE_SOURCE -D_LARGE_FILES -D_FILE_OFFSET_BITS=64
+        -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS)
+
+    check_include_file("sys/types.h" HAVE_SYS_TYPES_H)
+    check_include_file("inttypes.h" HAVE_INTTYPES_H)
+    check_include_file("stddef.h" HAVE_STDDEF_H)
+    check_include_file("stdint.h" HAVE_STDINT_H)
+
+    # LFS type1: 8 <= sizeof(off_t), fseeko, ftello
+    check_type_size("off_t" SIZEOF_OFF_T)
+    if(SIZEOF_OFF_T GREATER 7)
+      check_symbol_exists("fseeko" "stdio.h" HAVE_FSEEKO)
+      check_symbol_exists("ftello" "stdio.h" HAVE_FTELLO)
+      if(HAVE_FSEEKO AND HAVE_FTELLO)
+        set(LFS_OFF_T "off_t")
+        set(LFS_FOPEN "fopen")
+        set(LFS_FSEEK "fseeko")
+        set(LFS_FTELL "ftello")
+        check_symbol_exists("PRIdMAX" "inttypes.h" HAVE_PRIDMAX)
+        if(HAVE_PRIDMAX)
+          set(LFS_PRID "PRIdMAX")
+        else(HAVE_PRIDMAX)
+          check_type_size("long" SIZEOF_LONG)
+          check_type_size("int" SIZEOF_INT)
+          if(SIZEOF_OFF_T GREATER SIZEOF_LONG)
+            set(LFS_PRID "\"lld\"")
+          elseif(SIZEOF_LONG GREATER SIZEOF_INT)
+            set(LFS_PRID "\"ld\"")
+          else(SIZEOF_OFF_T GREATER SIZEOF_LONG)
+            set(LFS_PRID "\"d\"")
+          endif(SIZEOF_OFF_T GREATER SIZEOF_LONG)
+        endif(HAVE_PRIDMAX)
+      endif(HAVE_FSEEKO AND HAVE_FTELLO)
+    endif(SIZEOF_OFF_T GREATER 7)
+
+    # LFS type2: 8 <= sizeof(off64_t), fopen64, fseeko64, ftello64
+    if(NOT LFS_OFF_T)
+      check_type_size("off64_t" SIZEOF_OFF64_T)
+      if(SIZEOF_OFF64_T GREATER 7)
+        check_symbol_exists("fopen64" "stdio.h" HAVE_FOPEN64)
+        check_symbol_exists("fseeko64" "stdio.h" HAVE_FSEEKO64)
+        check_symbol_exists("ftello64" "stdio.h" HAVE_FTELLO64)
+        if(HAVE_FOPEN64 AND HAVE_FSEEKO64 AND HAVE_FTELLO64)
+          set(LFS_OFF_T "off64_t")
+          set(LFS_FOPEN "fopen64")
+          set(LFS_FSEEK "fseeko64")
+          set(LFS_FTELL "ftello64")
+          check_symbol_exists("PRIdMAX" "inttypes.h" HAVE_PRIDMAX)
+          if(HAVE_PRIDMAX)
+            set(LFS_PRID "PRIdMAX")
+          else(HAVE_PRIDMAX)
+            check_type_size("long" SIZEOF_LONG)
+            check_type_size("int" SIZEOF_INT)
+            if(SIZEOF_OFF64_T GREATER SIZEOF_LONG)
+              set(LFS_PRID "\"lld\"")
+            elseif(SIZEOF_LONG GREATER SIZEOF_INT)
+              set(LFS_PRID "\"ld\"")
+            else(SIZEOF_OFF64_T GREATER SIZEOF_LONG)
+              set(LFS_PRID "\"d\"")
+            endif(SIZEOF_OFF64_T GREATER SIZEOF_LONG)
+          endif(HAVE_PRIDMAX)
+        endif(HAVE_FOPEN64 AND HAVE_FSEEKO64 AND HAVE_FTELLO64)
+      endif(SIZEOF_OFF64_T GREATER 7)
+    endif(NOT LFS_OFF_T)
+
+    # LFS type3: 8 <= sizeof(__int64), _fseeki64, _ftelli64
+    if(NOT LFS_OFF_T)
+      check_type_size("__int64" SIZEOF___INT64)
+      if(SIZEOF___INT64 GREATER 7)
+        check_symbol_exists("_fseeki64" "stdio.h" HAVE__FSEEKI64)
+        check_symbol_exists("_ftelli64" "stdio.h" HAVE__FTELLI64)
+        if(HAVE__FSEEKI64 AND HAVE__FTELLI64)
+          set(LFS_OFF_T "__int64")
+          set(LFS_FOPEN "fopen")
+          set(LFS_FSEEK "_fseeki64")
+          set(LFS_FTELL "_ftelli64")
+          set(LFS_PRID  "\"I64d\"")
+        endif(HAVE__FSEEKI64 AND HAVE__FTELLI64)
+      endif(SIZEOF___INT64 GREATER 7)
+    endif(NOT LFS_OFF_T)
+
+    set(CMAKE_REQUIRED_DEFINITIONS "${SAFE_CMAKE_REQUIRED_DEFINITIONS}")
+  endif(${_isenable})
+
+  if(NOT LFS_OFF_T)
+    ## not found
+    set(LFS_OFF_T "long")
+    set(LFS_FOPEN "fopen")
+    set(LFS_FSEEK "fseek")
+    set(LFS_FTELL "ftell")
+    set(LFS_PRID  "\"ld\"")
+  endif(NOT LFS_OFF_T)
+
+endmacro(check_lfs)
diff --git a/src/libdivsufsort/CMakeModules/ProjectCPack.cmake b/src/libdivsufsort/CMakeModules/ProjectCPack.cmake
new file mode 100644
index 0000000..7c105f9
--- /dev/null
+++ b/src/libdivsufsort/CMakeModules/ProjectCPack.cmake
@@ -0,0 +1,38 @@
+# If the cmake version includes cpack, use it
+IF(EXISTS "${CMAKE_ROOT}/Modules/CPack.cmake")
+  SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "${PROJECT_DESCRIPTION}")
+  SET(CPACK_PACKAGE_VENDOR "${PROJECT_VENDOR}")
+  SET(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.md")
+  SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
+  SET(CPACK_PACKAGE_VERSION_MAJOR "${PROJECT_VERSION_MAJOR}")
+  SET(CPACK_PACKAGE_VERSION_MINOR "${PROJECT_VERSION_MINOR}")
+  SET(CPACK_PACKAGE_VERSION_PATCH "${PROJECT_VERSION_PATCH}")
+#  SET(CPACK_PACKAGE_INSTALL_DIRECTORY "${PROJECT_NAME} ${PROJECT_VERSION}")
+  SET(CPACK_SOURCE_PACKAGE_FILE_NAME "${PROJECT_NAME}-${PROJECT_VERSION_FULL}")
+
+  IF(NOT DEFINED CPACK_SYSTEM_NAME)
+    SET(CPACK_SYSTEM_NAME "${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}")
+  ENDIF(NOT DEFINED CPACK_SYSTEM_NAME)
+
+  IF(${CPACK_SYSTEM_NAME} MATCHES Windows)
+    IF(CMAKE_CL_64)
+      SET(CPACK_SYSTEM_NAME win64-${CMAKE_SYSTEM_PROCESSOR})
+    ELSE(CMAKE_CL_64)
+      SET(CPACK_SYSTEM_NAME win32-${CMAKE_SYSTEM_PROCESSOR})
+    ENDIF(CMAKE_CL_64)
+  ENDIF(${CPACK_SYSTEM_NAME} MATCHES Windows)
+
+  IF(NOT DEFINED CPACK_PACKAGE_FILE_NAME)
+    SET(CPACK_PACKAGE_FILE_NAME "${CPACK_SOURCE_PACKAGE_FILE_NAME}-${CPACK_SYSTEM_NAME}")
+  ENDIF(NOT DEFINED CPACK_PACKAGE_FILE_NAME)
+
+  SET(CPACK_PACKAGE_CONTACT "${PROJECT_CONTACT}")
+  IF(UNIX)
+    SET(CPACK_STRIP_FILES "")
+    SET(CPACK_SOURCE_STRIP_FILES "")
+#    SET(CPACK_PACKAGE_EXECUTABLES "ccmake" "CMake")
+  ENDIF(UNIX)
+  SET(CPACK_SOURCE_IGNORE_FILES "/CVS/" "/build/" "/\\\\.build/" "/\\\\.svn/" "~$")
+  # include CPack model once all variables are set
+  INCLUDE(CPack)
+ENDIF(EXISTS "${CMAKE_ROOT}/Modules/CPack.cmake")
diff --git a/src/libdivsufsort/CMakeModules/cmake_uninstall.cmake.in b/src/libdivsufsort/CMakeModules/cmake_uninstall.cmake.in
new file mode 100644
index 0000000..8366a83
--- /dev/null
+++ b/src/libdivsufsort/CMakeModules/cmake_uninstall.cmake.in
@@ -0,0 +1,36 @@
+IF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
+  MESSAGE(FATAL_ERROR "Cannot find install manifest: \"@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt\"")
+ENDIF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
+
+FILE(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files)
+STRING(REGEX REPLACE "\n" ";" files "${files}")
+
+SET(NUM 0)
+FOREACH(file ${files})
+  IF(EXISTS "$ENV{DESTDIR}${file}")
+    MESSAGE(STATUS "Looking for \"$ENV{DESTDIR}${file}\" - found")
+    SET(UNINSTALL_CHECK_${NUM} 1)
+  ELSE(EXISTS "$ENV{DESTDIR}${file}")
+    MESSAGE(STATUS "Looking for \"$ENV{DESTDIR}${file}\" - not found")
+    SET(UNINSTALL_CHECK_${NUM} 0)
+  ENDIF(EXISTS "$ENV{DESTDIR}${file}")
+  MATH(EXPR NUM "1 + ${NUM}")
+ENDFOREACH(file)
+
+SET(NUM 0)
+FOREACH(file ${files})
+  IF(${UNINSTALL_CHECK_${NUM}})
+    MESSAGE(STATUS "Uninstalling \"$ENV{DESTDIR}${file}\"")
+    EXEC_PROGRAM(
+      "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
+      OUTPUT_VARIABLE rm_out
+      RETURN_VALUE rm_retval
+      )
+    IF(NOT "${rm_retval}" STREQUAL 0)
+      MESSAGE(FATAL_ERROR "Problem when removing \"$ENV{DESTDIR}${file}\"")
+    ENDIF(NOT "${rm_retval}" STREQUAL 0)
+  ENDIF(${UNINSTALL_CHECK_${NUM}})
+  MATH(EXPR NUM "1 + ${NUM}")
+ENDFOREACH(file)
+
+FILE(REMOVE "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
diff --git a/src/libdivsufsort/LICENSE b/src/libdivsufsort/LICENSE
new file mode 100644
index 0000000..249efa4
--- /dev/null
+++ b/src/libdivsufsort/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2003 Yuta Mori All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/src/libdivsufsort/README.md b/src/libdivsufsort/README.md
new file mode 100644
index 0000000..381a188
--- /dev/null
+++ b/src/libdivsufsort/README.md
@@ -0,0 +1,140 @@
+# libdivsufsort
+
+libdivsufsort is a software library that implements a lightweight suffix array construction algorithm.
+
+## News
+* 2015-03-21: The project has moved from [Google Code](http://code.google.com/p/libdivsufsort/) to [GitHub](https://github.com/y-256/libdivsufsort)
+
+## Introduction
+This library provides a simple and an efficient C API to construct a suffix array and a Burrows-Wheeler transformed string from a given string over a constant-size alphabet.
+The algorithm runs in O(n log n) worst-case time using only 5n+O(1) bytes of memory space, where n is the length of
+the string.
+
+## Build requirements
+* An ANSI C Compiler (e.g. GNU GCC)
+* [CMake](http://www.cmake.org/ "CMake") version 2.4.2 or newer
+* CMake-supported build tool
+
+## Building on GNU/Linux
+1. Get the source code from GitHub. You can either
+    * use git to clone the repository
+    ```
+    git clone https://github.com/y-256/libdivsufsort.git
+    ```
+    * or download a [zip file](../../archive/master.zip) directly
+2. Create a `build` directory in the package source directory.
+```shell
+$ cd libdivsufsort
+$ mkdir build
+$ cd build
+```
+3. Configure the package for your system.
+If you want to install to a different location,  change the -DCMAKE_INSTALL_PREFIX option.
+```shell
+$ cmake -DCMAKE_BUILD_TYPE="Release" \
+-DCMAKE_INSTALL_PREFIX="/usr/local" ..
+```
+4. Compile the package.
+```shell
+$ make
+```
+5. (Optional) Install the library and header files.
+```shell
+$ sudo make install
+```
+
+## API
+```c
+/* Data types */
+typedef int32_t saint_t;
+typedef int32_t saidx_t;
+typedef uint8_t sauchar_t;
+
+/*
+ * Constructs the suffix array of a given string.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n-1] The output array or suffixes.
+ * @param n The length of the given string.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+saint_t
+divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n);
+
+/*
+ * Constructs the burrows-wheeler transformed string of a given string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @return The primary index if no error occurred, -1 or -2 otherwise.
+ */
+saidx_t
+divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n);
+```
+
+## Example Usage
+```c
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <divsufsort.h>
+
+int main() {
+    // intput data
+    char *Text = "abracadabra";
+    int n = strlen(Text);
+    int i, j;
+
+    // allocate
+    int *SA = (int *)malloc(n * sizeof(int));
+
+    // sort
+    divsufsort((unsigned char *)Text, SA, n);
+
+    // output
+    for(i = 0; i < n; ++i) {
+        printf("SA[%2d] = %2d: ", i, SA[i]);
+        for(j = SA[i]; j < n; ++j) {
+            printf("%c", Text[j]);
+        }
+        printf("$\n");
+    }
+
+    // deallocate
+    free(SA);
+
+    return 0;
+}
+```
+See the [examples](examples) directory for a few other examples.
+
+## Benchmarks
+See [Benchmarks](https://github.com/y-256/libdivsufsort/blob/wiki/SACA_Benchmarks.md) page for details.
+
+## License
+libdivsufsort is released under the [MIT license](LICENSE "MIT license").
+> The MIT License (MIT)
+>
+> Copyright (c) 2003 Yuta Mori All rights reserved.
+>
+> Permission is hereby granted, free of charge, to any person obtaining a copy
+> of this software and associated documentation files (the "Software"), to deal
+> in the Software without restriction, including without limitation the rights
+> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+> copies of the Software, and to permit persons to whom the Software is
+> furnished to do so, subject to the following conditions:
+>
+> The above copyright notice and this permission notice shall be included in all
+> copies or substantial portions of the Software.
+>
+> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+> SOFTWARE.
+
+## Author
+* Yuta Mori
diff --git a/src/libdivsufsort/VERSION.cmake b/src/libdivsufsort/VERSION.cmake
new file mode 100644
index 0000000..3f11ac1
--- /dev/null
+++ b/src/libdivsufsort/VERSION.cmake
@@ -0,0 +1,23 @@
+set(PROJECT_VERSION_MAJOR "2")
+set(PROJECT_VERSION_MINOR "0")
+set(PROJECT_VERSION_PATCH "2")
+set(PROJECT_VERSION_EXTRA "-1")
+set(PROJECT_VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}")
+set(PROJECT_VERSION_FULL "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}${PROJECT_VERSION_EXTRA}")
+
+set(LIBRARY_VERSION "3.0.1")
+set(LIBRARY_SOVERSION "3")
+
+## Git revision number ##
+if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
+  execute_process(COMMAND git describe --tags HEAD
+    WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
+    OUTPUT_VARIABLE GIT_DESCRIBE_TAGS ERROR_QUIET)
+  if(GIT_DESCRIBE_TAGS)
+    string(REGEX REPLACE "^v(.*)" "\\1" GIT_REVISION "${GIT_DESCRIBE_TAGS}")
+    string(STRIP "${GIT_REVISION}" GIT_REVISION)
+    if(GIT_REVISION)
+      set(PROJECT_VERSION_FULL "${GIT_REVISION}")
+    endif(GIT_REVISION)
+  endif(GIT_DESCRIBE_TAGS)
+endif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
diff --git a/src/libdivsufsort/examples/CMakeLists.txt b/src/libdivsufsort/examples/CMakeLists.txt
new file mode 100644
index 0000000..e801c81
--- /dev/null
+++ b/src/libdivsufsort/examples/CMakeLists.txt
@@ -0,0 +1,11 @@
+## Add definitions ##
+add_definitions(-D_LARGEFILE_SOURCE -D_LARGE_FILES -D_FILE_OFFSET_BITS=64)
+
+## Targets ##
+include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../include"
+                    "${CMAKE_CURRENT_BINARY_DIR}/../include")
+link_directories("${CMAKE_CURRENT_BINARY_DIR}/../lib")
+foreach(src suftest mksary sasearch bwt unbwt)
+  add_executable(${src} ${src}.c)
+  target_link_libraries(${src} divsufsort)
+endforeach(src)
diff --git a/src/libdivsufsort/examples/bwt.c b/src/libdivsufsort/examples/bwt.c
new file mode 100644
index 0000000..5a362d0
--- /dev/null
+++ b/src/libdivsufsort/examples/bwt.c
@@ -0,0 +1,220 @@
+/*
+ * bwt.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <stdio.h>
+#if HAVE_STRING_H
+# include <string.h>
+#endif
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#if HAVE_MEMORY_H
+# include <memory.h>
+#endif
+#if HAVE_STDDEF_H
+# include <stddef.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#if HAVE_IO_H && HAVE_FCNTL_H
+# include <io.h>
+# include <fcntl.h>
+#endif
+#include <time.h>
+#include <divsufsort.h>
+#include "lfs.h"
+
+
+static
+size_t
+write_int(FILE *fp, saidx_t n) {
+  unsigned char c[4];
+  c[0] = (unsigned char)((n >>  0) & 0xff), c[1] = (unsigned char)((n >>  8) & 0xff),
+  c[2] = (unsigned char)((n >> 16) & 0xff), c[3] = (unsigned char)((n >> 24) & 0xff);
+  return fwrite(c, sizeof(unsigned char), 4, fp);
+}
+
+static
+void
+print_help(const char *progname, int status) {
+  fprintf(stderr,
+          "bwt, a burrows-wheeler transform program, version %s.\n",
+          divsufsort_version());
+  fprintf(stderr, "usage: %s [-b num] INFILE OUTFILE\n", progname);
+  fprintf(stderr, "  -b num    set block size to num MiB [1..512] (default: 32)\n\n");
+  exit(status);
+}
+
+int
+main(int argc, const char *argv[]) {
+  FILE *fp, *ofp;
+  const char *fname, *ofname;
+  sauchar_t *T;
+  saidx_t *SA;
+  LFS_OFF_T n;
+  size_t m;
+  saidx_t pidx;
+  clock_t start,finish;
+  saint_t i, blocksize = 32, needclose = 3;
+
+  /* Check arguments. */
+  if((argc == 1) ||
+     (strcmp(argv[1], "-h") == 0) ||
+     (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
+  if((argc != 3) && (argc != 5)) { print_help(argv[0], EXIT_FAILURE); }
+  i = 1;
+  if(argc == 5) {
+    if(strcmp(argv[i], "-b") != 0) { print_help(argv[0], EXIT_FAILURE); }
+    blocksize = atoi(argv[i + 1]);
+    if(blocksize < 0) { blocksize = 1; }
+    else if(512 < blocksize) { blocksize = 512; }
+    i += 2;
+  }
+  blocksize <<= 20;
+
+  /* Open a file for reading. */
+  if(strcmp(argv[i], "-") != 0) {
+#if HAVE_FOPEN_S
+    if(fopen_s(&fp, fname = argv[i], "rb") != 0) {
+#else
+    if((fp = LFS_FOPEN(fname = argv[i], "rb")) == NULL) {
+#endif
+      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+#if HAVE__SETMODE && HAVE__FILENO
+    if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
+      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+#endif
+    fp = stdin;
+    fname = "stdin";
+    needclose ^= 1;
+  }
+  i += 1;
+
+  /* Open a file for writing. */
+  if(strcmp(argv[i], "-") != 0) {
+#if HAVE_FOPEN_S
+    if(fopen_s(&ofp, ofname = argv[i], "wb") != 0) {
+#else
+    if((ofp = LFS_FOPEN(ofname = argv[i], "wb")) == NULL) {
+#endif
+      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+#if HAVE__SETMODE && HAVE__FILENO
+    if(_setmode(_fileno(stdout), _O_BINARY) == -1) {
+      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+#endif
+    ofp = stdout;
+    ofname = "stdout";
+    needclose ^= 2;
+  }
+
+  /* Get the file size. */
+  if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
+    n = LFS_FTELL(fp);
+    rewind(fp);
+    if(n < 0) {
+      fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+    if(0x20000000L < n) { n = 0x20000000L; }
+    if((blocksize == 0) || (n < blocksize)) { blocksize = (saidx_t)n; }
+  } else if(blocksize == 0) { blocksize = 32 << 20; }
+
+  /* Allocate 5blocksize bytes of memory. */
+  T = (sauchar_t *)malloc(blocksize * sizeof(sauchar_t));
+  SA = (saidx_t *)malloc(blocksize * sizeof(saidx_t));
+  if((T == NULL) || (SA == NULL)) {
+    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  /* Write the blocksize. */
+  if(write_int(ofp, blocksize) != 4) {
+    fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+
+  fprintf(stderr, "  BWT (blocksize %" PRIdSAINT_T ") ... ", blocksize);
+  start = clock();
+  for(n = 0; 0 < (m = fread(T, sizeof(sauchar_t), blocksize, fp)); n += m) {
+    /* Burrows-Wheeler Transform. */
+    pidx = divbwt(T, T, SA, m);
+    if(pidx < 0) {
+      fprintf(stderr, "%s (bw_transform): %s.\n",
+        argv[0],
+        (pidx == -1) ? "Invalid arguments" : "Cannot allocate memory");
+      exit(EXIT_FAILURE);
+    }
+
+    /* Write the bwted data. */
+    if((write_int(ofp, pidx) != 4) ||
+       (fwrite(T, sizeof(sauchar_t), m, ofp) != m)) {
+      fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+  }
+  if(ferror(fp)) {
+    fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+  finish = clock();
+  fprintf(stderr, "%" PRIdOFF_T " bytes: %.4f sec\n",
+    n, (double)(finish - start) / (double)CLOCKS_PER_SEC);
+
+  /* Close files */
+  if(needclose & 1) { fclose(fp); }
+  if(needclose & 2) { fclose(ofp); }
+
+  /* Deallocate memory. */
+  free(SA);
+  free(T);
+
+  return 0;
+}
diff --git a/src/libdivsufsort/examples/mksary.c b/src/libdivsufsort/examples/mksary.c
new file mode 100644
index 0000000..b48177c
--- /dev/null
+++ b/src/libdivsufsort/examples/mksary.c
@@ -0,0 +1,193 @@
+/*
+ * mksary.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <stdio.h>
+#if HAVE_STRING_H
+# include <string.h>
+#endif
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#if HAVE_MEMORY_H
+# include <memory.h>
+#endif
+#if HAVE_STDDEF_H
+# include <stddef.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#if HAVE_IO_H && HAVE_FCNTL_H
+# include <io.h>
+# include <fcntl.h>
+#endif
+#include <time.h>
+#include <divsufsort.h>
+#include "lfs.h"
+
+
+static
+void
+print_help(const char *progname, int status) {
+  fprintf(stderr,
+          "mksary, a simple suffix array builder, version %s.\n",
+          divsufsort_version());
+  fprintf(stderr, "usage: %s INFILE OUTFILE\n\n", progname);
+  exit(status);
+}
+
+int
+main(int argc, const char *argv[]) {
+  FILE *fp, *ofp;
+  const char *fname, *ofname;
+  sauchar_t *T;
+  saidx_t *SA;
+  LFS_OFF_T n;
+  clock_t start, finish;
+  saint_t needclose = 3;
+
+  /* Check arguments. */
+  if((argc == 1) ||
+     (strcmp(argv[1], "-h") == 0) ||
+     (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
+  if(argc != 3) { print_help(argv[0], EXIT_FAILURE); }
+
+  /* Open a file for reading. */
+  if(strcmp(argv[1], "-") != 0) {
+#if HAVE_FOPEN_S
+    if(fopen_s(&fp, fname = argv[1], "rb") != 0) {
+#else
+    if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) {
+#endif
+      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+#if HAVE__SETMODE && HAVE__FILENO
+    if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
+      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+#endif
+    fp = stdin;
+    fname = "stdin";
+    needclose ^= 1;
+  }
+
+  /* Open a file for writing. */
+  if(strcmp(argv[2], "-") != 0) {
+#if HAVE_FOPEN_S
+    if(fopen_s(&ofp, ofname = argv[2], "wb") != 0) {
+#else
+    if((ofp = LFS_FOPEN(ofname = argv[2], "wb")) == NULL) {
+#endif
+      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+#if HAVE__SETMODE && HAVE__FILENO
+    if(_setmode(_fileno(stdout), _O_BINARY) == -1) {
+      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+#endif
+    ofp = stdout;
+    ofname = "stdout";
+    needclose ^= 2;
+  }
+
+  /* Get the file size. */
+  if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
+    n = LFS_FTELL(fp);
+    rewind(fp);
+    if(n < 0) {
+      fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+    if(0x7fffffff <= n) {
+      fprintf(stderr, "%s: Input file `%s' is too big.\n", argv[0], fname);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+    fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+
+  /* Allocate 5blocksize bytes of memory. */
+  T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t));
+  SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t));
+  if((T == NULL) || (SA == NULL)) {
+    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  /* Read n bytes of data. */
+  if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) {
+    fprintf(stderr, "%s: %s `%s': ",
+      argv[0],
+      (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
+      fname);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+  if(needclose & 1) { fclose(fp); }
+
+  /* Construct the suffix array. */
+  fprintf(stderr, "%s: %" PRIdOFF_T " bytes ... ", fname, n);
+  start = clock();
+  if(divsufsort(T, SA, (saidx_t)n) != 0) {
+    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+  finish = clock();
+  fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC);
+
+  /* Write the suffix array. */
+  if(fwrite(SA, sizeof(saidx_t), (size_t)n, ofp) != (size_t)n) {
+    fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+  if(needclose & 2) { fclose(ofp); }
+
+  /* Deallocate memory. */
+  free(SA);
+  free(T);
+
+  return 0;
+}
diff --git a/src/libdivsufsort/examples/sasearch.c b/src/libdivsufsort/examples/sasearch.c
new file mode 100644
index 0000000..7e5ca4f
--- /dev/null
+++ b/src/libdivsufsort/examples/sasearch.c
@@ -0,0 +1,165 @@
+/*
+ * sasearch.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <stdio.h>
+#if HAVE_STRING_H
+# include <string.h>
+#endif
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#if HAVE_MEMORY_H
+# include <memory.h>
+#endif
+#if HAVE_STDDEF_H
+# include <stddef.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#if HAVE_IO_H && HAVE_FCNTL_H
+# include <io.h>
+# include <fcntl.h>
+#endif
+#include <divsufsort.h>
+#include "lfs.h"
+
+
+static
+void
+print_help(const char *progname, int status) {
+  fprintf(stderr,
+          "sasearch, a simple SA-based full-text search tool, version %s\n",
+          divsufsort_version());
+  fprintf(stderr, "usage: %s PATTERN FILE SAFILE\n\n", progname);
+  exit(status);
+}
+
+int
+main(int argc, const char *argv[]) {
+  FILE *fp;
+  const char *P;
+  sauchar_t *T;
+  saidx_t *SA;
+  LFS_OFF_T n;
+  size_t Psize;
+  saidx_t i, size, left;
+
+  if((argc == 1) ||
+     (strcmp(argv[1], "-h") == 0) ||
+     (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
+  if(argc != 4) { print_help(argv[0], EXIT_FAILURE); }
+
+  P = argv[1];
+  Psize = strlen(P);
+
+  /* Open a file for reading. */
+#if HAVE_FOPEN_S
+  if(fopen_s(&fp, argv[2], "rb") != 0) {
+#else
+  if((fp = LFS_FOPEN(argv[2], "rb")) == NULL) {
+#endif
+    fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[2]);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+
+  /* Get the file size. */
+  if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
+    n = LFS_FTELL(fp);
+    rewind(fp);
+    if(n < 0) {
+      fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], argv[2]);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+    fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], argv[2]);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+
+  /* Allocate 5n bytes of memory. */
+  T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t));
+  SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t));
+  if((T == NULL) || (SA == NULL)) {
+    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  /* Read n bytes of data. */
+  if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) {
+    fprintf(stderr, "%s: %s `%s': ",
+      argv[0],
+      (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
+      argv[2]);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+  fclose(fp);
+
+  /* Open the SA file for reading. */
+#if HAVE_FOPEN_S
+  if(fopen_s(&fp, argv[3], "rb") != 0) {
+#else
+  if((fp = LFS_FOPEN(argv[3], "rb")) == NULL) {
+#endif
+    fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[3]);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+
+  /* Read n * sizeof(saidx_t) bytes of data. */
+  if(fread(SA, sizeof(saidx_t), (size_t)n, fp) != (size_t)n) {
+    fprintf(stderr, "%s: %s `%s': ",
+      argv[0],
+      (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
+      argv[3]);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+  fclose(fp);
+
+  /* Search and print */
+  size = sa_search(T, (saidx_t)n,
+                   (const sauchar_t *)P, (saidx_t)Psize,
+                   SA, (saidx_t)n, &left);
+  for(i = 0; i < size; ++i) {
+    fprintf(stdout, "%" PRIdSAIDX_T "\n", SA[left + i]);
+  }
+
+  /* Deallocate memory. */
+  free(SA);
+  free(T);
+
+  return 0;
+}
diff --git a/src/libdivsufsort/examples/suftest.c b/src/libdivsufsort/examples/suftest.c
new file mode 100644
index 0000000..71892ac
--- /dev/null
+++ b/src/libdivsufsort/examples/suftest.c
@@ -0,0 +1,164 @@
+/*
+ * suftest.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <stdio.h>
+#if HAVE_STRING_H
+# include <string.h>
+#endif
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#if HAVE_MEMORY_H
+# include <memory.h>
+#endif
+#if HAVE_STDDEF_H
+# include <stddef.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#if HAVE_IO_H && HAVE_FCNTL_H
+# include <io.h>
+# include <fcntl.h>
+#endif
+#include <time.h>
+#include <divsufsort.h>
+#include "lfs.h"
+
+
+static
+void
+print_help(const char *progname, int status) {
+  fprintf(stderr,
+          "suftest, a suffixsort tester, version %s.\n",
+          divsufsort_version());
+  fprintf(stderr, "usage: %s FILE\n\n", progname);
+  exit(status);
+}
+
+int
+main(int argc, const char *argv[]) {
+  FILE *fp;
+  const char *fname;
+  sauchar_t *T;
+  saidx_t *SA;
+  LFS_OFF_T n;
+  clock_t start, finish;
+  saint_t needclose = 1;
+
+  /* Check arguments. */
+  if((argc == 1) ||
+     (strcmp(argv[1], "-h") == 0) ||
+     (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
+  if(argc != 2) { print_help(argv[0], EXIT_FAILURE); }
+
+  /* Open a file for reading. */
+  if(strcmp(argv[1], "-") != 0) {
+#if HAVE_FOPEN_S
+    if(fopen_s(&fp, fname = argv[1], "rb") != 0) {
+#else
+    if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) {
+#endif
+      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+#if HAVE__SETMODE && HAVE__FILENO
+    if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
+      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+#endif
+    fp = stdin;
+    fname = "stdin";
+    needclose = 0;
+  }
+
+  /* Get the file size. */
+  if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
+    n = LFS_FTELL(fp);
+    rewind(fp);
+    if(n < 0) {
+      fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+    if(0x7fffffff <= n) {
+      fprintf(stderr, "%s: Input file `%s' is too big.\n", argv[0], fname);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+    fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+
+  /* Allocate 5n bytes of memory. */
+  T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t));
+  SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t));
+  if((T == NULL) || (SA == NULL)) {
+    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  /* Read n bytes of data. */
+  if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) {
+    fprintf(stderr, "%s: %s `%s': ",
+      argv[0],
+      (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
+      argv[1]);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+  if(needclose & 1) { fclose(fp); }
+
+  /* Construct the suffix array. */
+  fprintf(stderr, "%s: %" PRIdOFF_T " bytes ... ", fname, n);
+  start = clock();
+  if(divsufsort(T, SA, (saidx_t)n) != 0) {
+    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+  finish = clock();
+  fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC);
+
+  /* Check the suffix array. */
+  if(sufcheck(T, SA, (saidx_t)n, 1) != 0) { exit(EXIT_FAILURE); }
+
+  /* Deallocate memory. */
+  free(SA);
+  free(T);
+
+  return 0;
+}
diff --git a/src/libdivsufsort/examples/unbwt.c b/src/libdivsufsort/examples/unbwt.c
new file mode 100644
index 0000000..c0f19e9
--- /dev/null
+++ b/src/libdivsufsort/examples/unbwt.c
@@ -0,0 +1,207 @@
+/*
+ * unbwt.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <stdio.h>
+#if HAVE_STRING_H
+# include <string.h>
+#endif
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#if HAVE_MEMORY_H
+# include <memory.h>
+#endif
+#if HAVE_STDDEF_H
+# include <stddef.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#if HAVE_IO_H && HAVE_FCNTL_H
+# include <io.h>
+# include <fcntl.h>
+#endif
+#include <time.h>
+#include <divsufsort.h>
+#include "lfs.h"
+
+
+static
+size_t
+read_int(FILE *fp, saidx_t *n) {
+  unsigned char c[4];
+  size_t m = fread(c, sizeof(unsigned char), 4, fp);
+  if(m == 4) {
+    *n = (c[0] <<  0) | (c[1] <<  8) |
+         (c[2] << 16) | (c[3] << 24);
+  }
+  return m;
+}
+
+static
+void
+print_help(const char *progname, int status) {
+  fprintf(stderr,
+          "unbwt, an inverse burrows-wheeler transform program, version %s.\n",
+          divsufsort_version());
+  fprintf(stderr, "usage: %s INFILE OUTFILE\n\n", progname);
+  exit(status);
+}
+
+int
+main(int argc, const char *argv[]) {
+  FILE *fp, *ofp;
+  const char *fname, *ofname;
+  sauchar_t *T;
+  saidx_t *A;
+  LFS_OFF_T n;
+  size_t m;
+  saidx_t pidx;
+  clock_t start, finish;
+  saint_t err, blocksize, needclose = 3;
+
+  /* Check arguments. */
+  if((argc == 1) ||
+     (strcmp(argv[1], "-h") == 0) ||
+     (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
+  if(argc != 3) { print_help(argv[0], EXIT_FAILURE); }
+
+  /* Open a file for reading. */
+  if(strcmp(argv[1], "-") != 0) {
+#if HAVE_FOPEN_S
+    if(fopen_s(&fp, fname = argv[1], "rb") != 0) {
+#else
+    if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) {
+#endif
+      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+#if HAVE__SETMODE && HAVE__FILENO
+    if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
+      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+#endif
+    fp = stdin;
+    fname = "stdin";
+    needclose ^= 1;
+  }
+
+  /* Open a file for writing. */
+  if(strcmp(argv[2], "-") != 0) {
+#if HAVE_FOPEN_S
+    if(fopen_s(&ofp, ofname = argv[2], "wb") != 0) {
+#else
+    if((ofp = LFS_FOPEN(ofname = argv[2], "wb")) == NULL) {
+#endif
+      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+#if HAVE__SETMODE && HAVE__FILENO
+    if(_setmode(_fileno(stdout), _O_BINARY) == -1) {
+      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+#endif
+    ofp = stdout;
+    ofname = "stdout";
+    needclose ^= 2;
+  }
+
+  /* Read the blocksize. */
+  if(read_int(fp, &blocksize) != 4) {
+    fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+
+  /* Allocate 5blocksize bytes of memory. */
+  T = (sauchar_t *)malloc(blocksize * sizeof(sauchar_t));
+  A = (saidx_t *)malloc(blocksize * sizeof(saidx_t));
+  if((T == NULL) || (A == NULL)) {
+    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  fprintf(stderr, "UnBWT (blocksize %" PRIdSAINT_T ") ... ", blocksize);
+  start = clock();
+  for(n = 0; (m = read_int(fp, &pidx)) != 0; n += m) {
+    /* Read blocksize bytes of data. */
+    if((m != 4) || ((m = fread(T, sizeof(sauchar_t), blocksize, fp)) == 0)) {
+      fprintf(stderr, "%s: %s `%s': ",
+        argv[0],
+        (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
+        fname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+
+    /* Inverse Burrows-Wheeler Transform. */
+    if((err = inverse_bw_transform(T, T, A, m, pidx)) != 0) {
+      fprintf(stderr, "%s (reverseBWT): %s.\n",
+        argv[0],
+        (err == -1) ? "Invalid data" : "Cannot allocate memory");
+      exit(EXIT_FAILURE);
+    }
+
+    /* Write m bytes of data. */
+    if(fwrite(T, sizeof(sauchar_t), m, ofp) != m) {
+      fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
+      perror(NULL);
+      exit(EXIT_FAILURE);
+    }
+  }
+  if(ferror(fp)) {
+    fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname);
+    perror(NULL);
+    exit(EXIT_FAILURE);
+  }
+  finish = clock();
+  fprintf(stderr, "%" PRIdOFF_T " bytes: %.4f sec\n",
+    n, (double)(finish - start) / (double)CLOCKS_PER_SEC);
+
+  /* Close files */
+  if(needclose & 1) { fclose(fp); }
+  if(needclose & 2) { fclose(ofp); }
+
+  /* Deallocate memory. */
+  free(A);
+  free(T);
+
+  return 0;
+}
diff --git a/src/libdivsufsort/include/CMakeLists.txt b/src/libdivsufsort/include/CMakeLists.txt
new file mode 100644
index 0000000..37781cc
--- /dev/null
+++ b/src/libdivsufsort/include/CMakeLists.txt
@@ -0,0 +1,162 @@
+include(CheckIncludeFiles)
+include(CheckIncludeFile)
+include(CheckSymbolExists)
+include(CheckTypeSize)
+include(CheckFunctionKeywords)
+include(CheckLFS)
+
+## Checks for header files ##
+check_include_file("inttypes.h" HAVE_INTTYPES_H)
+check_include_file("memory.h" HAVE_MEMORY_H)
+check_include_file("stddef.h" HAVE_STDDEF_H)
+check_include_file("stdint.h" HAVE_STDINT_H)
+check_include_file("stdlib.h" HAVE_STDLIB_H)
+check_include_file("string.h" HAVE_STRING_H)
+check_include_file("strings.h" HAVE_STRINGS_H)
+check_include_file("sys/types.h" HAVE_SYS_TYPES_H)
+if(HAVE_INTTYPES_H)
+  set(INCFILE "#include <inttypes.h>")
+elseif(HAVE_STDINT_H)
+  set(INCFILE "#include <stdint.h>")
+else(HAVE_INTTYPES_H)
+  set(INCFILE "")
+endif(HAVE_INTTYPES_H)
+
+## create configuration files from .cmake file ##
+if(BUILD_EXAMPLES)
+  ## Checks for WinIO ##
+  if(WIN32)
+    check_include_file("io.h" HAVE_IO_H)
+    check_include_file("fcntl.h" HAVE_FCNTL_H)
+    check_symbol_exists("_setmode" "io.h;fcntl.h" HAVE__SETMODE)
+    if(NOT HAVE__SETMODE)
+      check_symbol_exists("setmode" "io.h;fcntl.h" HAVE_SETMODE)
+    endif(NOT HAVE__SETMODE)
+    check_symbol_exists("_fileno" "stdio.h" HAVE__FILENO)
+    check_symbol_exists("fopen_s" "stdio.h" HAVE_FOPEN_S)
+    check_symbol_exists("_O_BINARY" "fcntl.h" HAVE__O_BINARY)
+  endif(WIN32)
+
+  ## Checks for large file support ##
+  check_lfs(WITH_LFS)
+  configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lfs.h.cmake" "${CMAKE_CURRENT_BINARY_DIR}/lfs.h" @ONLY)
+endif(BUILD_EXAMPLES)
+
+## generate config.h ##
+check_function_keywords("inline;__inline;__inline__;__declspec(dllexport);__declspec(dllimport)")
+if(HAVE_INLINE)
+  set(INLINE "inline")
+elseif(HAVE___INLINE)
+  set(INLINE "__inline")
+elseif(HAVE___INLINE__)
+  set(INLINE "__inline__")
+else(HAVE_INLINE)
+  set(INLINE "")
+endif(HAVE_INLINE)
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake" "${CMAKE_CURRENT_BINARY_DIR}/config.h")
+
+## Checks for types ##
+# sauchar_t (8bit)
+check_type_size("uint8_t" UINT8_T)
+if(HAVE_UINT8_T)
+  set(SAUCHAR_TYPE "uint8_t")
+else(HAVE_UINT8_T)
+  check_type_size("unsigned char" SIZEOF_UNSIGNED_CHAR)
+  if("${SIZEOF_UNSIGNED_CHAR}" STREQUAL "1")
+    set(SAUCHAR_TYPE "unsigned char")
+  else("${SIZEOF_UNSIGNED_CHAR}" STREQUAL "1")
+    message(FATAL_ERROR "Cannot find unsigned 8-bit integer type")
+  endif("${SIZEOF_UNSIGNED_CHAR}" STREQUAL "1")
+endif(HAVE_UINT8_T)
+# saint_t (32bit)
+check_type_size("int32_t" INT32_T)
+if(HAVE_INT32_T)
+  set(SAINT32_TYPE "int32_t")
+  check_symbol_exists("PRId32" "inttypes.h" HAVE_PRID32)
+  if(HAVE_PRID32)
+    set(SAINT32_PRId "PRId32")
+  else(HAVE_PRID32)
+    set(SAINT32_PRId "\"d\"")
+  endif(HAVE_PRID32)
+else(HAVE_INT32_T)
+  check_type_size("int" SIZEOF_INT)
+  check_type_size("long" SIZEOF_LONG)
+  check_type_size("short" SIZEOF_SHORT)
+  check_type_size("__int32" SIZEOF___INT32)
+  if("${SIZEOF_INT}" STREQUAL "4")
+    set(SAINT32_TYPE "int")
+    set(SAINT32_PRId "\"d\"")
+  elseif("${SIZEOF_LONG}" STREQUAL "4")
+    set(SAINT32_TYPE "long")
+    set(SAINT32_PRId "\"ld\"")
+  elseif("${SIZEOF_SHORT}" STREQUAL "4")
+    set(SAINT32_TYPE "short")
+    set(SAINT32_PRId "\"d\"")
+  elseif("${SIZEOF___INT32}" STREQUAL "4")
+    set(SAINT32_TYPE "__int32")
+    set(SAINT32_PRId "\"d\"")
+  else("${SIZEOF_INT}" STREQUAL "4")
+    message(FATAL_ERROR "Cannot find 32-bit integer type")
+  endif("${SIZEOF_INT}" STREQUAL "4")
+endif(HAVE_INT32_T)
+# saint64_t (64bit)
+if(BUILD_DIVSUFSORT64)
+  check_type_size("int64_t" INT64_T)
+  if(HAVE_INT64_T)
+    set(SAINT64_TYPE "int64_t")
+    check_symbol_exists("PRId64" "inttypes.h" HAVE_PRID64)
+    if(HAVE_PRID64)
+      set(SAINT64_PRId "PRId64")
+    else(HAVE_PRID64)
+      set(SAINT64_PRId "\"lld\"")
+    endif(HAVE_PRID64)
+  else(HAVE_INT64_T)
+    check_type_size("int" SIZEOF_INT)
+    check_type_size("long" SIZEOF_LONG)
+    check_type_size("long long" SIZEOF_LONG_LONG)
+    check_type_size("__int64" SIZEOF___INT64)
+    if("${SIZEOF_INT}" STREQUAL "8")
+      set(SAINT64_TYPE "int")
+      set(SAINT64_PRId "\"d\"")
+    elseif("${SIZEOF_LONG}" STREQUAL "8")
+      set(SAINT64_TYPE "long")
+      set(SAINT64_PRId "\"ld\"")
+    elseif("${SIZEOF_LONG_LONG}" STREQUAL "8")
+      set(SAINT64_TYPE "long long")
+      set(SAINT64_PRId "\"lld\"")
+    elseif("${SIZEOF___INT64}" STREQUAL "8")
+      set(SAINT64_TYPE "__int64")
+      set(SAINT64_PRId "\"I64d\"")
+    else("${SIZEOF_INT}" STREQUAL "8")
+      message(SEND_ERROR "Cannot find 64-bit integer type")
+      set(BUILD_DIVSUFSORT64 OFF)
+    endif("${SIZEOF_INT}" STREQUAL "8")
+  endif(HAVE_INT64_T)
+endif(BUILD_DIVSUFSORT64)
+
+## generate divsufsort.h ##
+set(DIVSUFSORT_IMPORT "")
+set(DIVSUFSORT_EXPORT "")
+if(BUILD_SHARED_LIBS)
+  if(HAVE___DECLSPEC_DLLIMPORT_)
+    set(DIVSUFSORT_IMPORT "__declspec(dllimport)")
+  endif(HAVE___DECLSPEC_DLLIMPORT_)
+  if(HAVE___DECLSPEC_DLLEXPORT_)
+    set(DIVSUFSORT_EXPORT "__declspec(dllexport)")
+  endif(HAVE___DECLSPEC_DLLEXPORT_)
+endif(BUILD_SHARED_LIBS)
+set(W64BIT "")
+set(SAINDEX_TYPE "${SAINT32_TYPE}")
+set(SAINDEX_PRId "${SAINT32_PRId}")
+set(SAINT_PRId "${SAINT32_PRId}")
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/divsufsort.h.cmake"
+               "${CMAKE_CURRENT_BINARY_DIR}/divsufsort.h" @ONLY)
+install(FILES "${CMAKE_CURRENT_BINARY_DIR}/divsufsort.h" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+if(BUILD_DIVSUFSORT64)
+  set(W64BIT "64")
+  set(SAINDEX_TYPE "${SAINT64_TYPE}")
+  set(SAINDEX_PRId "${SAINT64_PRId}")
+  configure_file("${CMAKE_CURRENT_SOURCE_DIR}/divsufsort.h.cmake"
+                 "${CMAKE_CURRENT_BINARY_DIR}/divsufsort64.h" @ONLY)
+  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/divsufsort64.h" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+endif(BUILD_DIVSUFSORT64)
diff --git a/src/libdivsufsort/include/config.h.cmake b/src/libdivsufsort/include/config.h.cmake
new file mode 100644
index 0000000..6a1cf47
--- /dev/null
+++ b/src/libdivsufsort/include/config.h.cmake
@@ -0,0 +1,81 @@
+/*
+ * config.h for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _CONFIG_H
+#define _CONFIG_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/** Define to the version of this package. **/
+#cmakedefine PROJECT_VERSION_FULL "${PROJECT_VERSION_FULL}"
+
+/** Define to 1 if you have the header files. **/
+#cmakedefine HAVE_INTTYPES_H 1
+#cmakedefine HAVE_STDDEF_H 1
+#cmakedefine HAVE_STDINT_H 1
+#cmakedefine HAVE_STDLIB_H 1
+#cmakedefine HAVE_STRING_H 1
+#cmakedefine HAVE_STRINGS_H 1
+#cmakedefine HAVE_MEMORY_H 1
+#cmakedefine HAVE_SYS_TYPES_H 1
+
+/** for WinIO **/
+#cmakedefine HAVE_IO_H 1
+#cmakedefine HAVE_FCNTL_H 1
+#cmakedefine HAVE__SETMODE 1
+#cmakedefine HAVE_SETMODE 1
+#cmakedefine HAVE__FILENO 1
+#cmakedefine HAVE_FOPEN_S 1
+#cmakedefine HAVE__O_BINARY 1
+#ifndef HAVE__SETMODE
+# if HAVE_SETMODE
+#  define _setmode setmode
+#  define HAVE__SETMODE 1
+# endif
+# if HAVE__SETMODE && !HAVE__O_BINARY
+#  define _O_BINARY 0
+#  define HAVE__O_BINARY 1
+# endif
+#endif
+
+/** for inline **/
+#ifndef INLINE
+# define INLINE @INLINE@
+#endif
+
+/** for VC++ warning **/
+#ifdef _MSC_VER
+#pragma warning(disable: 4127)
+#endif
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _CONFIG_H */
diff --git a/src/libdivsufsort/include/divsufsort.h b/src/libdivsufsort/include/divsufsort.h
new file mode 100644
index 0000000..7ebb412
--- /dev/null
+++ b/src/libdivsufsort/include/divsufsort.h
@@ -0,0 +1,189 @@
+/*
+ * divsufsort.h for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DIVSUFSORT_H
+#define _DIVSUFSORT_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#define DIVSUFSORT_API
+
+/*- Datatypes -*/
+#ifndef SAUCHAR_T
+#define SAUCHAR_T
+typedef unsigned char sauchar_t;
+#endif /* SAUCHAR_T */
+#ifndef SAINT_T
+#define SAINT_T
+typedef int saint_t;
+#endif /* SAINT_T */
+#ifndef SAIDX_T
+#define SAIDX_T
+typedef int saidx_t;
+#endif /* SAIDX_T */
+#ifndef PRIdSAIDX_T
+#define PRIdSAIDX_T "d"
+#endif
+
+/*- divsufsort context */
+typedef struct _divsufsort_ctx_t {
+   saidx_t *bucket_A;
+   saidx_t *bucket_B;
+} divsufsort_ctx_t;
+
+/*- Prototypes -*/
+
+/**
+ * Initialize suffix array context
+ *
+ * @return 0 for success, or non-zero in case of an error
+ */
+int divsufsort_init(divsufsort_ctx_t *ctx);
+
+/**
+ * Destroy suffix array context
+ *
+ * @param ctx suffix array context to destroy
+ */
+void divsufsort_destroy(divsufsort_ctx_t *ctx);
+
+/**
+ * Constructs the suffix array of a given string.
+ * @param ctx suffix array context
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n-1] The output array of suffixes.
+ * @param n The length of the given string.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t divsufsort_build_array(divsufsort_ctx_t *ctx, const sauchar_t *T, saidx_t *SA, saidx_t n);
+
+#if 0
+/**
+ * Constructs the burrows-wheeler transformed string of a given string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @return The primary index if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saidx_t
+divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n);
+
+/**
+ * Returns the version of the divsufsort library.
+ * @return The version number string.
+ */
+DIVSUFSORT_API
+const char *
+divsufsort_version(void);
+
+
+/**
+ * Constructs the burrows-wheeler transformed string of a given string and suffix array.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param SA[0..n-1] The suffix array. (can be NULL)
+ * @param n The length of the given string.
+ * @param idx The output primary index.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+bw_transform(const sauchar_t *T, sauchar_t *U,
+             saidx_t *SA /* can NULL */,
+             saidx_t n, saidx_t *idx);
+
+/**
+ * Inverse BW-transforms a given BWTed string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @param idx The primary index.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+inverse_bw_transform(const sauchar_t *T, sauchar_t *U,
+                     saidx_t *A /* can NULL */,
+                     saidx_t n, saidx_t idx);
+
+/**
+ * Checks the correctness of a given suffix array.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n-1] The input suffix array.
+ * @param n The length of the given string.
+ * @param verbose The verbose mode.
+ * @return 0 if no error occurred.
+ */
+DIVSUFSORT_API
+saint_t
+sufcheck(const sauchar_t *T, const saidx_t *SA, saidx_t n, saint_t verbose);
+
+/**
+ * Search for the pattern P in the string T.
+ * @param T[0..Tsize-1] The input string.
+ * @param Tsize The length of the given string.
+ * @param P[0..Psize-1] The input pattern string.
+ * @param Psize The length of the given pattern string.
+ * @param SA[0..SAsize-1] The input suffix array.
+ * @param SAsize The length of the given suffix array.
+ * @param idx The output index.
+ * @return The count of matches if no error occurred, -1 otherwise.
+ */
+DIVSUFSORT_API
+saidx_t
+sa_search(const sauchar_t *T, saidx_t Tsize,
+          const sauchar_t *P, saidx_t Psize,
+          const saidx_t *SA, saidx_t SAsize,
+          saidx_t *left);
+
+/**
+ * Search for the character c in the string T.
+ * @param T[0..Tsize-1] The input string.
+ * @param Tsize The length of the given string.
+ * @param SA[0..SAsize-1] The input suffix array.
+ * @param SAsize The length of the given suffix array.
+ * @param c The input character.
+ * @param idx The output index.
+ * @return The count of matches if no error occurred, -1 otherwise.
+ */
+DIVSUFSORT_API
+saidx_t
+sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
+                const saidx_t *SA, saidx_t SAsize,
+                saint_t c, saidx_t *left);
+#endif
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _DIVSUFSORT_H */
diff --git a/src/libdivsufsort/include/divsufsort.h.cmake b/src/libdivsufsort/include/divsufsort.h.cmake
new file mode 100644
index 0000000..bcaba7c
--- /dev/null
+++ b/src/libdivsufsort/include/divsufsort.h.cmake
@@ -0,0 +1,180 @@
+/*
+ * divsufsort@W64BIT@.h for libdivsufsort@W64BIT@
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DIVSUFSORT@W64BIT@_H
+#define _DIVSUFSORT@W64BIT@_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+@INCFILE@
+
+#ifndef DIVSUFSORT_API
+# ifdef DIVSUFSORT_BUILD_DLL
+#  define DIVSUFSORT_API @DIVSUFSORT_EXPORT@
+# else
+#  define DIVSUFSORT_API @DIVSUFSORT_IMPORT@
+# endif
+#endif
+
+/*- Datatypes -*/
+#ifndef SAUCHAR_T
+#define SAUCHAR_T
+typedef @SAUCHAR_TYPE@ sauchar_t;
+#endif /* SAUCHAR_T */
+#ifndef SAINT_T
+#define SAINT_T
+typedef @SAINT32_TYPE@ saint_t;
+#endif /* SAINT_T */
+#ifndef SAIDX@W64BIT@_T
+#define SAIDX@W64BIT@_T
+typedef @SAINDEX_TYPE@ saidx@W64BIT@_t;
+#endif /* SAIDX@W64BIT@_T */
+#ifndef PRIdSAINT_T
+#define PRIdSAINT_T @SAINT_PRId@
+#endif /* PRIdSAINT_T */
+#ifndef PRIdSAIDX@W64BIT@_T
+#define PRIdSAIDX@W64BIT@_T @SAINDEX_PRId@
+#endif /* PRIdSAIDX@W64BIT@_T */
+
+
+/*- Prototypes -*/
+
+/**
+ * Constructs the suffix array of a given string.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n-1] The output array of suffixes.
+ * @param n The length of the given string.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+divsufsort@W64BIT@(const sauchar_t *T, saidx@W64BIT@_t *SA, saidx@W64BIT@_t n);
+
+/**
+ * Constructs the burrows-wheeler transformed string of a given string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @return The primary index if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saidx@W64BIT@_t
+divbwt@W64BIT@(const sauchar_t *T, sauchar_t *U, saidx@W64BIT@_t *A, saidx@W64BIT@_t n);
+
+/**
+ * Returns the version of the divsufsort library.
+ * @return The version number string.
+ */
+DIVSUFSORT_API
+const char *
+divsufsort@W64BIT@_version(void);
+
+
+/**
+ * Constructs the burrows-wheeler transformed string of a given string and suffix array.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param SA[0..n-1] The suffix array. (can be NULL)
+ * @param n The length of the given string.
+ * @param idx The output primary index.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+bw_transform@W64BIT@(const sauchar_t *T, sauchar_t *U,
+             saidx@W64BIT@_t *SA /* can NULL */,
+             saidx@W64BIT@_t n, saidx@W64BIT@_t *idx);
+
+/**
+ * Inverse BW-transforms a given BWTed string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @param idx The primary index.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+inverse_bw_transform@W64BIT@(const sauchar_t *T, sauchar_t *U,
+                     saidx@W64BIT@_t *A /* can NULL */,
+                     saidx@W64BIT@_t n, saidx@W64BIT@_t idx);
+
+/**
+ * Checks the correctness of a given suffix array.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n-1] The input suffix array.
+ * @param n The length of the given string.
+ * @param verbose The verbose mode.
+ * @return 0 if no error occurred.
+ */
+DIVSUFSORT_API
+saint_t
+sufcheck@W64BIT@(const sauchar_t *T, const saidx@W64BIT@_t *SA, saidx@W64BIT@_t n, saint_t verbose);
+
+/**
+ * Search for the pattern P in the string T.
+ * @param T[0..Tsize-1] The input string.
+ * @param Tsize The length of the given string.
+ * @param P[0..Psize-1] The input pattern string.
+ * @param Psize The length of the given pattern string.
+ * @param SA[0..SAsize-1] The input suffix array.
+ * @param SAsize The length of the given suffix array.
+ * @param idx The output index.
+ * @return The count of matches if no error occurred, -1 otherwise.
+ */
+DIVSUFSORT_API
+saidx@W64BIT@_t
+sa_search@W64BIT@(const sauchar_t *T, saidx@W64BIT@_t Tsize,
+          const sauchar_t *P, saidx@W64BIT@_t Psize,
+          const saidx@W64BIT@_t *SA, saidx@W64BIT@_t SAsize,
+          saidx@W64BIT@_t *left);
+
+/**
+ * Search for the character c in the string T.
+ * @param T[0..Tsize-1] The input string.
+ * @param Tsize The length of the given string.
+ * @param SA[0..SAsize-1] The input suffix array.
+ * @param SAsize The length of the given suffix array.
+ * @param c The input character.
+ * @param idx The output index.
+ * @return The count of matches if no error occurred, -1 otherwise.
+ */
+DIVSUFSORT_API
+saidx@W64BIT@_t
+sa_simplesearch@W64BIT@(const sauchar_t *T, saidx@W64BIT@_t Tsize,
+                const saidx@W64BIT@_t *SA, saidx@W64BIT@_t SAsize,
+                saint_t c, saidx@W64BIT@_t *left);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _DIVSUFSORT@W64BIT@_H */
diff --git a/src/libdivsufsort/include/divsufsort_config.h b/src/libdivsufsort/include/divsufsort_config.h
new file mode 100644
index 0000000..4054a8a
--- /dev/null
+++ b/src/libdivsufsort/include/divsufsort_config.h
@@ -0,0 +1,9 @@
+#define HAVE_STRING_H 1
+#define HAVE_STDLIB_H 1
+#define HAVE_MEMORY_H 1
+#define HAVE_STDINT_H 1
+#define INLINE inline
+
+#ifdef _MSC_VER
+#pragma warning( disable : 4244 )
+#endif /* _MSC_VER */
diff --git a/src/libdivsufsort/include/divsufsort_private.h b/src/libdivsufsort/include/divsufsort_private.h
new file mode 100644
index 0000000..b4d97ad
--- /dev/null
+++ b/src/libdivsufsort/include/divsufsort_private.h
@@ -0,0 +1,205 @@
+/*
+ * divsufsort_private.h for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DIVSUFSORT_PRIVATE_H
+#define _DIVSUFSORT_PRIVATE_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#include "divsufsort_config.h"
+#include <assert.h>
+#include <stdio.h>
+#if HAVE_STRING_H
+# include <string.h>
+#endif
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#if HAVE_MEMORY_H
+# include <memory.h>
+#endif
+#if HAVE_STDDEF_H
+# include <stddef.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_INTTYPES_H
+# include <inttypes.h>
+#else
+# if HAVE_STDINT_H
+#  include <stdint.h>
+# endif
+#endif
+#if defined(BUILD_DIVSUFSORT64)
+# include "divsufsort64.h"
+# ifndef SAIDX_T
+#  define SAIDX_T
+#  define saidx_t saidx64_t
+# endif /* SAIDX_T */
+# ifndef PRIdSAIDX_T
+#  define PRIdSAIDX_T PRIdSAIDX64_T
+# endif /* PRIdSAIDX_T */
+# define divsufsort divsufsort64
+# define divbwt divbwt64
+# define divsufsort_version divsufsort64_version
+# define bw_transform bw_transform64
+# define inverse_bw_transform inverse_bw_transform64
+# define sufcheck sufcheck64
+# define sa_search sa_search64
+# define sa_simplesearch sa_simplesearch64
+# define sssort sssort64
+# define trsort trsort64
+#else
+# include "divsufsort.h"
+#endif
+
+
+/*- Constants -*/
+#if !defined(UINT8_MAX)
+# define UINT8_MAX (255)
+#endif /* UINT8_MAX */
+#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
+# undef ALPHABET_SIZE
+#endif
+#if !defined(ALPHABET_SIZE)
+# define ALPHABET_SIZE (UINT8_MAX + 1)
+#endif
+/* for divsufsort.c */
+#define BUCKET_A_SIZE (ALPHABET_SIZE)
+#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
+/* for sssort.c */
+#if defined(SS_INSERTIONSORT_THRESHOLD)
+# if SS_INSERTIONSORT_THRESHOLD < 1
+#  undef SS_INSERTIONSORT_THRESHOLD
+#  define SS_INSERTIONSORT_THRESHOLD (1)
+# endif
+#else
+# define SS_INSERTIONSORT_THRESHOLD (8)
+#endif
+#if defined(SS_BLOCKSIZE)
+# if SS_BLOCKSIZE < 0
+#  undef SS_BLOCKSIZE
+#  define SS_BLOCKSIZE (0)
+# elif 32768 <= SS_BLOCKSIZE
+#  undef SS_BLOCKSIZE
+#  define SS_BLOCKSIZE (32767)
+# endif
+#else
+# define SS_BLOCKSIZE (1024)
+#endif
+/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
+#if SS_BLOCKSIZE == 0
+# if defined(BUILD_DIVSUFSORT64)
+#  define SS_MISORT_STACKSIZE (96)
+# else
+#  define SS_MISORT_STACKSIZE (64)
+# endif
+#elif SS_BLOCKSIZE <= 4096
+# define SS_MISORT_STACKSIZE (16)
+#else
+# define SS_MISORT_STACKSIZE (24)
+#endif
+#if defined(BUILD_DIVSUFSORT64)
+# define SS_SMERGE_STACKSIZE (64)
+#else
+# define SS_SMERGE_STACKSIZE (32)
+#endif
+/* for trsort.c */
+#define TR_INSERTIONSORT_THRESHOLD (8)
+#if defined(BUILD_DIVSUFSORT64)
+# define TR_STACKSIZE (96)
+#else
+# define TR_STACKSIZE (64)
+#endif
+
+
+/*- Macros -*/
+#ifndef SWAP
+# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
+#endif /* SWAP */
+#ifndef MIN
+# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
+#endif /* MIN */
+#ifndef MAX
+# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
+#endif /* MAX */
+#define STACK_PUSH(_a, _b, _c, _d)\
+  do {\
+    assert(ssize < STACK_SIZE);\
+    stack[ssize].a = (_a), stack[ssize].b = (_b),\
+    stack[ssize].c = (_c), stack[ssize++].d = (_d);\
+  } while(0)
+#define STACK_PUSH5(_a, _b, _c, _d, _e)\
+  do {\
+    assert(ssize < STACK_SIZE);\
+    stack[ssize].a = (_a), stack[ssize].b = (_b),\
+    stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
+  } while(0)
+#define STACK_POP(_a, _b, _c, _d)\
+  do {\
+    assert(0 <= ssize);\
+    if(ssize == 0) { return; }\
+    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
+    (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
+  } while(0)
+#define STACK_POP5(_a, _b, _c, _d, _e)\
+  do {\
+    assert(0 <= ssize);\
+    if(ssize == 0) { return; }\
+    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
+    (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
+  } while(0)
+/* for divsufsort.c */
+#define BUCKET_A(_c0) bucket_A[(_c0)]
+#if ALPHABET_SIZE == 256
+#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
+#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
+#else
+#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
+#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
+#endif
+
+
+/*- Private Prototypes -*/
+/* sssort.c */
+void
+sssort(const sauchar_t *Td, const saidx_t *PA,
+       saidx_t *first, saidx_t *last,
+       saidx_t *buf, saidx_t bufsize,
+       saidx_t depth, saidx_t n, saint_t lastsuffix);
+/* trsort.c */
+void
+trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _DIVSUFSORT_PRIVATE_H */
diff --git a/src/libdivsufsort/include/lfs.h.cmake b/src/libdivsufsort/include/lfs.h.cmake
new file mode 100644
index 0000000..d5b84a8
--- /dev/null
+++ b/src/libdivsufsort/include/lfs.h.cmake
@@ -0,0 +1,56 @@
+/*
+ * lfs.h for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _LFS_H
+#define _LFS_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef __STRICT_ANSI__
+# define LFS_OFF_T @LFS_OFF_T@
+# define LFS_FOPEN @LFS_FOPEN@
+# define LFS_FTELL @LFS_FTELL@
+# define LFS_FSEEK @LFS_FSEEK@
+# define LFS_PRId  @LFS_PRID@
+#else
+# define LFS_OFF_T long
+# define LFS_FOPEN fopen
+# define LFS_FTELL ftell
+# define LFS_FSEEK fseek
+# define LFS_PRId "ld"
+#endif
+#ifndef PRIdOFF_T
+# define PRIdOFF_T LFS_PRId
+#endif
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _LFS_H */
diff --git a/src/libdivsufsort/lib/CMakeLists.txt b/src/libdivsufsort/lib/CMakeLists.txt
new file mode 100644
index 0000000..abc90e6
--- /dev/null
+++ b/src/libdivsufsort/lib/CMakeLists.txt
@@ -0,0 +1,31 @@
+include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../include"
+                    "${CMAKE_CURRENT_BINARY_DIR}/../include")
+
+set(divsufsort_SRCS divsufsort.c sssort.c trsort.c utils.c)
+
+## libdivsufsort ##
+add_library(divsufsort ${divsufsort_SRCS})
+install(TARGETS divsufsort
+  RUNTIME DESTINATION ${CMAKE_INSTALL_RUNTIMEDIR}
+  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
+set_target_properties(divsufsort PROPERTIES
+  VERSION   "${LIBRARY_VERSION}"
+  SOVERSION "${LIBRARY_SOVERSION}"
+  DEFINE_SYMBOL DIVSUFSORT_BUILD_DLL
+  RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/../examples")
+
+## libdivsufsort64 ##
+if(BUILD_DIVSUFSORT64)
+  add_library(divsufsort64 ${divsufsort_SRCS})
+  install(TARGETS divsufsort64
+    RUNTIME DESTINATION ${CMAKE_INSTALL_RUNTIMEDIR}
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
+  set_target_properties(divsufsort64 PROPERTIES
+    VERSION   "${LIBRARY_VERSION}"
+    SOVERSION "${LIBRARY_SOVERSION}"
+    DEFINE_SYMBOL DIVSUFSORT_BUILD_DLL
+    COMPILE_FLAGS "-DBUILD_DIVSUFSORT64"
+    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/../examples")
+endif(BUILD_DIVSUFSORT64)
diff --git a/src/libdivsufsort/lib/divsufsort.c b/src/libdivsufsort/lib/divsufsort.c
new file mode 100644
index 0000000..50631ac
--- /dev/null
+++ b/src/libdivsufsort/lib/divsufsort.c
@@ -0,0 +1,431 @@
+/*
+ * divsufsort.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "divsufsort_private.h"
+#ifdef _OPENMP
+# include <omp.h>
+#endif
+
+
+/*- Private Functions -*/
+
+/* Sorts suffixes of type B*. */
+static
+saidx_t
+sort_typeBstar(const sauchar_t *T, saidx_t *SA,
+               saidx_t *bucket_A, saidx_t *bucket_B,
+               saidx_t n) {
+  saidx_t *PAb, *ISAb, *buf;
+#ifdef _OPENMP
+  saidx_t *curbuf;
+  saidx_t l;
+#endif
+  saidx_t i, j, k, t, m, bufsize;
+  saint_t c0, c1;
+#ifdef _OPENMP
+  saint_t d0, d1;
+  int tmp;
+#endif
+
+  /* Initialize bucket arrays. */
+  for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
+  for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
+
+  /* Count the number of occurrences of the first one or two characters of each
+     type A, B and B* suffix. Moreover, store the beginning position of all
+     type B* suffixes into the array SA. */
+  for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
+    /* type A suffix. */
+    do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
+    if(0 <= i) {
+      /* type B* suffix. */
+      ++BUCKET_BSTAR(c0, c1);
+      SA[--m] = i;
+      /* type B suffix. */
+      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
+        ++BUCKET_B(c0, c1);
+      }
+    }
+  }
+  m = n - m;
+/*
+note:
+  A type B* suffix is lexicographically smaller than a type B suffix that
+  begins with the same first two characters.
+*/
+
+  /* Calculate the index of start/end point of each bucket. */
+  for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
+    t = i + BUCKET_A(c0);
+    BUCKET_A(c0) = i + j; /* start point */
+    i = t + BUCKET_B(c0, c0);
+    for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
+      j += BUCKET_BSTAR(c0, c1);
+      BUCKET_BSTAR(c0, c1) = j; /* end point */
+      i += BUCKET_B(c0, c1);
+    }
+  }
+
+  if(0 < m) {
+    /* Sort the type B* suffixes by their first two characters. */
+    PAb = SA + n - m; ISAb = SA + m;
+    for(i = m - 2; 0 <= i; --i) {
+      t = PAb[i], c0 = T[t], c1 = T[t + 1];
+      SA[--BUCKET_BSTAR(c0, c1)] = i;
+    }
+    t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
+    SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
+
+    /* Sort the type B* substrings using sssort. */
+#ifdef _OPENMP
+    tmp = omp_get_max_threads();
+    buf = SA + m, bufsize = (n - (2 * m)) / tmp;
+    c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
+#pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp)
+    {
+      tmp = omp_get_thread_num();
+      curbuf = buf + tmp * bufsize;
+      k = 0;
+      for(;;) {
+        #pragma omp critical(sssort_lock)
+        {
+          if(0 < (l = j)) {
+            d0 = c0, d1 = c1;
+            do {
+              k = BUCKET_BSTAR(d0, d1);
+              if(--d1 <= d0) {
+                d1 = ALPHABET_SIZE - 1;
+                if(--d0 < 0) { break; }
+              }
+            } while(((l - k) <= 1) && (0 < (l = k)));
+            c0 = d0, c1 = d1, j = k;
+          }
+        }
+        if(l == 0) { break; }
+        sssort(T, PAb, SA + k, SA + l,
+               curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
+      }
+    }
+#else
+    buf = SA + m, bufsize = n - (2 * m);
+    for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
+      for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
+        i = BUCKET_BSTAR(c0, c1);
+        if(1 < (j - i)) {
+          sssort(T, PAb, SA + i, SA + j,
+                 buf, bufsize, 2, n, *(SA + i) == (m - 1));
+        }
+      }
+    }
+#endif
+
+    /* Compute ranks of type B* substrings. */
+    for(i = m - 1; 0 <= i; --i) {
+      if(0 <= SA[i]) {
+        j = i;
+        do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
+        SA[i + 1] = i - j;
+        if(i <= 0) { break; }
+      }
+      j = i;
+      do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
+      ISAb[SA[i]] = j;
+    }
+
+    /* Construct the inverse suffix array of type B* suffixes using trsort. */
+    trsort(ISAb, SA, m, 1);
+
+    /* Set the sorted order of tyoe B* suffixes. */
+    for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
+      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
+      if(0 <= i) {
+        t = i;
+        for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
+        SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
+      }
+    }
+
+    /* Calculate the index of start/end point of each bucket. */
+    BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
+    for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
+      i = BUCKET_A(c0 + 1) - 1;
+      for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
+        t = i - BUCKET_B(c0, c1);
+        BUCKET_B(c0, c1) = i; /* end point */
+
+        /* Move all type B* suffixes to the correct position. */
+        for(i = t, j = BUCKET_BSTAR(c0, c1);
+            j <= k;
+            --i, --k) { SA[i] = SA[k]; }
+      }
+      BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
+      BUCKET_B(c0, c0) = i; /* end point */
+    }
+  }
+
+  return m;
+}
+
+/* Constructs the suffix array by using the sorted order of type B* suffixes. */
+static
+void
+construct_SA(const sauchar_t *T, saidx_t *SA,
+             saidx_t *bucket_A, saidx_t *bucket_B,
+             saidx_t n, saidx_t m) {
+  saidx_t *i, *j, *k;
+  saidx_t s;
+  saint_t c0, c1, c2;
+
+  if(0 < m) {
+    /* Construct the sorted order of type B suffixes by using
+       the sorted order of type B* suffixes. */
+    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
+      /* Scan the suffix array from right to left. */
+      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
+          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
+          i <= j;
+          --j) {
+        if(0 < (s = *j)) {
+          assert(T[s] == c1);
+          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
+          assert(T[s - 1] <= T[s]);
+          *j = ~s;
+          c0 = T[--s];
+          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
+          if(c0 != c2) {
+            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
+            k = SA + BUCKET_B(c2 = c0, c1);
+          }
+          assert(k < j);
+          *k-- = s;
+        } else {
+          assert(((s == 0) && (T[s] == c1)) || (s < 0));
+          *j = ~s;
+        }
+      }
+    }
+  }
+
+  /* Construct the suffix array by using
+     the sorted order of type B suffixes. */
+  k = SA + BUCKET_A(c2 = T[n - 1]);
+  *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
+  /* Scan the suffix array from left to right. */
+  for(i = SA, j = SA + n; i < j; ++i) {
+    if(0 < (s = *i)) {
+      assert(T[s - 1] >= T[s]);
+      c0 = T[--s];
+      if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
+      if(c0 != c2) {
+        BUCKET_A(c2) = k - SA;
+        k = SA + BUCKET_A(c2 = c0);
+      }
+      assert(i < k);
+      *k++ = s;
+    } else {
+      assert(s < 0);
+      *i = ~s;
+    }
+  }
+}
+
+#if 0
+/* Constructs the burrows-wheeler transformed string directly
+   by using the sorted order of type B* suffixes. */
+static
+saidx_t
+construct_BWT(const sauchar_t *T, saidx_t *SA,
+              saidx_t *bucket_A, saidx_t *bucket_B,
+              saidx_t n, saidx_t m) {
+  saidx_t *i, *j, *k, *orig;
+  saidx_t s;
+  saint_t c0, c1, c2;
+
+  if(0 < m) {
+    /* Construct the sorted order of type B suffixes by using
+       the sorted order of type B* suffixes. */
+    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
+      /* Scan the suffix array from right to left. */
+      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
+          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
+          i <= j;
+          --j) {
+        if(0 < (s = *j)) {
+          assert(T[s] == c1);
+          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
+          assert(T[s - 1] <= T[s]);
+          c0 = T[--s];
+          *j = ~((saidx_t)c0);
+          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
+          if(c0 != c2) {
+            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
+            k = SA + BUCKET_B(c2 = c0, c1);
+          }
+          assert(k < j);
+          *k-- = s;
+        } else if(s != 0) {
+          *j = ~s;
+#ifndef NDEBUG
+        } else {
+          assert(T[s] == c1);
+#endif
+        }
+      }
+    }
+  }
+
+  /* Construct the BWTed string by using
+     the sorted order of type B suffixes. */
+  k = SA + BUCKET_A(c2 = T[n - 1]);
+  *k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1);
+  /* Scan the suffix array from left to right. */
+  for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
+    if(0 < (s = *i)) {
+      assert(T[s - 1] >= T[s]);
+      c0 = T[--s];
+      *i = c0;
+      if((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); }
+      if(c0 != c2) {
+        BUCKET_A(c2) = k - SA;
+        k = SA + BUCKET_A(c2 = c0);
+      }
+      assert(i < k);
+      *k++ = s;
+    } else if(s != 0) {
+      *i = ~s;
+    } else {
+      orig = i;
+    }
+  }
+
+  return orig - SA;
+}
+#endif
+
+/*---------------------------------------------------------------------------*/
+
+/**
+ * Initialize suffix array context
+ *
+ * @return 0 for success, or non-zero in case of an error
+ */
+int divsufsort_init(divsufsort_ctx_t *ctx) {
+   ctx->bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
+   ctx->bucket_B = NULL;
+
+   if (ctx->bucket_A) {
+      ctx->bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
+
+      if (ctx->bucket_B)
+         return 0;
+   }
+
+   divsufsort_destroy(ctx);
+   return -1;
+}
+
+/**
+ * Destroy suffix array context
+ *
+ * @param ctx suffix array context to destroy
+ */
+void divsufsort_destroy(divsufsort_ctx_t *ctx) {
+   if (ctx->bucket_B) {
+      free(ctx->bucket_B);
+      ctx->bucket_B = NULL;
+   }
+
+   if (ctx->bucket_A) {
+      free(ctx->bucket_A);
+      ctx->bucket_A = NULL;
+   }
+}
+
+/*- Function -*/
+
+saint_t
+divsufsort_build_array(divsufsort_ctx_t *ctx, const sauchar_t *T, saidx_t *SA, saidx_t n) {
+  saidx_t m;
+  saint_t err = 0;
+
+  /* Check arguments. */
+  if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
+  else if(n == 0) { return 0; }
+  else if(n == 1) { SA[0] = 0; return 0; }
+  else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
+
+  /* Suffixsort. */
+  if((ctx->bucket_A != NULL) && (ctx->bucket_B != NULL)) {
+    m = sort_typeBstar(T, SA, ctx->bucket_A, ctx->bucket_B, n);
+    construct_SA(T, SA, ctx->bucket_A, ctx->bucket_B, n, m);
+  } else {
+    err = -2;
+  }
+
+  return err;
+}
+
+#if 0
+saidx_t
+divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n) {
+  saidx_t *B;
+  saidx_t *bucket_A, *bucket_B;
+  saidx_t m, pidx, i;
+
+  /* Check arguments. */
+  if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
+  else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
+
+  if((B = A) == NULL) { B = (saidx_t *)malloc((size_t)(n + 1) * sizeof(saidx_t)); }
+  bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
+  bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
+
+  /* Burrows-Wheeler Transform. */
+  if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
+    m = sort_typeBstar(T, B, bucket_A, bucket_B, n);
+    pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
+
+    /* Copy to output string. */
+    U[0] = T[n - 1];
+    for(i = 0; i < pidx; ++i) { U[i + 1] = (sauchar_t)B[i]; }
+    for(i += 1; i < n; ++i) { U[i] = (sauchar_t)B[i]; }
+    pidx += 1;
+  } else {
+    pidx = -2;
+  }
+
+  free(bucket_B);
+  free(bucket_A);
+  if(A == NULL) { free(B); }
+
+  return pidx;
+}
+
+const char *
+divsufsort_version(void) {
+  return PROJECT_VERSION_FULL;
+}
+#endif
diff --git a/src/libdivsufsort/lib/divsufsort_utils.c b/src/libdivsufsort/lib/divsufsort_utils.c
new file mode 100644
index 0000000..f7cbc0d
--- /dev/null
+++ b/src/libdivsufsort/lib/divsufsort_utils.c
@@ -0,0 +1,383 @@
+/*
+ * utils.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "divsufsort_private.h"
+
+
+/*- Private Function -*/
+
+#if 0
+/* Binary search for inverse bwt. */
+static
+saidx_t
+binarysearch_lower(const saidx_t *A, saidx_t size, saidx_t value) {
+  saidx_t half, i;
+  for(i = 0, half = size >> 1;
+      0 < size;
+      size = half, half >>= 1) {
+    if(A[i + half] < value) {
+      i += half + 1;
+      half -= (size & 1) ^ 1;
+    }
+  }
+  return i;
+}
+
+
+/*- Functions -*/
+
+/* Burrows-Wheeler transform. */
+saint_t
+bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA,
+             saidx_t n, saidx_t *idx) {
+  saidx_t *A, i, j, p, t;
+  saint_t c;
+
+  /* Check arguments. */
+  if((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; }
+  if(n <= 1) {
+    if(n == 1) { U[0] = T[0]; }
+    *idx = n;
+    return 0;
+  }
+
+  if((A = SA) == NULL) {
+    i = divbwt(T, U, NULL, n);
+    if(0 <= i) { *idx = i; i = 0; }
+    return (saint_t)i;
+  }
+
+  /* BW transform. */
+  if(T == U) {
+    t = n;
+    for(i = 0, j = 0; i < n; ++i) {
+      p = t - 1;
+      t = A[i];
+      if(0 <= p) {
+        c = T[j];
+        U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
+        A[j] = c;
+        j++;
+      } else {
+        *idx = i;
+      }
+    }
+    p = t - 1;
+    if(0 <= p) {
+      c = T[j];
+      U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
+      A[j] = c;
+    } else {
+      *idx = i;
+    }
+  } else {
+    U[0] = T[n - 1];
+    for(i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; }
+    *idx = i + 1;
+    for(++i; i < n; ++i) { U[i] = T[A[i] - 1]; }
+  }
+
+  if(SA == NULL) {
+    /* Deallocate memory. */
+    free(A);
+  }
+
+  return 0;
+}
+
+/* Inverse Burrows-Wheeler transform. */
+saint_t
+inverse_bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *A,
+                     saidx_t n, saidx_t idx) {
+  saidx_t C[ALPHABET_SIZE];
+  sauchar_t D[ALPHABET_SIZE];
+  saidx_t *B;
+  saidx_t i, p;
+  saint_t c, d;
+
+  /* Check arguments. */
+  if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) ||
+     (n < idx) || ((0 < n) && (idx == 0))) {
+    return -1;
+  }
+  if(n <= 1) { return 0; }
+
+  if((B = A) == NULL) {
+    /* Allocate n*sizeof(saidx_t) bytes of memory. */
+    if((B = (saidx_t *)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; }
+  }
+
+  /* Inverse BW transform. */
+  for(c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; }
+  for(i = 0; i < n; ++i) { ++C[T[i]]; }
+  for(c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) {
+    p = C[c];
+    if(0 < p) {
+      C[c] = i;
+      D[d++] = (sauchar_t)c;
+      i += p;
+    }
+  }
+  for(i = 0; i < idx; ++i) { B[C[T[i]]++] = i; }
+  for( ; i < n; ++i)       { B[C[T[i]]++] = i + 1; }
+  for(c = 0; c < d; ++c) { C[c] = C[D[c]]; }
+  for(i = 0, p = idx; i < n; ++i) {
+    U[i] = D[binarysearch_lower(C, d, p)];
+    p = B[p - 1];
+  }
+
+  if(A == NULL) {
+    /* Deallocate memory. */
+    free(B);
+  }
+
+  return 0;
+}
+
+/* Checks the suffix array SA of the string T. */
+saint_t
+sufcheck(const sauchar_t *T, const saidx_t *SA,
+         saidx_t n, saint_t verbose) {
+  saidx_t C[ALPHABET_SIZE];
+  saidx_t i, p, q, t;
+  saint_t c;
+
+  if(verbose) { fprintf(stderr, "sufcheck: "); }
+
+  /* Check arguments. */
+  if((T == NULL) || (SA == NULL) || (n < 0)) {
+    if(verbose) { fprintf(stderr, "Invalid arguments.\n"); }
+    return -1;
+  }
+  if(n == 0) {
+    if(verbose) { fprintf(stderr, "Done.\n"); }
+    return 0;
+  }
+
+  /* check range: [0..n-1] */
+  for(i = 0; i < n; ++i) {
+    if((SA[i] < 0) || (n <= SA[i])) {
+      if(verbose) {
+        fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n"
+                        "  SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
+                        n - 1, i, SA[i]);
+      }
+      return -2;
+    }
+  }
+
+  /* check first characters. */
+  for(i = 1; i < n; ++i) {
+    if(T[SA[i - 1]] > T[SA[i]]) {
+      if(verbose) {
+        fprintf(stderr, "Suffixes in wrong order.\n"
+                        "  T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d"
+                        " > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n",
+                        i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]);
+      }
+      return -3;
+    }
+  }
+
+  /* check suffixes. */
+  for(i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; }
+  for(i = 0; i < n; ++i) { ++C[T[i]]; }
+  for(i = 0, p = 0; i < ALPHABET_SIZE; ++i) {
+    t = C[i];
+    C[i] = p;
+    p += t;
+  }
+
+  q = C[T[n - 1]];
+  C[T[n - 1]] += 1;
+  for(i = 0; i < n; ++i) {
+    p = SA[i];
+    if(0 < p) {
+      c = T[--p];
+      t = C[c];
+    } else {
+      c = T[p = n - 1];
+      t = q;
+    }
+    if((t < 0) || (p != SA[t])) {
+      if(verbose) {
+        fprintf(stderr, "Suffix in wrong position.\n"
+                        "  SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n"
+                        "  SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
+                        t, (0 <= t) ? SA[t] : -1, i, SA[i]);
+      }
+      return -4;
+    }
+    if(t != q) {
+      ++C[c];
+      if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; }
+    }
+  }
+
+  if(1 <= verbose) { fprintf(stderr, "Done.\n"); }
+  return 0;
+}
+
+
+static
+int
+_compare(const sauchar_t *T, saidx_t Tsize,
+         const sauchar_t *P, saidx_t Psize,
+         saidx_t suf, saidx_t *match) {
+  saidx_t i, j;
+  saint_t r;
+  for(i = suf + *match, j = *match, r = 0;
+      (i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { }
+  *match = j;
+  return (r == 0) ? -(j != Psize) : r;
+}
+
+/* Search for the pattern P in the string T. */
+saidx_t
+sa_search(const sauchar_t *T, saidx_t Tsize,
+          const sauchar_t *P, saidx_t Psize,
+          const saidx_t *SA, saidx_t SAsize,
+          saidx_t *idx) {
+  saidx_t size, lsize, rsize, half;
+  saidx_t match, lmatch, rmatch;
+  saidx_t llmatch, lrmatch, rlmatch, rrmatch;
+  saidx_t i, j, k;
+  saint_t r;
+
+  if(idx != NULL) { *idx = -1; }
+  if((T == NULL) || (P == NULL) || (SA == NULL) ||
+     (Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; }
+  if((Tsize == 0) || (SAsize == 0)) { return 0; }
+  if(Psize == 0) { if(idx != NULL) { *idx = 0; } return SAsize; }
+
+  for(i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1;
+      0 < size;
+      size = half, half >>= 1) {
+    match = MIN(lmatch, rmatch);
+    r = _compare(T, Tsize, P, Psize, SA[i + half], &match);
+    if(r < 0) {
+      i += half + 1;
+      half -= (size & 1) ^ 1;
+      lmatch = match;
+    } else if(r > 0) {
+      rmatch = match;
+    } else {
+      lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
+
+      /* left part */
+      for(llmatch = lmatch, lrmatch = match, half = lsize >> 1;
+          0 < lsize;
+          lsize = half, half >>= 1) {
+        lmatch = MIN(llmatch, lrmatch);
+        r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch);
+        if(r < 0) {
+          j += half + 1;
+          half -= (lsize & 1) ^ 1;
+          llmatch = lmatch;
+        } else {
+          lrmatch = lmatch;
+        }
+      }
+
+      /* right part */
+      for(rlmatch = match, rrmatch = rmatch, half = rsize >> 1;
+          0 < rsize;
+          rsize = half, half >>= 1) {
+        rmatch = MIN(rlmatch, rrmatch);
+        r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch);
+        if(r <= 0) {
+          k += half + 1;
+          half -= (rsize & 1) ^ 1;
+          rlmatch = rmatch;
+        } else {
+          rrmatch = rmatch;
+        }
+      }
+
+      break;
+    }
+  }
+
+  if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
+  return k - j;
+}
+
+/* Search for the character c in the string T. */
+saidx_t
+sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
+                const saidx_t *SA, saidx_t SAsize,
+                saint_t c, saidx_t *idx) {
+  saidx_t size, lsize, rsize, half;
+  saidx_t i, j, k, p;
+  saint_t r;
+
+  if(idx != NULL) { *idx = -1; }
+  if((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; }
+  if((Tsize == 0) || (SAsize == 0)) { return 0; }
+
+  for(i = j = k = 0, size = SAsize, half = size >> 1;
+      0 < size;
+      size = half, half >>= 1) {
+    p = SA[i + half];
+    r = (p < Tsize) ? T[p] - c : -1;
+    if(r < 0) {
+      i += half + 1;
+      half -= (size & 1) ^ 1;
+    } else if(r == 0) {
+      lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
+
+      /* left part */
+      for(half = lsize >> 1;
+          0 < lsize;
+          lsize = half, half >>= 1) {
+        p = SA[j + half];
+        r = (p < Tsize) ? T[p] - c : -1;
+        if(r < 0) {
+          j += half + 1;
+          half -= (lsize & 1) ^ 1;
+        }
+      }
+
+      /* right part */
+      for(half = rsize >> 1;
+          0 < rsize;
+          rsize = half, half >>= 1) {
+        p = SA[k + half];
+        r = (p < Tsize) ? T[p] - c : -1;
+        if(r <= 0) {
+          k += half + 1;
+          half -= (rsize & 1) ^ 1;
+        }
+      }
+
+      break;
+    }
+  }
+
+  if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
+  return k - j;
+}
+#endif
diff --git a/src/libdivsufsort/lib/sssort.c b/src/libdivsufsort/lib/sssort.c
new file mode 100644
index 0000000..4a18fd2
--- /dev/null
+++ b/src/libdivsufsort/lib/sssort.c
@@ -0,0 +1,815 @@
+/*
+ * sssort.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "divsufsort_private.h"
+
+
+/*- Private Functions -*/
+
+static const saint_t lg_table[256]= {
+ -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+
+#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
+
+static INLINE
+saint_t
+ss_ilg(saidx_t n) {
+#if SS_BLOCKSIZE == 0
+# if defined(BUILD_DIVSUFSORT64)
+  return (n >> 32) ?
+          ((n >> 48) ?
+            ((n >> 56) ?
+              56 + lg_table[(n >> 56) & 0xff] :
+              48 + lg_table[(n >> 48) & 0xff]) :
+            ((n >> 40) ?
+              40 + lg_table[(n >> 40) & 0xff] :
+              32 + lg_table[(n >> 32) & 0xff])) :
+          ((n & 0xffff0000) ?
+            ((n & 0xff000000) ?
+              24 + lg_table[(n >> 24) & 0xff] :
+              16 + lg_table[(n >> 16) & 0xff]) :
+            ((n & 0x0000ff00) ?
+               8 + lg_table[(n >>  8) & 0xff] :
+               0 + lg_table[(n >>  0) & 0xff]));
+# else
+  return (n & 0xffff0000) ?
+          ((n & 0xff000000) ?
+            24 + lg_table[(n >> 24) & 0xff] :
+            16 + lg_table[(n >> 16) & 0xff]) :
+          ((n & 0x0000ff00) ?
+             8 + lg_table[(n >>  8) & 0xff] :
+             0 + lg_table[(n >>  0) & 0xff]);
+# endif
+#elif SS_BLOCKSIZE < 256
+  return lg_table[n];
+#else
+  return (n & 0xff00) ?
+          8 + lg_table[(n >> 8) & 0xff] :
+          0 + lg_table[(n >> 0) & 0xff];
+#endif
+}
+
+#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
+
+#if SS_BLOCKSIZE != 0
+
+static const saint_t sqq_table[256] = {
+  0,  16,  22,  27,  32,  35,  39,  42,  45,  48,  50,  53,  55,  57,  59,  61,
+ 64,  65,  67,  69,  71,  73,  75,  76,  78,  80,  81,  83,  84,  86,  87,  89,
+ 90,  91,  93,  94,  96,  97,  98,  99, 101, 102, 103, 104, 106, 107, 108, 109,
+110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
+128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
+143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
+156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
+169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
+181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
+192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
+202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
+212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
+221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
+230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
+239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
+247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
+};
+
+static INLINE
+saidx_t
+ss_isqrt(saidx_t x) {
+  saidx_t y, e;
+
+  if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
+  e = (x & 0xffff0000) ?
+        ((x & 0xff000000) ?
+          24 + lg_table[(x >> 24) & 0xff] :
+          16 + lg_table[(x >> 16) & 0xff]) :
+        ((x & 0x0000ff00) ?
+           8 + lg_table[(x >>  8) & 0xff] :
+           0 + lg_table[(x >>  0) & 0xff]);
+
+  if(e >= 16) {
+    y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
+    if(e >= 24) { y = (y + 1 + x / y) >> 1; }
+    y = (y + 1 + x / y) >> 1;
+  } else if(e >= 8) {
+    y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
+  } else {
+    return sqq_table[x] >> 4;
+  }
+
+  return (x < (y * y)) ? y - 1 : y;
+}
+
+#endif /* SS_BLOCKSIZE != 0 */
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Compares two suffixes. */
+static INLINE
+saint_t
+ss_compare(const sauchar_t *T,
+           const saidx_t *p1, const saidx_t *p2,
+           saidx_t depth) {
+  const sauchar_t *U1, *U2, *U1n, *U2n;
+
+  for(U1 = T + depth + *p1,
+      U2 = T + depth + *p2,
+      U1n = T + *(p1 + 1) + 2,
+      U2n = T + *(p2 + 1) + 2;
+      (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
+      ++U1, ++U2) {
+  }
+
+  return U1 < U1n ?
+        (U2 < U2n ? *U1 - *U2 : 1) :
+        (U2 < U2n ? -1 : 0);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
+
+/* Insertionsort for small size groups */
+static
+void
+ss_insertionsort(const sauchar_t *T, const saidx_t *PA,
+                 saidx_t *first, saidx_t *last, saidx_t depth) {
+  saidx_t *i, *j;
+  saidx_t t;
+  saint_t r;
+
+  for(i = last - 2; first <= i; --i) {
+    for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
+      do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
+      if(last <= j) { break; }
+    }
+    if(r == 0) { *j = ~*j; }
+    *(j - 1) = t;
+  }
+}
+
+#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
+
+
+/*---------------------------------------------------------------------------*/
+
+#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
+
+static INLINE
+void
+ss_fixdown(const sauchar_t *Td, const saidx_t *PA,
+           saidx_t *SA, saidx_t i, saidx_t size) {
+  saidx_t j, k;
+  saidx_t v;
+  saint_t c, d, e;
+
+  for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
+    d = Td[PA[SA[k = j++]]];
+    if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
+    if(d <= c) { break; }
+  }
+  SA[i] = v;
+}
+
+/* Simple top-down heapsort. */
+static
+void
+ss_heapsort(const sauchar_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size) {
+  saidx_t i, m;
+  saidx_t t;
+
+  m = size;
+  if((size % 2) == 0) {
+    m--;
+    if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
+  }
+
+  for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
+  if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
+  for(i = m - 1; 0 < i; --i) {
+    t = SA[0], SA[0] = SA[i];
+    ss_fixdown(Td, PA, SA, 0, i);
+    SA[i] = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Returns the median of three elements. */
+static INLINE
+saidx_t *
+ss_median3(const sauchar_t *Td, const saidx_t *PA,
+           saidx_t *v1, saidx_t *v2, saidx_t *v3) {
+  saidx_t *t;
+  if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
+  if(Td[PA[*v2]] > Td[PA[*v3]]) {
+    if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
+    else { return v3; }
+  }
+  return v2;
+}
+
+/* Returns the median of five elements. */
+static INLINE
+saidx_t *
+ss_median5(const sauchar_t *Td, const saidx_t *PA,
+           saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
+  saidx_t *t;
+  if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
+  if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
+  if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
+  if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
+  if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
+  if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
+  return v3;
+}
+
+/* Returns the pivot element. */
+static INLINE
+saidx_t *
+ss_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) {
+  saidx_t *middle;
+  saidx_t t;
+
+  t = last - first;
+  middle = first + t / 2;
+
+  if(t <= 512) {
+    if(t <= 32) {
+      return ss_median3(Td, PA, first, middle, last - 1);
+    } else {
+      t >>= 2;
+      return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
+    }
+  }
+  t >>= 3;
+  first  = ss_median3(Td, PA, first, first + t, first + (t << 1));
+  middle = ss_median3(Td, PA, middle - t, middle, middle + t);
+  last   = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
+  return ss_median3(Td, PA, first, middle, last);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Binary partition for substrings. */
+static INLINE
+saidx_t *
+ss_partition(const saidx_t *PA,
+                    saidx_t *first, saidx_t *last, saidx_t depth) {
+  saidx_t *a, *b;
+  saidx_t t;
+  for(a = first - 1, b = last;;) {
+    for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
+    for(; (a < --b) && ((PA[*b] + depth) <  (PA[*b + 1] + 1));) { }
+    if(b <= a) { break; }
+    t = ~*b;
+    *b = *a;
+    *a = t;
+  }
+  if(first < a) { *first = ~*first; }
+  return a;
+}
+
+/* Multikey introsort for medium size groups. */
+static
+void
+ss_mintrosort(const sauchar_t *T, const saidx_t *PA,
+              saidx_t *first, saidx_t *last,
+              saidx_t depth) {
+#define STACK_SIZE SS_MISORT_STACKSIZE
+  struct { saidx_t *a, *b, c; saint_t d; } stack[STACK_SIZE];
+  const sauchar_t *Td;
+  saidx_t *a, *b, *c, *d, *e, *f;
+  saidx_t s, t;
+  saint_t ssize;
+  saint_t limit;
+  saint_t v, x = 0;
+
+  for(ssize = 0, limit = ss_ilg(last - first);;) {
+
+    if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
+#if 1 < SS_INSERTIONSORT_THRESHOLD
+      if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
+#endif
+      STACK_POP(first, last, depth, limit);
+      continue;
+    }
+
+    Td = T + depth;
+    if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
+    if(limit < 0) {
+      for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
+        if((x = Td[PA[*a]]) != v) {
+          if(1 < (a - first)) { break; }
+          v = x;
+          first = a;
+        }
+      }
+      if(Td[PA[*first] - 1] < v) {
+        first = ss_partition(PA, first, a, depth);
+      }
+      if((a - first) <= (last - a)) {
+        if(1 < (a - first)) {
+          STACK_PUSH(a, last, depth, -1);
+          last = a, depth += 1, limit = ss_ilg(a - first);
+        } else {
+          first = a, limit = -1;
+        }
+      } else {
+        if(1 < (last - a)) {
+          STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
+          first = a, limit = -1;
+        } else {
+          last = a, depth += 1, limit = ss_ilg(a - first);
+        }
+      }
+      continue;
+    }
+
+    /* choose pivot */
+    a = ss_pivot(Td, PA, first, last);
+    v = Td[PA[*a]];
+    SWAP(*first, *a);
+
+    /* partition */
+    for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
+    if(((a = b) < last) && (x < v)) {
+      for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
+        if(x == v) { SWAP(*b, *a); ++a; }
+      }
+    }
+    for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
+    if((b < (d = c)) && (x > v)) {
+      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
+        if(x == v) { SWAP(*c, *d); --d; }
+      }
+    }
+    for(; b < c;) {
+      SWAP(*b, *c);
+      for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
+        if(x == v) { SWAP(*b, *a); ++a; }
+      }
+      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
+        if(x == v) { SWAP(*c, *d); --d; }
+      }
+    }
+
+    if(a <= d) {
+      c = b - 1;
+
+      if((s = a - first) > (t = b - a)) { s = t; }
+      for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+      if((s = d - c) > (t = last - d - 1)) { s = t; }
+      for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+
+      a = first + (b - a), c = last - (d - c);
+      b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
+
+      if((a - first) <= (last - c)) {
+        if((last - c) <= (c - b)) {
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          STACK_PUSH(c, last, depth, limit);
+          last = a;
+        } else if((a - first) <= (c - b)) {
+          STACK_PUSH(c, last, depth, limit);
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          last = a;
+        } else {
+          STACK_PUSH(c, last, depth, limit);
+          STACK_PUSH(first, a, depth, limit);
+          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
+        }
+      } else {
+        if((a - first) <= (c - b)) {
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          STACK_PUSH(first, a, depth, limit);
+          first = c;
+        } else if((last - c) <= (c - b)) {
+          STACK_PUSH(first, a, depth, limit);
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          first = c;
+        } else {
+          STACK_PUSH(first, a, depth, limit);
+          STACK_PUSH(c, last, depth, limit);
+          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
+        }
+      }
+    } else {
+      limit += 1;
+      if(Td[PA[*first] - 1] < v) {
+        first = ss_partition(PA, first, last, depth);
+        limit = ss_ilg(last - first);
+      }
+      depth += 1;
+    }
+  }
+#undef STACK_SIZE
+}
+
+#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
+
+
+/*---------------------------------------------------------------------------*/
+
+#if SS_BLOCKSIZE != 0
+
+static INLINE
+void
+ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n) {
+  saidx_t t;
+  for(; 0 < n; --n, ++a, ++b) {
+    t = *a, *a = *b, *b = t;
+  }
+}
+
+static INLINE
+void
+ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last) {
+  saidx_t *a, *b, t;
+  saidx_t l, r;
+  l = middle - first, r = last - middle;
+  for(; (0 < l) && (0 < r);) {
+    if(l == r) { ss_blockswap(first, middle, l); break; }
+    if(l < r) {
+      a = last - 1, b = middle - 1;
+      t = *a;
+      do {
+        *a-- = *b, *b-- = *a;
+        if(b < first) {
+          *a = t;
+          last = a;
+          if((r -= l + 1) <= l) { break; }
+          a -= 1, b = middle - 1;
+          t = *a;
+        }
+      } while(1);
+    } else {
+      a = first, b = middle;
+      t = *a;
+      do {
+        *a++ = *b, *b++ = *a;
+        if(last <= b) {
+          *a = t;
+          first = a + 1;
+          if((l -= r + 1) <= r) { break; }
+          a += 1, b = middle;
+          t = *a;
+        }
+      } while(1);
+    }
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static
+void
+ss_inplacemerge(const sauchar_t *T, const saidx_t *PA,
+                saidx_t *first, saidx_t *middle, saidx_t *last,
+                saidx_t depth) {
+  const saidx_t *p;
+  saidx_t *a, *b;
+  saidx_t len, half;
+  saint_t q, r;
+  saint_t x;
+
+  for(;;) {
+    if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
+    else                { x = 0; p = PA +  *(last - 1); }
+    for(a = first, len = middle - first, half = len >> 1, r = -1;
+        0 < len;
+        len = half, half >>= 1) {
+      b = a + half;
+      q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
+      if(q < 0) {
+        a = b + 1;
+        half -= (len & 1) ^ 1;
+      } else {
+        r = q;
+      }
+    }
+    if(a < middle) {
+      if(r == 0) { *a = ~*a; }
+      ss_rotate(a, middle, last);
+      last -= middle - a;
+      middle = a;
+      if(first == middle) { break; }
+    }
+    --last;
+    if(x != 0) { while(*--last < 0) { } }
+    if(middle == last) { break; }
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Merge-forward with internal buffer. */
+static
+void
+ss_mergeforward(const sauchar_t *T, const saidx_t *PA,
+                saidx_t *first, saidx_t *middle, saidx_t *last,
+                saidx_t *buf, saidx_t depth) {
+  saidx_t *a, *b, *c, *bufend;
+  saidx_t t;
+  saint_t r;
+
+  bufend = buf + (middle - first) - 1;
+  ss_blockswap(buf, first, middle - first);
+
+  for(t = *(a = first), b = buf, c = middle;;) {
+    r = ss_compare(T, PA + *b, PA + *c, depth);
+    if(r < 0) {
+      do {
+        *a++ = *b;
+        if(bufend <= b) { *bufend = t; return; }
+        *b++ = *a;
+      } while(*b < 0);
+    } else if(r > 0) {
+      do {
+        *a++ = *c, *c++ = *a;
+        if(last <= c) {
+          while(b < bufend) { *a++ = *b, *b++ = *a; }
+          *a = *b, *b = t;
+          return;
+        }
+      } while(*c < 0);
+    } else {
+      *c = ~*c;
+      do {
+        *a++ = *b;
+        if(bufend <= b) { *bufend = t; return; }
+        *b++ = *a;
+      } while(*b < 0);
+
+      do {
+        *a++ = *c, *c++ = *a;
+        if(last <= c) {
+          while(b < bufend) { *a++ = *b, *b++ = *a; }
+          *a = *b, *b = t;
+          return;
+        }
+      } while(*c < 0);
+    }
+  }
+}
+
+/* Merge-backward with internal buffer. */
+static
+void
+ss_mergebackward(const sauchar_t *T, const saidx_t *PA,
+                 saidx_t *first, saidx_t *middle, saidx_t *last,
+                 saidx_t *buf, saidx_t depth) {
+  const saidx_t *p1, *p2;
+  saidx_t *a, *b, *c, *bufend;
+  saidx_t t;
+  saint_t r;
+  saint_t x;
+
+  bufend = buf + (last - middle) - 1;
+  ss_blockswap(buf, middle, last - middle);
+
+  x = 0;
+  if(*bufend < 0)       { p1 = PA + ~*bufend; x |= 1; }
+  else                  { p1 = PA +  *bufend; }
+  if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
+  else                  { p2 = PA +  *(middle - 1); }
+  for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
+    r = ss_compare(T, p1, p2, depth);
+    if(0 < r) {
+      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
+      *a-- = *b;
+      if(b <= buf) { *buf = t; break; }
+      *b-- = *a;
+      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
+      else       { p1 = PA +  *b; }
+    } else if(r < 0) {
+      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
+      *a-- = *c, *c-- = *a;
+      if(c < first) {
+        while(buf < b) { *a-- = *b, *b-- = *a; }
+        *a = *b, *b = t;
+        break;
+      }
+      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
+      else       { p2 = PA +  *c; }
+    } else {
+      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
+      *a-- = ~*b;
+      if(b <= buf) { *buf = t; break; }
+      *b-- = *a;
+      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
+      *a-- = *c, *c-- = *a;
+      if(c < first) {
+        while(buf < b) { *a-- = *b, *b-- = *a; }
+        *a = *b, *b = t;
+        break;
+      }
+      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
+      else       { p1 = PA +  *b; }
+      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
+      else       { p2 = PA +  *c; }
+    }
+  }
+}
+
+/* D&C based merge. */
+static
+void
+ss_swapmerge(const sauchar_t *T, const saidx_t *PA,
+             saidx_t *first, saidx_t *middle, saidx_t *last,
+             saidx_t *buf, saidx_t bufsize, saidx_t depth) {
+#define STACK_SIZE SS_SMERGE_STACKSIZE
+#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
+#define MERGE_CHECK(a, b, c)\
+  do {\
+    if(((c) & 1) ||\
+       (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
+      *(a) = ~*(a);\
+    }\
+    if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
+      *(b) = ~*(b);\
+    }\
+  } while(0)
+  struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE];
+  saidx_t *l, *r, *lm, *rm;
+  saidx_t m, len, half;
+  saint_t ssize;
+  saint_t check, next;
+
+  for(check = 0, ssize = 0;;) {
+    if((last - middle) <= bufsize) {
+      if((first < middle) && (middle < last)) {
+        ss_mergebackward(T, PA, first, middle, last, buf, depth);
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+      continue;
+    }
+
+    if((middle - first) <= bufsize) {
+      if(first < middle) {
+        ss_mergeforward(T, PA, first, middle, last, buf, depth);
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+      continue;
+    }
+
+    for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
+        0 < len;
+        len = half, half >>= 1) {
+      if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
+                       PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
+        m += half + 1;
+        half -= (len & 1) ^ 1;
+      }
+    }
+
+    if(0 < m) {
+      lm = middle - m, rm = middle + m;
+      ss_blockswap(lm, middle, m);
+      l = r = middle, next = 0;
+      if(rm < last) {
+        if(*rm < 0) {
+          *rm = ~*rm;
+          if(first < lm) { for(; *--l < 0;) { } next |= 4; }
+          next |= 1;
+        } else if(first < lm) {
+          for(; *r < 0; ++r) { }
+          next |= 2;
+        }
+      }
+
+      if((l - first) <= (last - r)) {
+        STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
+        middle = lm, last = l, check = (check & 3) | (next & 4);
+      } else {
+        if((next & 2) && (r == middle)) { next ^= 6; }
+        STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
+        first = r, middle = rm, check = (next & 3) | (check & 4);
+      }
+    } else {
+      if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
+        *middle = ~*middle;
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+    }
+  }
+#undef STACK_SIZE
+}
+
+#endif /* SS_BLOCKSIZE != 0 */
+
+
+/*---------------------------------------------------------------------------*/
+
+/*- Function -*/
+
+/* Substring sort */
+void
+sssort(const sauchar_t *T, const saidx_t *PA,
+       saidx_t *first, saidx_t *last,
+       saidx_t *buf, saidx_t bufsize,
+       saidx_t depth, saidx_t n, saint_t lastsuffix) {
+  saidx_t *a;
+#if SS_BLOCKSIZE != 0
+  saidx_t *b, *middle, *curbuf;
+  saidx_t j, k, curbufsize, limit;
+#endif
+  saidx_t i;
+
+  if(lastsuffix != 0) { ++first; }
+
+#if SS_BLOCKSIZE == 0
+  ss_mintrosort(T, PA, first, last, depth);
+#else
+  if((bufsize < SS_BLOCKSIZE) &&
+      (bufsize < (last - first)) &&
+      (bufsize < (limit = ss_isqrt(last - first)))) {
+    if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
+    buf = middle = last - limit, bufsize = limit;
+  } else {
+    middle = last, limit = 0;
+  }
+  for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+    ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
+#elif 1 < SS_BLOCKSIZE
+    ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
+#endif
+    curbufsize = last - (a + SS_BLOCKSIZE);
+    curbuf = a + SS_BLOCKSIZE;
+    if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
+    for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
+      ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
+    }
+  }
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+  ss_mintrosort(T, PA, a, middle, depth);
+#elif 1 < SS_BLOCKSIZE
+  ss_insertionsort(T, PA, a, middle, depth);
+#endif
+  for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
+    if(i & 1) {
+      ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
+      a -= k;
+    }
+  }
+  if(limit != 0) {
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+    ss_mintrosort(T, PA, middle, last, depth);
+#elif 1 < SS_BLOCKSIZE
+    ss_insertionsort(T, PA, middle, last, depth);
+#endif
+    ss_inplacemerge(T, PA, first, middle, last, depth);
+  }
+#endif
+
+  if(lastsuffix != 0) {
+    /* Insert last type B* suffix. */
+    saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
+    for(a = first, i = *(first - 1);
+        (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
+        ++a) {
+      *(a - 1) = *a;
+    }
+    *(a - 1) = i;
+  }
+}
diff --git a/src/libdivsufsort/lib/trsort.c b/src/libdivsufsort/lib/trsort.c
new file mode 100644
index 0000000..6fe3e67
--- /dev/null
+++ b/src/libdivsufsort/lib/trsort.c
@@ -0,0 +1,586 @@
+/*
+ * trsort.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "divsufsort_private.h"
+
+
+/*- Private Functions -*/
+
+static const saint_t lg_table[256]= {
+ -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+
+static INLINE
+saint_t
+tr_ilg(saidx_t n) {
+#if defined(BUILD_DIVSUFSORT64)
+  return (n >> 32) ?
+          ((n >> 48) ?
+            ((n >> 56) ?
+              56 + lg_table[(n >> 56) & 0xff] :
+              48 + lg_table[(n >> 48) & 0xff]) :
+            ((n >> 40) ?
+              40 + lg_table[(n >> 40) & 0xff] :
+              32 + lg_table[(n >> 32) & 0xff])) :
+          ((n & 0xffff0000) ?
+            ((n & 0xff000000) ?
+              24 + lg_table[(n >> 24) & 0xff] :
+              16 + lg_table[(n >> 16) & 0xff]) :
+            ((n & 0x0000ff00) ?
+               8 + lg_table[(n >>  8) & 0xff] :
+               0 + lg_table[(n >>  0) & 0xff]));
+#else
+  return (n & 0xffff0000) ?
+          ((n & 0xff000000) ?
+            24 + lg_table[(n >> 24) & 0xff] :
+            16 + lg_table[(n >> 16) & 0xff]) :
+          ((n & 0x0000ff00) ?
+             8 + lg_table[(n >>  8) & 0xff] :
+             0 + lg_table[(n >>  0) & 0xff]);
+#endif
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Simple insertionsort for small size groups. */
+static
+void
+tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
+  saidx_t *a, *b;
+  saidx_t t, r;
+
+  for(a = first + 1; a < last; ++a) {
+    for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
+      do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
+      if(b < first) { break; }
+    }
+    if(r == 0) { *b = ~*b; }
+    *(b + 1) = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static INLINE
+void
+tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) {
+  saidx_t j, k;
+  saidx_t v;
+  saidx_t c, d, e;
+
+  for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
+    d = ISAd[SA[k = j++]];
+    if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
+    if(d <= c) { break; }
+  }
+  SA[i] = v;
+}
+
+/* Simple top-down heapsort. */
+static
+void
+tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) {
+  saidx_t i, m;
+  saidx_t t;
+
+  m = size;
+  if((size % 2) == 0) {
+    m--;
+    if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
+  }
+
+  for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
+  if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
+  for(i = m - 1; 0 < i; --i) {
+    t = SA[0], SA[0] = SA[i];
+    tr_fixdown(ISAd, SA, 0, i);
+    SA[i] = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Returns the median of three elements. */
+static INLINE
+saidx_t *
+tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) {
+  saidx_t *t;
+  if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
+  if(ISAd[*v2] > ISAd[*v3]) {
+    if(ISAd[*v1] > ISAd[*v3]) { return v1; }
+    else { return v3; }
+  }
+  return v2;
+}
+
+/* Returns the median of five elements. */
+static INLINE
+saidx_t *
+tr_median5(const saidx_t *ISAd,
+           saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
+  saidx_t *t;
+  if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
+  if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
+  if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
+  if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
+  if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
+  if(ISAd[*v3] > ISAd[*v4]) { return v4; }
+  return v3;
+}
+
+/* Returns the pivot element. */
+static INLINE
+saidx_t *
+tr_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
+  saidx_t *middle;
+  saidx_t t;
+
+  t = last - first;
+  middle = first + t / 2;
+
+  if(t <= 512) {
+    if(t <= 32) {
+      return tr_median3(ISAd, first, middle, last - 1);
+    } else {
+      t >>= 2;
+      return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
+    }
+  }
+  t >>= 3;
+  first  = tr_median3(ISAd, first, first + t, first + (t << 1));
+  middle = tr_median3(ISAd, middle - t, middle, middle + t);
+  last   = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
+  return tr_median3(ISAd, first, middle, last);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+typedef struct _trbudget_t trbudget_t;
+struct _trbudget_t {
+  saidx_t chance;
+  saidx_t remain;
+  saidx_t incval;
+  saidx_t count;
+};
+
+static INLINE
+void
+trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) {
+  budget->chance = chance;
+  budget->remain = budget->incval = incval;
+}
+
+static INLINE
+saint_t
+trbudget_check(trbudget_t *budget, saidx_t size) {
+  if(size <= budget->remain) { budget->remain -= size; return 1; }
+  if(budget->chance == 0) { budget->count += size; return 0; }
+  budget->remain += budget->incval - size;
+  budget->chance -= 1;
+  return 1;
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static INLINE
+void
+tr_partition(const saidx_t *ISAd,
+             saidx_t *first, saidx_t *middle, saidx_t *last,
+             saidx_t **pa, saidx_t **pb, saidx_t v) {
+  saidx_t *a, *b, *c, *d, *e, *f;
+  saidx_t t, s;
+  saidx_t x = 0;
+
+  for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
+  if(((a = b) < last) && (x < v)) {
+    for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
+      if(x == v) { SWAP(*b, *a); ++a; }
+    }
+  }
+  for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
+  if((b < (d = c)) && (x > v)) {
+    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+      if(x == v) { SWAP(*c, *d); --d; }
+    }
+  }
+  for(; b < c;) {
+    SWAP(*b, *c);
+    for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
+      if(x == v) { SWAP(*b, *a); ++a; }
+    }
+    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+      if(x == v) { SWAP(*c, *d); --d; }
+    }
+  }
+
+  if(a <= d) {
+    c = b - 1;
+    if((s = a - first) > (t = b - a)) { s = t; }
+    for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+    if((s = d - c) > (t = last - d - 1)) { s = t; }
+    for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+    first += (b - a), last -= (d - c);
+  }
+  *pa = first, *pb = last;
+}
+
+static
+void
+tr_copy(saidx_t *ISA, const saidx_t *SA,
+        saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
+        saidx_t depth) {
+  /* sort suffixes of middle partition
+     by using sorted order of suffixes of left and right partition. */
+  saidx_t *c, *d, *e;
+  saidx_t s, v;
+
+  v = b - SA - 1;
+  for(c = first, d = a - 1; c <= d; ++c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *++d = s;
+      ISA[s] = d - SA;
+    }
+  }
+  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *--d = s;
+      ISA[s] = d - SA;
+    }
+  }
+}
+
+static
+void
+tr_partialcopy(saidx_t *ISA, const saidx_t *SA,
+               saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
+               saidx_t depth) {
+  saidx_t *c, *d, *e;
+  saidx_t s, v;
+  saidx_t rank, lastrank, newrank = -1;
+
+  v = b - SA - 1;
+  lastrank = -1;
+  for(c = first, d = a - 1; c <= d; ++c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *++d = s;
+      rank = ISA[s + depth];
+      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
+      ISA[s] = newrank;
+    }
+  }
+
+  lastrank = -1;
+  for(e = d; first <= e; --e) {
+    rank = ISA[*e];
+    if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
+    if(newrank != rank) { ISA[*e] = newrank; }
+  }
+
+  lastrank = -1;
+  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *--d = s;
+      rank = ISA[s + depth];
+      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
+      ISA[s] = newrank;
+    }
+  }
+}
+
+static
+void
+tr_introsort(saidx_t *ISA, const saidx_t *ISAd,
+             saidx_t *SA, saidx_t *first, saidx_t *last,
+             trbudget_t *budget) {
+#define STACK_SIZE TR_STACKSIZE
+  struct { const saidx_t *a; saidx_t *b, *c; saint_t d, e; }stack[STACK_SIZE];
+  saidx_t *a, *b, *c;
+  saidx_t t;
+  saidx_t v, x = 0;
+  saidx_t incr = ISAd - ISA;
+  saint_t limit, next;
+  saint_t ssize, trlink = -1;
+
+  for(ssize = 0, limit = tr_ilg(last - first);;) {
+
+    if(limit < 0) {
+      if(limit == -1) {
+        /* tandem repeat partition */
+        tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
+
+        /* update ranks */
+        if(a < last) {
+          for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
+        }
+        if(b < last) {
+          for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
+        }
+
+        /* push */
+        if(1 < (b - a)) {
+          STACK_PUSH5(NULL, a, b, 0, 0);
+          STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
+          trlink = ssize - 2;
+        }
+        if((a - first) <= (last - b)) {
+          if(1 < (a - first)) {
+            STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
+            last = a, limit = tr_ilg(a - first);
+          } else if(1 < (last - b)) {
+            first = b, limit = tr_ilg(last - b);
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        } else {
+          if(1 < (last - b)) {
+            STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
+            first = b, limit = tr_ilg(last - b);
+          } else if(1 < (a - first)) {
+            last = a, limit = tr_ilg(a - first);
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        }
+      } else if(limit == -2) {
+        /* tandem repeat copy */
+        a = stack[--ssize].b, b = stack[ssize].c;
+        if(stack[ssize].d == 0) {
+          tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
+        } else {
+          if(0 <= trlink) { stack[trlink].d = -1; }
+          tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
+        }
+        STACK_POP5(ISAd, first, last, limit, trlink);
+      } else {
+        /* sorted partition */
+        if(0 <= *first) {
+          a = first;
+          do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
+          first = a;
+        }
+        if(first < last) {
+          a = first; do { *a = ~*a; } while(*++a < 0);
+          next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
+          if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
+
+          /* push */
+          if(trbudget_check(budget, a - first)) {
+            if((a - first) <= (last - a)) {
+              STACK_PUSH5(ISAd, a, last, -3, trlink);
+              ISAd += incr, last = a, limit = next;
+            } else {
+              if(1 < (last - a)) {
+                STACK_PUSH5(ISAd + incr, first, a, next, trlink);
+                first = a, limit = -3;
+              } else {
+                ISAd += incr, last = a, limit = next;
+              }
+            }
+          } else {
+            if(0 <= trlink) { stack[trlink].d = -1; }
+            if(1 < (last - a)) {
+              first = a, limit = -3;
+            } else {
+              STACK_POP5(ISAd, first, last, limit, trlink);
+            }
+          }
+        } else {
+          STACK_POP5(ISAd, first, last, limit, trlink);
+        }
+      }
+      continue;
+    }
+
+    if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
+      tr_insertionsort(ISAd, first, last);
+      limit = -3;
+      continue;
+    }
+
+    if(limit-- == 0) {
+      tr_heapsort(ISAd, first, last - first);
+      for(a = last - 1; first < a; a = b) {
+        for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
+      }
+      limit = -3;
+      continue;
+    }
+
+    /* choose pivot */
+    a = tr_pivot(ISAd, first, last);
+    SWAP(*first, *a);
+    v = ISAd[*first];
+
+    /* partition */
+    tr_partition(ISAd, first, first + 1, last, &a, &b, v);
+    if((last - first) != (b - a)) {
+      next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
+
+      /* update ranks */
+      for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
+      if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
+
+      /* push */
+      if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
+        if((a - first) <= (last - b)) {
+          if((last - b) <= (b - a)) {
+            if(1 < (a - first)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              last = a;
+            } else if(1 < (last - b)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              first = b;
+            } else {
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else if((a - first) <= (b - a)) {
+            if(1 < (a - first)) {
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              last = a;
+            } else {
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else {
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            ISAd += incr, first = a, last = b, limit = next;
+          }
+        } else {
+          if((a - first) <= (b - a)) {
+            if(1 < (last - b)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              first = b;
+            } else if(1 < (a - first)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              last = a;
+            } else {
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else if((last - b) <= (b - a)) {
+            if(1 < (last - b)) {
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              first = b;
+            } else {
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else {
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            ISAd += incr, first = a, last = b, limit = next;
+          }
+        }
+      } else {
+        if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
+        if((a - first) <= (last - b)) {
+          if(1 < (a - first)) {
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            last = a;
+          } else if(1 < (last - b)) {
+            first = b;
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        } else {
+          if(1 < (last - b)) {
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            first = b;
+          } else if(1 < (a - first)) {
+            last = a;
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        }
+      }
+    } else {
+      if(trbudget_check(budget, last - first)) {
+        limit = tr_ilg(last - first), ISAd += incr;
+      } else {
+        if(0 <= trlink) { stack[trlink].d = -1; }
+        STACK_POP5(ISAd, first, last, limit, trlink);
+      }
+    }
+  }
+#undef STACK_SIZE
+}
+
+
+
+/*---------------------------------------------------------------------------*/
+
+/*- Function -*/
+
+/* Tandem repeat sort */
+void
+trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth) {
+  saidx_t *ISAd;
+  saidx_t *first, *last;
+  trbudget_t budget;
+  saidx_t t, skip, unsorted;
+
+  trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
+/*  trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
+  for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
+    first = SA;
+    skip = 0;
+    unsorted = 0;
+    do {
+      if((t = *first) < 0) { first -= t; skip += t; }
+      else {
+        if(skip != 0) { *(first + skip) = skip; skip = 0; }
+        last = SA + ISA[t] + 1;
+        if(1 < (last - first)) {
+          budget.count = 0;
+          tr_introsort(ISA, ISAd, SA, first, last, &budget);
+          if(budget.count != 0) { unsorted += budget.count; }
+          else { skip = first - last; }
+        } else if((last - first) == 1) {
+          skip = -1;
+        }
+        first = last;
+      }
+    } while(first < (SA + n));
+    if(skip != 0) { *(first + skip) = skip; }
+    if(unsorted == 0) { break; }
+  }
+}
diff --git a/src/libdivsufsort/pkgconfig/CMakeLists.txt b/src/libdivsufsort/pkgconfig/CMakeLists.txt
new file mode 100644
index 0000000..ee7063c
--- /dev/null
+++ b/src/libdivsufsort/pkgconfig/CMakeLists.txt
@@ -0,0 +1,9 @@
+## generate libdivsufsort.pc ##
+set(W64BIT "")
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/libdivsufsort.pc.cmake" "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort.pc" @ONLY)
+install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort.pc" DESTINATION ${CMAKE_INSTALL_PKGCONFIGDIR})
+if(BUILD_DIVSUFSORT64)
+  set(W64BIT "64")
+  configure_file("${CMAKE_CURRENT_SOURCE_DIR}/libdivsufsort.pc.cmake" "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort64.pc" @ONLY)
+  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort64.pc" DESTINATION ${CMAKE_INSTALL_PKGCONFIGDIR})
+endif(BUILD_DIVSUFSORT64)
diff --git a/src/libdivsufsort/pkgconfig/libdivsufsort.pc.cmake b/src/libdivsufsort/pkgconfig/libdivsufsort.pc.cmake
new file mode 100644
index 0000000..6419d1e
--- /dev/null
+++ b/src/libdivsufsort/pkgconfig/libdivsufsort.pc.cmake
@@ -0,0 +1,11 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=${prefix}
+libdir=@CMAKE_INSTALL_LIBDIR@
+includedir=@CMAKE_INSTALL_INCLUDEDIR@
+
+Name: @PROJECT_NAME@@W64BIT@
+Description: @PROJECT_DESCRIPTION@
+Version: @PROJECT_VERSION_FULL@
+URL: @PROJECT_URL@
+Libs: -L${libdir} -ldivsufsort@W64BIT@
+Cflags: -I${includedir}
diff --git a/src/libsalvador.h b/src/libsalvador.h
new file mode 100644
index 0000000..5158a31
--- /dev/null
+++ b/src/libsalvador.h
@@ -0,0 +1,40 @@
+/*
+ * libsalvador.h - library definitions
+ *
+ * Copyright (C) 2021 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Implements the ZX0 encoding designed by Einar Saukas. https://github.com/einar-saukas/ZX0
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ *
+ */
+
+#ifndef _LIB_SALVADOR_H
+#define _LIB_SALVADOR_H
+
+#include "format.h"
+#include "shrink.h"
+#include "expand.h"
+
+#define FLG_IS_INVERTED  1       /**< Use inverted (V2) format */
+
+#endif /* _LIB_SALVADOR_H */
diff --git a/src/matchfinder.c b/src/matchfinder.c
new file mode 100644
index 0000000..fcb1242
--- /dev/null
+++ b/src/matchfinder.c
@@ -0,0 +1,410 @@
+/*
+ * matchfinder.c - LZ match finder implementation
+ *
+ * The following copying information applies to this specific source code file:
+ *
+ * Written in 2019-2021 by Emmanuel Marty <marty.emmanuel@gmail.com>
+ * Portions written in 2014-2015 by Eric Biggers <ebiggers3@gmail.com>
+ *
+ * To the extent possible under law, the author(s) have dedicated all copyright
+ * and related and neighboring rights to this software to the public domain
+ * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
+ * Dedication (the "CC0").
+ *
+ * This software is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
+ *
+ * You should have received a copy of the CC0 along with this software; if not
+ * see <http://creativecommons.org/publicdomain/zero/1.0/>.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Implements the ZX0 encoding designed by Einar Saukas. https://github.com/einar-saukas/ZX0
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "matchfinder.h"
+#include "format.h"
+#include "libsalvador.h"
+
+/**
+ * Hash index into TAG_BITS
+ *
+ * @param nIndex index value
+ *
+ * @return hash
+ */
+static inline int salvador_get_index_tag(unsigned int nIndex) {
+   return (int)(((unsigned long long)nIndex * 11400714819323198485ULL) >> (64ULL - TAG_BITS));
+}
+
+/**
+ * Parse input data, build suffix array and overlaid data structures to speed up match finding
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
+ *
+ * @return 0 for success, non-zero for failure
+ */
+int salvador_build_suffix_array(salvador_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) {
+   unsigned long long *intervals = pCompressor->intervals;
+
+   /* Build suffix array from input data */
+   saidx_t *suffixArray = (saidx_t*)intervals;
+   if (divsufsort_build_array(&pCompressor->divsufsort_context, pInWindow, suffixArray, nInWindowSize) != 0) {
+      return 100;
+   }
+
+   int i, r;
+
+   for (i = nInWindowSize - 1; i >= 0; i--) {
+      intervals[i] = suffixArray[i];
+   }
+
+   int *PLCP = (int*)pCompressor->pos_data;  /* Use temporarily */
+   int *Phi = PLCP;
+   int nCurLen = 0;
+
+   /* Compute the permuted LCP first (K�rkk�inen method) */
+   Phi[intervals[0]] = -1;
+   for (i = 1; i < nInWindowSize; i++)
+      Phi[intervals[i]] = (unsigned int)intervals[i - 1];
+   for (i = 0; i < nInWindowSize; i++) {
+      if (Phi[i] == -1) {
+         PLCP[i] = 0;
+         continue;
+      }
+      int nMaxLen = (i > Phi[i]) ? (nInWindowSize - i) : (nInWindowSize - Phi[i]);
+      while (nCurLen < nMaxLen && pInWindow[i + nCurLen] == pInWindow[Phi[i] + nCurLen]) nCurLen++;
+      PLCP[i] = nCurLen;
+      if (nCurLen > 0)
+         nCurLen--;
+   }
+
+   /* Rotate permuted LCP into the LCP. This has better cache locality than the direct Kasai LCP method. This also
+    * saves us from having to build the inverse suffix array index, as the LCP is calculated without it using this method,
+    * and the interval builder below doesn't need it either. */
+   intervals[0] &= POS_MASK;
+
+   for (i = 1; i < nInWindowSize; i++) {
+      int nIndex = (int)(intervals[i] & POS_MASK);
+      int nLen = PLCP[nIndex];
+      if (nLen < MIN_MATCH_SIZE)
+         nLen = 0;
+      if (nLen > LCP_MAX)
+         nLen = LCP_MAX;
+      int nTaggedLen = 0;
+      if (nLen)
+         nTaggedLen = (nLen << TAG_BITS) | (salvador_get_index_tag((unsigned int)nIndex) & ((1 << TAG_BITS) - 1));
+      intervals[i] = ((unsigned long long)nIndex) | (((unsigned long long)nTaggedLen) << LCP_SHIFT);
+   }
+
+   /**
+    * Build intervals for finding matches
+    *
+    * Methodology and code fragment taken from wimlib (CC0 license):
+    * https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
+    */
+   unsigned long long * const SA_and_LCP = intervals;
+   unsigned long long *pos_data = pCompressor->pos_data;
+   unsigned long long next_interval_idx;
+   unsigned long long *top = pCompressor->open_intervals;
+   unsigned long long prev_pos = SA_and_LCP[0] & POS_MASK;
+
+   *top = 0;
+   intervals[0] = 0;
+   next_interval_idx = 1;
+
+   for (r = 1; r < nInWindowSize; r++) {
+      const unsigned long long next_pos = SA_and_LCP[r] & POS_MASK;
+      const unsigned long long next_lcp = SA_and_LCP[r] & LCP_MASK;
+      const unsigned long long top_lcp = *top & LCP_MASK;
+
+      if (next_lcp == top_lcp) {
+         /* Continuing the deepest open interval  */
+         pos_data[prev_pos] = *top;
+      }
+      else if (next_lcp > top_lcp) {
+         /* Opening a new interval  */
+         *++top = next_lcp | next_interval_idx++;
+         pos_data[prev_pos] = *top;
+      }
+      else {
+         /* Closing the deepest open interval  */
+         pos_data[prev_pos] = *top;
+         for (;;) {
+            const unsigned long long closed_interval_idx = *top-- & POS_MASK;
+            const unsigned long long superinterval_lcp = *top & LCP_MASK;
+
+            if (next_lcp == superinterval_lcp) {
+               /* Continuing the superinterval */
+               intervals[closed_interval_idx] = *top;
+               break;
+            }
+            else if (next_lcp > superinterval_lcp) {
+               /* Creating a new interval that is a
+                * superinterval of the one being
+                * closed, but still a subinterval of
+                * its superinterval  */
+               *++top = next_lcp | next_interval_idx++;
+               intervals[closed_interval_idx] = *top;
+               break;
+            }
+            else {
+               /* Also closing the superinterval  */
+               intervals[closed_interval_idx] = *top;
+            }
+         }
+      }
+      prev_pos = next_pos;
+   }
+
+   /* Close any still-open intervals.  */
+   pos_data[prev_pos] = *top;
+   for (; top > pCompressor->open_intervals; top--)
+      intervals[*top & POS_MASK] = *(top - 1);
+
+   /* Success */
+   return 0;
+}
+
+/**
+ * Find matches at the specified offset in the input window
+ *
+ * @param pCompressor compression context
+ * @param nOffset offset to find matches at, in the input window
+ * @param pMatches pointer to returned matches
+ * @param pMatchDepth pointer to returned match depths
+ * @param nMaxMatches maximum number of matches to return (0 for none)
+ * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise
+ *
+ * @return number of matches
+ */
+static int salvador_find_matches_at(salvador_compressor *pCompressor, const int nOffset, salvador_match *pMatches, unsigned short *pMatchDepth, const int nMaxMatches, const int nBlockFlags) {
+   unsigned long long *intervals = pCompressor->intervals;
+   unsigned long long *pos_data = pCompressor->pos_data;
+   unsigned long long ref;
+   unsigned long long super_ref;
+   unsigned long long match_pos;
+   salvador_match *matchptr;
+   unsigned short *depthptr;
+   const int nMaxOffset = pCompressor->max_offset;
+
+   /**
+    * Find matches using intervals
+    *
+    * Taken from wimlib (CC0 license):
+    * https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
+    */
+
+    /* Get the deepest lcp-interval containing the current suffix. */
+   ref = pos_data[nOffset];
+
+   pos_data[nOffset] = 0;
+
+   /* Ascend until we reach a visited interval, the root, or a child of the
+    * root.  Link unvisited intervals to the current suffix as we go.  */
+   while ((super_ref = intervals[ref & POS_MASK]) & LCP_MASK) {
+      intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
+      ref = super_ref;
+   }
+
+   if (super_ref == 0) {
+      /* In this case, the current interval may be any of:
+       * (1) the root;
+       * (2) an unvisited child of the root */
+
+      if (ref != 0)  /* Not the root?  */
+         intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
+      return 0;
+   }
+
+   /* Ascend indirectly via pos_data[] links.  */
+   match_pos = super_ref & EXCL_VISITED_MASK;
+   matchptr = pMatches;
+   depthptr = pMatchDepth;
+   int nPrevOffset = 0;
+   int nPrevLen = 0;
+   int nCurDepth = 0;
+   unsigned short *cur_depth = NULL;
+   
+   if (nOffset >= match_pos) {
+      int nMatchOffset = (int)(nOffset - match_pos);
+      int nMatchLen = (int)(ref >> (LCP_SHIFT + TAG_BITS));
+
+      if ((matchptr - pMatches) < nMaxMatches) {
+         if (nMatchOffset <= nMaxOffset) {
+            if (nPrevOffset && nPrevLen > 2 && nMatchOffset == (nPrevOffset - 1) && nMatchLen == (nPrevLen - 1) && cur_depth && nCurDepth < LCP_MAX) {
+               nCurDepth++;
+               *cur_depth = nCurDepth;
+            }
+            else {
+               nCurDepth = 0;
+
+               cur_depth = depthptr;
+               matchptr->length = nMatchLen;
+               matchptr->offset = nMatchOffset;
+               *depthptr = 0;
+               matchptr++;
+               depthptr++;
+            }
+
+            nPrevLen = nMatchLen;
+            nPrevOffset = nMatchOffset;
+         }
+      }
+   }
+
+   for (;;) {
+      if ((super_ref = pos_data[match_pos]) > ref) {
+         match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
+
+         if (nOffset >= match_pos) {
+            int nMatchOffset = (int)(nOffset - match_pos);
+            int nMatchLen = (int)(ref >> (LCP_SHIFT + TAG_BITS));
+
+            if ((matchptr - pMatches) < nMaxMatches) {
+               if (nMatchOffset <= nMaxOffset && nMatchOffset != nPrevOffset) {
+                  if (nPrevOffset && nPrevLen > 2 && nMatchOffset == (nPrevOffset - 1) && nMatchLen == (nPrevLen - 1) && cur_depth && nCurDepth < LCP_MAX) {
+                     nCurDepth++;
+                     *cur_depth = nCurDepth | 0x8000;
+                  }
+                  else {
+                     nCurDepth = 0;
+
+                     cur_depth = depthptr;
+                     matchptr->length = nMatchLen;
+                     matchptr->offset = nMatchOffset;
+                     *depthptr = 0x8000;
+                     matchptr++;
+                     depthptr++;
+                  }
+
+                  nPrevLen = nMatchLen;
+                  nPrevOffset = nMatchOffset;
+               }
+            }
+         }
+      }
+
+      while ((super_ref = pos_data[match_pos]) > ref)
+         match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
+
+      intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
+      pos_data[match_pos] = (unsigned long long)ref;
+
+      int nMatchOffset = (int)(nOffset - match_pos);
+      int nMatchLen = (int)(ref >> (LCP_SHIFT + TAG_BITS));
+
+      if ((matchptr - pMatches) < nMaxMatches) {
+         if (nMatchOffset <= nMaxOffset && nMatchOffset != nPrevOffset) {
+            if (nPrevOffset && nPrevLen > 2 && nMatchOffset == (nPrevOffset - 1) && nMatchLen == (nPrevLen - 1) && cur_depth && nCurDepth < LCP_MAX) {
+               nCurDepth++;
+               *cur_depth = nCurDepth;
+            }
+            else {
+               nCurDepth = 0;
+
+               cur_depth = depthptr;
+               matchptr->length = nMatchLen;
+               matchptr->offset = nMatchOffset;
+               *depthptr = 0;
+               matchptr++;
+               depthptr++;
+            }
+
+            nPrevLen = nMatchLen;
+            nPrevOffset = nMatchOffset;
+         }
+      }
+
+      if (super_ref == 0)
+         break;
+      ref = super_ref;
+      match_pos = intervals[ref & POS_MASK] & EXCL_VISITED_MASK;
+
+      if (nOffset >= match_pos) {
+         int nMatchOffset = (int)(nOffset - match_pos);
+         int nMatchLen = (int)(ref >> (LCP_SHIFT + TAG_BITS));
+
+         if ((matchptr - pMatches) < nMaxMatches) {
+            if (nMatchOffset <= nMaxOffset && nMatchOffset != nPrevOffset) {
+               if (nPrevOffset && nPrevLen > 2 && nMatchOffset == (nPrevOffset - 1) && nMatchLen == (nPrevLen - 1) && cur_depth && nCurDepth < LCP_MAX) {
+                  nCurDepth++;
+                  *cur_depth = nCurDepth | 0x8000;
+               }
+               else {
+                  nCurDepth = 0;
+
+                  cur_depth = depthptr;
+                  matchptr->length = nMatchLen;
+                  matchptr->offset = nMatchOffset;
+                  *depthptr = 0x8000;
+                  matchptr++;
+                  depthptr++;
+               }
+
+               nPrevLen = nMatchLen;
+               nPrevOffset = nMatchOffset;
+            }
+         }
+      }
+   }
+
+   return (int)(matchptr - pMatches);
+}
+
+/**
+ * Skip previously compressed bytes
+ *
+ * @param pCompressor compression context
+ * @param nStartOffset current offset in input window (typically 0)
+ * @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
+ */
+void salvador_skip_matches(salvador_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
+   salvador_match match;
+   unsigned short depth;
+   int i;
+
+   /* Skipping still requires scanning for matches, as this also performs a lazy update of the intervals. However,
+    * we don't store the matches. */
+   for (i = nStartOffset; i < nEndOffset; i++) {
+      salvador_find_matches_at(pCompressor, i, &match, &depth, 0, 0);
+   }
+}
+
+/**
+ * Find all matches for the data to be compressed
+ *
+ * @param pCompressor compression context
+ * @param nMatchesPerOffset maximum number of matches to store for each offset
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise
+ */
+void salvador_find_all_matches(salvador_compressor *pCompressor, const int nMatchesPerOffset, const int nStartOffset, const int nEndOffset, const int nBlockFlags) {
+   salvador_match *pMatch = pCompressor->match;
+   unsigned short *pMatchDepth = pCompressor->match_depth;
+   int i;
+
+   for (i = nStartOffset; i < nEndOffset; i++) {
+      int nMatches = salvador_find_matches_at(pCompressor, i, pMatch, pMatchDepth, nMatchesPerOffset, nBlockFlags);
+
+      while (nMatches < nMatchesPerOffset) {
+         pMatch[nMatches].length = 0;
+         pMatch[nMatches].offset = 0;
+         pMatchDepth[nMatches] = 0;
+         nMatches++;
+      }
+
+      pMatch += nMatchesPerOffset;
+      pMatchDepth += nMatchesPerOffset;
+   }
+}
diff --git a/src/matchfinder.h b/src/matchfinder.h
new file mode 100644
index 0000000..783c92b
--- /dev/null
+++ b/src/matchfinder.h
@@ -0,0 +1,77 @@
+/*
+ * matchfinder.h - LZ match finder definitions
+ *
+ * Copyright (C) 2021 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Implements the ZX0 encoding designed by Einar Saukas. https://github.com/einar-saukas/ZX0
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ *
+ */
+
+#ifndef _MATCHFINDER_H
+#define _MATCHFINDER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Forward declarations */
+typedef struct _salvador_match salvador_match;
+typedef struct _salvador_compressor salvador_compressor;
+
+/**
+ * Parse input data, build suffix array and overlaid data structures to speed up match finding
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
+ *
+ * @return 0 for success, non-zero for failure
+ */
+int salvador_build_suffix_array(salvador_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize);
+
+/**
+ * Skip previously compressed bytes
+ *
+ * @param pCompressor compression context
+ * @param nStartOffset current offset in input window (typically 0)
+ * @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
+ */
+void salvador_skip_matches(salvador_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
+
+/**
+ * Find all matches for the data to be compressed
+ *
+ * @param pCompressor compression context
+ * @param nMatchesPerOffset maximum number of matches to store for each offset
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise
+ */
+void salvador_find_all_matches(salvador_compressor *pCompressor, const int nMatchesPerOffset, const int nStartOffset, const int nEndOffset, const int nBlockFlags);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _MATCHFINDER_H */
diff --git a/src/salvador.c b/src/salvador.c
new file mode 100644
index 0000000..56d0381
--- /dev/null
+++ b/src/salvador.c
@@ -0,0 +1,1233 @@
+/*
+ * salvador.c - command line compression utility for the salvador library
+ *
+ * Copyright (C) 2021 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Implements the ZX0 encoding designed by Einar Saukas. https://github.com/einar-saukas/ZX0
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef _WIN32
+#include <windows.h>
+#include <sys/timeb.h>
+#else
+#include <sys/time.h>
+#endif
+#include "libsalvador.h"
+
+#define OPT_VERBOSE        1
+#define OPT_STATS          2
+#define OPT_BACKWARD       4
+#define OPT_CLASSIC        8
+
+#define TOOL_VERSION "1.0.0"
+
+/*---------------------------------------------------------------------------*/
+
+#ifdef _WIN32
+LARGE_INTEGER hpc_frequency;
+BOOL hpc_available = FALSE;
+#endif
+
+static void do_init_time() {
+#ifdef _WIN32
+   hpc_frequency.QuadPart = 0;
+   hpc_available = QueryPerformanceFrequency(&hpc_frequency);
+#endif
+}
+
+static long long do_get_time() {
+   long long nTime;
+
+#ifdef _WIN32
+   if (hpc_available) {
+      LARGE_INTEGER nCurTime;
+
+      /* Use HPC hardware for best precision */
+      QueryPerformanceCounter(&nCurTime);
+      nTime = (long long)(nCurTime.QuadPart * 1000000LL / hpc_frequency.QuadPart);
+   }
+   else {
+      struct _timeb tb;
+      _ftime(&tb);
+
+      nTime = ((long long)tb.time * 1000LL + (long long)tb.millitm) * 1000LL;
+   }
+#else
+   struct timeval tm;
+   gettimeofday(&tm, NULL);
+
+   nTime = (long long)tm.tv_sec * 1000000LL + (long long)tm.tv_usec;
+#endif
+   return nTime;
+}
+
+static void do_reverse_buffer(unsigned char *pBuffer, size_t nBufferSize) {
+   size_t nMidPoint = nBufferSize / 2;
+   size_t i, j;
+
+   for (i = 0, j = nBufferSize - 1; i < nMidPoint; i++, j--) {
+      unsigned char c = pBuffer[i];
+      pBuffer[i] = pBuffer[j];
+      pBuffer[j] = c;
+   }
+}
+
+/*---------------------------------------------------------------------------*/
+
+static void compression_progress(long long nOriginalSize, long long nCompressedSize) {
+   if (nOriginalSize >= 512 * 1024) {
+      fprintf(stdout, "\r%lld => %lld (%g %%)     \b\b\b\b\b", nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize));
+      fflush(stdout);
+   }
+}
+
+static int do_compress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const unsigned int nMaxWindowSize) {
+   long long nStartTime = 0LL, nEndTime = 0LL;
+   size_t nOriginalSize = 0L, nCompressedSize = 0L, nMaxCompressedSize;
+   int nFlags = (nOptions & OPT_CLASSIC) ? 0 : FLG_IS_INVERTED;
+   salvador_stats stats;
+   unsigned char *pDecompressedData;
+   unsigned char *pCompressedData;
+
+   if (nOptions & OPT_VERBOSE) {
+      nStartTime = do_get_time();
+   }
+
+   FILE* f_dict = NULL;
+   size_t nDictionarySize = 0;
+   if (pszDictionaryFilename) {
+      /* Open the dictionary */
+      f_dict = fopen(pszDictionaryFilename, "rb");
+      if (!f_dict) {
+         fprintf(stderr, "error opening dictionary '%s' for reading\n", pszDictionaryFilename);
+         return 100;
+      }
+
+      /* Get dictionary size */
+      fseek(f_dict, 0, SEEK_END);
+      nDictionarySize = (size_t)ftell(f_dict);
+      fseek(f_dict, 0, SEEK_SET);
+
+      if (nDictionarySize > BLOCK_SIZE) nDictionarySize = BLOCK_SIZE;
+   }
+
+   /* Read the whole original file in memory */
+
+   FILE *f_in = fopen(pszInFilename, "rb");
+   if (!f_in) {
+      if (f_dict) fclose(f_dict);
+      fprintf(stderr, "error opening '%s' for reading\n", pszInFilename);
+      return 100;
+   }
+
+   fseek(f_in, 0, SEEK_END);
+   nOriginalSize = (size_t)ftell(f_in);
+   fseek(f_in, 0, SEEK_SET);
+
+   pDecompressedData = (unsigned char*)malloc(nDictionarySize + nOriginalSize);
+   if (!pDecompressedData) {
+      fclose(f_in);
+      if (f_dict) fclose(f_dict);
+      fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nOriginalSize);
+      return 100;
+   }
+
+   if (f_dict) {
+      /* Read dictionary data */
+      if (fread(pDecompressedData + ((nOptions & OPT_BACKWARD) ? nOriginalSize : 0), 1, nDictionarySize, f_dict) != nDictionarySize) {
+         free(pDecompressedData);
+         fclose(f_in);
+         fclose(f_dict);
+         fprintf(stderr, "I/O error while reading dictionary '%s'\n", pszDictionaryFilename);
+         return 100;
+      }
+
+      fclose(f_dict);
+      f_dict = NULL;
+   }
+
+   /* Read input file data */
+   if (fread(pDecompressedData + ((nOptions & OPT_BACKWARD) ? 0 : nDictionarySize), 1, nOriginalSize, f_in) != nOriginalSize) {
+      free(pDecompressedData);
+      fclose(f_in);
+      fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename);
+      return 100;
+   }
+
+   fclose(f_in);
+
+   if (nOptions & OPT_BACKWARD)
+      do_reverse_buffer(pDecompressedData, nDictionarySize + nOriginalSize);
+
+   /* Allocate max compressed size */
+
+   nMaxCompressedSize = salvador_get_max_compressed_size(nDictionarySize + nOriginalSize);
+
+   pCompressedData = (unsigned char*)malloc(nMaxCompressedSize);
+   if (!pCompressedData) {
+      free(pDecompressedData);
+      fprintf(stderr, "out of memory for compressing '%s', %zd bytes needed\n", pszInFilename, nMaxCompressedSize);
+      return 100;
+   }
+
+   memset(pCompressedData, 0, nMaxCompressedSize);
+
+   nCompressedSize = salvador_compress(pDecompressedData, pCompressedData, nDictionarySize + nOriginalSize, nMaxCompressedSize, nFlags, nMaxWindowSize, nDictionarySize, compression_progress, &stats);
+
+   if ((nOptions & OPT_VERBOSE)) {
+      nEndTime = do_get_time();
+   }
+
+   if (nCompressedSize == -1) {
+      free(pCompressedData);
+      free(pDecompressedData);
+      fprintf(stderr, "compression error for '%s'\n", pszInFilename);
+      return 100;
+   }
+
+   if (nOptions & OPT_BACKWARD)
+      do_reverse_buffer(pCompressedData, nCompressedSize);
+
+   if (pszOutFilename) {
+      FILE *f_out;
+
+      /* Write whole compressed file out */
+
+      f_out = fopen(pszOutFilename, "wb");
+      if (f_out) {
+         fwrite(pCompressedData, 1, nCompressedSize, f_out);
+         fclose(f_out);
+      }
+   }
+
+   free(pCompressedData);
+   free(pDecompressedData);
+
+   if ((nOptions & OPT_VERBOSE)) {
+      double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0;
+      double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta;
+      fprintf(stdout, "\rCompressed '%s' in %g seconds, %.02g Mb/s, %d tokens (%g bytes/token), %d into %d bytes ==> %g %%\n",
+         pszInFilename, fDelta, fSpeed, stats.commands_divisor, (double)nOriginalSize / (double)stats.commands_divisor,
+         (int)nOriginalSize, (int)nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize));
+   }
+
+   if (nOptions & OPT_STATS) {
+      if (stats.literals_divisor > 0)
+         fprintf(stdout, "Literals: min: %d avg: %d max: %d count: %d\n", stats.min_literals, stats.total_literals / stats.literals_divisor, stats.max_literals, stats.literals_divisor);
+      else
+         fprintf(stdout, "Literals: none\n");
+
+      if (stats.match_divisor > 0) {
+         fprintf(stdout, "Offsets: min: %d avg: %d max: %d count: %d\n", stats.min_offset, (int)(stats.total_offsets / (long long)stats.match_divisor), stats.max_offset, stats.match_divisor);
+         fprintf(stdout, "Match lens: min: %d avg: %d max: %d count: %d\n", stats.min_match_len, stats.total_match_lens / stats.match_divisor, stats.max_match_len, stats.match_divisor);
+      }
+      else {
+         fprintf(stdout, "Offsets: none\n");
+         fprintf(stdout, "Match lens: none\n");
+      }
+      if (stats.rle1_divisor > 0) {
+         fprintf(stdout, "RLE1 lens: min: %d avg: %d max: %d count: %d\n", stats.min_rle1_len, stats.total_rle1_lens / stats.rle1_divisor, stats.max_rle1_len, stats.rle1_divisor);
+      }
+      else {
+         fprintf(stdout, "RLE1 lens: none\n");
+      }
+      if (stats.rle2_divisor > 0) {
+         fprintf(stdout, "RLE2 lens: min: %d avg: %d max: %d count: %d\n", stats.min_rle2_len, stats.total_rle2_lens / stats.rle2_divisor, stats.max_rle2_len, stats.rle2_divisor);
+      }
+      else {
+         fprintf(stdout, "RLE2 lens: none\n");
+      }
+      fprintf(stdout, "Safe distance: %d (0x%X)\n", stats.safe_dist, stats.safe_dist);
+   }
+   return 0;
+}
+
+/*---------------------------------------------------------------------------*/
+
+static int do_decompress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) {
+   long long nStartTime = 0LL, nEndTime = 0LL;
+   size_t nCompressedSize, nMaxDecompressedSize, nOriginalSize;
+   unsigned char *pCompressedData;
+   unsigned char *pDecompressedData;
+   int nFlags = (nOptions & OPT_CLASSIC) ? 0 : FLG_IS_INVERTED;
+
+   /* Read the whole compressed file in memory */
+
+   FILE *f_in = fopen(pszInFilename, "rb");
+   if (!f_in) {
+      fprintf(stderr, "error opening '%s' for reading\n", pszInFilename);
+      return 100;
+   }
+
+   fseek(f_in, 0, SEEK_END);
+   nCompressedSize = (size_t)ftell(f_in);
+   fseek(f_in, 0, SEEK_SET);
+
+   pCompressedData = (unsigned char*)malloc(nCompressedSize);
+   if (!pCompressedData) {
+      fclose(f_in);
+      fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nCompressedSize);
+      return 100;
+   }
+
+   if (fread(pCompressedData, 1, nCompressedSize, f_in) != nCompressedSize) {
+      free(pCompressedData);
+      fclose(f_in);
+      fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename);
+      return 100;
+   }
+
+   fclose(f_in);
+
+   if (nOptions & OPT_BACKWARD)
+      do_reverse_buffer(pCompressedData, nCompressedSize);
+
+   /* Get max decompressed size */
+
+   nMaxDecompressedSize = salvador_get_max_decompressed_size(pCompressedData, nCompressedSize, nFlags);
+   if (nMaxDecompressedSize == -1) {
+      free(pCompressedData);
+      fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename);
+      return 100;
+   }
+
+   FILE* f_dict = NULL;
+   size_t nDictionarySize = 0;
+   if (pszDictionaryFilename) {
+      /* Open the dictionary */
+      f_dict = fopen(pszDictionaryFilename, "rb");
+      if (!f_dict) {
+         fprintf(stderr, "error opening dictionary '%s' for reading\n", pszDictionaryFilename);
+         return 100;
+      }
+
+      /* Get dictionary size */
+      fseek(f_dict, 0, SEEK_END);
+      nDictionarySize = (size_t)ftell(f_dict);
+      fseek(f_dict, 0, SEEK_SET);
+
+      if (nDictionarySize > BLOCK_SIZE) nDictionarySize = BLOCK_SIZE;
+   }
+
+   /* Allocate max decompressed size */
+
+   pDecompressedData = (unsigned char*)malloc(nDictionarySize + nMaxDecompressedSize);
+   if (!pDecompressedData) {
+      free(pCompressedData);
+      if (f_dict) fclose(f_dict);
+      fprintf(stderr, "out of memory for decompressing '%s', %zd bytes needed\n", pszInFilename, nMaxDecompressedSize);
+      return 100;
+   }
+
+   memset(pDecompressedData, 0, nDictionarySize + nMaxDecompressedSize);
+
+   if (f_dict) {
+      /* Read dictionary data */
+      if (fread(pDecompressedData, 1, nDictionarySize, f_dict) != nDictionarySize) {
+         free(pDecompressedData);
+         fclose(f_in);
+         fclose(f_dict);
+         fprintf(stderr, "I/O error while reading dictionary '%s'\n", pszDictionaryFilename);
+         return 100;
+      }
+
+      fclose(f_dict);
+      f_dict = NULL;
+
+      if (nOptions & OPT_BACKWARD)
+         do_reverse_buffer(pDecompressedData, nDictionarySize);
+   }
+
+   if (nOptions & OPT_VERBOSE) {
+      nStartTime = do_get_time();
+   }
+
+   nOriginalSize = salvador_decompress(pCompressedData, pDecompressedData, nCompressedSize, nMaxDecompressedSize, nDictionarySize, nFlags);
+   if (nOriginalSize == -1) {
+      free(pDecompressedData);
+      free(pCompressedData);
+
+      fprintf(stderr, "decompression error for '%s'\n", pszInFilename);
+      return 100;
+   }
+
+   if (nOptions & OPT_BACKWARD)
+      do_reverse_buffer(pDecompressedData + nDictionarySize, nOriginalSize);
+
+   if (pszOutFilename) {
+      FILE *f_out;
+
+      /* Write whole decompressed file out */
+
+      f_out = fopen(pszOutFilename, "wb");
+      if (f_out) {
+         fwrite(pDecompressedData + nDictionarySize, 1, nOriginalSize, f_out);
+         fclose(f_out);
+      }
+   }
+
+   free(pDecompressedData);
+   free(pCompressedData);
+
+   if (nOptions & OPT_VERBOSE) {
+      nEndTime = do_get_time();
+      double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0;
+      double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta;
+      fprintf(stdout, "Decompressed '%s' in %g seconds, %g Mb/s\n",
+         pszInFilename, fDelta, fSpeed);
+   }
+
+   return 0;
+}
+
+/*---------------------------------------------------------------------------*/
+
+static int do_compare(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) {
+   long long nStartTime = 0LL, nEndTime = 0LL;
+   size_t nCompressedSize, nMaxDecompressedSize, nOriginalSize, nDecompressedSize;
+   unsigned char *pCompressedData = NULL;
+   unsigned char *pOriginalData = NULL;
+   unsigned char *pDecompressedData = NULL;
+   int nFlags = (nOptions & OPT_CLASSIC) ? 0 : FLG_IS_INVERTED;
+
+   /* Read the whole compressed file in memory */
+
+   FILE *f_in = fopen(pszInFilename, "rb");
+   if (!f_in) {
+      fprintf(stderr, "error opening '%s' for reading\n", pszInFilename);
+      return 100;
+   }
+
+   fseek(f_in, 0, SEEK_END);
+   nCompressedSize = (size_t)ftell(f_in);
+   fseek(f_in, 0, SEEK_SET);
+
+   pCompressedData = (unsigned char*)malloc(nCompressedSize);
+   if (!pCompressedData) {
+      fclose(f_in);
+      fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nCompressedSize);
+      return 100;
+   }
+
+   if (fread(pCompressedData, 1, nCompressedSize, f_in) != nCompressedSize) {
+      free(pCompressedData);
+      fclose(f_in);
+      fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename);
+      return 100;
+   }
+
+   fclose(f_in);
+
+   if (nOptions & OPT_BACKWARD)
+      do_reverse_buffer(pCompressedData, nCompressedSize);
+
+   /* Read the whole original file in memory */
+
+   f_in = fopen(pszOutFilename, "rb");
+   if (!f_in) {
+      free(pCompressedData);
+      fprintf(stderr, "error opening '%s' for reading\n", pszInFilename);
+      return 100;
+   }
+
+   fseek(f_in, 0, SEEK_END);
+   nOriginalSize = (size_t)ftell(f_in);
+   fseek(f_in, 0, SEEK_SET);
+
+   pOriginalData = (unsigned char*)malloc(nOriginalSize);
+   if (!pOriginalData) {
+      fclose(f_in);
+      free(pCompressedData);
+      fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nOriginalSize);
+      return 100;
+   }
+
+   if (fread(pOriginalData, 1, nOriginalSize, f_in) != nOriginalSize) {
+      free(pOriginalData);
+      fclose(f_in);
+      free(pCompressedData);
+      fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename);
+      return 100;
+   }
+
+   fclose(f_in);
+
+   /* Get max decompressed size */
+
+   nMaxDecompressedSize = salvador_get_max_decompressed_size(pCompressedData, nCompressedSize, nFlags);
+   if (nMaxDecompressedSize == -1) {
+      free(pOriginalData);
+      free(pCompressedData);
+      fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename);
+      return 100;
+   }
+
+   FILE* f_dict = NULL;
+   size_t nDictionarySize = 0;
+   if (pszDictionaryFilename) {
+      /* Open the dictionary */
+      f_dict = fopen(pszDictionaryFilename, "rb");
+      if (!f_dict) {
+         fprintf(stderr, "error opening dictionary '%s' for reading\n", pszDictionaryFilename);
+         return 100;
+      }
+
+      /* Get dictionary size */
+      fseek(f_dict, 0, SEEK_END);
+      nDictionarySize = (size_t)ftell(f_dict);
+      fseek(f_dict, 0, SEEK_SET);
+
+      if (nDictionarySize > BLOCK_SIZE) nDictionarySize = BLOCK_SIZE;
+   }
+
+   /* Allocate max decompressed size */
+
+   pDecompressedData = (unsigned char*)malloc(nDictionarySize + nMaxDecompressedSize);
+   if (!pDecompressedData) {
+      free(pOriginalData);
+      free(pCompressedData);
+      if (f_dict) fclose(f_dict);
+      fprintf(stderr, "out of memory for decompressing '%s', %zd bytes needed\n", pszInFilename, nMaxDecompressedSize);
+      return 100;
+   }
+
+   memset(pDecompressedData, 0, nDictionarySize + nMaxDecompressedSize);
+
+   if (f_dict) {
+      /* Read dictionary data */
+      if (fread(pDecompressedData, 1, nDictionarySize, f_dict) != nDictionarySize) {
+         free(pDecompressedData);
+         fclose(f_in);
+         fclose(f_dict);
+         fprintf(stderr, "I/O error while reading dictionary '%s'\n", pszDictionaryFilename);
+         return 100;
+      }
+
+      fclose(f_dict);
+      f_dict = NULL;
+
+      if (nOptions & OPT_BACKWARD)
+         do_reverse_buffer(pDecompressedData, nDictionarySize);
+   }
+
+   if (nOptions & OPT_VERBOSE) {
+      nStartTime = do_get_time();
+   }
+
+   nDecompressedSize = salvador_decompress(pCompressedData, pDecompressedData, nCompressedSize, nMaxDecompressedSize, nDictionarySize, nFlags);
+   if (nDecompressedSize == -1) {
+      free(pDecompressedData);
+      free(pOriginalData);
+      free(pCompressedData);
+
+      fprintf(stderr, "decompression error for '%s'\n", pszInFilename);
+      return 100;
+   }
+
+   if (nOptions & OPT_BACKWARD)
+      do_reverse_buffer(pDecompressedData + nDictionarySize, nDecompressedSize);
+
+   if (nDecompressedSize != nOriginalSize || memcmp(pDecompressedData + nDictionarySize, pOriginalData, nOriginalSize)) {
+      fprintf(stderr, "error comparing compressed file '%s' with original '%s'\n", pszInFilename, pszOutFilename);
+      return 100;
+   }
+
+   free(pDecompressedData);
+   free(pOriginalData);
+   free(pCompressedData);
+
+   if (nOptions & OPT_VERBOSE) {
+      nEndTime = do_get_time();
+      double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0;
+      double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta;
+      fprintf(stdout, "Compared '%s' in %g seconds, %g Mb/s\n",
+         pszInFilename, fDelta, fSpeed);
+   }
+
+   return 0;
+}
+
+/*---------------------------------------------------------------------------*/
+
+static void generate_compressible_data(unsigned char *pBuffer, size_t nBufferSize, unsigned int nSeed, int nNumLiteralValues, float fMatchProbability) {
+   size_t nIndex = 0;
+   int nMatchProbability = (int)(fMatchProbability * 1023.0f);
+
+   srand(nSeed);
+   
+   if (nIndex >= nBufferSize) return;
+   pBuffer[nIndex++] = rand() % nNumLiteralValues;
+
+   while (nIndex < nBufferSize) {
+      if ((rand() & 1023) >= nMatchProbability) {
+         size_t nLiteralCount = rand() & 127;
+         if (nLiteralCount > (nBufferSize - nIndex))
+            nLiteralCount = nBufferSize - nIndex;
+
+         while (nLiteralCount--)
+            pBuffer[nIndex++] = rand() % nNumLiteralValues;
+      }
+      else {
+         size_t nMatchLength = MIN_MATCH_SIZE + (rand() & 1023);
+         size_t nMatchOffset;
+
+         if (nMatchLength > (nBufferSize - nIndex))
+            nMatchLength = nBufferSize - nIndex;
+         if (nMatchLength > nIndex)
+            nMatchLength = nIndex;
+
+         if (nMatchLength < nIndex)
+            nMatchOffset = rand() % (nIndex - nMatchLength);
+         else
+            nMatchOffset = 0;
+
+         while (nMatchLength--) {
+            pBuffer[nIndex] = pBuffer[nIndex - nMatchOffset];
+            nIndex++;
+         }
+      }
+   }
+}
+
+static void xor_data(unsigned char *pBuffer, size_t nBufferSize, unsigned int nSeed, float fXorProbability) {
+   size_t nIndex = 0;
+   int nXorProbability = (int)(fXorProbability * 1023.0f);
+
+   srand(nSeed);
+
+   if (nIndex >= nBufferSize) return;
+
+   while (nIndex < nBufferSize) {
+      if ((rand() & 1023) < nXorProbability) {
+         pBuffer[nIndex] ^= 0xff;
+      }
+      nIndex++;
+   }
+}
+
+static int do_self_test(const unsigned int nOptions, const unsigned int nMaxWindowSize, const int nIsQuickTest) {
+   unsigned char *pGeneratedData;
+   unsigned char *pCompressedData;
+   unsigned char *pTmpCompressedData;
+   unsigned char *pTmpDecompressedData;
+   size_t nGeneratedDataSize;
+   size_t nMaxCompressedDataSize;
+   unsigned int nSeed = 123;
+   int nFlags = FLG_IS_INVERTED;
+   int i;
+
+   pGeneratedData = (unsigned char*)malloc(4 * BLOCK_SIZE);
+   if (!pGeneratedData) {
+      fprintf(stderr, "out of memory, %d bytes needed\n", 4 * BLOCK_SIZE);
+      return 100;
+   }
+
+   nMaxCompressedDataSize = salvador_get_max_compressed_size(4 * BLOCK_SIZE);
+   pCompressedData = (unsigned char*)malloc(nMaxCompressedDataSize);
+   if (!pCompressedData) {
+      free(pGeneratedData);
+      pGeneratedData = NULL;
+
+      fprintf(stderr, "out of memory, %zd bytes needed\n", nMaxCompressedDataSize);
+      return 100;
+   }
+
+   pTmpCompressedData = (unsigned char*)malloc(nMaxCompressedDataSize);
+   if (!pTmpCompressedData) {
+      free(pCompressedData);
+      pCompressedData = NULL;
+      free(pGeneratedData);
+      pGeneratedData = NULL;
+
+      fprintf(stderr, "out of memory, %zd bytes needed\n", nMaxCompressedDataSize);
+      return 100;
+   }
+
+   pTmpDecompressedData = (unsigned char*)malloc(4 * BLOCK_SIZE);
+   if (!pTmpDecompressedData) {
+      free(pTmpCompressedData);
+      pTmpCompressedData = NULL;
+      free(pCompressedData);
+      pCompressedData = NULL;
+      free(pGeneratedData);
+      pGeneratedData = NULL;
+
+      fprintf(stderr, "out of memory, %d bytes needed\n", 4 * BLOCK_SIZE);
+      return 100;
+   }
+
+   memset(pGeneratedData, 0, 4 * BLOCK_SIZE);
+   memset(pCompressedData, 0, nMaxCompressedDataSize);
+   memset(pTmpCompressedData, 0, nMaxCompressedDataSize);
+
+   /* Test compressing with a too small buffer to do anything, expect to fail cleanly */
+   for (i = 0; i < 12; i++) {
+      generate_compressible_data(pGeneratedData, i, nSeed, 256, 0.5f);
+      salvador_compress(pGeneratedData, pCompressedData, i, i, nFlags, nMaxWindowSize, 0 /* dictionary size */, NULL, NULL);
+   }
+
+   size_t nDataSizeStep = 128;
+   float fProbabilitySizeStep = nIsQuickTest ? 0.005f : 0.0005f;
+
+   for (nGeneratedDataSize = 1024; nGeneratedDataSize <= (nIsQuickTest ? 1024U : (4U * BLOCK_SIZE)); nGeneratedDataSize += nDataSizeStep) {
+      float fMatchProbability;
+
+      fprintf(stdout, "size %zd", nGeneratedDataSize);
+      for (fMatchProbability = 0; fMatchProbability <= 0.995f; fMatchProbability += fProbabilitySizeStep) {
+         int nNumLiteralValues[12] = { 1, 2, 3, 15, 30, 56, 96, 137, 178, 191, 255, 256 };
+         float fXorProbability;
+
+         fputc('.', stdout);
+         fflush(stdout);
+
+         for (i = 0; i < 12; i++) {
+            /* Generate data to compress */
+            generate_compressible_data(pGeneratedData, nGeneratedDataSize, nSeed, nNumLiteralValues[i], fMatchProbability);
+
+            /* Try to compress it, expected to succeed */
+            size_t nActualCompressedSize = salvador_compress(pGeneratedData, pCompressedData, nGeneratedDataSize, salvador_get_max_compressed_size(nGeneratedDataSize),
+               nFlags, nMaxWindowSize, 0 /* dictionary size */, NULL, NULL);
+            if (nActualCompressedSize == -1 || nActualCompressedSize < (1 + 1 + 1 /* footer */)) {
+               free(pTmpDecompressedData);
+               pTmpDecompressedData = NULL;
+               free(pTmpCompressedData);
+               pTmpCompressedData = NULL;
+               free(pCompressedData);
+               pCompressedData = NULL;
+               free(pGeneratedData);
+               pGeneratedData = NULL;
+
+               fprintf(stderr, "\nself-test: error compressing size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
+               return 100;
+            }
+
+            /* Try to decompress it, expected to succeed */
+            size_t nActualDecompressedSize;
+            nActualDecompressedSize = salvador_decompress(pCompressedData, pTmpDecompressedData, nActualCompressedSize, nGeneratedDataSize, 0 /* dictionary size */, nFlags);
+            if (nActualDecompressedSize == -1) {
+               free(pTmpDecompressedData);
+               pTmpDecompressedData = NULL;
+               free(pTmpCompressedData);
+               pTmpCompressedData = NULL;
+               free(pCompressedData);
+               pCompressedData = NULL;
+               free(pGeneratedData);
+               pGeneratedData = NULL;
+
+               fprintf(stderr, "\nself-test: error decompressing size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
+               return 100;
+            }
+
+            if (memcmp(pGeneratedData, pTmpDecompressedData, nGeneratedDataSize)) {
+               free(pTmpDecompressedData);
+               pTmpDecompressedData = NULL;
+               free(pTmpCompressedData);
+               pTmpCompressedData = NULL;
+               free(pCompressedData);
+               pCompressedData = NULL;
+               free(pGeneratedData);
+               pGeneratedData = NULL;
+
+               fprintf(stderr, "\nself-test: error comparing decompressed and original data, size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
+               return 100;
+            }
+
+            /* Try to decompress corrupted data, expected to fail cleanly, without crashing or corrupting memory outside the output buffer */
+            for (fXorProbability = 0.05f; fXorProbability <= 0.5f; fXorProbability += 0.05f) {
+               memcpy(pTmpCompressedData, pCompressedData, nActualCompressedSize);
+               xor_data(pTmpCompressedData, nActualCompressedSize, nSeed, fXorProbability);
+               salvador_decompress(pTmpCompressedData, pGeneratedData, nActualCompressedSize, nGeneratedDataSize, 0 /* dictionary size */, nFlags);
+            }
+         }
+
+         nSeed++;
+      }
+
+      fputc(10, stdout);
+      fflush(stdout);
+
+      nDataSizeStep <<= 1;
+      if (nDataSizeStep > (128 * 4096))
+         nDataSizeStep = 128 * 4096;
+      fProbabilitySizeStep *= 1.25;
+      if (fProbabilitySizeStep > (0.0005f * 4096))
+         fProbabilitySizeStep = 0.0005f * 4096;
+   }
+
+   free(pTmpDecompressedData);
+   pTmpDecompressedData = NULL;
+
+   free(pTmpCompressedData);
+   pTmpCompressedData = NULL;
+
+   free(pCompressedData);
+   pCompressedData = NULL;
+
+   free(pGeneratedData);
+   pGeneratedData = NULL;
+
+   fprintf(stdout, "All tests passed.\n");
+   return 0;
+}
+
+/*---------------------------------------------------------------------------*/
+
+static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const unsigned int nMaxWindowSize) {
+   size_t nFileSize, nMaxCompressedSize;
+   unsigned char *pFileData;
+   unsigned char *pCompressedData;
+   int nFlags = FLG_IS_INVERTED;
+   int i;
+
+   if (pszDictionaryFilename) {
+      fprintf(stderr, "in-memory benchmarking does not support dictionaries\n");
+      return 100;
+   }
+
+   /* Read the whole original file in memory */
+
+   FILE *f_in = fopen(pszInFilename, "rb");
+   if (!f_in) {
+      fprintf(stderr, "error opening '%s' for reading\n", pszInFilename);
+      return 100;
+   }
+
+   fseek(f_in, 0, SEEK_END);
+   nFileSize = (size_t)ftell(f_in);
+   fseek(f_in, 0, SEEK_SET);
+
+   pFileData = (unsigned char*)malloc(nFileSize);
+   if (!pFileData) {
+      fclose(f_in);
+      fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nFileSize);
+      return 100;
+   }
+
+   if (fread(pFileData, 1, nFileSize, f_in) != nFileSize) {
+      free(pFileData);
+      fclose(f_in);
+      fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename);
+      return 100;
+   }
+
+   fclose(f_in);
+
+   if (nOptions & OPT_BACKWARD)
+      do_reverse_buffer(pFileData, nFileSize);
+
+   /* Allocate max compressed size */
+
+   nMaxCompressedSize = salvador_get_max_compressed_size(nFileSize);
+
+   pCompressedData = (unsigned char*)malloc(nMaxCompressedSize + 2048);
+   if (!pCompressedData) {
+      free(pFileData);
+      fprintf(stderr, "out of memory for compressing '%s', %zd bytes needed\n", pszInFilename, nMaxCompressedSize);
+      return 100;
+   }
+
+   memset(pCompressedData + 1024, 0, nMaxCompressedSize);
+
+   long long nBestCompTime = -1;
+
+   size_t nActualCompressedSize = 0;
+   size_t nRightGuardPos = nMaxCompressedSize;
+
+   for (i = 0; i < 5; i++) {
+      unsigned char nGuard = 0x33 + i;
+      int j;
+
+      /* Write guard bytes around the output buffer, to help check for writes outside of it by the compressor */
+      memset(pCompressedData, nGuard, 1024);
+      memset(pCompressedData + 1024 + nRightGuardPos, nGuard, 1024);
+
+      long long t0 = do_get_time();
+      nActualCompressedSize = salvador_compress(pFileData, pCompressedData + 1024, nFileSize, nRightGuardPos, nFlags, nMaxWindowSize, 0 /* dictionary size */, NULL, NULL);
+      long long t1 = do_get_time();
+      if (nActualCompressedSize == -1) {
+         free(pCompressedData);
+         free(pFileData);
+         fprintf(stderr, "compression error\n");
+         return 100;
+      }
+
+      long long nCurDecTime = t1 - t0;
+      if (nBestCompTime == -1 || nBestCompTime > nCurDecTime)
+         nBestCompTime = nCurDecTime;
+
+      /* Check guard bytes before the output buffer */
+      for (j = 0; j < 1024; j++) {
+         if (pCompressedData[j] != nGuard) {
+            free(pCompressedData);
+            free(pFileData);
+            fprintf(stderr, "error, wrote outside of output buffer at %d!\n", j - 1024);
+            return 100;
+         }
+      }
+
+      /* Check guard bytes after the output buffer */
+      for (j = 0; j < 1024; j++) {
+         if (pCompressedData[1024 + nRightGuardPos + j] != nGuard) {
+            free(pCompressedData);
+            free(pFileData);
+            fprintf(stderr, "error, wrote outside of output buffer at %d!\n", j);
+            return 100;
+         }
+      }
+
+      nRightGuardPos = nActualCompressedSize;
+   }
+
+   if (nOptions & OPT_BACKWARD)
+      do_reverse_buffer(pCompressedData + 1024, nActualCompressedSize);
+
+   if (pszOutFilename) {
+      FILE *f_out;
+
+      /* Write whole compressed file out */
+
+      f_out = fopen(pszOutFilename, "wb");
+      if (f_out) {
+         fwrite(pCompressedData + 1024, 1, nActualCompressedSize, f_out);
+         fclose(f_out);
+      }
+   }
+
+   free(pCompressedData);
+   free(pFileData);
+
+   fprintf(stdout, "compressed size: %zd bytes\n", nActualCompressedSize);
+   fprintf(stdout, "compression time: %lld microseconds (%g Mb/s)\n", nBestCompTime, ((double)nActualCompressedSize / 1024.0) / ((double)nBestCompTime / 1000.0));
+
+   return 0;
+}
+
+/*---------------------------------------------------------------------------*/
+
+static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) {
+   size_t nFileSize, nMaxDecompressedSize;
+   unsigned char *pFileData;
+   unsigned char *pDecompressedData;
+   int nFlags = FLG_IS_INVERTED;
+   int i;
+
+   if (pszDictionaryFilename) {
+      fprintf(stderr, "in-memory benchmarking does not support dictionaries\n");
+      return 100;
+   }
+
+   /* Read the whole compressed file in memory */
+
+   FILE *f_in = fopen(pszInFilename, "rb");
+   if (!f_in) {
+      fprintf(stderr, "error opening '%s' for reading\n", pszInFilename);
+      return 100;
+   }
+
+   fseek(f_in, 0, SEEK_END);
+   nFileSize = (size_t)ftell(f_in);
+   fseek(f_in, 0, SEEK_SET);
+
+   pFileData = (unsigned char*)malloc(nFileSize);
+   if (!pFileData) {
+      fclose(f_in);
+      fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nFileSize);
+      return 100;
+   }
+
+   if (fread(pFileData, 1, nFileSize, f_in) != nFileSize) {
+      free(pFileData);
+      fclose(f_in);
+      fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename);
+      return 100;
+   }
+
+   fclose(f_in);
+
+   if (nOptions & OPT_BACKWARD)
+      do_reverse_buffer(pFileData, nFileSize);
+
+   /* Allocate max decompressed size */
+
+   nMaxDecompressedSize = salvador_get_max_decompressed_size(pFileData, nFileSize, nFlags);
+   if (nMaxDecompressedSize == -1) {
+      free(pFileData);
+      fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename);
+      return 100;
+   }
+
+   pDecompressedData = (unsigned char*)malloc(nMaxDecompressedSize);
+   if (!pDecompressedData) {
+      free(pFileData);
+      fprintf(stderr, "out of memory for decompressing '%s', %zd bytes needed\n", pszInFilename, nMaxDecompressedSize);
+      return 100;
+   }
+
+   memset(pDecompressedData, 0, nMaxDecompressedSize);
+
+   long long nBestDecTime = -1;
+
+   size_t nActualDecompressedSize = 0;
+   for (i = 0; i < 50; i++) {
+      long long t0 = do_get_time();
+      nActualDecompressedSize = salvador_decompress(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, 0 /* dictionary size */, nFlags);
+      long long t1 = do_get_time();
+      if (nActualDecompressedSize == -1) {
+         free(pDecompressedData);
+         free(pFileData);
+         fprintf(stderr, "decompression error\n");
+         return 100;
+      }
+
+      long long nCurDecTime = t1 - t0;
+      if (nBestDecTime == -1 || nBestDecTime > nCurDecTime)
+         nBestDecTime = nCurDecTime;
+   }
+
+   if (nOptions & OPT_BACKWARD)
+      do_reverse_buffer(pDecompressedData, nActualDecompressedSize);
+
+   if (pszOutFilename) {
+      FILE *f_out;
+
+      /* Write whole decompressed file out */
+
+      f_out = fopen(pszOutFilename, "wb");
+      if (f_out) {
+         fwrite(pDecompressedData, 1, nActualDecompressedSize, f_out);
+         fclose(f_out);
+      }
+   }
+
+   free(pDecompressedData);
+   free(pFileData);
+
+   fprintf(stdout, "decompressed size: %zd bytes\n", nActualDecompressedSize);
+   fprintf(stdout, "decompression time: %lld microseconds (%g Mb/s)\n", nBestDecTime, ((double)nActualDecompressedSize / 1024.0) / ((double)nBestDecTime / 1000.0));
+
+   return 0;
+}
+
+/*---------------------------------------------------------------------------*/
+
+int main(int argc, char **argv) {
+   int i;
+   const char *pszInFilename = NULL;
+   const char *pszOutFilename = NULL;
+   const char *pszDictionaryFilename = NULL;
+   int nArgsError = 0;
+   int nCommandDefined = 0;
+   int nVerifyCompression = 0;
+   char cCommand = 'z';
+   unsigned int nOptions = 0;
+   unsigned int nMaxWindowSize = 0;
+
+   for (i = 1; i < argc; i++) {
+      if (!strcmp(argv[i], "-d")) {
+         if (!nCommandDefined) {
+            nCommandDefined = 1;
+            cCommand = 'd';
+         }
+         else
+            nArgsError = 1;
+      }
+      else if (!strcmp(argv[i], "-z")) {
+         if (!nCommandDefined) {
+            nCommandDefined = 1;
+            cCommand = 'z';
+         }
+         else
+            nArgsError = 1;
+      }
+      else if (!strcmp(argv[i], "-c")) {
+         if (!nVerifyCompression) {
+            nVerifyCompression = 1;
+         }
+         else
+            nArgsError = 1;
+      }
+      else if (!strcmp(argv[i], "-cbench")) {
+         if (!nCommandDefined) {
+            nCommandDefined = 1;
+            cCommand = 'B';
+         }
+         else
+            nArgsError = 1;
+      }
+      else if (!strcmp(argv[i], "-dbench")) {
+         if (!nCommandDefined) {
+            nCommandDefined = 1;
+            cCommand = 'b';
+         }
+         else
+            nArgsError = 1;
+      }
+      else if (!strcmp(argv[i], "-test")) {
+         if (!nCommandDefined) {
+            nCommandDefined = 1;
+            cCommand = 't';
+         }
+         else
+            nArgsError = 1;
+      }
+      else if (!strcmp(argv[i], "-quicktest")) {
+         if (!nCommandDefined) {
+            nCommandDefined = 1;
+            cCommand = 'T';
+         }
+         else
+            nArgsError = 1;
+      }
+      else if (!strcmp(argv[i], "-D")) {
+         if (!pszDictionaryFilename && (i + 1) < argc) {
+            pszDictionaryFilename = argv[i + 1];
+            i++;
+         }
+         else
+            nArgsError = 1;
+      }
+      else if (!strncmp(argv[i], "-D", 2)) {
+         if (!pszDictionaryFilename) {
+            pszDictionaryFilename = argv[i] + 2;
+         }
+         else
+            nArgsError = 1;
+      }
+      else if (!strcmp(argv[i], "-v")) {
+         if ((nOptions & OPT_VERBOSE) == 0) {
+            nOptions |= OPT_VERBOSE;
+         }
+         else
+            nArgsError = 1;
+      }
+      else if (!strcmp(argv[i], "-w")) {
+         if (!nMaxWindowSize && (i + 1) < argc) {
+            char *pEnd = NULL;
+            nMaxWindowSize = (int)strtol(argv[i + 1], &pEnd, 10);
+            if (pEnd && pEnd != argv[i + 1] && (nMaxWindowSize >= 16 && nMaxWindowSize <= MAX_OFFSET)) {
+               i++;
+            }
+            else {
+               nArgsError = 1;
+            }
+         }
+         else
+            nArgsError = 1;
+      }
+      else if (!strncmp(argv[i], "-w", 2)) {
+         if (!nMaxWindowSize) {
+            char *pEnd = NULL;
+            nMaxWindowSize = (int)strtol(argv[i] + 2, &pEnd, 10);
+            if (!(pEnd && pEnd != (argv[i] + 2) && (nMaxWindowSize >= 16 && nMaxWindowSize <= MAX_OFFSET))) {
+               nArgsError = 1;
+            }
+         }
+         else
+            nArgsError = 1;
+      }
+      else if (!strcmp(argv[i], "-stats")) {
+         if ((nOptions & OPT_STATS) == 0) {
+            nOptions |= OPT_STATS;
+         }
+         else
+            nArgsError = 1;
+      }
+      else if (!strcmp(argv[i], "-b")) {
+         if ((nOptions & OPT_BACKWARD) == 0) {
+            nOptions |= OPT_BACKWARD;
+         }
+         else
+            nArgsError = 1;
+      }
+      else if (!strcmp(argv[i], "-classic")) {
+         if ((nOptions & OPT_CLASSIC) == 0) {
+            nOptions |= OPT_CLASSIC;
+         }
+         else
+            nArgsError = 1;
+      }
+      else {
+         if (!pszInFilename)
+            pszInFilename = argv[i];
+         else {
+            if (!pszOutFilename)
+               pszOutFilename = argv[i];
+            else
+               nArgsError = 1;
+         }
+      }
+   }
+
+   if (!nArgsError && cCommand == 't') {
+      return do_self_test(nOptions, nMaxWindowSize, 0);
+   }
+   else if (!nArgsError && cCommand == 'T') {
+      return do_self_test(nOptions, nMaxWindowSize, 1);
+   }
+
+   if (nArgsError || !pszInFilename || !pszOutFilename) {
+      fprintf(stderr, "salvador command-line tool v" TOOL_VERSION " by Emmanuel Marty\n");
+      fprintf(stderr, "usage: %s [-c] [-d] [-v] [-b] <infile> <outfile>\n", argv[0]);
+      fprintf(stderr, "        -c: check resulting stream after compressing\n");
+      fprintf(stderr, "        -d: decompress (default: compress)\n");
+      fprintf(stderr, "        -b: backwards compression or decompression\n");
+      fprintf(stderr, " -w <size>: maximum window size, in bytes (16..32639), defaults to maximum\n");
+      fprintf(stderr, " -D <file>: use dictionary file\n");
+      fprintf(stderr, "   -cbench: benchmark in-memory compression\n");
+      fprintf(stderr, "   -dbench: benchmark in-memory decompression\n");
+      fprintf(stderr, "     -test: run full automated self-tests\n");
+      fprintf(stderr, "-quicktest: run quick automated self-tests\n");
+      fprintf(stderr, "    -stats: show compressed data stats\n");
+      fprintf(stderr, "  -classic: encode and decode using classical (V1) format, defaults to modern (V2)\n");
+      fprintf(stderr, "        -v: be verbose\n");
+      return 100;
+   }
+
+   do_init_time();
+
+   if (cCommand == 'z') {
+      int nResult = do_compress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMaxWindowSize);
+      if (nResult == 0 && nVerifyCompression) {
+         return do_compare(pszOutFilename, pszInFilename, pszDictionaryFilename, nOptions);
+      } else {
+         return nResult;
+      }
+   }
+   else if (cCommand == 'd') {
+      return do_decompress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions);
+   }
+   else if (cCommand == 'B') {
+      return do_compr_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMaxWindowSize);
+   }
+   else if (cCommand == 'b') {
+      return do_dec_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions);
+   }
+   else {
+      return 100;
+   }
+}
diff --git a/src/shrink.c b/src/shrink.c
new file mode 100644
index 0000000..a3c3b9f
--- /dev/null
+++ b/src/shrink.c
@@ -0,0 +1,1820 @@
+/*
+ * shrink.c - compressor implementation
+ *
+ * Copyright (C) 2021 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Implements the ZX0 encoding designed by Einar Saukas. https://github.com/einar-saukas/ZX0
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "libsalvador.h"
+#include "matchfinder.h"
+#include "shrink.h"
+#include "format.h"
+
+#define MIN_ENCODED_MATCH_SIZE   2
+#define TOKEN_SIZE               1
+#define OFFSET_COST(__offset)    (((__offset) <= 128) ? 8 : (7 + salvador_get_elias_size((((__offset) - 1) >> 7) + 1)))
+
+/**
+ * Write bitpacked value to output (compressed) buffer
+ *
+ * @param pOutData pointer to output buffer
+ * @param nOutOffset current write index into output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ * @param nValue value to write
+ * @param nBits number of least significant bits to write in value
+ * @param nCurBitsOffset write index into output buffer, of current byte being filled with bits
+ * @param nCurBitShift bit shift count
+ *
+ * @return updated write index into output buffer, or -1 in case of an error
+ */
+static int salvador_write_bits(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, const int nValue, const int nBits, int *nCurBitsOffset, int *nCurBitShift) {
+   int i;
+
+   if (nOutOffset < 0) return -1;
+
+   for (i = nBits - 1; i >= 0; i--) {
+      if ((*nCurBitsOffset) == INT_MIN) {
+         /* Allocate a new byte in the stream to pack bits in */
+         if (nOutOffset >= nMaxOutDataSize) return -1;
+         (*nCurBitsOffset) = nOutOffset;
+         (*nCurBitShift) = 7;
+         pOutData[nOutOffset++] = 0;
+      }
+
+      pOutData[(*nCurBitsOffset)] |= ((nValue >> i) & 1) << (*nCurBitShift);
+
+      (*nCurBitShift) --;
+      if ((*nCurBitShift) == -1) {
+         /* Current byte is full */
+         (*nCurBitsOffset) = INT_MIN;
+      }
+   }
+
+   return nOutOffset;
+}
+
+/**
+ * Get the number of bits required to encode a gamma value
+ *
+ * @param nValue value to encode as gamma
+ *
+ * @return number of bits required for encoding
+ */
+static int salvador_get_elias_size(const int nValue) {
+   int i;
+   int nBits = 0;
+
+   for (i = 2; i <= nValue; i <<= 1)
+      ;
+
+   i >>= 1;
+   while ((i >>= 1) > 0) {
+      nBits++;
+      nBits++;
+   }
+
+   nBits++;
+
+   return nBits;
+}
+
+/**
+ * Write elias gamma encoded value to output (compressed) buffer
+ *
+ * @param pOutData pointer to output buffer
+ * @param nOutOffset current write index into output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ * @param nValue value to write with gamma encoding
+ * @param nIsInverted 1 to write inverted match offset encoding (V2), 0 to write V1 encoding
+ * @param nCurBitsOffset write index into output buffer, of current byte being filled with bits
+ * @param nCurBitShift bit shift count
+ * @param nFirstBit where to store first bit, NULL to write all bits out normally
+ *
+ * @return updated write index into output buffer, or -1 in case of an error
+ */
+static int salvador_write_elias_value(unsigned char* pOutData, int nOutOffset, const int nMaxOutDataSize, const int nValue, const int nIsInverted, int* nCurBitsOffset, int* nCurBitShift, unsigned char* nFirstBit) {
+   int i;
+
+   for (i = 2; i <= nValue; i <<= 1)
+      ;
+
+   i >>= 1;
+   while ((i >>= 1) > 0) {
+      if (nFirstBit) {
+         (*nFirstBit) &= 0xfe;
+         nFirstBit = NULL;
+      }
+      else {
+         nOutOffset = salvador_write_bits(pOutData, nOutOffset, nMaxOutDataSize, 0, 1, nCurBitsOffset, nCurBitShift);
+      }
+      if (nIsInverted)
+         nOutOffset = salvador_write_bits(pOutData, nOutOffset, nMaxOutDataSize, (nValue & i) ? 0 : 1, 1, nCurBitsOffset, nCurBitShift);
+      else
+         nOutOffset = salvador_write_bits(pOutData, nOutOffset, nMaxOutDataSize, (nValue & i) ? 1 : 0, 1, nCurBitsOffset, nCurBitShift);
+   }
+
+   if (nFirstBit) {
+      (*nFirstBit) = ((*nFirstBit) & 0xfe) | 1;
+      nFirstBit = NULL;
+   }
+   else {
+      nOutOffset = salvador_write_bits(pOutData, nOutOffset, nMaxOutDataSize, 1, 1, nCurBitsOffset, nCurBitShift);
+   }
+
+   return nOutOffset;
+}
+
+/**
+ * Get the number of extra bits required to represent a literals length
+ *
+ * @param nLength literals length
+ *
+ * @return number of extra bits required
+ */
+static inline int salvador_get_literals_varlen_size(const int nLength) {
+   if (nLength > 0)
+      return TOKEN_SIZE + salvador_get_elias_size(nLength);
+   else
+      return 0;
+}
+
+/**
+ * Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough
+ * room to write the bytes.
+ *
+ * @param pOutData pointer to output buffer
+ * @param nOutOffset current write index into output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ * @param nCurNibbleOffset write index into output buffer, of current byte being filled with nibbles
+ * @param nLength literals length
+ */
+static inline int salvador_write_literals_varlen(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int nLength, int *nCurBitsOffset, int *nCurBitShift) {
+   return salvador_write_elias_value(pOutData, nOutOffset, nMaxOutDataSize, nLength, 0, nCurBitsOffset, nCurBitShift, NULL);
+}
+
+/**
+ * Get the number of extra bits required to represent a non-rep match length
+ *
+ * @param nLength encoded match length (actual match length - MIN_ENCODED_MATCH_SIZE)
+ * @param nIsRepMatch 1 if requesting bits required to represent a rep-match, 0 to represent the length of a match with an offset
+ *
+ * @return number of extra bits required
+ */
+#define salvador_get_match_varlen_size_norep(__nLength) salvador_get_elias_size((__nLength) + 1)
+
+/**
+ * Get the number of extra bits required to represent a repmatch length
+ *
+ * @param nLength encoded match length (actual match length - MIN_ENCODED_MATCH_SIZE)
+ * @param nIsRepMatch 1 if requesting bits required to represent a rep-match, 0 to represent the length of a match with an offset
+ *
+ * @return number of extra bits required
+ */
+#define salvador_get_match_varlen_size_rep(__nLength) salvador_get_elias_size((__nLength) + 1 + 1)
+
+/**
+ * Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough
+ * room to write the bytes.
+ *
+ * @param pOutData pointer to output buffer
+ * @param nOutOffset current write index into output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ * @param nCurNibbleOffset write index into output buffer, of current byte being filled with nibbles
+ * @param nLength encoded match length (actual match length - MIN_ENCODED_MATCH_SIZE)
+ * @param nIsRepMatch 1 if writing the match length for a rep-match, 0 if writing the length for a match with an offset
+ * @param nFirstBit where to store first bit, NULL to write all bits out normally
+ */
+static inline int salvador_write_match_varlen(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int nLength, int nIsRepMatch, int* nCurBitsOffset, int* nCurBitShift, unsigned char* nFirstBit) {
+   return salvador_write_elias_value(pOutData, nOutOffset, nMaxOutDataSize, nLength + 1 + (nIsRepMatch ? 1 : 0), 0, nCurBitsOffset, nCurBitShift, nFirstBit);
+}
+
+/**
+ * Insert forward rep candidate
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param i input data window position whose matches are being considered
+ * @param nMatchOffset match offset to use as rep candidate
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param nDepth current insertion depth
+ */
+static void salvador_insert_forward_match(salvador_compressor *pCompressor, const unsigned char *pInWindow, const int i, const int nMatchOffset, const int nStartOffset, const int nEndOffset, int nDepth) {
+   const salvador_arrival *arrival = pCompressor->arrival + ((i - nStartOffset) * NARRIVALS_PER_POSITION);
+   const int *rle_len = (int*)pCompressor->intervals /* reuse */;
+   salvador_visited* visited = ((salvador_visited*)pCompressor->pos_data) - nStartOffset /* reuse */;
+   int j;
+
+   for (j = 0; j < NARRIVALS_PER_POSITION && arrival[j].from_slot; j++) {
+      if (arrival[j].num_literals) {
+         int nRepOffset = arrival[j].rep_offset;
+
+         if (nMatchOffset != nRepOffset && nRepOffset) {
+            int nRepPos = arrival[j].rep_pos;
+
+            if (nRepPos >= nStartOffset &&
+               (nRepPos + 1) < nEndOffset &&
+               visited[nRepPos].outer != nMatchOffset) {
+
+               visited[nRepPos].outer = nMatchOffset;
+
+               if (visited[nRepPos].inner != nMatchOffset && nRepPos >= nMatchOffset && pCompressor->match[((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT) + NMATCHES_PER_INDEX - 1].length == 0) {
+                  const unsigned char* pInWindowAtRepOffset = pInWindow + nRepPos;
+
+                  if (pInWindowAtRepOffset[0] == pInWindowAtRepOffset[-nMatchOffset]) {
+                     visited[nRepPos].inner = nMatchOffset;
+
+                     const int nLen0 = rle_len[nRepPos - nMatchOffset];
+                     const int nLen1 = rle_len[nRepPos];
+                     int nMinLen = (nLen0 < nLen1) ? nLen0 : nLen1;
+
+                     int nMaxRepLen = nEndOffset - nRepPos;
+                     if (nMaxRepLen > LCP_MAX)
+                        nMaxRepLen = LCP_MAX;
+
+                     if (nMinLen > nMaxRepLen)
+                        nMinLen = nMaxRepLen;
+
+                     const unsigned char* pInWindowMax = pInWindowAtRepOffset + nMaxRepLen;
+                     pInWindowAtRepOffset += nMinLen;
+
+                     while ((pInWindowAtRepOffset + 8) < pInWindowMax && !memcmp(pInWindowAtRepOffset, pInWindowAtRepOffset - nMatchOffset, 8))
+                        pInWindowAtRepOffset += 8;
+                     while ((pInWindowAtRepOffset + 4) < pInWindowMax && !memcmp(pInWindowAtRepOffset, pInWindowAtRepOffset - nMatchOffset, 4))
+                        pInWindowAtRepOffset += 4;
+                     while (pInWindowAtRepOffset < pInWindowMax && pInWindowAtRepOffset[0] == pInWindowAtRepOffset[-nMatchOffset])
+                        pInWindowAtRepOffset++;
+
+                     const int nCurRepLen = (int)(pInWindowAtRepOffset - (pInWindow + nRepPos));
+
+                     salvador_match* fwd_match = pCompressor->match + ((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT);
+                     unsigned short* fwd_depth = pCompressor->match_depth + ((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT);
+                     int r;
+
+                     for (r = 0; fwd_match[r].length; r++) {
+                        if (fwd_match[r].offset == nMatchOffset) {
+                           if ((int)fwd_match[r].length < nCurRepLen && (fwd_depth[r] & 0x3fff) == 0) {
+                              fwd_match[r].length = nCurRepLen;
+                              fwd_depth[r] = 0;
+                           }
+                           r = NMATCHES_PER_INDEX;
+                           break;
+                        }
+                     }
+
+                     if (r < NMATCHES_PER_INDEX) {
+                        fwd_match[r].offset = nMatchOffset;
+                        fwd_match[r].length = nCurRepLen;
+                        fwd_depth[r] = 0;
+
+                        if (nDepth < 9)
+                           salvador_insert_forward_match(pCompressor, pInWindow, nRepPos, nMatchOffset, nStartOffset, nEndOffset, nDepth + 1);
+                     }
+                  }
+               }
+            }
+         }
+      }
+   }
+}
+
+/**
+ * Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param nInsertForwardReps non-zero to insert forward repmatch candidates, zero to use the previously inserted candidates
+ * @param nCurRepMatchOffset starting rep offset for this block
+ * @param nArrivalsPerPosition number of arrivals to record per input buffer position
+ * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise
+ */
+static void salvador_optimize_forward(salvador_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, const int nInsertForwardReps, const int *nCurRepMatchOffset, const int nArrivalsPerPosition, const int nBlockFlags) {
+   salvador_arrival *arrival = pCompressor->arrival - (nStartOffset * NARRIVALS_PER_POSITION);
+   const int* rle_len = (int*)pCompressor->intervals /* reuse */;
+   salvador_visited* visited = ((salvador_visited*)pCompressor->pos_data) - nStartOffset /* reuse */;
+   const int nModeSwitchPenalty = 0;
+   int i, j, n;
+
+   if ((nEndOffset - nStartOffset) > pCompressor->block_size) return;
+
+   memset(arrival + (nStartOffset * NARRIVALS_PER_POSITION), 0, sizeof(salvador_arrival) * ((nEndOffset - nStartOffset + 1) * NARRIVALS_PER_POSITION));
+
+   arrival[nStartOffset * NARRIVALS_PER_POSITION].from_slot = -1;
+   arrival[nStartOffset * NARRIVALS_PER_POSITION].rep_offset = *nCurRepMatchOffset;
+
+   for (i = (nStartOffset * NARRIVALS_PER_POSITION); i != ((nEndOffset+1) * NARRIVALS_PER_POSITION); i++) {
+      arrival[i].cost = 0x40000000;
+   }
+
+   if (nInsertForwardReps) {
+      memset(visited + nStartOffset, 0, (nEndOffset - nStartOffset) * sizeof(salvador_visited));
+   }
+
+   for (i = nStartOffset; i != nEndOffset; i++) {
+      salvador_arrival *cur_arrival = &arrival[i * NARRIVALS_PER_POSITION];
+      int m;
+      
+      for (j = 0; j < nArrivalsPerPosition && cur_arrival[j].from_slot; j++) {
+         const int nPrevCost = cur_arrival[j].cost & 0x3fffffff;
+         int nCodingChoiceCost = nPrevCost + 8 /* literal */;
+         int nScore = cur_arrival[j].score + 1;
+         int nNumLiterals = cur_arrival[j].num_literals + 1;
+
+         if (nNumLiterals > 1)
+            nCodingChoiceCost -= salvador_get_literals_varlen_size(nNumLiterals - 1);
+         nCodingChoiceCost += salvador_get_literals_varlen_size(nNumLiterals);
+
+         if (nNumLiterals == 1)
+            nCodingChoiceCost += nModeSwitchPenalty;
+
+         salvador_arrival* pDestSlots = &cur_arrival[NARRIVALS_PER_POSITION];
+         if (nCodingChoiceCost < pDestSlots[nArrivalsPerPosition - 1].cost ||
+            (nCodingChoiceCost == pDestSlots[nArrivalsPerPosition - 1].cost && nScore < (pDestSlots[nArrivalsPerPosition - 1].score))) {
+            int nRepOffset = cur_arrival[j].rep_offset;
+            int exists = 0;
+
+            for (n = 0;
+               pDestSlots[n].cost < nCodingChoiceCost;
+               n++) {
+               if (pDestSlots[n].rep_offset == nRepOffset) {
+                  exists = 1;
+                  break;
+               }
+            }
+
+            if (!exists) {
+               for (;
+                  pDestSlots[n].cost == nCodingChoiceCost && nScore >= (pDestSlots[n].score);
+                  n++) {
+                  if (pDestSlots[n].rep_offset == nRepOffset) {
+                     exists = 1;
+                     break;
+                  }
+               }
+
+               if (!exists) {
+                  if (n < nArrivalsPerPosition) {
+                     int nn;
+
+                     for (nn = n;
+                        nn < nArrivalsPerPosition && pDestSlots[nn].cost == nCodingChoiceCost;
+                        nn++) {
+                        if (pDestSlots[nn].rep_offset == nRepOffset) {
+                           exists = 1;
+                           break;
+                        }
+                     }
+
+                     if (!exists) {
+                        int z;
+
+                        for (z = n; z < nArrivalsPerPosition - 1 && pDestSlots[z].from_slot; z++) {
+                           if (pDestSlots[z].rep_offset == nRepOffset)
+                              break;
+                        }
+
+                        memmove(&pDestSlots[n + 1],
+                           &pDestSlots[n],
+                           sizeof(salvador_arrival) * (z - n));
+
+                        salvador_arrival* pDestArrival = &pDestSlots[n];
+                        pDestArrival->cost = nCodingChoiceCost;
+                        pDestArrival->from_pos = i;
+                        pDestArrival->from_slot = j + 1;
+                        pDestArrival->rep_offset = nRepOffset;
+                        pDestArrival->rep_pos = cur_arrival[j].rep_pos;
+                        pDestArrival->match_len = 0;
+                        pDestArrival->num_literals = nNumLiterals;
+                        pDestArrival->score = nScore;
+                     }
+                  }
+               }
+            }
+         }
+      }
+
+      if (i == nStartOffset && (nBlockFlags & 1)) continue;
+
+      const salvador_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT);
+      const unsigned short *match_depth = pCompressor->match_depth + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT);
+      const int nNumArrivalsForThisPos = j;
+      int nOverallMinRepLen = 0, nOverallMaxRepLen = 0;
+
+      int nRepMatchArrivalIdx[(2 * NARRIVALS_PER_POSITION) + 1];
+      int nNumRepMatchArrivals = 0;
+
+      if (i < nEndOffset) {
+         int nMaxRepLenForPos = nEndOffset - i;
+         if (nMaxRepLenForPos > LCP_MAX)
+            nMaxRepLenForPos = LCP_MAX;
+
+         const unsigned char* pInWindowStart = pInWindow + i;
+         const unsigned char* pInWindowMax = pInWindowStart + nMaxRepLenForPos;
+
+         for (j = 0; j < nNumArrivalsForThisPos; j++) {
+            if (cur_arrival[j].num_literals) {
+               int nRepOffset = cur_arrival[j].rep_offset;
+
+               if (nRepOffset) {
+                  if (i >= nRepOffset) {
+                     if (pInWindow[i] == pInWindow[i - nRepOffset]) {
+                        const unsigned char* pInWindowAtPos;
+
+                        int nLen0 = rle_len[i - nRepOffset];
+                        int nLen1 = rle_len[i];
+                        int nMinLen = (nLen0 < nLen1) ? nLen0 : nLen1;
+
+                        if (nMinLen > nMaxRepLenForPos)
+                           nMinLen = nMaxRepLenForPos;
+                        pInWindowAtPos = pInWindowStart + nMinLen;
+
+                        while ((pInWindowAtPos + 8) < pInWindowMax && !memcmp(pInWindowAtPos - nRepOffset, pInWindowAtPos, 8))
+                           pInWindowAtPos += 8;
+                        while ((pInWindowAtPos + 4) < pInWindowMax && !memcmp(pInWindowAtPos - nRepOffset, pInWindowAtPos, 4))
+                           pInWindowAtPos += 4;
+                        while (pInWindowAtPos < pInWindowMax && pInWindowAtPos[-nRepOffset] == pInWindowAtPos[0])
+                           pInWindowAtPos++;
+                        int nCurRepLen = (int)(pInWindowAtPos - pInWindowStart);
+
+                        if (nOverallMaxRepLen < nCurRepLen)
+                           nOverallMaxRepLen = nCurRepLen;
+                        nRepMatchArrivalIdx[nNumRepMatchArrivals++] = j;
+                        nRepMatchArrivalIdx[nNumRepMatchArrivals++] = nCurRepLen;
+                     }
+                  }
+               }
+            }
+         }
+      }
+      nRepMatchArrivalIdx[nNumRepMatchArrivals] = -1;
+
+      for (m = 0; m < NMATCHES_PER_INDEX && match[m].length; m++) {
+         const int nOrigMatchLen = match[m].length;
+         const int nOrigMatchOffset = match[m].offset;
+         const unsigned int nOrigMatchDepth = match_depth[m] & 0x3fff;
+         const int nScorePenalty = 3 + ((match[m].length & 0x8000) >> 15);
+         unsigned int d;
+
+         for (d = 0; d <= nOrigMatchDepth; d += (nOrigMatchDepth ? nOrigMatchDepth : 1)) {
+            const int nMatchOffset = nOrigMatchOffset - d;
+            int nMatchLen = nOrigMatchLen - d;
+
+            if ((i + nMatchLen) > nEndOffset)
+               nMatchLen = nEndOffset - i;
+
+            if (nInsertForwardReps) {
+               salvador_insert_forward_match(pCompressor, pInWindow, i, nMatchOffset, nStartOffset, nEndOffset, 0);
+            }
+
+            int nNoRepmatchOffsetCost = OFFSET_COST(nMatchOffset);
+            int nNoRepmatchScore, nStartingMatchLen, k;
+
+            int nNonRepMatchArrivalIdx = -1;
+            for (j = 0; j < nNumArrivalsForThisPos; j++) {
+               int nRepOffset = cur_arrival[j].rep_offset;
+
+               if (nMatchOffset != nRepOffset || cur_arrival[j].num_literals == 0) {
+                  const int nPrevCost = cur_arrival[j].cost & 0x3fffffff;
+
+                  nNoRepmatchOffsetCost += nPrevCost /* the actual cost of the literals themselves accumulates up the chain */;
+                  if (!cur_arrival[j].num_literals)
+                     nNoRepmatchOffsetCost += nModeSwitchPenalty;
+
+                  nNoRepmatchScore = cur_arrival[j].score + nScorePenalty;
+                  nNonRepMatchArrivalIdx = j;
+                  break;
+               }
+            }
+
+            if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE) {
+               nStartingMatchLen = nMatchLen;
+            }
+            else {
+               nStartingMatchLen = 1;
+            }
+
+            for (k = nStartingMatchLen; k <= nMatchLen; k++) {
+               salvador_arrival* pDestSlots = &cur_arrival[k * NARRIVALS_PER_POSITION];
+
+               /* Insert non-repmatch candidate */
+
+               if (k >= 2 && nNonRepMatchArrivalIdx >= 0) {
+                  int nMatchLenCost = salvador_get_match_varlen_size_norep(k - MIN_ENCODED_MATCH_SIZE) + TOKEN_SIZE /* token */;
+                  int nCodingChoiceCost = nMatchLenCost + nNoRepmatchOffsetCost;
+
+                  if (nCodingChoiceCost < pDestSlots[nArrivalsPerPosition - 2].cost ||
+                     (nCodingChoiceCost == pDestSlots[nArrivalsPerPosition - 2].cost && nNoRepmatchScore < (pDestSlots[nArrivalsPerPosition - 2].score))) {
+                     int exists = 0;
+
+                     for (n = 0;
+                        pDestSlots[n].cost < nCodingChoiceCost;
+                        n++) {
+                        if (pDestSlots[n].rep_offset == nMatchOffset) {
+                           exists = 1;
+                           break;
+                        }
+                     }
+
+                     if (!exists) {
+                        for (;
+                           pDestSlots[n].cost == nCodingChoiceCost && nNoRepmatchScore >= (pDestSlots[n].score);
+                           n++) {
+                           if (pDestSlots[n].rep_offset == nMatchOffset) {
+                              exists = 1;
+                              break;
+                           }
+                        }
+
+                        if (!exists) {
+                           if (n < nArrivalsPerPosition - 1) {
+                              int nn;
+
+                              for (nn = n;
+                                 nn < nArrivalsPerPosition && pDestSlots[nn].cost == nCodingChoiceCost;
+                                 nn++) {
+                                 if (pDestSlots[nn].rep_offset == nMatchOffset) {
+                                    exists = 1;
+                                    break;
+                                 }
+                              }
+
+                              if (!exists) {
+                                 int z;
+
+                                 for (z = n; z < nArrivalsPerPosition - 1 && pDestSlots[z].from_slot; z++) {
+                                    if (pDestSlots[z].rep_offset == nMatchOffset)
+                                       break;
+                                 }
+
+                                 memmove(&pDestSlots[n + 1],
+                                    &pDestSlots[n],
+                                    sizeof(salvador_arrival) * (z - n));
+
+                                 salvador_arrival* pDestArrival = &pDestSlots[n];
+                                 pDestArrival->cost = nCodingChoiceCost;
+                                 pDestArrival->from_pos = i;
+                                 pDestArrival->from_slot = nNonRepMatchArrivalIdx + 1;
+                                 pDestArrival->match_len = k;
+                                 pDestArrival->num_literals = 0;
+                                 pDestArrival->score = nNoRepmatchScore;
+                                 pDestArrival->rep_offset = nMatchOffset;
+                                 pDestArrival->rep_pos = i;
+                              }
+                           }
+                        }
+                     }
+                  }
+               }
+
+               /* Insert repmatch candidates */
+
+               if (k > nOverallMinRepLen  && k <= nOverallMaxRepLen) {
+                  int nMatchLenCost = salvador_get_match_varlen_size_rep(k - MIN_ENCODED_MATCH_SIZE) + TOKEN_SIZE /* token */;
+                  int nCurRepMatchArrival;
+
+                  if (k <= LEAVE_ALONE_MATCH_SIZE)
+                     nOverallMinRepLen = k;
+                  else if (nOverallMaxRepLen == k)
+                     nOverallMaxRepLen--;
+
+                  for (nCurRepMatchArrival = 0; (j = nRepMatchArrivalIdx[nCurRepMatchArrival]) >= 0; nCurRepMatchArrival += 2) {
+                     if (nRepMatchArrivalIdx[nCurRepMatchArrival + 1] >= k) {
+                        const int nPrevCost = cur_arrival[j].cost & 0x3fffffff;
+                        int nRepCodingChoiceCost = nPrevCost /* the actual cost of the literals themselves accumulates up the chain */ + nMatchLenCost;
+                        int nScore = cur_arrival[j].score + 2;
+
+                        if (nRepCodingChoiceCost < pDestSlots[nArrivalsPerPosition - 1].cost ||
+                           (nRepCodingChoiceCost == pDestSlots[nArrivalsPerPosition - 1].cost && nScore < (pDestSlots[nArrivalsPerPosition - 1].score))) {
+                           int nRepOffset = cur_arrival[j].rep_offset;
+                           int exists = 0;
+
+                           for (n = 0;
+                              pDestSlots[n].cost < nRepCodingChoiceCost;
+                              n++) {
+                              if (pDestSlots[n].rep_offset == nRepOffset) {
+                                 exists = 1;
+                                 break;
+                              }
+                           }
+
+                           if (!exists) {
+                              for (;
+                                 pDestSlots[n].cost == nRepCodingChoiceCost && nScore >= (pDestSlots[n].score);
+                                 n++) {
+                                 if (pDestSlots[n].rep_offset == nRepOffset) {
+                                    exists = 1;
+                                    break;
+                                 }
+                              }
+
+                              if (!exists) {
+                                 if (n < nArrivalsPerPosition) {
+                                    int nn;
+
+                                    for (nn = n;
+                                       nn < nArrivalsPerPosition && pDestSlots[nn].cost == nRepCodingChoiceCost;
+                                       nn++) {
+                                       if (pDestSlots[nn].rep_offset == nRepOffset) {
+                                          exists = 1;
+                                          break;
+                                       }
+                                    }
+
+                                    if (!exists) {
+                                       int z;
+
+                                       for (z = n; z < nArrivalsPerPosition - 1 && pDestSlots[z].from_slot; z++) {
+                                          if (pDestSlots[z].rep_offset == nRepOffset)
+                                             break;
+                                       }
+
+                                       memmove(&pDestSlots[n + 1],
+                                          &pDestSlots[n],
+                                          sizeof(salvador_arrival) * (z - n));
+
+                                       salvador_arrival* pDestArrival = &pDestSlots[n];
+                                       pDestArrival->cost = nRepCodingChoiceCost;
+                                       pDestArrival->from_pos = i;
+                                       pDestArrival->from_slot = j + 1;
+                                       pDestArrival->match_len = k;
+                                       pDestArrival->num_literals = 0;
+                                       pDestArrival->score = nScore;
+                                       pDestArrival->rep_offset = nRepOffset;
+                                       pDestArrival->rep_pos = i;
+                                    }
+                                 }
+                              }
+                           }
+                        }
+                        else {
+                           break;
+                        }
+                     }
+                  }
+               }
+            }
+         }
+
+         if (nOrigMatchLen >= 512)
+            break;
+      }
+   }
+   
+   if (!nInsertForwardReps) {
+      const salvador_arrival* end_arrival = &arrival[(i * NARRIVALS_PER_POSITION) + 0];
+      salvador_final_match* pBestMatch = pCompressor->best_match - nStartOffset;
+
+      while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0 && (int)end_arrival->from_pos < nEndOffset) {
+         pBestMatch[end_arrival->from_pos].length = end_arrival->match_len;
+         if (end_arrival->match_len)
+            pBestMatch[end_arrival->from_pos].offset = end_arrival->rep_offset;
+         else
+            pBestMatch[end_arrival->from_pos].offset = 0;
+
+         end_arrival = &arrival[(end_arrival->from_pos * NARRIVALS_PER_POSITION) + (end_arrival->from_slot - 1)];
+      }
+   }
+}
+
+/**
+ * Attempt to replace matches by literals when it makes the final bitstream smaller, and merge large matches
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param pBestMatch optimal matches to evaluate and update
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param nCurRepMatchOffset starting rep offset for this block
+ * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise
+ *
+ * @return non-zero if the number of tokens was reduced, 0 if it wasn't
+ */
+static int salvador_reduce_commands(salvador_compressor *pCompressor, const unsigned char *pInWindow, salvador_final_match *pBestMatch, const int nStartOffset, const int nEndOffset, const int *nCurRepMatchOffset, const int nBlockFlags) {
+   int i;
+   int nNumLiterals = (nBlockFlags & 1) ? 1 : 0;
+   int nRepMatchOffset = *nCurRepMatchOffset;
+   int nFollowsLiteral = 0;
+   int nDidReduce = 0;
+   int nLastMatchLen = 0;
+
+   for (i = nStartOffset + ((nBlockFlags & 1) ? 1 : 0); i < nEndOffset; ) {
+      salvador_final_match *pMatch = pBestMatch + i;
+
+      if (nFollowsLiteral &&
+         pMatch->length == 0 &&
+         (i + 1) < nEndOffset &&
+         pBestMatch[i + 1].length >= MIN_ENCODED_MATCH_SIZE &&
+         pBestMatch[i + 1].length < MAX_VARLEN &&
+         pBestMatch[i + 1].offset &&
+         i >= pBestMatch[i + 1].offset &&
+         (i + pBestMatch[i + 1].length + 1) <= nEndOffset &&
+         !memcmp(pInWindow + i - (pBestMatch[i + 1].offset), pInWindow + i, pBestMatch[i + 1].length + 1)) {
+         int nCurLenSize, nReducedLenSize;
+
+         if (nRepMatchOffset && pBestMatch[i + 1].offset == nRepMatchOffset) {
+            nCurLenSize = salvador_get_match_varlen_size_rep(pBestMatch[i + 1].length - MIN_ENCODED_MATCH_SIZE);
+            nReducedLenSize = salvador_get_match_varlen_size_rep(pBestMatch[i + 1].length + 1 - MIN_ENCODED_MATCH_SIZE);
+         }
+         else {
+            nCurLenSize = salvador_get_match_varlen_size_norep(pBestMatch[i + 1].length - MIN_ENCODED_MATCH_SIZE);
+            nReducedLenSize = salvador_get_match_varlen_size_norep(pBestMatch[i + 1].length + 1 - MIN_ENCODED_MATCH_SIZE);
+         }
+
+         if ((nReducedLenSize - nCurLenSize) <= 8) {
+            /* Merge */
+            pBestMatch[i].length = pBestMatch[i + 1].length + 1;
+            pBestMatch[i].offset = pBestMatch[i + 1].offset;
+            pBestMatch[i + 1].length = 0;
+            pBestMatch[i + 1].offset = 0;
+            nDidReduce = 1;
+            nFollowsLiteral = 0;
+            continue;
+         }
+      }
+
+      if (pMatch->length >= MIN_ENCODED_MATCH_SIZE) {
+         if (nFollowsLiteral && (i + pMatch->length) < nEndOffset /* Don't consider the last match in the block, we can only reduce a match inbetween other tokens */) {
+            int nNextIndex = i + pMatch->length;
+            int nNextLiterals = 0;
+
+            while (nNextIndex < nEndOffset && pBestMatch[nNextIndex].length == 0) {
+               nNextLiterals++;
+               nNextIndex++;
+            }
+
+            if (nNextIndex < nEndOffset && pBestMatch[nNextIndex].length >= MIN_ENCODED_MATCH_SIZE) {
+               /* This command is a match, is followed by 'nNextLiterals' literals and then by another match */
+
+               if (nRepMatchOffset && pMatch->offset != nRepMatchOffset && (pBestMatch[nNextIndex].offset != pMatch->offset || pBestMatch[nNextIndex].offset == nRepMatchOffset ||
+                  OFFSET_COST(pMatch->offset) > OFFSET_COST(pBestMatch[nNextIndex].offset))) {
+                  /* Check if we can change the current match's offset to be the same as the previous match's offset, and get an extra repmatch. This will occur when
+                   * matching large regions of identical bytes for instance, where there are too many offsets to be considered by the parser, and when not compressing to favor the
+                   * ratio (the forward arrivals parser already has this covered). */
+                  if (i >= nRepMatchOffset &&
+                     (i - nRepMatchOffset + pMatch->length) <= nEndOffset &&
+                     !memcmp(pInWindow + i - nRepMatchOffset, pInWindow + i - pMatch->offset, pMatch->length)) {
+                     pMatch->offset = nRepMatchOffset;
+                     nDidReduce = 1;
+                  }
+               }
+
+               if (pBestMatch[nNextIndex].offset && pMatch->offset != pBestMatch[nNextIndex].offset && nRepMatchOffset != pBestMatch[nNextIndex].offset && nNextLiterals) {
+                  /* Otherwise, try to gain a match forward as well */
+                  if (i >= pBestMatch[nNextIndex].offset && (i - pBestMatch[nNextIndex].offset + pMatch->length) <= nEndOffset && pMatch->offset != nRepMatchOffset) {
+                     int nMaxLen = 0;
+                     while (nMaxLen < pMatch->length && pInWindow[i - pBestMatch[nNextIndex].offset + nMaxLen] == pInWindow[i - pMatch->offset + nMaxLen])
+                        nMaxLen++;
+                     if (nMaxLen >= pMatch->length) {
+                        /* Replace */
+                        pMatch->offset = pBestMatch[nNextIndex].offset;
+                        nDidReduce = 1;
+                     }
+                     else if (nMaxLen >= 2) {
+                        int nPartialSizeBefore, nPartialSizeAfter;
+
+                        nPartialSizeBefore = salvador_get_match_varlen_size_norep(pMatch->length - MIN_ENCODED_MATCH_SIZE);
+                        nPartialSizeBefore += OFFSET_COST(pMatch->offset);
+                        nPartialSizeBefore += salvador_get_literals_varlen_size(nNextLiterals);
+
+                        nPartialSizeAfter = salvador_get_match_varlen_size_rep(nMaxLen - MIN_ENCODED_MATCH_SIZE);
+                        nPartialSizeAfter += salvador_get_literals_varlen_size(nNextLiterals + (pMatch->length - nMaxLen)) + ((pMatch->length - nMaxLen) << 3);
+
+                        if (nPartialSizeAfter < nPartialSizeBefore) {
+                           int j;
+
+                           /* We gain a repmatch that is shorter than the original match as this is the best we can do, so it is followed by extra literals, but
+                            * we have calculated that this is shorter */
+                           pMatch->offset = pBestMatch[nNextIndex].offset;
+                           for (j = nMaxLen; j < pMatch->length; j++) {
+                              pBestMatch[i + j].length = 0;
+                           }
+                           pMatch->length = nMaxLen;
+                           nDidReduce = 1;
+                        }
+                     }
+                  }
+               }
+
+               if (pMatch->length < 9 /* Don't waste time considering large matches, they will always win over literals */) {
+                  /* Calculate this command's current cost (excluding 'nNumLiterals' bytes) */
+
+                  int nCurCommandSize = 0;
+                  if (nNumLiterals != 0) {
+                     nCurCommandSize += salvador_get_literals_varlen_size(nNumLiterals);
+                     nCurCommandSize += (nNumLiterals << 3);
+                  }
+                  if (nRepMatchOffset && pMatch->offset == nRepMatchOffset && nNumLiterals != 0) {
+                     /* Rep match */
+                     nCurCommandSize += 1; /* rep-match follows */
+
+                     /* Match length */
+                     nCurCommandSize += salvador_get_match_varlen_size_rep(pMatch->length - MIN_ENCODED_MATCH_SIZE);
+                  }
+                  else {
+                     /* Match with offset */
+                     nCurCommandSize += 1; /* match with offset follows */
+
+                     /* High bits of match offset */
+                     nCurCommandSize += salvador_get_elias_size(((pMatch->offset - 1) >> 7) + 1);
+
+                     /* Low byte of match offset */
+                     nCurCommandSize += 7;
+
+                     /* Match length */
+                     nCurCommandSize += salvador_get_match_varlen_size_norep(pMatch->length - MIN_ENCODED_MATCH_SIZE);
+
+                  }
+
+                  /* Calculate the next command's current cost */
+                  int nNextCommandSize = 0;
+                  if (nNextLiterals != 0) {
+                     nNextCommandSize += salvador_get_literals_varlen_size(nNextLiterals);
+                     nNextCommandSize += (nNextLiterals << 3);
+                  }
+                  if (pMatch->offset && pBestMatch[nNextIndex].offset == pMatch->offset && nNextLiterals != 0) {
+                     /* Rep match */
+                     nNextCommandSize += 1; /* rep-match follows */
+
+                     /* Match length */
+                     nNextCommandSize += salvador_get_match_varlen_size_rep(pBestMatch[nNextIndex].length - MIN_ENCODED_MATCH_SIZE);
+                  }
+                  else {
+                     /* Match with offset */
+                     nNextCommandSize += 1; /* match with offset follows */
+
+                     /* High bits of match offset */
+                     nNextCommandSize += salvador_get_elias_size(((pBestMatch[nNextIndex].offset - 1) >> 7) + 1);
+
+                     /* Low byte of match offset */
+                     nNextCommandSize += 7;
+
+                     /* Match length */
+                     nNextCommandSize += salvador_get_match_varlen_size_norep(pBestMatch[nNextIndex].length - MIN_ENCODED_MATCH_SIZE);
+                  }
+
+                  int nOriginalCombinedCommandSize = nCurCommandSize + nNextCommandSize;
+
+                  /* Calculate the cost of replacing this match command by literals + the next command with the cost of encoding these literals (excluding 'nNumLiterals' bytes) */
+                  int nReducedCommandSize = (pMatch->length << 3);
+                  nReducedCommandSize += salvador_get_literals_varlen_size(nNumLiterals + pMatch->length + nNextLiterals);
+                  nReducedCommandSize += ((nNumLiterals + nNextLiterals) << 3);
+
+                  if (nRepMatchOffset && pBestMatch[nNextIndex].offset == nRepMatchOffset && (nNumLiterals + pMatch->length + nNextLiterals) != 0) {
+                     /* Rep match */
+                     nReducedCommandSize += 1; /* rep-match follows */
+
+                     /* Match length */
+                     nReducedCommandSize += salvador_get_match_varlen_size_rep(pBestMatch[nNextIndex].length - MIN_ENCODED_MATCH_SIZE);
+                  }
+                  else {
+                     /* Match with offset */
+                     nReducedCommandSize += 1; /* match with offset follows */
+
+                     /* High bits of match offset */
+                     nReducedCommandSize += salvador_get_elias_size(((pBestMatch[nNextIndex].offset - 1) >> 7) + 1);
+
+                     /* Low byte of match offset */
+                     nReducedCommandSize += 7;
+
+                     /* Match length */
+                     nReducedCommandSize += salvador_get_match_varlen_size_norep(pBestMatch[nNextIndex].length - MIN_ENCODED_MATCH_SIZE);
+                  }
+
+                  if (nOriginalCombinedCommandSize >= nReducedCommandSize) {
+                     /* Reduce */
+                     int nMatchLen = pMatch->length;
+                     int j;
+
+                     for (j = 0; j < nMatchLen; j++) {
+                        pBestMatch[i + j].length = 0;
+                     }
+
+                     nDidReduce = 1;
+                     nFollowsLiteral = 0;
+                     continue;
+                  }
+               }
+            }
+         }
+
+         if ((i + pMatch->length) <= nEndOffset && pMatch->offset > 0 && pMatch->length >= MIN_ENCODED_MATCH_SIZE &&
+            pBestMatch[i + pMatch->length].offset > 0 &&
+            pBestMatch[i + pMatch->length].length >= MIN_ENCODED_MATCH_SIZE &&
+            (pMatch->length + pBestMatch[i + pMatch->length].length) >= LEAVE_ALONE_MATCH_SIZE &&
+            (pMatch->length + pBestMatch[i + pMatch->length].length) <= MAX_VARLEN &&
+            (i + pMatch->length) > pMatch->offset &&
+            (i + pMatch->length) > pBestMatch[i + pMatch->length].offset &&
+            (i + pMatch->length + pBestMatch[i + pMatch->length].length) <= nEndOffset &&
+            !memcmp(pInWindow + i - pMatch->offset + pMatch->length,
+               pInWindow + i + pMatch->length - pBestMatch[i + pMatch->length].offset,
+               pBestMatch[i + pMatch->length].length)) {
+
+            int nNextIndex = i + pMatch->length + pBestMatch[i + pMatch->length].length;
+            int nNextLiterals = 0;
+
+            while (nNextIndex < nEndOffset && pBestMatch[nNextIndex].length == 0) {
+               nNextIndex++;
+               nNextLiterals++;
+            }
+
+            int nCurPartialSize = 0;
+            if (nRepMatchOffset && pMatch->offset == nRepMatchOffset && nNumLiterals != 0) {
+               /* Rep match */
+               nCurPartialSize += 1; /* rep-match follows */
+
+               /* Match length */
+               nCurPartialSize += salvador_get_match_varlen_size_rep(pMatch->length - MIN_ENCODED_MATCH_SIZE);
+            }
+            else {
+               /* Match with offset */
+               nCurPartialSize += 1; /* match with offset follows */
+
+               /* High bits of match offset */
+               nCurPartialSize += salvador_get_elias_size(((pMatch->offset - 1) >> 7) + 1);
+
+               /* Low byte of match offset */
+               nCurPartialSize += 7;
+
+               /* Match length */
+               nCurPartialSize += salvador_get_match_varlen_size_norep(pMatch->length - MIN_ENCODED_MATCH_SIZE);
+            }
+
+            /* Match with offset */
+            nCurPartialSize += 1; /* match with offset */
+
+            /* High bits of match offset */
+            nCurPartialSize += salvador_get_elias_size(((pBestMatch[i + pMatch->length].offset - 1) >> 7) + 1);
+
+            /* Low byte of match offset */
+            nCurPartialSize += 7;
+
+            /* Match length */
+            nCurPartialSize += salvador_get_match_varlen_size_norep(pBestMatch[i + pMatch->length].length - MIN_ENCODED_MATCH_SIZE);
+
+            if (nNextIndex < nEndOffset) {
+               if (pBestMatch[i + pMatch->length].offset && pBestMatch[nNextIndex].offset == pBestMatch[i + pMatch->length].offset && nNextLiterals != 0) {
+                  /* Rep match */
+                  nCurPartialSize += 1; /* rep-match follows */
+
+                  /* Match length */
+                  nCurPartialSize += salvador_get_match_varlen_size_rep(pBestMatch[nNextIndex].length - MIN_ENCODED_MATCH_SIZE);
+               }
+               else {
+                  /* Match with offset */
+                  nCurPartialSize += 1; /* match with offset follows */
+
+                  /* High bits of match offset */
+                  nCurPartialSize += salvador_get_elias_size(((pBestMatch[nNextIndex].offset - 1) >> 7) + 1);
+
+                  /* Low byte of match offset */
+                  nCurPartialSize += 7;
+
+                  /* Match length */
+                  nCurPartialSize += salvador_get_match_varlen_size_norep(pBestMatch[nNextIndex].length - MIN_ENCODED_MATCH_SIZE);
+               }
+            }
+
+            int nReducedPartialSize = 0;
+            if (nRepMatchOffset && pMatch->offset == nRepMatchOffset && nNumLiterals != 0) {
+               /* Rep match */
+               nReducedPartialSize += 1; /* rep-match follows */
+
+               /* Match length */
+               nReducedPartialSize += salvador_get_match_varlen_size_rep(pMatch->length + pBestMatch[i + pMatch->length].length - MIN_ENCODED_MATCH_SIZE);
+            }
+            else {
+               /* Match with offset */
+               nReducedPartialSize += 1; /* match with offset follows */
+
+               /* High bits of match offset */
+               nReducedPartialSize += salvador_get_elias_size(((pMatch->offset - 1) >> 7) + 1);
+
+               /* Low byte of match offset */
+               nReducedPartialSize += 7;
+
+               /* Match length */
+               nReducedPartialSize += salvador_get_match_varlen_size_norep(pMatch->length + pBestMatch[i + pMatch->length].length - MIN_ENCODED_MATCH_SIZE);
+            }
+
+            int nCannotReduce = 0;
+            if (nNextIndex < nEndOffset) {
+               if (pMatch->offset && pBestMatch[nNextIndex].offset == pMatch->offset && nNextLiterals != 0) {
+                  /* Rep match */
+                  nReducedPartialSize += 1; /* rep-match follows */
+
+                  /* Match length */
+                  nReducedPartialSize += salvador_get_match_varlen_size_rep(pBestMatch[nNextIndex].length - MIN_ENCODED_MATCH_SIZE);
+               }
+               else {
+                  if (pBestMatch[nNextIndex].length >= MIN_ENCODED_MATCH_SIZE) {
+                     /* Match with offset */
+                     nReducedPartialSize += 1; /* match with offset follows */
+
+                     /* High bits of match offset */
+                     nReducedPartialSize += salvador_get_elias_size(((pBestMatch[nNextIndex].offset - 1) >> 7) + 1);
+
+                     /* Low byte of match offset */
+                     nReducedPartialSize += 7;
+
+                     /* Match length */
+                     nReducedPartialSize += salvador_get_match_varlen_size_norep(pBestMatch[nNextIndex].length - MIN_ENCODED_MATCH_SIZE);
+                  }
+                  else {
+                     nCannotReduce = 1;
+                  }
+               }
+            }
+
+            if (nCurPartialSize >= nReducedPartialSize && !nCannotReduce) {
+               int nMatchLen = pMatch->length;
+
+               /* Join */
+
+               pMatch->length += pBestMatch[i + nMatchLen].length;
+               pBestMatch[i + nMatchLen].offset = 0;
+               pBestMatch[i + nMatchLen].length = -1;
+               nDidReduce = 1;
+               nFollowsLiteral = 0;
+               continue;
+            }
+         }
+
+         nRepMatchOffset = pMatch->offset;
+
+         i += pMatch->length;
+         nNumLiterals = 0;
+         nFollowsLiteral = 0;
+      }
+      else if (pMatch->length == 1) {
+         if (nNumLiterals > 0) {
+            int nNextIndex = i + pMatch->length;
+            int nNextLiterals = 0;
+
+            while (nNextIndex < nEndOffset && pBestMatch[nNextIndex].length == 0) {
+               nNextLiterals++;
+               nNextIndex++;
+            }
+
+            if (nNextLiterals > 0) {
+               int nCurPartialSize = salvador_get_literals_varlen_size(nNumLiterals);
+               nCurPartialSize += TOKEN_SIZE + salvador_get_match_varlen_size_rep(pMatch->length - MIN_ENCODED_MATCH_SIZE);
+               nCurPartialSize += salvador_get_literals_varlen_size(nNextLiterals);
+
+               int nReducedPartialSize = salvador_get_literals_varlen_size(nNumLiterals + 1 + nNextLiterals) + 8;
+
+               if (nCurPartialSize >= nReducedPartialSize) {
+                  pMatch->length = 0;
+                  pMatch->offset = 0;
+                  nDidReduce = 1;
+                  continue;
+               }
+            }
+         }
+
+         nNumLiterals = 0;
+         nFollowsLiteral = 0;
+         i++;
+      }
+      else {
+         nFollowsLiteral = 1;
+         nNumLiterals++;
+         i++;
+      }
+   }
+
+   return nDidReduce;
+}
+
+/**
+ * Emit a block of compressed data
+ *
+ * @param pCompressor compression context
+ * @param pBestMatch optimal matches to emit
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param pOutData pointer to output buffer
+ * @param nOutOffset starting offset into outpout buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ * @param nCurBitsOffset write index into output buffer, of current byte being filled with bits
+ * @param nCurBitShift bit shift count
+ * @param nFinalLiterals output number of literals not written after writing this block, that need to be written in the next block
+ * @param nCurRepMatchOffset starting rep offset for this block, updated after the block is compressed successfully
+ * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+static int salvador_write_block(salvador_compressor* pCompressor, salvador_final_match* pBestMatch, const unsigned char* pInWindow, const int nStartOffset, const int nEndOffset, unsigned char* pOutData, int nOutOffset, const int nMaxOutDataSize, int* nCurBitsOffset, int* nCurBitShift, int* nFinalLiterals, int* nCurRepMatchOffset, const int nBlockFlags) {
+   int nRepMatchOffset = *nCurRepMatchOffset;
+   const int nMaxOffset = pCompressor->max_offset;
+   const int nIsInverted = (pCompressor->flags & FLG_IS_INVERTED) ? 1 : 0;
+   int nNumLiterals = 0;
+   int nInFirstLiteralOffset = 0;
+   int nIsFirstCommand = (nBlockFlags & 1) ? 1 : 0;
+   int i;
+
+   for (i = nStartOffset; i < nEndOffset; ) {
+      const salvador_final_match* pMatch = pBestMatch + i;
+
+      if (pMatch->length >= 2 || (pMatch->length >= 1 && pMatch->offset == nRepMatchOffset && nNumLiterals != 0)) {
+         int nMatchOffset = pMatch->offset;
+         int nMatchLen = pMatch->length;
+         int nEncodedMatchLen = nMatchLen - 2;
+
+         if (nMatchOffset < MIN_OFFSET || nMatchOffset > nMaxOffset || nMatchOffset > MAX_OFFSET)
+            return -1;
+
+         if (nIsFirstCommand && nNumLiterals == 0) {
+            /* The first block always starts with a literal */
+            return -1;
+         }
+
+         if (nNumLiterals != 0) {
+            /* Literals */
+
+            if (nNumLiterals < pCompressor->stats.min_literals || pCompressor->stats.min_literals == -1)
+               pCompressor->stats.min_literals = nNumLiterals;
+            if (nNumLiterals > pCompressor->stats.max_literals)
+               pCompressor->stats.max_literals = nNumLiterals;
+            pCompressor->stats.total_literals += nNumLiterals;
+            pCompressor->stats.literals_divisor++;
+
+            if (!nIsFirstCommand) {
+               nOutOffset = salvador_write_bits(pOutData, nOutOffset, nMaxOutDataSize, 0 /* literals follow */, 1, nCurBitsOffset, nCurBitShift);
+               if (nOutOffset < 0) return -1;
+            }
+            else {
+               /* The command code for the first literals is omitted */
+               nIsFirstCommand = 0;
+            }
+
+            nOutOffset = salvador_write_literals_varlen(pOutData, nOutOffset, nMaxOutDataSize, nNumLiterals, nCurBitsOffset, nCurBitShift);
+            if (nOutOffset < 0) return -1;
+
+            if ((nOutOffset + nNumLiterals) > nMaxOutDataSize)
+               return -1;
+            memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
+            nOutOffset += nNumLiterals;
+         }
+
+         if (nMatchOffset == nRepMatchOffset && nNumLiterals != 0) {
+            /* Rep match */
+            nOutOffset = salvador_write_bits(pOutData, nOutOffset, nMaxOutDataSize, 0 /* rep match */, 1, nCurBitsOffset, nCurBitShift);
+            if (nOutOffset < 0) return -1;
+
+            /* Write match length */
+            nOutOffset = salvador_write_match_varlen(pOutData, nOutOffset, nMaxOutDataSize, nEncodedMatchLen, 1, nCurBitsOffset, nCurBitShift, NULL);
+            if (nOutOffset < 0) return -1;
+         }
+         else {
+            /* Match with offset */
+            nOutOffset = salvador_write_bits(pOutData, nOutOffset, nMaxOutDataSize, 1 /* match with offset */, 1, nCurBitsOffset, nCurBitShift);
+            if (nOutOffset < 0) return -1;
+
+            /* Write high bits of match offset */
+            nOutOffset = salvador_write_elias_value(pOutData, nOutOffset, nMaxOutDataSize, ((nMatchOffset - 1) >> 7) + 1, nIsInverted, nCurBitsOffset, nCurBitShift, NULL);
+            if (nOutOffset < 0) return -1;
+
+            /* Write low byte of match offset */
+            if (nOutOffset >= nMaxOutDataSize)
+               return -1;
+            unsigned char* pFirstBit = &pOutData[nOutOffset];
+            pOutData[nOutOffset++] = (255 - ((nMatchOffset - 1) & 0x7f)) << 1;
+
+            /* Write match length */
+            nOutOffset = salvador_write_match_varlen(pOutData, nOutOffset, nMaxOutDataSize, nEncodedMatchLen, 0, nCurBitsOffset, nCurBitShift, pFirstBit);
+            if (nOutOffset < 0) return -1;
+         }
+
+         nNumLiterals = 0;
+
+         if (nMatchOffset == nRepMatchOffset)
+            pCompressor->stats.num_rep_matches++;
+
+         nRepMatchOffset = nMatchOffset;
+
+         if (nMatchOffset < pCompressor->stats.min_offset || pCompressor->stats.min_offset == -1)
+            pCompressor->stats.min_offset = nMatchOffset;
+         if (nMatchOffset > pCompressor->stats.max_offset)
+            pCompressor->stats.max_offset = nMatchOffset;
+         pCompressor->stats.total_offsets += (long long)nMatchOffset;
+
+         if (nMatchLen < pCompressor->stats.min_match_len || pCompressor->stats.min_match_len == -1)
+            pCompressor->stats.min_match_len = nMatchLen;
+         if (nMatchLen > pCompressor->stats.max_match_len)
+            pCompressor->stats.max_match_len = nMatchLen;
+         pCompressor->stats.total_match_lens += nMatchLen;
+         pCompressor->stats.match_divisor++;
+
+         if (nMatchOffset == 1) {
+            if (nMatchLen < pCompressor->stats.min_rle1_len || pCompressor->stats.min_rle1_len == -1)
+               pCompressor->stats.min_rle1_len = nMatchLen;
+            if (nMatchLen > pCompressor->stats.max_rle1_len)
+               pCompressor->stats.max_rle1_len = nMatchLen;
+            pCompressor->stats.total_rle1_lens += nMatchLen;
+            pCompressor->stats.rle1_divisor++;
+         }
+         else if (nMatchOffset == 2) {
+            if (nMatchLen < pCompressor->stats.min_rle2_len || pCompressor->stats.min_rle2_len == -1)
+               pCompressor->stats.min_rle2_len = nMatchLen;
+            if (nMatchLen > pCompressor->stats.max_rle2_len)
+               pCompressor->stats.max_rle2_len = nMatchLen;
+            pCompressor->stats.total_rle2_lens += nMatchLen;
+            pCompressor->stats.rle2_divisor++;
+         }
+
+         i += nMatchLen;
+
+         int nCurSafeDist = (i - nStartOffset) - nOutOffset;
+         if (nCurSafeDist >= 0 && pCompressor->stats.safe_dist < nCurSafeDist)
+            pCompressor->stats.safe_dist = nCurSafeDist;
+
+         pCompressor->stats.commands_divisor++;
+      }
+      else {
+         if (nNumLiterals == 0)
+            nInFirstLiteralOffset = i;
+         nNumLiterals++;
+         i++;
+      }
+   }
+
+   if (nBlockFlags & 2) {
+      if (nNumLiterals < pCompressor->stats.min_literals || pCompressor->stats.min_literals == -1)
+         pCompressor->stats.min_literals = nNumLiterals;
+      if (nNumLiterals > pCompressor->stats.max_literals)
+         pCompressor->stats.max_literals = nNumLiterals;
+      pCompressor->stats.total_literals += nNumLiterals;
+      pCompressor->stats.literals_divisor++;
+
+      *nFinalLiterals = 0;
+
+      if (nNumLiterals != 0) {
+         /* Final Literals */
+
+         if (!nIsFirstCommand) {
+            nOutOffset = salvador_write_bits(pOutData, nOutOffset, nMaxOutDataSize, 0 /* literals follow */, 1, nCurBitsOffset, nCurBitShift);
+            if (nOutOffset < 0) return -1;
+         }
+         else {
+            /* The command code for the first literals is omitted. We are writing the final literals, so this must be a fully incompressible block */
+            nIsFirstCommand = 0;
+         }
+
+         nOutOffset = salvador_write_literals_varlen(pOutData, nOutOffset, nMaxOutDataSize, nNumLiterals, nCurBitsOffset, nCurBitShift);
+         if (nOutOffset < 0) return -1;
+
+         if ((nOutOffset + nNumLiterals) > nMaxOutDataSize)
+            return -1;
+         memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
+         nOutOffset += nNumLiterals;
+         nNumLiterals = 0;
+      }
+
+      nOutOffset = salvador_write_bits(pOutData, nOutOffset, nMaxOutDataSize, 1 /* match with offset */, 1, nCurBitsOffset, nCurBitShift);
+      if (nOutOffset < 0) return -1;
+
+      nOutOffset = salvador_write_elias_value(pOutData, nOutOffset, nMaxOutDataSize, 256 /* EOD */, nIsInverted, nCurBitsOffset, nCurBitShift, NULL);
+      if (nOutOffset < 0) return -1;
+   }
+   else {
+      *nFinalLiterals = nNumLiterals;
+   }
+
+   *nCurRepMatchOffset = nRepMatchOffset;
+   return nOutOffset;
+}
+
+/**
+ * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed data
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
+ * @param nInDataSize number of input bytes to compress
+ * @param pOutData pointer to output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ * @param nCurBitsOffset write index into output buffer, of current byte being filled with bits
+ * @param nCurBitShift bit shift count
+ * @param nFinalLiterals output number of literals not written after writing this block, that need to be written in the next block
+ * @param nCurRepMatchOffset starting rep offset for this block, updated after the block is compressed successfully
+ * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+static int salvador_optimize_and_write_block(salvador_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize, int *nCurBitsOffset, int *nCurBitShift, int *nFinalLiterals, int *nCurRepMatchOffset, const int nBlockFlags) {
+   int nOutOffset = 0;
+   const int nEndOffset = nPreviousBlockSize + nInDataSize;
+   int *rle_len = (int*)pCompressor->intervals /* reuse */;
+   int *first_offset_for_byte = pCompressor->first_offset_for_byte;
+   int *next_offset_for_pos = pCompressor->next_offset_for_pos;
+   int *offset_cache = pCompressor->offset_cache;
+   int i, nPosition;
+
+   memset(pCompressor->best_match, 0, pCompressor->block_size * sizeof(salvador_final_match));
+
+   /* Supplement small matches */
+
+   memset(first_offset_for_byte, 0xff, sizeof(int) * 65536);
+   memset(next_offset_for_pos, 0xff, sizeof(int) * nInDataSize);
+
+   for (nPosition = nPreviousBlockSize; nPosition < (nEndOffset - 1); nPosition++) {
+      next_offset_for_pos[nPosition - nPreviousBlockSize] = first_offset_for_byte[((unsigned int)pInWindow[nPosition]) | (((unsigned int)pInWindow[nPosition + 1]) << 8)];
+      first_offset_for_byte[((unsigned int)pInWindow[nPosition]) | (((unsigned int)pInWindow[nPosition + 1]) << 8)] = nPosition;
+   }
+
+   for (nPosition = nPreviousBlockSize + 1; nPosition < (nEndOffset - 1); nPosition++) {
+      salvador_match *match = pCompressor->match + ((nPosition - nPreviousBlockSize) << MATCHES_PER_INDEX_SHIFT);
+      unsigned short *match_depth = pCompressor->match_depth + ((nPosition - nPreviousBlockSize) << MATCHES_PER_INDEX_SHIFT);
+      int m = 0, nInserted = 0;
+      int nMatchPos;
+
+      while (m < 15 && match[m].length)
+         m++;
+
+      for (nMatchPos = next_offset_for_pos[nPosition - nPreviousBlockSize]; m < 15 && nMatchPos >= 0; nMatchPos = next_offset_for_pos[nMatchPos - nPreviousBlockSize]) {
+         int nMatchOffset = nPosition - nMatchPos;
+
+         if (nMatchOffset <= pCompressor->max_offset) {
+            int nExistingMatchIdx;
+            int nAlreadyExists = 0;
+
+            for (nExistingMatchIdx = 0; nExistingMatchIdx < m; nExistingMatchIdx++) {
+               if (match[nExistingMatchIdx].offset == nMatchOffset ||
+                  (match[nExistingMatchIdx].offset - (match_depth[nExistingMatchIdx] & 0x3fff)) == nMatchOffset) {
+                  nAlreadyExists = 1;
+                  break;
+               }
+            }
+
+            if (!nAlreadyExists) {
+               int nMatchLen = 2;
+               while (nMatchLen < 128 && (nPosition + nMatchLen + 4) < nEndOffset && !memcmp(pInWindow + nMatchPos + nMatchLen, pInWindow + nPosition + nMatchLen, 4))
+                  nMatchLen += 4;
+               while (nMatchLen < 128 && (nPosition + nMatchLen) < nEndOffset && pInWindow[nMatchPos + nMatchLen] == pInWindow[nPosition + nMatchLen])
+                  nMatchLen++;
+               match[m].length = nMatchLen;
+               match[m].offset = nMatchOffset;
+               match_depth[m] = 0x4000;
+               m++;
+               nInserted++;
+               if (nInserted >= 15)
+                  break;
+            }
+         }
+         else {
+            break;
+         }
+      }
+   }
+
+   i = 0;
+   while (i < nEndOffset) {
+      int nRangeStartIdx = i;
+      unsigned char c = pInWindow[nRangeStartIdx];
+      do {
+         i++;
+      }
+      while (i < nEndOffset && pInWindow[i] == c);
+      while (nRangeStartIdx < i) {
+         rle_len[nRangeStartIdx] = i - nRangeStartIdx;
+         nRangeStartIdx++;
+      }
+   }
+
+   /* Compress and insert additional matches */
+   salvador_optimize_forward(pCompressor, pInWindow, nPreviousBlockSize, nEndOffset, 1 /* nInsertForwardReps */, nCurRepMatchOffset, NARRIVALS_PER_POSITION / 2, nBlockFlags);
+
+   /* Supplement matches further */
+
+   memset(offset_cache, 0xff, sizeof(int) * 2048);
+
+   for (nPosition = nPreviousBlockSize + 1; nPosition < (nEndOffset - 1); nPosition++) {
+      salvador_match* match = pCompressor->match + ((nPosition - nPreviousBlockSize) << MATCHES_PER_INDEX_SHIFT);
+
+      if (match[0].length < 8) {
+         unsigned short* match_depth = pCompressor->match_depth + ((nPosition - nPreviousBlockSize) << MATCHES_PER_INDEX_SHIFT);
+         int m = 0, nInserted = 0;
+         int nMatchPos;
+         int nMaxForwardPos = nPosition + 2 + 1 + 3;
+
+         if (nMaxForwardPos > (nEndOffset - 2))
+            nMaxForwardPos = nEndOffset - 2;
+
+         while (m < NMATCHES_PER_INDEX && match[m].length) {
+            offset_cache[match[m].offset & 2047] = nPosition;
+            offset_cache[(match[m].offset - (match_depth[m] & 0x3fff)) & 2047] = nPosition;
+            m++;
+         }
+
+         for (nMatchPos = next_offset_for_pos[nPosition - nPreviousBlockSize]; m < NMATCHES_PER_INDEX && nMatchPos >= 0; nMatchPos = next_offset_for_pos[nMatchPos - nPreviousBlockSize]) {
+            const int nMatchOffset = nPosition - nMatchPos;
+
+            if (nMatchOffset <= pCompressor->max_offset) {
+               int nAlreadyExists = 0;
+
+               if (offset_cache[nMatchOffset & 2047] == nPosition) {
+                  int nExistingMatchIdx;
+
+                  for (nExistingMatchIdx = 0; nExistingMatchIdx < m; nExistingMatchIdx++) {
+                     if (match[nExistingMatchIdx].offset == nMatchOffset ||
+                        (match[nExistingMatchIdx].offset - (match_depth[nExistingMatchIdx] & 0x3fff)) == nMatchOffset) {
+                        nAlreadyExists = 1;
+
+                        if (match_depth[nExistingMatchIdx] == 0x4000) {
+                           int nMatchLen = 2;
+                           while (nMatchLen < 128 && nPosition < (nEndOffset - nMatchLen) && pInWindow[nMatchPos + nMatchLen] == pInWindow[nPosition + nMatchLen])
+                              nMatchLen++;
+                           if (nMatchLen > (int)match[nExistingMatchIdx].length)
+                              match[nExistingMatchIdx].length = nMatchLen;
+                        }
+
+                        break;
+                     }
+                  }
+               }
+
+               if (!nAlreadyExists) {
+                  int nForwardPos = nPosition + 2 + 1;
+
+                  if (nForwardPos >= nMatchOffset) {
+                     int nGotMatch = 0;
+
+                     while (nForwardPos < nMaxForwardPos) {
+                        if (pInWindow[nForwardPos] == pInWindow[nForwardPos - nMatchOffset]) {
+                           nGotMatch = 1;
+                           break;
+                        }
+                        nForwardPos++;
+                     }
+
+                     if (nGotMatch) {
+                        int nMatchLen = 2;
+                        while (nMatchLen < 128 && (nPosition + nMatchLen + 4) < nEndOffset && !memcmp(pInWindow + nMatchPos + nMatchLen, pInWindow + nPosition + nMatchLen, 4))
+                           nMatchLen += 4;
+                        while (nMatchLen < 128 && (nPosition + nMatchLen ) < nEndOffset && pInWindow[nMatchPos + nMatchLen] == pInWindow[nPosition + nMatchLen])
+                           nMatchLen++;
+                        match[m].length = nMatchLen;
+                        match[m].offset = nMatchOffset;
+                        match_depth[m] = 0;
+                        m++;
+
+                        salvador_insert_forward_match(pCompressor, pInWindow, nPosition, nMatchOffset, nPreviousBlockSize, nEndOffset, 8);
+
+                        nInserted++;
+                        if (nInserted >= 9 || m >= NMATCHES_PER_INDEX)
+                           break;
+                     }
+                  }
+               }
+            }
+            else {
+               break;
+            }
+         }
+      }
+   }
+
+   /* Pick final matches */
+   salvador_optimize_forward(pCompressor, pInWindow, nPreviousBlockSize, nEndOffset, 0 /* nInsertForwardReps */, nCurRepMatchOffset, NARRIVALS_PER_POSITION, nBlockFlags);
+
+   /* Apply reduction and merge pass */
+   int nDidReduce;
+   int nPasses = 0;
+   do {
+      nDidReduce = salvador_reduce_commands(pCompressor, pInWindow, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nEndOffset, nCurRepMatchOffset, nBlockFlags);
+      nPasses++;
+   } while (nDidReduce && nPasses < 20);
+
+   /* Write compressed block */
+
+   return salvador_write_block(pCompressor, pCompressor->best_match - nPreviousBlockSize, pInWindow, nPreviousBlockSize, nEndOffset, pOutData, nOutOffset, nMaxOutDataSize, nCurBitsOffset, nCurBitShift, nFinalLiterals, nCurRepMatchOffset, nBlockFlags);
+}
+
+/* Forward declaration */
+static void salvador_compressor_destroy(salvador_compressor *pCompressor);
+
+/**
+ * Initialize compression context
+ *
+ * @param pCompressor compression context to initialize
+ * @param nBlockSize maximum size of input data (bytes to compress only)
+ * @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
+ * @param nMaxArrivals maximum number of arrivals per position
+ * @param nFlags compression flags
+ *
+ * @return 0 for success, non-zero for failure
+ */
+static int salvador_compressor_init(salvador_compressor *pCompressor, const int nBlockSize, const int nMaxWindowSize, const int nMaxArrivals, const int nFlags) {
+   int nResult;
+
+   nResult = divsufsort_init(&pCompressor->divsufsort_context);
+   pCompressor->intervals = NULL;
+   pCompressor->pos_data = NULL;
+   pCompressor->open_intervals = NULL;
+   pCompressor->match = NULL;
+   pCompressor->match_depth = NULL;
+   pCompressor->best_match = NULL;
+   pCompressor->arrival = NULL;
+   pCompressor->first_offset_for_byte = NULL;
+   pCompressor->next_offset_for_pos = NULL;
+   pCompressor->offset_cache = NULL;
+   pCompressor->flags = nFlags;
+   pCompressor->block_size = nBlockSize;
+
+   memset(&pCompressor->stats, 0, sizeof(pCompressor->stats));
+   pCompressor->stats.min_match_len = -1;
+   pCompressor->stats.min_offset = -1;
+   pCompressor->stats.min_rle1_len = -1;
+   pCompressor->stats.min_rle2_len = -1;
+
+   if (!nResult) {
+      pCompressor->intervals = (unsigned long long *)malloc(nMaxWindowSize * sizeof(unsigned long long));
+
+      if (pCompressor->intervals) {
+         pCompressor->pos_data = (unsigned long long *)malloc(nMaxWindowSize * sizeof(unsigned long long));
+
+         if (pCompressor->pos_data) {
+            pCompressor->open_intervals = (unsigned long long *)malloc((LCP_AND_TAG_MAX + 1) * sizeof(unsigned long long));
+
+            if (pCompressor->open_intervals) {
+               pCompressor->arrival = (salvador_arrival *)malloc((nBlockSize + 1) * nMaxArrivals * sizeof(salvador_arrival));
+
+               if (pCompressor->arrival) {
+                  pCompressor->best_match = (salvador_final_match *)malloc(nBlockSize * sizeof(salvador_final_match));
+
+                  if (pCompressor->best_match) {
+                     pCompressor->match = (salvador_match *)malloc(nBlockSize * NMATCHES_PER_INDEX * sizeof(salvador_match));
+                     if (pCompressor->match) {
+                        pCompressor->match_depth = (unsigned short *)malloc(nBlockSize * NMATCHES_PER_INDEX * sizeof(unsigned short));
+                        if (pCompressor->match_depth) {
+                           pCompressor->first_offset_for_byte = (int*)malloc(65536 * sizeof(int));
+                           if (pCompressor->first_offset_for_byte) {
+                              pCompressor->next_offset_for_pos = (int*)malloc(nBlockSize * sizeof(int));
+                              if (pCompressor->next_offset_for_pos) {
+                                 if (nMaxArrivals == NARRIVALS_PER_POSITION) {
+                                    pCompressor->offset_cache = (int*)malloc(2048 * sizeof(int));
+                                    if (pCompressor->offset_cache) {
+                                       return 0;
+                                    }
+                                 }
+                                 else {
+                                    return 0;
+                                 }
+                              }
+                           }
+                        }
+                     }
+                  }
+               }
+            }
+         }
+      }
+   }
+
+   salvador_compressor_destroy(pCompressor);
+   return 100;
+}
+
+/**
+ * Clean up compression context and free up any associated resources
+ *
+ * @param pCompressor compression context to clean up
+ */
+static void salvador_compressor_destroy(salvador_compressor *pCompressor) {
+   divsufsort_destroy(&pCompressor->divsufsort_context);
+
+   if (pCompressor->offset_cache) {
+      free(pCompressor->offset_cache);
+      pCompressor->offset_cache = NULL;
+   }
+
+   if (pCompressor->next_offset_for_pos) {
+      free(pCompressor->next_offset_for_pos);
+      pCompressor->next_offset_for_pos = NULL;
+   }
+
+   if (pCompressor->first_offset_for_byte) {
+      free(pCompressor->first_offset_for_byte);
+      pCompressor->first_offset_for_byte = NULL;
+   }
+
+   if (pCompressor->match_depth) {
+      free(pCompressor->match_depth);
+      pCompressor->match_depth = NULL;
+   }
+
+   if (pCompressor->match) {
+      free(pCompressor->match);
+      pCompressor->match = NULL;
+   }
+
+   if (pCompressor->arrival) {
+      free(pCompressor->arrival);
+      pCompressor->arrival = NULL;
+   }
+
+   if (pCompressor->best_match) {
+      free(pCompressor->best_match);
+      pCompressor->best_match = NULL;
+   }
+
+   if (pCompressor->open_intervals) {
+      free(pCompressor->open_intervals);
+      pCompressor->open_intervals = NULL;
+   }
+
+   if (pCompressor->pos_data) {
+      free(pCompressor->pos_data);
+      pCompressor->pos_data = NULL;
+   }
+
+   if (pCompressor->intervals) {
+      free(pCompressor->intervals);
+      pCompressor->intervals = NULL;
+   }
+}
+
+/**
+ * Compress one block of data
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
+ * @param nInDataSize number of input bytes to compress
+ * @param pOutData pointer to output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ * @param nCurBitsOffset write index into output buffer, of current byte being filled with bits
+ * @param nCurBitShift bit shift count
+ * @param nFinalLiterals output number of literals not written after writing this block, that need to be written in the next block
+ * @param nCurRepMatchOffset starting rep offset for this block, updated after the block is compressed successfully
+ * @param nBlockFlags bit 0: 1 for first block, 0 otherwise; bit 1: 1 for last block, 0 otherwise
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+static int salvador_compressor_shrink_block(salvador_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize, int *nCurBitsOffset, int *nCurBitShift, int *nFinalLiterals, int *nCurRepMatchOffset, const int nBlockFlags) {
+   int nCompressedSize;
+
+   if (salvador_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize))
+      nCompressedSize = -1;
+   else {
+      if (nPreviousBlockSize) {
+         salvador_skip_matches(pCompressor, 0, nPreviousBlockSize);
+      }
+      salvador_find_all_matches(pCompressor, NMATCHES_PER_INDEX, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, nBlockFlags);
+
+      nCompressedSize = salvador_optimize_and_write_block(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize, nCurBitsOffset, nCurBitShift, nFinalLiterals, nCurRepMatchOffset, nBlockFlags);
+   }
+
+   return nCompressedSize;
+}
+
+/**
+ * Get maximum compressed size of input(source) data
+ *
+ * @param nInputSize input(source) size in bytes
+ *
+ * @return maximum compressed size
+ */
+size_t salvador_get_max_compressed_size(size_t nInputSize) {
+   return ((nInputSize + 65535) >> 16) * 128 + nInputSize;
+}
+
+/**
+ * Compress memory
+ *
+ * @param pInputData pointer to input(source) data to compress
+ * @param pOutBuffer buffer for compressed data
+ * @param nInputSize input(source) size in bytes
+ * @param nMaxOutBufferSize maximum capacity of compression buffer
+ * @param nFlags compression flags (set to 0)
+ * @param nMaxWindowSize maximum window size to use (0 for default)
+ * @param nDictionarySize size of dictionary in front of input data (0 for none)
+ * @param progress progress function, called after compressing each block, or NULL for none
+ * @param pStats pointer to compression stats that are filled if this function is successful, or NULL
+ *
+ * @return actual compressed size, or -1 for error
+ */
+size_t salvador_compress(const unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
+      const unsigned int nFlags, size_t nMaxWindowSize, size_t nDictionarySize, void(*progress)(long long nOriginalSize, long long nCompressedSize), salvador_stats *pStats) {
+   salvador_compressor compressor;
+   size_t nOriginalSize = 0;
+   size_t nCompressedSize = 0L;
+   int nResult;
+   int nMaxArrivals = NARRIVALS_PER_POSITION;
+   int nError = 0;
+   const int nBlockSize = (nInputSize < BLOCK_SIZE) ? ((nInputSize < 1024) ? 1024 : (int)nInputSize) : BLOCK_SIZE;
+   const int nMaxOutBlockSize = (int)salvador_get_max_compressed_size(nBlockSize);
+
+   if (nDictionarySize < nInputSize) {
+      int nInDataSize = (int)(nInputSize - nDictionarySize);
+      if (nInDataSize > nBlockSize)
+         nInDataSize = nBlockSize;
+   }
+
+   nResult = salvador_compressor_init(&compressor, nBlockSize, nBlockSize * 2, nMaxArrivals, nFlags);
+   if (nResult != 0) {
+      return -1;
+   }
+
+   compressor.max_offset = nMaxWindowSize ? (int)nMaxWindowSize : MAX_OFFSET;
+
+   int nPreviousBlockSize = 0;
+   int nNumBlocks = 0;
+   int nCurBitsOffset = INT_MIN, nCurBitShift = 0, nCurFinalLiterals = 0;
+   int nBlockFlags = 1;
+   int nCurRepMatchOffset = 1;
+
+   if (nDictionarySize) {
+      nOriginalSize = (int)nDictionarySize;
+      nPreviousBlockSize = (int)nDictionarySize;
+   }
+
+   while (nOriginalSize < nInputSize && !nError) {
+      int nInDataSize;
+
+      nInDataSize = (int)(nInputSize - nOriginalSize);
+      if (nInDataSize > nBlockSize)
+         nInDataSize = nBlockSize;
+
+      if (nInDataSize > 0) {
+         int nOutDataSize;
+         int nOutDataEnd = (int)(nMaxOutBufferSize - nCompressedSize);
+
+         if (nOutDataEnd > nMaxOutBlockSize)
+            nOutDataEnd = nMaxOutBlockSize;
+
+         if ((nOriginalSize + nInDataSize) >= nInputSize)
+            nBlockFlags |= 2;
+         nOutDataSize = salvador_compressor_shrink_block(&compressor, pInputData + nOriginalSize - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutBuffer + nCompressedSize, nOutDataEnd,
+            &nCurBitsOffset, &nCurBitShift, &nCurFinalLiterals, &nCurRepMatchOffset, nBlockFlags);
+         nBlockFlags &= (~1);
+
+         if (nOutDataSize >= 0 && nCurFinalLiterals >= 0 && nCurFinalLiterals < nInDataSize) {
+            /* Write compressed block */
+
+            if (!nError) {
+               nInDataSize -= nCurFinalLiterals;
+               nOriginalSize += nInDataSize;
+               nCurFinalLiterals = 0;
+               nCompressedSize += nOutDataSize;
+               if (nCurBitsOffset != INT_MIN)
+                  nCurBitsOffset -= nOutDataSize;
+            }
+         }
+         else {
+            nError = -1;
+         }
+
+         nPreviousBlockSize = nInDataSize;
+         nNumBlocks++;
+      }
+
+      if (!nError && nOriginalSize < nInputSize) {
+         if (progress)
+            progress(nOriginalSize, nCompressedSize);
+      }
+   }
+
+   if (progress)
+      progress(nOriginalSize, nCompressedSize);
+   if (pStats)
+      *pStats = compressor.stats;
+
+   salvador_compressor_destroy(&compressor);
+
+   if (nError) {
+      return -1;
+   }
+   else {
+      return nCompressedSize;
+   }
+}
diff --git a/src/shrink.h b/src/shrink.h
new file mode 100644
index 0000000..37c5da3
--- /dev/null
+++ b/src/shrink.h
@@ -0,0 +1,178 @@
+/*
+ * shrink.h - compressor definitions
+ *
+ * Copyright (C) 2021 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Implements the ZX0 encoding designed by Einar Saukas. https://github.com/einar-saukas/ZX0
+ * Also inspired by Charles Bloom's compression blog. http://cbloomrants.blogspot.com/
+ *
+ */
+
+#ifndef _SHRINK_H
+#define _SHRINK_H
+
+#include "divsufsort.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LCP_BITS 18
+#define TAG_BITS 4
+#define LCP_MAX ((1U<<(LCP_BITS - TAG_BITS)) - 1)
+#define LCP_AND_TAG_MAX ((1U<<LCP_BITS) - 1)
+#define LCP_SHIFT (63-LCP_BITS)
+#define LCP_MASK (((1ULL<<LCP_BITS) - 1) << LCP_SHIFT)
+#define POS_MASK ((1ULL<<LCP_SHIFT) - 1)
+#define VISITED_FLAG 0x8000000000000000ULL
+#define EXCL_VISITED_MASK  0x7fffffffffffffffULL
+
+#define NARRIVALS_PER_POSITION 80
+#define NMATCHES_PER_INDEX 64
+#define MATCHES_PER_INDEX_SHIFT 6
+
+#define LEAVE_ALONE_MATCH_SIZE 300
+
+/** One match option */
+typedef struct _salvador_match {
+   unsigned int length:14;
+   unsigned int offset:17;
+} salvador_match;
+
+/** One finalized match */
+typedef struct _salvador_final_match {
+   int length;
+   int offset;
+} salvador_final_match;
+
+/** Forward arrival slot */
+typedef struct {
+   int cost;
+
+   unsigned int from_pos:17;
+   int from_slot:8;
+
+   unsigned int rep_offset:17;
+
+   unsigned int rep_pos:17;
+   unsigned int match_len:14;
+
+   int num_literals;
+   int score;
+} salvador_arrival;
+
+/** Visited match */
+typedef struct {
+   int outer;
+   int inner;
+} salvador_visited;
+
+/** Compression statistics */
+typedef struct _salvador_stats {
+   int num_literals;
+   int num_4bit_matches;
+   int num_7bit_matches;
+   int num_variable_matches;
+   int num_rep_matches;
+   int num_eod;
+
+   int safe_dist;
+
+   int min_literals;
+   int max_literals;
+   int total_literals;
+
+   int min_offset;
+   int max_offset;
+   long long total_offsets;
+
+   int min_match_len;
+   int max_match_len;
+   int total_match_lens;
+
+   int min_rle1_len;
+   int max_rle1_len;
+   int total_rle1_lens;
+
+   int min_rle2_len;
+   int max_rle2_len;
+   int total_rle2_lens;
+
+   int commands_divisor;
+   int literals_divisor;
+   int match_divisor;
+   int rle1_divisor;
+   int rle2_divisor;
+} salvador_stats;
+
+/** Compression context */
+typedef struct _salvador_compressor {
+   divsufsort_ctx_t divsufsort_context;
+   unsigned long long *intervals;
+   unsigned long long *pos_data;
+   unsigned long long *open_intervals;
+   salvador_match *match;
+   unsigned short *match_depth;
+   salvador_final_match *best_match;
+   salvador_arrival *arrival;
+   int *first_offset_for_byte;
+   int *next_offset_for_pos;
+   int *offset_cache;
+   int flags;
+   int block_size;
+   int max_offset;
+   salvador_stats stats;
+} salvador_compressor;
+
+/**
+ * Get maximum compressed size of input(source) data
+ *
+ * @param nInputSize input(source) size in bytes
+ *
+ * @return maximum compressed size
+ */
+size_t salvador_get_max_compressed_size(size_t nInputSize);
+
+/**
+ * Compress memory
+ *
+ * @param pInputData pointer to input(source) data to compress
+ * @param pOutBuffer buffer for compressed data
+ * @param nInputSize input(source) size in bytes
+ * @param nMaxOutBufferSize maximum capacity of compression buffer
+ * @param nFlags compression flags (set to 0)
+ * @param nMaxWindowSize maximum window size to use (0 for default)
+ * @param nDictionarySize size of dictionary in front of input data (0 for none)
+ * @param progress progress function, called after compressing each block, or NULL for none
+ * @param pStats pointer to compression stats that are filled if this function is successful, or NULL
+ *
+ * @return actual compressed size, or -1 for error
+ */
+size_t salvador_compress(const unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
+   const unsigned int nFlags, size_t nMaxWindowSize, size_t nDictionarySize, void(*progress)(long long nOriginalSize, long long nCompressedSize), salvador_stats *pStats);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SHRINK_H */