Skip to content

Commit

Permalink
added CopticLS parameter to converter
Browse files Browse the repository at this point in the history
added CopticLS parametier to converter. use with David Brakke's ASCII
files
  • Loading branch information
ctschroeder committed Apr 25, 2015
1 parent 242aa22 commit 8480b20
Show file tree
Hide file tree
Showing 4 changed files with 334 additions and 2 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
This repository holds character encoding converters to covert ASCII characters in legacy fonts into Unicode Coptic characters and a processing perl script to convert a diplomatic, Coptic text transcription into SGML.
________________________________________
Simple Perl Converter:
recode_coptic.pl Version 0.9.1
recode_coptic.pl Version 1.1.0

This assumes a UTF-8 file in one word per line format and automatically converts Coptic encodings

Usage:
recode_coptic.pl file
Read a file in Coptic font encoding and output standard Unicode as UTF-8
recode_coptic.pl in_Coptic.txt > out_utf8.txt
See help (-h) for options

Copyright 2013-14 Amir Zeldes, Caroline T. Schroeder. The perl program is free software. You may copy or redistribute the script under the same terms as Perl itself.

Copyright 2013-15 Amir Zeldes, Caroline T. Schroeder. The perl program is free software. You may copy or redistribute the script under the same terms as Perl itself.
_________________________________________
About CopticVDWtoUTFConverter

Expand Down
File renamed without changes.
File renamed without changes.
329 changes: 329 additions & 0 deletions recode_coptic.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,329 @@
#!/usr/bin/perl -w

# recode_coptic.pl Version 1.0.0

# this assumes a UTF-8 file in one word per line format and
# automatically converts Coptic encodings
# usage:
# recode_coptic.pl file
# See help (-h) for options

use Getopt::Std;
use utf8;
binmode(STDOUT, ":utf8");
binmode STDIN;

my $usage;
{
$usage = <<"_USAGE_";
This script converts characters from one Coptic encoding to another.
Notes and assumptions:
- Supralinear strokes in Coptic font are supported
- Other decorations not yet supported
Usage: recode_coptic.pl [options] <FILE>
Options and argument:
-h print this message and quit
-f <format> Specify the input format. Default is Coptic font, other options are "CopticLS", "CMCL", "avva_shenouda", "low" (converts all upper case utf8 Coptic to all lower case)
<FILE> A text file with Coptic text in a supported encoding
Examples:
Read a file in Coptic font encoding and output standard Unicode as UTF-8:
recode_coptic.pl in_Coptic.txt > out_utf8.txt
Read a file in CMCL encoding and output standard Unicode as UTF-8:
recode_coptic.pl -f CMCL in_Coptic.txt > out_utf8.txt
Copyright 2013-2015, Amir Zeldes, Caroline T. Schroeder
This program is free software. You may copy or redistribute it under
the same terms as Perl itself.
_USAGE_
}

### OPTIONS BEGIN ###
%opts = ();
getopts('hf:',\%opts) or die $usage;

#help
if ($opts{h} || (@ARGV == 0)) {
print $usage;
exit;
}

#format
if (!($format = $opts{f}))
{$format = "Coptic";}
elsif($format eq "CopticLS")
{$format="CopticLS"}
elsif($format eq "CMCL")
{$format="CMCL";}
elsif($format eq "low")
{$format="low";}
elsif($format eq "avva_shenouda")
{$format="avva_shenouda";}
else {$format = "Coptic"}
### OPTIONS END ###

open FILE,"<:encoding(UTF-8)",shift or die "could not find input document";

while (<FILE>) {

chomp;
$line = $_;

if ($format eq "Coptic")
{
#consonants (Replace various consonants and digraphs with IPA symbols)
$line =~ s/a//g;
$line =~ s/b//g;
$line =~ s/c//g;
$line =~ s/d//g;
$line =~ s/e//g;
$line =~ s/f/ϥ/g;
$line =~ s/g//g;
$line =~ s/h//g;
$line =~ s/i//g;
$line =~ s/j/ϫ/g;
$line =~ s/k//g;
$line =~ s/l//g;
$line =~ s/m//g;
$line =~ s/n//g;
$line =~ s/o//g;
$line =~ s/p//g;
$line =~ s/q/ϭ/g;
$line =~ s/r//g;
$line =~ s/s//g;
$line =~ s/t//g;
$line =~ s/u//g;
$line =~ s/v//g;
$line =~ s/w//g;
$line =~ s/x/ϩ/g;
$line =~ s/y//g;
$line =~ s/z//g;
$line =~ s/¥/ϣ/g;
$line =~ s/\+/ϯ/g;

#NB psi and xi in Coptic font are 2, 3
#these numbers will be replaced
$line =~ s/2//g;
$line =~ s/3//g;

#capitals express supralinear stroke in Coptic font
$line =~ s/A/ⲁ̄/g;
$line =~ s/B/ⲃ̄/g;
$line =~ s/C/ⲑ̄/g;
$line =~ s/D/ⲇ̄/g;
$line =~ s/E/ⲉ̄/g;
$line =~ s/F/ϥ̄/g;
$line =~ s/G/ⲅ̄/g;
$line =~ s/H/ⲏ̄/g;
$line =~ s/I/ⲓ̄/g;
$line =~ s/J/ϫ̄/g;
$line =~ s/K/ⲕ̄/g;
$line =~ s/L/ⲗ̄/g;
$line =~ s/M/ⲙ̄/g;
$line =~ s/N/ⲛ̄/g;
$line =~ s/O/ⲟ̄/g;
$line =~ s/P/ⲡ̄/g;
$line =~ s/Q/ϭ̄/g;
$line =~ s/R/ⲣ̄/g;
$line =~ s/S/ⲥ̄/g;
$line =~ s/T/ⲧ̄/g;
$line =~ s/U/ⲩ̄/g;
$line =~ s/V/ⲫ̄/g;
$line =~ s/W/ⲱ̄/g;
$line =~ s/X/ϩ̄/g;
$line =~ s/Y/ⲭ̄/g;
$line =~ s/Z/ⲍ̄/g;
#$line =~ s/¥/ϣ̄/g;
#$line =~ s/\+/ϯ̄/g;
}
elsif ($format eq "CopticLS")
#need to add c±s
{
$line =~ s/a//g;
$line =~ s/b//g;
$line =~ s/c//g;
$line =~ s/d//g;
$line =~ s/e//g;
$line =~ s/f//g;
$line =~ s/g//g;
$line =~ s/h//g;
$line =~ s/i//g;
$line =~ s/j/ϫ/g;
$line =~ s/k//g;
$line =~ s/l//g;
$line =~ s/m//g;
$line =~ s/n//g;
$line =~ s/o//g;
$line =~ s/p//g;
$line =~ s/q/ϭ/g;
$line =~ s/r//g;
$line =~ s/s//g;
$line =~ s/t//g;
$line =~ s/u//g;
$line =~ s/v/ϣ/g;
$line =~ s/w//g;
$line =~ s/x//g;
$line =~ s/y//g;
$line =~ s/z//g;
$line =~ s/v/ϣ/g;
$line =~ s/C/ϭ/g;
$line =~ s/D//g;
$line =~ s/F/ϥ/g;
$line =~ s/G//g;
$line =~ s/H/ϩ/g;
$line =~ s/J/ϧ/g;
$line =~ s/Q//g;
$line =~ s/R//g;
$line =~ s/T/ϯ/g;
$line =~ s/Z//g;

#diacritics and strokes
$line =~ s/(.)±(.)=±(.)/$1$2$3/g;
$line =~ s/(.)±(.)/$1$2/g; #place binding supralinear strokes after each character
$line =~ s/=/̄/g; #equals sign after letter is a supralinear stroke
$line =~ s/O//g;
$line =~ s/P/̂/g;
$line =~ s/\//̣/g;
$line =~ s/Ú/̈/g;
$line =~ s/A/ⲏ̂/g;
$line =~ s/E/ⲉ̄/g;
$line =~ s/I/ⲓ̄/g;
$line =~ s/M/ⲛ̀/g;
$line =~ s/N/ⲛ̄/g;
$line =~ s/S/ⲏ⳰/g;
$line =~ s/U/ⲩ̄/g;

$line =~ s/K/K/g; #couldn't find keyboard character
$line =~ s/L/L/g; #couldn't find keyboard character
$line =~ s/V/V/g; #couldn't find keyboard character
$line =~ s/W/W/g; #couldn't find keyboard character
$line =~ s/X/X/g; #couldn't find keyboard character
$line =~ s/Y/Y/g; #couldn't find keyboard character

#punctuation
$line =~ s/>/·/g;
$line =~ s/B//g;
$line =~ s/ı//g;
$line =~ s/:/:/g;
$line =~ s//;/g;
$line =~ s/</⳿/g;



}
elsif ($format eq "CMCL")
{
$line =~ s/a//g;
$line =~ s/b//g;
$line =~ s/q//g;
$line =~ s/d//g;
$line =~ s/e//g;
$line =~ s/f/ϥ/g;
$line =~ s/g//g;
$line =~ s/H//g;
$line =~ s/i//g;
$line =~ s/j/ϫ/g;
$line =~ s/k//g;
$line =~ s/l//g;
$line =~ s/m//g;
$line =~ s/n//g;
$line =~ s/o//g;
$line =~ s/p//g;
$line =~ s/c/ϭ/g;
$line =~ s/r//g;
$line =~ s/s//g;
$line =~ s/t//g;
$line =~ s/u//g;
$line =~ s/P//g;
$line =~ s/w//g;
$line =~ s/h/ϩ/g;
$line =~ s/C//g;
$line =~ s/z//g;
$line =~ s/y/ϣ/g;
$line =~ s/Y/ϯ/g;
$line =~ s/T//g;
$line =~ s/x//g;

}
elsif ($format eq "low")
{
$line =~ s///g;
$line =~ s///g;
$line =~ s///g;
$line =~ s///g;
$line =~ s///g;
$line =~ s/Ϥ/ϥ/g;
$line =~ s///g;
$line =~ s///g;
$line =~ s///g;
$line =~ s/Ϫ/ϫ/g;
$line =~ s///g;
$line =~ s///g;
$line =~ s///g;
$line =~ s///g;
$line =~ s///g;
$line =~ s///g;
$line =~ s/Ϭ/ϭ/g;
$line =~ s///g;
$line =~ s///g;
$line =~ s///g;
$line =~ s///g;
$line =~ s///g;
$line =~ s///g;
$line =~ s/Ϩ/ϩ/g;
$line =~ s///g;
$line =~ s///g;
$line =~ s/Ϣ/ϣ/g;
$line =~ s/Ϯ/ϯ/g;
$line =~ s///g;
$line =~ s///g;

}
elsif ($format eq "avva_shenouda")
{
$line =~ s/=(.)/$1̄/g; #place supralinear stroke after character, not before as in avva shenouda
$line =~ s/a//g;
$line =~ s/b//g;
$line =~ s/c//g; #c is sigma
$line =~ s/d//g;
$line =~ s/e//g;
$line =~ s/f/ϥ/g;
$line =~ s/g//g;
$line =~ s/y//g; #y is eta
$line =~ s/i//g;
$line =~ s/j/ϫ/g;
$line =~ s/k//g;
$line =~ s/l//g;
$line =~ s/m//g;
$line =~ s/n//g;
$line =~ s/o//g;
$line =~ s/p//g;
$line =~ s/\[/ϭ/g; #chima is an opening square bracket
$line =~ s/r//g;
$line =~ s/\;//g; #semi colon is theta
$line =~ s/t//g;
$line =~ s/u//g;
$line =~ s/v//g;
$line =~ s/w//g;
$line =~ s/h/ϩ/g; #h is hori
$line =~ s/y//g;
$line =~ s/z//g;
$line =~ s/s/ϣ/g; #s is shai
$line =~ s/\]/ϯ/g; #right square bracket is ti
$line =~ s/v//g;
$line =~ s/x//g;

}

print $line ."\n";
}

0 comments on commit 8480b20

Please sign in to comment.