-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Crawford Collins
committed
Jan 4, 2022
0 parents
commit 7451594
Showing
12 changed files
with
978 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
{ | ||
"type": "application", | ||
"source-directories": [ | ||
"src" | ||
], | ||
"elm-version": "0.19.1", | ||
"dependencies": { | ||
"direct": { | ||
"NoRedInk/elm-json-decode-pipeline": "1.0.0", | ||
"elm/browser": "1.0.2", | ||
"elm/core": "1.0.5", | ||
"elm/html": "1.0.0", | ||
"elm/http": "2.0.0", | ||
"elm/json": "1.1.3", | ||
"elm/url": "1.0.0", | ||
"elm-community/list-extra": "8.5.1", | ||
"krisajenkins/remotedata": "6.0.1", | ||
"mdgriffith/elm-ui": "1.1.8", | ||
"ohanhi/remotedata-http": "4.0.0" | ||
}, | ||
"indirect": { | ||
"elm/bytes": "1.0.8", | ||
"elm/file": "1.0.5", | ||
"elm/time": "1.0.0", | ||
"elm/virtual-dom": "1.0.2" | ||
} | ||
}, | ||
"test-dependencies": { | ||
"direct": {}, | ||
"indirect": {} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
import glob | ||
import json | ||
import re | ||
import sqlite3 | ||
|
||
|
||
def create_scrubbed_lines(): | ||
sql = sqlite3.connect("db.sqlite3") | ||
try: | ||
sql.execute("DROP TABLE scrubbedLines") | ||
except: | ||
pass | ||
sql.execute( | ||
"""CREATE VIRTUAL TABLE scrubbedLines USING fts5( | ||
line, lineNumber UNINDEXED, bookId UNINDEXED, chapter UNINDEXED, subsection UNINDEXED, | ||
);""") | ||
sql.close() | ||
return 1 | ||
|
||
|
||
def add_scrubbed_lines_to_db(chapter, book, subsection): | ||
file = f"fulltexts/{book}/{book}{chapter:02d}.txt" | ||
with open(file) as f: | ||
text = re.sub(pattern=r"(Ἀ|Ἁ|Ἂ|Ἃ|Ἄ|Ἅ|Ἆ|Ἇ|Ὰ|Ά|Ᾰ|Ᾱ|ᾼ|ᾈ|ᾉ|ᾊ|ᾋ|ᾌ|ᾍ|ᾎ|ᾏ)", repl="Α", string=f.read(), count=100_000) | ||
text = re.sub(pattern=r"(Ἐ|Ἑ|Ἒ|Ἓ|Ἔ|Ἕ|Ὲ|Έ)", repl="Ε", string=text, count=100_000) | ||
text = re.sub(pattern=r"(Ἠ|Ἡ|Ἢ|Ἣ|Ἤ|Ἥ|Ἦ|Ἧ|Ὴ|Ή|ῌ|ᾘ|ᾙ|ᾚ|ᾛ|ᾜ|ᾝ|ᾞ|ᾟ)", repl="Η", string=text, count=100_000) | ||
text = re.sub(pattern=r"(Ἰ|Ἱ|Ἲ|Ἳ|Ἴ|Ἵ|Ἶ|Ἷ|Ὶ|Ί|Ῐ|Ῑ)", repl="Ι", string=text, count=100_000) | ||
text = re.sub(pattern=r"(Ὑ|Ὓ|Ὕ|Ὗ|Ὺ|Ύ|Ῠ|Ῡ)", repl="Υ", string=text, count=100_000) | ||
text = re.sub(pattern=r"(ἀ|ἁ|ἂ|ἃ|ἄ|ἅ|ἆ|ἇ|ὰ|ά|ᾰ|ᾱ|ᾶ|ᾳ|ᾲ|ᾴ|ᾀ|ᾁ|ᾂ|ᾃ|ᾄ|ᾅ|ᾆ|ᾇ|ᾷ|ά)", repl="α", string=text, | ||
count=100_000) | ||
text = re.sub(pattern=r"(ἐ|ἑ|ἒ|ἓ|ἔ|ἕ|ὲ|έ|έ)", repl="ε", string=text, count=100_000) | ||
text = re.sub(pattern=r"(ἠ|ἡ|ἢ|ἣ|ἤ|ἥ|ἦ|ἧ|ὴ|ή|ῆ|ῃ|ῂ|ῄ|ᾐ|ᾑ|ᾒ|ᾓ|ᾔ|ᾕ|ᾖ|ᾗ|ῇ|ή)", repl="η", string=text, | ||
count=100_000) | ||
text = re.sub(pattern=r"(ἰ|ἱ|ἲ|ἳ|ἴ|ἵ|ἶ|ἷ|ὶ|ί|ῐ|ῑ|ῖ|ῒ|ΐ|ῗ|ί|ΐ)", repl="ι", string=text, count=100_000) | ||
text = re.sub(pattern=r"(ὀ|ὁ|ὂ|ὃ|ὄ|ὅ|ὸ|ό|ό)", repl="ο", string=text, count=100_000) | ||
text = re.sub(pattern=r"(ὑ|ὓ|ὕ|ὗ|ὺ|ύ|ῠ|ῡ|ὐ|ὒ|ὔ|ὖ|ῦ|ῢ|ΰ|ῧ|ύ)", repl="υ", string=text, count=100_000) | ||
text = re.sub(pattern=r"(ὠ|ὡ|ὢ|ὣ|ὤ|ὥ|ὦ|ὧ|ὼ|ώ|ῶ|ῳ|ῲ|ῴ|ᾠ|ᾡ|ᾢ|ᾣ|ᾤ|ᾥ|ᾦ|ᾧ|ῷ|ώ)", repl="ω", string=text, | ||
count=100_000) | ||
text = re.sub(pattern=r'\n’', repl='’\n', string=text, count=100_000) | ||
text = text.split("\n") | ||
text = [t.strip("0123456789") for t in text] | ||
text = [i for i in text if i not in ["\n", "", '’', '’ ’', "’’", "‘"]] | ||
|
||
for index, line in enumerate(text, start=1): | ||
with sqlite3.connect("db.sqlite3") as conn: | ||
book_id = conn.execute("SELECT id FROM books WHERE title=?", (book,)).fetchone()[0] | ||
conn.execute( | ||
"INSERT INTO scrubbedLines VALUES (?,?,?,?,?)", (line, index, book_id, chapter, subsection)) | ||
return 1 | ||
|
||
|
||
def create_full_lines(): | ||
sql = sqlite3.connect("db.sqlite3") | ||
try: | ||
sql.execute("DROP TABLE fullLines") | ||
except: | ||
pass | ||
sql.execute(""" | ||
CREATE TABLE fullLines( | ||
line TEXT NOT NULL, | ||
lineNumber INT NOT NULL, | ||
bookId INT NOT NULL, | ||
chapter INT NOT NULL, | ||
subsection INT, | ||
FOREIGN KEY (bookId) REFERENCES books(id) | ||
);""") | ||
sql.execute("CREATE UNIQUE INDEX textLocation on fullLines(bookId,chapter,lineNumber)") | ||
sql.close() | ||
return 1 | ||
|
||
|
||
def add_full_lines_to_db(chapter, book, subsection): | ||
file = f"fulltexts/{book}/{book}{chapter:02d}.txt" | ||
with open(file) as f: | ||
text = re.sub(pattern=r'\n’', repl='’\n', string=f.read(), count=100_000) | ||
text = text.split("\n") | ||
text = [t.strip("0123456789") for t in text] | ||
text = [i for i in text if i not in ["\n", "", '’', '’ ’', "’’", "‘"]] | ||
|
||
for index, line in enumerate(text, start=1): | ||
with sqlite3.connect("db.sqlite3") as conn: | ||
book_id = conn.execute("SELECT id FROM books WHERE title=?", (book,)).fetchone()[0] | ||
res = conn.execute( | ||
"INSERT INTO fullLines VALUES (?,?,?,?,?)", (line, index, book_id, chapter, subsection) | ||
) | ||
return 1 | ||
|
||
|
||
def create_authors(): | ||
sql = sqlite3.connect("db.sqlite3") | ||
try: | ||
sql.execute("DROP TABLE authors") | ||
except: | ||
pass | ||
sql.execute("CREATE TABLE authors (id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT UNIQUE NOT NULL);") | ||
sql.close() | ||
return 1 | ||
|
||
|
||
def create_books(): | ||
sql = sqlite3.connect("db.sqlite3") | ||
try: | ||
sql.execute("DROP TABLE books") | ||
except: | ||
pass | ||
sql.execute( | ||
"CREATE TABLE books(id INTEGER PRIMARY KEY AUTOINCREMENT, title TEXT, authorId INTEGER, FOREIGN KEY (authorId) REFERENCES author(id));") | ||
sql.close() | ||
return 1 | ||
|
||
def create_commentary(): | ||
with sqlite3.connect("db.sqlite3") as sql: | ||
try: | ||
sql.execute("DROP TABLE commentary") | ||
except: | ||
pass | ||
sql.execute( | ||
"""CREATE TABLE commentary( | ||
text TEXT, | ||
commentaryAuthorId INT, | ||
source TEXT, | ||
bookId INT, | ||
chapter INT, | ||
lineNumber INT, | ||
FOREIGN KEY (bookId) REFERENCES books(id) | ||
);""") | ||
from fulltexts.commentary.commentary import commentary | ||
for i in commentary: | ||
for line in i["lineNumber"]: | ||
confirm = sql.execute("""INSERT INTO commentary(text,commentaryAuthorId,source,bookId,chapter,lineNumber) | ||
VALUES(?,?,?,?,?,?)""", | ||
(i["text"],i["commentaryAuthorId"],i["source"],i["bookId"],i["chapter"],line)).lastrowid | ||
if confirm is None: | ||
print(f"Error adding commentary {i['source']}, {i['bookId']}, {i['chapter']}, {line}") | ||
|
||
def main(metadata): | ||
create_authors() | ||
create_books() | ||
create_full_lines() | ||
create_scrubbed_lines() | ||
create_commentary() | ||
for i in metadata: | ||
# Insert Author | ||
try: | ||
with sqlite3.connect("db.sqlite3") as conn: | ||
conn.execute("INSERT INTO authors(name) VALUES (?)", (i["author"],)) | ||
except sqlite3.IntegrityError: | ||
pass | ||
# Insert Lines | ||
with sqlite3.connect("db.sqlite3") as conn: | ||
_, author_id = conn.execute("SELECT name,id FROM authors WHERE name=?", (i["author"],)).fetchone() | ||
conn.execute("INSERT INTO books(title,authorId) VALUES (?,?)", (i["book"], author_id)) | ||
for j in glob.glob(f'{i["path"]}*'): | ||
print(j) | ||
chapter = int(j.replace(i["path"], "").replace(i["book"], "").replace(".txt", "")) | ||
add_scrubbed_lines_to_db(chapter, i["book"], None) | ||
add_full_lines_to_db(chapter, i["book"], None) | ||
|
||
|
||
if __name__ == "__main__": | ||
# metadata = [ | ||
# {"author": "homer", "book": "iliad", "path": "fulltexts/iliad/"}, | ||
# {"author": "homer", "book": "odyssey", "path": "fulltexts/odyssey/"} | ||
# ] | ||
# main(metadata) | ||
create_commentary() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
import Distribution.Simple | ||
main = defaultMain |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
{-# LANGUAGE DataKinds #-} | ||
{-# LANGUAGE OverloadedStrings #-} | ||
{-# LANGUAGE TypeOperators #-} | ||
{-# LANGUAGE DeriveGeneric #-} | ||
|
||
module Main where | ||
|
||
import Control.Concurrent | ||
import Control.Exception (bracket) | ||
import Control.Monad.IO.Class | ||
import Database.SQLite.Simple | ||
import Network.Wai.Handler.Warp | ||
import Servant | ||
import Network.Wai.Middleware.Cors | ||
import GHC.Generics | ||
import Data.Aeson | ||
import Data.Text | ||
|
||
data BookPage = BookPage { | ||
title :: Text | ||
, linesOfText :: ZipListLine | ||
, chapter :: Int | ||
, allBooks :: [Book] | ||
} deriving ( Generic) | ||
|
||
instance ToJSON BookPage | ||
|
||
data BookPageCommentary = BookPageCommentary { | ||
bpcTitle :: Text, | ||
bpcLinesOfText :: ZipListLine, | ||
bpcChapter :: Int, | ||
bpcAllBooks :: [Book], | ||
bpcCommentary :: [Commentary], | ||
bpcLineNumber :: Int | ||
} deriving (Generic) | ||
|
||
instance ToJSON BookPageCommentary | ||
|
||
data Line = Line { | ||
lineText :: Text | ||
, lineLineNumber :: Int | ||
} deriving (Show, Generic) | ||
instance FromRow Line where | ||
fromRow = Line <$> field <*> field | ||
instance ToJSON Line | ||
|
||
data ZipListLine = ZipListLine { | ||
p1 :: [Line] | ||
, p2 :: [Line] | ||
, p3 :: [Line] | ||
} deriving (Show,Generic) | ||
instance ToJSON ZipListLine | ||
|
||
data Book = Book{ | ||
bookTitle :: Text | ||
, bookId :: Int | ||
, numberOfChapters :: Int | ||
} deriving (Show,Generic) | ||
|
||
instance FromRow Book where | ||
fromRow = Book <$> field <*> field <*> field | ||
|
||
instance ToJSON Book | ||
|
||
data Commentary = Commentary { | ||
commentaryText :: Text | ||
,commentaryAuthorId :: Text | ||
, commentarySource :: Text | ||
} deriving (Generic) | ||
instance ToJSON Commentary | ||
instance FromRow Commentary where | ||
fromRow = Commentary <$> field <*>field<*>field -- <*>field<*>field<*>field | ||
|
||
|
||
type API = Get '[JSON] [Book] | ||
:<|> "books" :> Capture "title" Text :> Capture "chapter" Int :> Get '[JSON] BookPage | ||
:<|> "books" :> Capture "title" Text :> Capture "chapter" Int :> Capture "lineNumber" Int :> Get '[JSON] BookPageCommentary | ||
|
||
allBooksQuery :: Query | ||
allBooksQuery = "SELECT title, bookId, Count(DISTINCT chapter) FROM fullLines Inner JOIN books on books.id=fullLines.bookId GROUP BY bookId" | ||
queryAllBooks :: FilePath -> IO [Book] | ||
queryAllBooks dbfile = withConnection dbfile $ \conn -> query_ conn allBooksQuery | ||
|
||
bookChapterCommentaryQuery :: Query | ||
bookChapterCommentaryQuery = "SELECT text, commentaryAuthorId, source FROM commentary INNER JOIN books ON books.id = commentary.bookId WHERE lineNumber = :lineNumber AND books.title = :title AND chapter = :chapter" | ||
queryBookChapterCommentary :: Text -> Int-> Int ->FilePath-> IO [Commentary] | ||
queryBookChapterCommentary title chapter lineNumber dbfile = withConnection dbfile $ \conn -> queryNamed conn bookChapterCommentaryQuery [":title" := title, ":chapter" := chapter, ":lineNumber" := lineNumber] | ||
|
||
zipListLineQuery1 :: Query | ||
zipListLineQuery1 = "Select line,lineNumber FROM fullLines INNER JOIN books ON fullLines.bookId = books.id WHERE books.title = :title AND fullLines.chapter = :chapter AND lineNumber < :lineNumber ORDER BY lineNumber ASC" | ||
zipListLineQuery2 :: Query | ||
zipListLineQuery2 = "Select line,lineNumber FROM fullLines INNER JOIN books ON fullLines.bookId = books.id WHERE books.title = :title AND fullLines.chapter = :chapter AND lineNumber = :lineNumber ORDER BY lineNumber ASC" | ||
zipListLineQuery3 :: Query | ||
zipListLineQuery3 = "Select line,lineNumber FROM fullLines INNER JOIN books ON fullLines.bookId = books.id WHERE books.title = :title AND fullLines.chapter = :chapter AND lineNumber > :lineNumber ORDER BY lineNumber ASC" | ||
zipListLineQuery :: Text -> Int-> Int -> FilePath -> IO ZipListLine | ||
zipListLineQuery title chapter lineNumber dbfile = do | ||
p1 <- withConnection dbfile $ \conn -> queryNamed conn zipListLineQuery1 [":title" := title, ":chapter" := chapter, ":lineNumber" := lineNumber] | ||
p2 <- withConnection dbfile $ \conn -> queryNamed conn zipListLineQuery2 [":title" := title, ":chapter" := chapter, ":lineNumber" := lineNumber] | ||
p3 <- withConnection dbfile $ \conn -> queryNamed conn zipListLineQuery3 [":title" := title, ":chapter" := chapter, ":lineNumber" := lineNumber] | ||
return ZipListLine {p1=p1,p2=p2,p3=p3} | ||
|
||
bookChapterQuery :: Query | ||
bookChapterQuery = "Select line,lineNumber FROM fullLines INNER JOIN books ON fullLines.bookId = books.id WHERE books.title = :book AND fullLines.chapter = :chapter ORDER BY lineNumber ASC" | ||
queryLines :: Text-> Int -> FilePath ->IO [Line] | ||
queryLines title chapter dbfile= withConnection dbfile $ \conn -> queryNamed conn bookChapterQuery [":book" :=title,":chapter":=chapter ] | ||
|
||
api :: Proxy API | ||
api = Proxy | ||
|
||
server dbfile = listAllBooks | ||
:<|> getBookPageWithChapter | ||
:<|> getBookPageWithChapterWithCommentary | ||
|
||
where | ||
listAllBooks :: Handler [Book] | ||
listAllBooks = liftIO (queryAllBooks dbfile) | ||
|
||
getBookPageWithChapter :: Text -> Int -> Handler BookPage | ||
getBookPageWithChapter title chapter = do | ||
lines <- liftIO (zipListLineQuery title chapter 1 dbfile) | ||
allBooks <- liftIO (queryAllBooks dbfile) | ||
return BookPage {title=title,chapter=chapter,linesOfText=lines, allBooks= allBooks } | ||
|
||
getBookPageWithChapterWithCommentary :: Text -> Int -> Int -> Handler BookPageCommentary | ||
getBookPageWithChapterWithCommentary title chapter lineNumber = do | ||
lines <- liftIO (zipListLineQuery title chapter lineNumber dbfile) | ||
allBooks <- liftIO (queryAllBooks dbfile) | ||
commentary <- liftIO (queryBookChapterCommentary title chapter lineNumber dbfile) | ||
return BookPageCommentary {bpcTitle=title, bpcChapter=chapter, bpcLinesOfText=lines,bpcAllBooks=allBooks,bpcCommentary =commentary,bpcLineNumber=lineNumber} | ||
|
||
|
||
|
||
|
||
runApp :: FilePath -> IO () | ||
runApp dbfile = run 8080 (simpleCors $ (serve api $ server dbfile)) | ||
|
||
|
||
main :: IO () | ||
main = do | ||
-- you could read this from some configuration file, | ||
-- environment variable or somewhere else instead. | ||
let dbfile = "../db.sqlite3" | ||
runApp dbfile |
Oops, something went wrong.