Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Crawford Collins committed Jan 4, 2022
0 parents commit 7451594
Show file tree
Hide file tree
Showing 12 changed files with 978 additions and 0 deletions.
1 change: 1 addition & 0 deletions db_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

32 changes: 32 additions & 0 deletions elm.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"type": "application",
"source-directories": [
"src"
],
"elm-version": "0.19.1",
"dependencies": {
"direct": {
"NoRedInk/elm-json-decode-pipeline": "1.0.0",
"elm/browser": "1.0.2",
"elm/core": "1.0.5",
"elm/html": "1.0.0",
"elm/http": "2.0.0",
"elm/json": "1.1.3",
"elm/url": "1.0.0",
"elm-community/list-extra": "8.5.1",
"krisajenkins/remotedata": "6.0.1",
"mdgriffith/elm-ui": "1.1.8",
"ohanhi/remotedata-http": "4.0.0"
},
"indirect": {
"elm/bytes": "1.0.8",
"elm/file": "1.0.5",
"elm/time": "1.0.0",
"elm/virtual-dom": "1.0.2"
}
},
"test-dependencies": {
"direct": {},
"indirect": {}
}
}
166 changes: 166 additions & 0 deletions make_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
import glob
import json
import re
import sqlite3


def create_scrubbed_lines():
sql = sqlite3.connect("db.sqlite3")
try:
sql.execute("DROP TABLE scrubbedLines")
except:
pass
sql.execute(
"""CREATE VIRTUAL TABLE scrubbedLines USING fts5(
line, lineNumber UNINDEXED, bookId UNINDEXED, chapter UNINDEXED, subsection UNINDEXED,
);""")
sql.close()
return 1


def add_scrubbed_lines_to_db(chapter, book, subsection):
file = f"fulltexts/{book}/{book}{chapter:02d}.txt"
with open(file) as f:
text = re.sub(pattern=r"(Ἀ|Ἁ|Ἂ|Ἃ|Ἄ|Ἅ|Ἆ|Ἇ|Ὰ|Ά|Ᾰ|Ᾱ|ᾼ|ᾈ|ᾉ|ᾊ|ᾋ|ᾌ|ᾍ|ᾎ|ᾏ)", repl="Α", string=f.read(), count=100_000)
text = re.sub(pattern=r"(Ἐ|Ἑ|Ἒ|Ἓ|Ἔ|Ἕ|Ὲ|Έ)", repl="Ε", string=text, count=100_000)
text = re.sub(pattern=r"(Ἠ|Ἡ|Ἢ|Ἣ|Ἤ|Ἥ|Ἦ|Ἧ|Ὴ|Ή|ῌ|ᾘ|ᾙ|ᾚ|ᾛ|ᾜ|ᾝ|ᾞ|ᾟ)", repl="Η", string=text, count=100_000)
text = re.sub(pattern=r"(Ἰ|Ἱ|Ἲ|Ἳ|Ἴ|Ἵ|Ἶ|Ἷ|Ὶ|Ί|Ῐ|Ῑ)", repl="Ι", string=text, count=100_000)
text = re.sub(pattern=r"(Ὑ|Ὓ|Ὕ|Ὗ|Ὺ|Ύ|Ῠ|Ῡ)", repl="Υ", string=text, count=100_000)
text = re.sub(pattern=r"(ἀ|ἁ|ἂ|ἃ|ἄ|ἅ|ἆ|ἇ|ὰ|ά|ᾰ|ᾱ|ᾶ|ᾳ|ᾲ|ᾴ|ᾀ|ᾁ|ᾂ|ᾃ|ᾄ|ᾅ|ᾆ|ᾇ|ᾷ|ά)", repl="α", string=text,
count=100_000)
text = re.sub(pattern=r"(ἐ|ἑ|ἒ|ἓ|ἔ|ἕ|ὲ|έ|έ)", repl="ε", string=text, count=100_000)
text = re.sub(pattern=r"(ἠ|ἡ|ἢ|ἣ|ἤ|ἥ|ἦ|ἧ|ὴ|ή|ῆ|ῃ|ῂ|ῄ|ᾐ|ᾑ|ᾒ|ᾓ|ᾔ|ᾕ|ᾖ|ᾗ|ῇ|ή)", repl="η", string=text,
count=100_000)
text = re.sub(pattern=r"(ἰ|ἱ|ἲ|ἳ|ἴ|ἵ|ἶ|ἷ|ὶ|ί|ῐ|ῑ|ῖ|ῒ|ΐ|ῗ|ί|ΐ)", repl="ι", string=text, count=100_000)
text = re.sub(pattern=r"(ὀ|ὁ|ὂ|ὃ|ὄ|ὅ|ὸ|ό|ό)", repl="ο", string=text, count=100_000)
text = re.sub(pattern=r"(ὑ|ὓ|ὕ|ὗ|ὺ|ύ|ῠ|ῡ|ὐ|ὒ|ὔ|ὖ|ῦ|ῢ|ΰ|ῧ|ύ)", repl="υ", string=text, count=100_000)
text = re.sub(pattern=r"(ὠ|ὡ|ὢ|ὣ|ὤ|ὥ|ὦ|ὧ|ὼ|ώ|ῶ|ῳ|ῲ|ῴ|ᾠ|ᾡ|ᾢ|ᾣ|ᾤ|ᾥ|ᾦ|ᾧ|ῷ|ώ)", repl="ω", string=text,
count=100_000)
text = re.sub(pattern=r'\n’', repl='’\n', string=text, count=100_000)
text = text.split("\n")
text = [t.strip("0123456789") for t in text]
text = [i for i in text if i not in ["\n", "", '’', '’ ’', "’’", "‘"]]

for index, line in enumerate(text, start=1):
with sqlite3.connect("db.sqlite3") as conn:
book_id = conn.execute("SELECT id FROM books WHERE title=?", (book,)).fetchone()[0]
conn.execute(
"INSERT INTO scrubbedLines VALUES (?,?,?,?,?)", (line, index, book_id, chapter, subsection))
return 1


def create_full_lines():
sql = sqlite3.connect("db.sqlite3")
try:
sql.execute("DROP TABLE fullLines")
except:
pass
sql.execute("""
CREATE TABLE fullLines(
line TEXT NOT NULL,
lineNumber INT NOT NULL,
bookId INT NOT NULL,
chapter INT NOT NULL,
subsection INT,
FOREIGN KEY (bookId) REFERENCES books(id)
);""")
sql.execute("CREATE UNIQUE INDEX textLocation on fullLines(bookId,chapter,lineNumber)")
sql.close()
return 1


def add_full_lines_to_db(chapter, book, subsection):
file = f"fulltexts/{book}/{book}{chapter:02d}.txt"
with open(file) as f:
text = re.sub(pattern=r'\n’', repl='’\n', string=f.read(), count=100_000)
text = text.split("\n")
text = [t.strip("0123456789") for t in text]
text = [i for i in text if i not in ["\n", "", '’', '’ ’', "’’", "‘"]]

for index, line in enumerate(text, start=1):
with sqlite3.connect("db.sqlite3") as conn:
book_id = conn.execute("SELECT id FROM books WHERE title=?", (book,)).fetchone()[0]
res = conn.execute(
"INSERT INTO fullLines VALUES (?,?,?,?,?)", (line, index, book_id, chapter, subsection)
)
return 1


def create_authors():
sql = sqlite3.connect("db.sqlite3")
try:
sql.execute("DROP TABLE authors")
except:
pass
sql.execute("CREATE TABLE authors (id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT UNIQUE NOT NULL);")
sql.close()
return 1


def create_books():
sql = sqlite3.connect("db.sqlite3")
try:
sql.execute("DROP TABLE books")
except:
pass
sql.execute(
"CREATE TABLE books(id INTEGER PRIMARY KEY AUTOINCREMENT, title TEXT, authorId INTEGER, FOREIGN KEY (authorId) REFERENCES author(id));")
sql.close()
return 1

def create_commentary():
with sqlite3.connect("db.sqlite3") as sql:
try:
sql.execute("DROP TABLE commentary")
except:
pass
sql.execute(
"""CREATE TABLE commentary(
text TEXT,
commentaryAuthorId INT,
source TEXT,
bookId INT,
chapter INT,
lineNumber INT,
FOREIGN KEY (bookId) REFERENCES books(id)
);""")
from fulltexts.commentary.commentary import commentary
for i in commentary:
for line in i["lineNumber"]:
confirm = sql.execute("""INSERT INTO commentary(text,commentaryAuthorId,source,bookId,chapter,lineNumber)
VALUES(?,?,?,?,?,?)""",
(i["text"],i["commentaryAuthorId"],i["source"],i["bookId"],i["chapter"],line)).lastrowid
if confirm is None:
print(f"Error adding commentary {i['source']}, {i['bookId']}, {i['chapter']}, {line}")

def main(metadata):
create_authors()
create_books()
create_full_lines()
create_scrubbed_lines()
create_commentary()
for i in metadata:
# Insert Author
try:
with sqlite3.connect("db.sqlite3") as conn:
conn.execute("INSERT INTO authors(name) VALUES (?)", (i["author"],))
except sqlite3.IntegrityError:
pass
# Insert Lines
with sqlite3.connect("db.sqlite3") as conn:
_, author_id = conn.execute("SELECT name,id FROM authors WHERE name=?", (i["author"],)).fetchone()
conn.execute("INSERT INTO books(title,authorId) VALUES (?,?)", (i["book"], author_id))
for j in glob.glob(f'{i["path"]}*'):
print(j)
chapter = int(j.replace(i["path"], "").replace(i["book"], "").replace(".txt", ""))
add_scrubbed_lines_to_db(chapter, i["book"], None)
add_full_lines_to_db(chapter, i["book"], None)


if __name__ == "__main__":
# metadata = [
# {"author": "homer", "book": "iliad", "path": "fulltexts/iliad/"},
# {"author": "homer", "book": "odyssey", "path": "fulltexts/odyssey/"}
# ]
# main(metadata)
create_commentary()
2 changes: 2 additions & 0 deletions server/Setup.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain
143 changes: 143 additions & 0 deletions server/app/Main.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE DeriveGeneric #-}

module Main where

import Control.Concurrent
import Control.Exception (bracket)
import Control.Monad.IO.Class
import Database.SQLite.Simple
import Network.Wai.Handler.Warp
import Servant
import Network.Wai.Middleware.Cors
import GHC.Generics
import Data.Aeson
import Data.Text

data BookPage = BookPage {
title :: Text
, linesOfText :: ZipListLine
, chapter :: Int
, allBooks :: [Book]
} deriving ( Generic)

instance ToJSON BookPage

data BookPageCommentary = BookPageCommentary {
bpcTitle :: Text,
bpcLinesOfText :: ZipListLine,
bpcChapter :: Int,
bpcAllBooks :: [Book],
bpcCommentary :: [Commentary],
bpcLineNumber :: Int
} deriving (Generic)

instance ToJSON BookPageCommentary

data Line = Line {
lineText :: Text
, lineLineNumber :: Int
} deriving (Show, Generic)
instance FromRow Line where
fromRow = Line <$> field <*> field
instance ToJSON Line

data ZipListLine = ZipListLine {
p1 :: [Line]
, p2 :: [Line]
, p3 :: [Line]
} deriving (Show,Generic)
instance ToJSON ZipListLine

data Book = Book{
bookTitle :: Text
, bookId :: Int
, numberOfChapters :: Int
} deriving (Show,Generic)

instance FromRow Book where
fromRow = Book <$> field <*> field <*> field

instance ToJSON Book

data Commentary = Commentary {
commentaryText :: Text
,commentaryAuthorId :: Text
, commentarySource :: Text
} deriving (Generic)
instance ToJSON Commentary
instance FromRow Commentary where
fromRow = Commentary <$> field <*>field<*>field -- <*>field<*>field<*>field


type API = Get '[JSON] [Book]
:<|> "books" :> Capture "title" Text :> Capture "chapter" Int :> Get '[JSON] BookPage
:<|> "books" :> Capture "title" Text :> Capture "chapter" Int :> Capture "lineNumber" Int :> Get '[JSON] BookPageCommentary

allBooksQuery :: Query
allBooksQuery = "SELECT title, bookId, Count(DISTINCT chapter) FROM fullLines Inner JOIN books on books.id=fullLines.bookId GROUP BY bookId"
queryAllBooks :: FilePath -> IO [Book]
queryAllBooks dbfile = withConnection dbfile $ \conn -> query_ conn allBooksQuery

bookChapterCommentaryQuery :: Query
bookChapterCommentaryQuery = "SELECT text, commentaryAuthorId, source FROM commentary INNER JOIN books ON books.id = commentary.bookId WHERE lineNumber = :lineNumber AND books.title = :title AND chapter = :chapter"
queryBookChapterCommentary :: Text -> Int-> Int ->FilePath-> IO [Commentary]
queryBookChapterCommentary title chapter lineNumber dbfile = withConnection dbfile $ \conn -> queryNamed conn bookChapterCommentaryQuery [":title" := title, ":chapter" := chapter, ":lineNumber" := lineNumber]

zipListLineQuery1 :: Query
zipListLineQuery1 = "Select line,lineNumber FROM fullLines INNER JOIN books ON fullLines.bookId = books.id WHERE books.title = :title AND fullLines.chapter = :chapter AND lineNumber < :lineNumber ORDER BY lineNumber ASC"
zipListLineQuery2 :: Query
zipListLineQuery2 = "Select line,lineNumber FROM fullLines INNER JOIN books ON fullLines.bookId = books.id WHERE books.title = :title AND fullLines.chapter = :chapter AND lineNumber = :lineNumber ORDER BY lineNumber ASC"
zipListLineQuery3 :: Query
zipListLineQuery3 = "Select line,lineNumber FROM fullLines INNER JOIN books ON fullLines.bookId = books.id WHERE books.title = :title AND fullLines.chapter = :chapter AND lineNumber > :lineNumber ORDER BY lineNumber ASC"
zipListLineQuery :: Text -> Int-> Int -> FilePath -> IO ZipListLine
zipListLineQuery title chapter lineNumber dbfile = do
p1 <- withConnection dbfile $ \conn -> queryNamed conn zipListLineQuery1 [":title" := title, ":chapter" := chapter, ":lineNumber" := lineNumber]
p2 <- withConnection dbfile $ \conn -> queryNamed conn zipListLineQuery2 [":title" := title, ":chapter" := chapter, ":lineNumber" := lineNumber]
p3 <- withConnection dbfile $ \conn -> queryNamed conn zipListLineQuery3 [":title" := title, ":chapter" := chapter, ":lineNumber" := lineNumber]
return ZipListLine {p1=p1,p2=p2,p3=p3}

bookChapterQuery :: Query
bookChapterQuery = "Select line,lineNumber FROM fullLines INNER JOIN books ON fullLines.bookId = books.id WHERE books.title = :book AND fullLines.chapter = :chapter ORDER BY lineNumber ASC"
queryLines :: Text-> Int -> FilePath ->IO [Line]
queryLines title chapter dbfile= withConnection dbfile $ \conn -> queryNamed conn bookChapterQuery [":book" :=title,":chapter":=chapter ]

api :: Proxy API
api = Proxy

server dbfile = listAllBooks
:<|> getBookPageWithChapter
:<|> getBookPageWithChapterWithCommentary

where
listAllBooks :: Handler [Book]
listAllBooks = liftIO (queryAllBooks dbfile)

getBookPageWithChapter :: Text -> Int -> Handler BookPage
getBookPageWithChapter title chapter = do
lines <- liftIO (zipListLineQuery title chapter 1 dbfile)
allBooks <- liftIO (queryAllBooks dbfile)
return BookPage {title=title,chapter=chapter,linesOfText=lines, allBooks= allBooks }

getBookPageWithChapterWithCommentary :: Text -> Int -> Int -> Handler BookPageCommentary
getBookPageWithChapterWithCommentary title chapter lineNumber = do
lines <- liftIO (zipListLineQuery title chapter lineNumber dbfile)
allBooks <- liftIO (queryAllBooks dbfile)
commentary <- liftIO (queryBookChapterCommentary title chapter lineNumber dbfile)
return BookPageCommentary {bpcTitle=title, bpcChapter=chapter, bpcLinesOfText=lines,bpcAllBooks=allBooks,bpcCommentary =commentary,bpcLineNumber=lineNumber}




runApp :: FilePath -> IO ()
runApp dbfile = run 8080 (simpleCors $ (serve api $ server dbfile))


main :: IO ()
main = do
-- you could read this from some configuration file,
-- environment variable or somewhere else instead.
let dbfile = "../db.sqlite3"
runApp dbfile
Loading

0 comments on commit 7451594

Please sign in to comment.