makelatex

#!/usr/bin/env python
import sys
import urllib2
from bs4 import BeautifulSoup

# check if hw # is supplied, help the user if not
if len(sys.argv) != 2:
    print "Usage: makelatex <hw number>"
    sys.exit()

# check if input is actually a number
try:
    val = int(sys.argv[1])
except ValueError:
    print "Usage: makelatex <hw number> (where hw number is actually a number)"
    sys.exit()

# scrape raw html from specified hw url
url = 'https://www.usna.edu/Users/cs/aviv/classes/ic221/s18/hw/'
url += sys.argv[1]+'/hw.html'
rawhtml = urllib2.urlopen(url)

# parse the html and extract the questions (in an ol)
html = BeautifulSoup(rawhtml, 'html.parser')
html = html.find("ol")

# get all questions & code snippits
question_list = html.find_all("li")
code_snippits = html.find_all("pre")

# create and write latex file header
latexfile = open("hw"+sys.argv[1]+".tex", "w")
l_header = "\documentclass{article}[9pt]\n\usepackage{listings}\n" \
           "\usepackage{fullpage}\n" \
           "\usepackage{textcomp}\n\usepackage{mdframed}\n" \
           "\lstset{ %\nbasicstyle=\\ttfamily\scriptsize,\n" \
           "commentstyle=\\ttfamily\scriptsize\emph,\n" \
           "upquote=true,\nframerule=1.25pt,\nbreaklines=true,\n" \
           "showstringspaces=false,\n" \
           "escapeinside={(*@}{@*)},\nbelowskip=2em,\naboveskip=1em,\n}" \
           "\\newenvironment{answerfont}{\\fontfamily{qhv}\selectfont}{\par}" \
           "\n\n\\newenvironment{myanswer}{\\begin{mdframed}" \
           "\\begin{answerfont}}{\end{answerfont}\end{mdframed}}\n" \
           "\\title{HW "+sys.argv[1]+"}\n\\author{NAME GOES HERE}\n" \
           "\date{DATE GOES HERE}\n\\begin{document}" \
           "\maketitle\n\section*{Questions}\n\label{sec:orgfce6862}" \
           "\n\\begin{enumerate}\n\n\n"

latexfile.write(l_header)

i = 0  # for index values for questions[]
questions = {}  # array of questions in unicode string format
sub_questions = {}  # array of sub-question arrays (in html tag format)
last = "notinanything"  # arbitrary string for substring checking

# iterate through question list to generate string-formatted question array
for question in question_list:
    # filter sub-questions out of questions list (find_all catches them)
    if (("points" not in question.text and question.text in last) or question.text in last):
        continue
    else:
        # add question in string format to questions array
        questions[i] = question.text

        # remove all subquestions from their respective question
        if "ol" in question.text or "li" in question.text:
            sub_questions[i] = question.find_all("li")
            for j in sub_questions[i]:
                questions[i] = questions[i].replace(j.text, "")

    last = question.text  # allows to check for subquestions in question list
    i += 1  # iterate i

# Makes code snippits look nice
for i in range(0, len(questions)):
    # format questions so that LaTeX doesnt have a seizure
    questions[i] = questions[i].replace("\\", "\\\\").replace("_", "\_").replace("&", "\&")
    for snip in code_snippits:
        if len(snip.text) < 15:
            continue
        
        test = snip.text.replace("\\", "\\\\").replace("_", "\_").replace("&", "\&")
        if test in questions[i]:
            newval = "\n\\begin{lstlisting}\n"+snip.text+"\n\end{lstlisting}\n"
            questions[i] = questions[i].replace(test, newval)
            code_snippits.remove(snip)

# iterate through each question, write to latex file
for i in range(0, len(questions)):
    # write question to the file
    latexfile.write("\item "+questions[i].encode("utf8")+"\n")

    # if a question has parts, print out all the parts
    try:

        # check if subquestion is an empty array, artificially throw error
        if not sub_questions[i]:
            raise Exception("empty sub-question array")

        # after checking existance, actually write subquestions (list format)
        latexfile.write("\\begin{enumerate}\n")

        for j in sub_questions[i]:
            sub_text = j.text.replace("_", "\_").replace("&", "\&")
            latexfile.write("\item "+sub_text.encode("utf8")+"\n")
            latexfile.write("\\begin{myanswer}\nANSWER\n\end{myanswer}\n\n")

        latexfile.write("\end{enumerate}\n")

    # print a normal answer field if there aren't subquestions
    except Exception:
        # I check for points and point because Aviv has made this typo before
        if "points" in questions[i] or "point" in questions[i]:
            latexfile.write("\\begin{myanswer}\nANSWER\n\end{myanswer}\n\n")

# write file footer (close enumerate and document)
latexfile.write("\end{enumerate}\n\end{document}\n" +
                "\n%%% mode: latex\n% TeX-master: t\n%%% End:")

print "Output written to hw"+sys.argv[1]+".tex"