This repository has been archived by the owner on Sep 25, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit cb00bad
Showing
37 changed files
with
263 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
exiftool -X "%fileFullName%" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
ffprobe -i "%fileFullName%" -show_data -show_format -show_error -show_streams -show_chapters -show_private_data -show_versions -print_format xml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
ng edu.harvard.hul.ois.fits.Fits -i %relativeLocation% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
fiwalk -x %relativeLocation% -c /usr/lib/archivematica/archivematicaCommon/externals/fiwalk_plugins/ficonfig.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
mediainfo --Language=Raw -f --Output=XML "%fileFullName%" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
echo program=\"7z\"\; version=\"`7z | grep Version`\" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
echo program=\"convert\"\; version=\"`convert -version | grep Version:`\" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
echo program=\"ffmpeg\"\; version=\"`ffmpeg 2>&1 | grep --ignore-case "FFmpeg version"`\" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
echo program=\"Ghostscript\"\; version=\"`gs --version`\" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
echo program=\"inkscape\"\; version=\"`inkscape -V`\" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
echo "/usr/lib/archivematica/transcoder/transcoderScripts/" "%fileFullName%" "%outputDirectory%%prefix%%fileName%%postfix%.mbox" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
echo program=\"ps2pdf\"\; program=\"Ghostscript\"\; version=\"`gs --version`\" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
echo program=\"readpst\"\; version=\"`readpst -V`\" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
echo program=\"unrar-nonfree\"\; version=\"`unrar-nonfree | grep 'UNRAR'`\" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
7z x -bd -o"%outputDirectory%" "%inputFile%" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import os, subprocess, sys | ||
|
||
def main(output_directory, compressed_file): | ||
# Note that unrar-free only extracts into the current working directory, | ||
# hence the os.chdir() here | ||
try: | ||
os.chdir(output_directory) | ||
args = ['unrar', '-x', compressed_file] | ||
subprocess.call(args) | ||
except Exception as e: | ||
return e | ||
|
||
if __name__ == '__main__': | ||
output_directory = sys.argv[1] | ||
compressed_file = sys.argv[2] | ||
exit(main(output_directory, compressed_file)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
from __future__ import print_function | ||
import re | ||
import subprocess | ||
import sys | ||
|
||
def extract(package, outdir): | ||
# -a extracts only allocated files; we're not capturing unallocated files | ||
try: | ||
process = subprocess.Popen(['tsk_recover', package, '-a', outdir], | ||
stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) | ||
stdout, stderr = process.communicate() | ||
|
||
match = re.match(r'Files Recovered: (\d+)', stdout.splitlines()[0]) | ||
if match: | ||
if match.groups()[0] == '0': | ||
raise Exception('tsk_recover failed to extract any files with the message: {}'.format(stdout)) | ||
else: | ||
print(stdout) | ||
except Exception as e: | ||
return e | ||
|
||
return 0 | ||
|
||
def main(package, outdir): | ||
return extract(package, outdir) | ||
|
||
if __name__ == '__main__': | ||
package = sys.argv[1] | ||
outdir = sys.argv[2] | ||
sys.exit(main(package, outdir)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import os.path | ||
import re | ||
import subprocess | ||
import sys | ||
|
||
def file_tool(path): | ||
return subprocess.check_output(['file', path]).strip() | ||
|
||
class FidoFailed(Exception): | ||
def __init__(self, stdout, stderr, retcode): | ||
message = """ | ||
Fido exited {retcode} and no format was found. | ||
stdout: {stdout} | ||
--- | ||
stderr: {stderr} | ||
""".format(stdout=stdout, stderr=stderr, retcode=retcode) | ||
super(FidoFailed, self).__init__(message) | ||
|
||
def identify(file_): | ||
# The default buffer size fido uses, 256KB, is too small to be able to detect certain formats | ||
# Formats like office documents and Adobe Illustrator .ai files will be identified as other, less-specific formats | ||
# This larger buffer size is a bit slower and consumes more RAM, so some users may wish to customize this to reduce the buffer size | ||
# See: https://projects.artefactual.com/issues/5941, https://projects.artefactual.com/issues/5731 | ||
cmd = ['fido', '-bufsize', '1048576', | ||
'-loadformats', '/usr/lib/archivematica/archivematicaCommon/externals/fido/archivematica_format_extensions.xml', | ||
os.path.abspath(file_)] | ||
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) | ||
stdout, stderr = process.communicate() | ||
|
||
try: | ||
results = stdout.split('\n')[0].split(',') | ||
except: | ||
raise FidoFailed(stdout, stderr, process.returncode) | ||
|
||
if process.returncode != 0 or results[-1] == '"fail"': | ||
raise FidoFailed(stdout, stderr, process.returncode) | ||
else: | ||
puid = results[2] | ||
if re.match('(.+)?fmt\/\d+', puid): | ||
return puid | ||
else: | ||
print >> sys.stderr, "File identified as non-standard Fido code: {id}".format(id=puid) | ||
return "" | ||
|
||
def main(argv): | ||
try: | ||
print identify(argv[1]) | ||
return 0 | ||
except FidoFailed as e: | ||
file_output = file_tool(argv[1]) | ||
# FIDO can't currently identify text files with no extension, and this | ||
# is a common enough usecase to special-case it | ||
if 'text' in file_output: | ||
print 'x-fmt/111' | ||
else: | ||
return e | ||
except Exception as e: | ||
return e | ||
|
||
if __name__ == '__main__': | ||
exit(main(sys.argv)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import os.path | ||
import subprocess | ||
import sys | ||
|
||
def file_tool(path): | ||
return subprocess.check_output(['file', path]).strip() | ||
|
||
(_, extension) = os.path.splitext(sys.argv[1]) | ||
if extension: | ||
print extension.lower() | ||
else: | ||
# Plaintext files frequently have no extension, but are common to identify. | ||
# file is pretty smart at figuring these out. | ||
file_output = file_tool(sys.argv[1]) | ||
if 'text' in file_output: | ||
print '.txt' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
ffmpeg -i "%fileFullName%" -vcodec libx264 -pix_fmt yuv420p -preset medium -crf 18 "%outputDirectory%%prefix%%fileName%%postfix%.mp4" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
convert "%fileFullName%" -sampling-factor 4:4:4 -quality 60 -layers merge | ||
"%outputDirectory%%prefix%%fileName%%postfix%.jpg" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
ffmpeg -i "%fileFullName%" -ac 2 -ab 192000 "%outputDirectory%%prefix%%fileName%%postfix%.mp3" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
cp -R "%inputFile%" "%outputDirectory%%prefix%%fileName%%postfix%%fileExtensionWithDot%" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
cp -R "/var/archivematica/sharedDirectory/sharedMicroServiceTasksConfigs/transcoder/defaultIcons/default.jpg" "%outputDirectory%%postfix%.jpg" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
convert "%fileFullName%" -thumbnail 100x100 -layers merge | ||
"%outputDirectory%%postfix%.jpg" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
"/usr/lib/archivematica/MCPClient/clientScripts/archivematicaMaildirToMbox.py" "%fileFullName%" "%outputDirectory%%prefix%%fileName%%postfix%.mbox" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/bin/bash | ||
|
||
inputFile="%fileFullName%" | ||
outputFile="%outputDirectory%%prefix%%fileName%%postfix%.mkv" | ||
audioCodec="pcm_s16le" | ||
videoCodec="ffv1 -level 3" | ||
|
||
command="ffmpeg -vsync passthrough -i \"${inputFile}\" " | ||
command="${command} -vcodec ${videoCodec} -g 1 " | ||
command="${command} -acodec ${audioCodec}" | ||
|
||
|
||
command="${command} ${outputFile}" | ||
|
||
echo $command | ||
eval $command |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
gs -dPDFA -dBATCH -dNOPAUSE -sDEVICE=pdfwrite -dPDFACompatibilityPolicy=1 -sOutputFile="%outputDirectory%%prefix%%fileName%%postfix%.pdf" "%fileFullName%" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
inkscape -z "%fileFullName%" --export-pdf="%outputDirectory%%prefix%%fileName%%postfix%.pdf" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
ps2pdf -dEPSCrop -dPDFA "%fileFullName%" "%outputDirectory%%prefix%%fileName%%postfix%.pdf" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
sudo /usr/bin/inkscape "%fileFullName%" --export-plain-svg="%outputDirectory%%prefix%%fileName%%postfix%.svg" | ||
sudo chmod 777 "%outputDirectory%%prefix%%fileName%%postfix%.svg" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
convert "%fileFullName%" +compress "%outputDirectory%%prefix%%fileName%%postfix%.tif" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
ffmpeg -i "%fileFullName%" "%outputDirectory%%prefix%%fileName%%postfix%.wav" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
ocrfiles="%SIPObjectsDirectory%metadata/OCRfiles" | ||
test -d "$ocrfiles" || mkdir -p "$ocrfiles" | ||
|
||
tesseract %fileFullName% "$ocrfiles/%fileName%" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
import json | ||
import subprocess | ||
import sys | ||
|
||
from lxml import etree | ||
|
||
class JhoveException(Exception): | ||
pass | ||
|
||
def parse_jhove_data(target): | ||
args = ['jhove', '-h', 'xml', target] | ||
try: | ||
output = subprocess.check_output(args) | ||
except subprocess.CalledProcessError: | ||
raise JhoveException("Jhove failed when running: " + ' '.join(args)) | ||
|
||
return etree.fromstring(output) | ||
|
||
def get_status(doc): | ||
status = doc.find('.{http://hul.harvard.edu/ois/xml/ns/jhove}repInfo/{http://hul.harvard.edu/ois/xml/ns/jhove}status') | ||
if status is None: | ||
raise JhoveException("Unable to find status!") | ||
|
||
return status.text | ||
|
||
def get_outcome(status, format=None): | ||
# JHOVE returns "bytestream" for unrecognized file formats. | ||
# That can include unrecognized or malformed PDFs, JPEG2000s, etc. | ||
# Since we're whitelisting the formats we're passing in, | ||
# "bytestream" indicates that the format is not in fact well-formed | ||
# regardless of what the status reads. | ||
if format == "bytestream": | ||
return "fail" | ||
|
||
if status == "Well-Formed and valid": | ||
return "pass" | ||
elif status == "Well-Formed, but not valid": | ||
return "partial pass" | ||
else: | ||
return "fail" | ||
|
||
def get_format(doc): | ||
format = doc.find('.{http://hul.harvard.edu/ois/xml/ns/jhove}repInfo/{http://hul.harvard.edu/ois/xml/ns/jhove}format') | ||
version = doc.find('.{http://hul.harvard.edu/ois/xml/ns/jhove}repInfo/{http://hul.harvard.edu/ois/xml/ns/jhove}version') | ||
|
||
if format is None: | ||
format = "Not detected" | ||
else: | ||
format = format.text | ||
|
||
if version is not None: | ||
version = version.text | ||
|
||
return (format, version) | ||
|
||
def format_event_outcome_detail_note(format, version, result): | ||
note = 'format="{}";'.format(format) | ||
if version is not None: | ||
note = note + ' version="{}";'.format(version) | ||
note = note + ' result="{}"'.format(result) | ||
|
||
return note | ||
|
||
def main(target): | ||
try: | ||
doc = parse_jhove_data(target) | ||
status = get_status(doc) | ||
format, version = get_format(doc) | ||
outcome = get_outcome(status, format) | ||
note = format_event_outcome_detail_note(format, version, status) | ||
|
||
out = { | ||
"eventOutcomeInformation": outcome, | ||
"eventOutcomeDetailNote": note | ||
} | ||
print json.dumps(out) | ||
|
||
return 0 | ||
except JhoveException as e: | ||
return e | ||
|
||
if __name__ == '__main__': | ||
target = sys.argv[1] | ||
sys.exit(main(target)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
test -f "%outputLocation%" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
test -s "%outputLocation%" |