frank

#!/usr/bin/env bash
curlUserAgent="-H \"User-Agent: Frank\""
#first check bash version. needs to be 4+ to support associative arrays (used for keeping track of prefixes)
associativeSupported=false
if [ ${BASH_VERSION:0:1} -gt "3" ] ; then
    associativeSupported=true;
fi
command -v curl >/dev/null 2>&1 || { echo >&2 "Please install curl first"; exit 1; }
ldfApi="http://ldf.lodlaundromat.org"
endpoint="http://sparql.backend.lodlaundromat.org/"
downloadUrl="http://download.lodlaundromat.org"
resourceUrl="http://lodlaundromat.org/resource"
r2dIndex="http://index.lodlaundromat.org/r2d/"
ns2dIndex="http://index.lodlaundromat.org/ns2d/"
#limit how many values clauses we include in sparql. setting too high will result in gigantic and slow sparql queries
sparqlMaxDocsAsValues=50000
echoerr() { echo -e "\e[01;31m$@\e[0m" 1>&2; }
caterr() {
while IFS= read  line; do
    echo "$line" 1>&2;
done
 }


##
## All documentation
##
show_main_help() {
caterr << EOF
Usage: ${0##*/} [-h] <command> [<args>]

The three main frank commands are:
    statements    Fetch statements from the LOD Cloud, gathered by the LOD Laundromat (add -h for help)
    documents     Fetch LOD Laundromat document references (add -h for help)
    meta          Fetch meta-data for a given document (add -h for help)
EOF
}


show_statements_help() {
caterr << EOF
usage: ${0##*/} $mode [<args>] [<documentUri>]

Fetch statements from the LOD Cloud, using data hosted and cleaned by the LOD Laundromat.
When a LOD Laundromat document reference (<documentUri>) is passed as parameter, statements are fetched from that document.
Otherwise, statements are fetched from the complete LOD Cloud

    -h
        display this help and exit
    -s <subject>
    --subject <subject>
        Filter statements by <subject>
    -p <predicate>
    --predicate <predicate>
        Filter statements by <predicate>
    -o <object>
    --object <object>
        Filter statements by <object>
    -g
    --showGraph
        Return quads, where the named graph is the reference to the LOD Laundromat resource
        of this particular dataset

EOF
}

show_documents_help() {
caterr << EOF
Usage: ${0##*/} $mode [<args>]

Fetch LOD Laundromat document references

    -h
        display this help and exit
    -b <buffer>
    --buffer <buffer>
        Buffer x documents references in memory. Default: 50
    --minTriples <minTriples>
        Only fetch documents with at least this amount of triples
    --maxTriples <maxTriples>
        Only fetch documents with at most this amount of triples
    --minAvgOutDegree <minAvgOutDegree>
        Only fetch documents with an average out degree of at least this amount
    --maxAvgOutDegree <maxAvgOutDegree>
        Only fetch documents with an average out degree of maximum this amount
    --minAvgInDegree <minAvgInDegree>
        Only fetch documents with an average in degree of at least this amount
    --maxAvgInDegree <maxAvgInDegree>
        Only fetch documents with an average in degree of maximum this amount
    --minAvgDegree <minAvgDegree>
        Only fetch documents with an average degree of at least this amount
    --maxAvgDegree <maxAvgDegree>
        Only fetch documents with an average degree of maximum this amount
    --namespace <namespace>
        Filter document for this namespace. Provide this flag several times to filter
        for multiple namespaces ('OR' operation)
    --sparql <sparql-bgp>
        Use your own SPARQL bgp to filter the results. Use ?doc to refer
        to the current document. For more information on available properties,
        check schema, or any resource
        (e.g. http://lodlaundromat.org/resource/e439f40f187906eb8e6223d57a77d24d)
        Example:
            {?doc llm:metrics/llm:IRILength/llm:std ?std .
            FILTER(?std >  50)}
        Available prefixes:
            ll: <http://lodlaundromat.org/resource/>
            llo: <http://lodlaundromat.org/ontology/>
            llm: <http://lodlaundromat.org/metrics/ontology/
    -d
    --downloadUri
        Only print the download URI
    -r
    --resourceUri
        Only print the LOD Laundromat document resource URI
EOF
}

show_meta_help() {
caterr << EOF
Usage: ${0##*/} $mode [<args>] <documentUri>...

Fetch LOD Laundromat meta-data for each LOD Laundromat document references (<documentUri>) passed as arguments.

    -h
        display this help and exit
EOF
}

#statements options
sub=
pred=
obj=
showGraph=false

#documents options
limit=50
download=true
resource=true
minTriples=1;
maxTriples=
minAvgOutDegree=
maxAvgOutDegree=
minAvgInDegree=
maxAvgInDegree=
minAvgDegree=
maxAvgDegree=
bgp=""
namespaces=()
verbose=false;
[ "$#" -eq "0" ] && show_main_help && exit;
mode="$1";shift;
if [[ "$mode" != "statements" && "$mode" != "documents" && "$mode" != "meta" ]]; then show_main_help; exit 1; fi

if [[ "$mode" == "statements" ]]; then
    while [ "$#" -gt 0 ]; do
        case $1 in
            -h|-\?|--help)
                show_statements_help
                exit
                ;;
            -v|--verbose)
                verbose=true;
                shift;
                continue;
                ;;
            ##parse subject
            -s|--subject)
                if [ "$#" -gt 1 ]; then
                    sub=$2
                    shift 2
                    continue
                else
                    echoerr 'ERROR: Must specify a non-empty "--subject <subject>" argument.' >&2
                    exit 1
                fi
                ;;
            --subject=?*)
                sub=${1#*=} # Delete everything up to "=" and assign the remainder.
                ;;
            --subject=)
                echoerr 'ERROR: Must specify a non-empty "--subject <subject>" argument.' >&2
                exit 1
                ;;

            ##parse predicate
            -p|--predicate)
                if [ "$#" -gt 1 ]; then
                    pred=$2
                    shift 2
                    continue
                else
                    echoerr 'ERROR: Must specify a non-empty "--predicate <predicate>" argument.' >&2
                    exit 1
                fi
                ;;
            --predicate=?*)
                pred=${1#*=} # Delete everything up to "=" and assign the remainder.
                ;;
            --predicate=)
                echoerr 'ERROR: Must specify a non-empty "--predicate <predicate>" argument.' >&2
                exit 1
                ;;

             ##parse object
            -o|--object)
                if [ "$#" -gt 1 ]; then
                    obj=$2
                    shift 2
                    continue
                else
                    echoerr 'ERROR: Must specify a non-empty "--object <object>" argument.' >&2
                    exit 1
                fi
                ;;
            --object=?*)
                obj=${1#*=} # Delete everything up to "=" and assign the remainder.
                ;;
            --object=)
                echoerr 'ERROR: Must specify a non-empty "--object <object>" argument.' >&2
                exit 1
                ;;


            -g|--showGraph)
                showGraph=true;
                ;;
            --)              # End of all options.
                shift
                break
                ;;
            -?*)
                printf 'WARN: Unknown option (ignored): %s\n' "$1" >&2
                ;;
            *)               # Default case: If no more options then break out of the loop.
                break
        esac

        shift
    done
elif [[ "$mode" == "documents" ]]; then
    while [ "$#" -gt 0 ]; do
        case $1 in
            -h|-\?|--help)
                show_documents_help
                exit
                ;;
            -v|--verbose)
                verbose=true;
                shift;
                continue;
                ;;
            ##parse limit size ('buffer')
            -b|--buffer)
                if [ "$#" -gt 1 ]; then
                    limit=$2
                    shift 2
                    continue
                else
                    echoerr 'ERROR: Must specify a non-empty "--buffer <buffer>" argument.' >&2
                    exit 1
                fi
                ;;
            --buffer=?*)
                limit=${1#*=} # Delete everything up to "=" and assign the remainder.
                ;;
            --buffer=)
                echoerr 'ERROR: Must specify a non-empty "--buffer <buffer>" argument.' >&2
                exit 1
                ;;

            ##parse min triples)
            --minTriples)
                if [ "$#" -gt 1 ]; then
                    minTriples=$2
                    shift 2
                    continue
                else
                    echoerr 'ERROR: Must specify a non-empty "--minTriples <minTriples>" argument.' >&2
                    exit 1
                fi
                ;;
            --minTriples=?*)
                minTriples=${1#*=} # Delete everything up to "=" and assign the remainder.
                ;;
            --minTriples=)
                echoerr 'ERROR: Must specify a non-empty "--minTriples <minTriples>" argument.' >&2
                exit 1
                ;;
            ##parse max triples)
            --maxTriples)
                if [ "$#" -gt 1 ]; then
                    maxTriples=$2
                    shift 2
                    continue
                else
                    echoerr 'ERROR: Must specify a non-empty "--maxTriples <maxTriples>" argument.' >&2
                    exit 1
                fi
                ;;
            --maxTriples=?*)
                maxTriples=${1#*=} # Delete everything up to "=" and assign the remainder.
                ;;
            --maxTriples=)
                echoerr 'ERROR: Must specify a non-empty "--maxTriples <maxTriples>" argument.' >&2
                exit 1
                ;;
            ##parse min avg out degree)
            --minAvgOutDegree)
                if [ "$#" -gt 1 ]; then
                    minAvgOutDegree=$2
                    shift 2
                    continue
                else
                    echoerr 'ERROR: Must specify a non-empty "--minAvgOutDegree <minAvgOutDegree>" argument.' >&2
                    exit 1
                fi
                ;;
            --minAvgOutDegree=?*)
                minAvgOutDegree=${1#*=} # Delete everything up to "=" and assign the remainder.
                ;;
            --minAvgOutDegree=)
                echoerr 'ERROR: Must specify a non-empty "--minAvgOutDegree <minAvgOutDegree>" argument.' >&2
                exit 1
                ;;
            ##parse max avg out degree)
            --maxAvgOutDegree)
                if [ "$#" -gt 1 ]; then
                    maxAvgOutDegree=$2
                    shift 2
                    continue
                else
                    echoerr 'ERROR: Must specify a non-empty "--maxAvgOutDegree <maxAvgOutDegree>" argument.' >&2
                    exit 1
                fi
                ;;
            --maxAvgOutDegree=?*)
                maxAvgOutDegree=${1#*=} # Delete everything up to "=" and assign the remainder.
                ;;
            --maxAvgOutDegree=)
                echoerr 'ERROR: Must specify a non-empty "--maxAvgOutDegree <maxAvgOutDegree>" argument.' >&2
                exit 1
                ;;
            ##parse min avg in degree)
            --minAvgInDegree)
                if [ "$#" -gt 1 ]; then
                    minAvgInDegree=$2
                    shift 2
                    continue
                else
                    echoerr 'ERROR: Must specify a non-empty "--minAvgInDegree <minAvgInDegree>" argument.' >&2
                    exit 1
                fi
                ;;
            --minAvgInDegree=?*)
                minAvgInDegree=${1#*=} # Delete everything up to "=" and assign the remainder.
                ;;
            --minAvgInDegree=)
                echoerr 'ERROR: Must specify a non-empty "--minAvgInDegree <minAvgInDegree>" argument.' >&2
                exit 1
                ;;
            ##parse max avg in degree)
            --maxAvgInDegree)
                if [ "$#" -gt 1 ]; then
                    maxAvgInDegree=$2
                    shift 2
                    continue
                else
                    echoerr 'ERROR: Must specify a non-empty "--maxAvgInDegree <maxAvgInDegree>" argument.' >&2
                    exit 1
                fi
                ;;
            --maxAvgInDegree=?*)
                maxAvgInDegree=${1#*=} # Delete everything up to "=" and assign the remainder.
                ;;
            --maxAvgInDegree=)
                echoerr 'ERROR: Must specify a non-empty "--maxAvgInDegree <maxAvgInDegree>" argument.' >&2
                exit 1
                ;;
            ##parse min avg degree)
            --minAvgDegree)
                if [ "$#" -gt 1 ]; then
                    minAvgDegree=$2
                    shift 2
                    continue
                else
                    echoerr 'ERROR: Must specify a non-empty "--minAvgDegree <minAvgDegree>" argument.' >&2
                    exit 1
                fi
                ;;
            --minAvgDegree=?*)
                minAvgDegree=${1#*=} # Delete everything up to "=" and assign the remainder.
                ;;
            --minAvgDegree=)
                echoerr 'ERROR: Must specify a non-empty "--minAvgDegree <minAvgDegree>" argument.' >&2
                exit 1
                ;;
            ##parse max avg degree)
            --maxAvgDegree)
                if [ "$#" -gt 1 ]; then
                    maxAvgDegree=$2
                    shift 2
                    continue
                else
                    echoerr 'ERROR: Must specify a non-empty "--maxAvgDegree <maxAvgDegree>" argument.' >&2
                    exit 1
                fi
                ;;
            --maxAvgDegree=?*)
                maxAvgDegree=${1#*=} # Delete everything up to "=" and assign the remainder.
                ;;
            --maxAvgDegree=)
                echoerr 'ERROR: Must specify a non-empty "--maxAvgDegree <maxAvgDegree>" argument.' >&2
                exit 1
                ;;
            ##parse sparql param)
            --sparql)
                if [ "$#" -gt 1 ]; then
                    bgp=$2
                    shift 2
                    continue
                else
                    echoerr 'ERROR: Must specify a non-empty "--sparql <sparql-bgp>" argument.' >&2
                    exit 1
                fi
                ;;
            --sparql=?*)
                bgp=${1#*=} # Delete everything up to "=" and assign the remainder.
                ;;
            --sparql=)
                echoerr 'ERROR: Must specify a non-empty "--sparql <sparql-bgp>" argument.' >&2
                exit 1
                ;;

            ##parse namespace param)
            --namespace)
                if [ "$#" -gt 1 ]; then
                    namespaces+=("$2")
                    shift 2
                    continue
                else
                    echoerr 'ERROR: Must specify a non-empty "--namespace <namespace>" argument.' >&2
                    exit 1
                fi
                ;;
            --namespace=?*)
                namespaces+=(${1#*=}) # Delete everything up to "=" and assign the remainder.
                ;;
            --namespace=)
                echoerr 'ERROR: Must specify a non-empty "--namespace <namespace>" argument.' >&2
                exit 1
                ;;


            -d|--downloadUri)
                resource=false;
                ;;
            -r|--resourceUri)
                download=false;
                ;;
            --)              # End of all options.
                shift
                break
                ;;
            -?*)
                printf 'WARN: Unknown option (ignored): %s\n' "$1" >&2
                ;;
            *)               # Default case: If no more options then break out of the loop.
                break
        esac

        shift
    done
elif [[ "$mode" == "meta" ]]; then
    while [ "$#" -gt 0 ]; do
        case $1 in
            -h|-\?|--help)
                show_meta_help
                exit
                ;;
            -v|--verbose)
                verbose=true;
                shift;
                continue;
                ;;
            --)              # End of all options.
                shift
                break
                ;;
            -?*)
                printf 'WARN: Unknown option (ignored): %s\n' "$1" >&2
                ;;
            *)               # Default case: If no more options then break out of the loop.
                break
        esac

        shift
    done
fi


prefixes=false
if $associativeSupported; then
    #top25 of prefix.cc prefixes
    declare -A prefixes=(["yago"]="http://yago-knowledge.org/resource/" ["rdf"]="http://www.w3.org/1999/02/22-rdf-syntax-ns#" ["foaf"]="http://xmlns.com/foaf/0.1/" ["dbp"]="http://dbpedia.org/property/" ["dc"]="http://purl.org/dc/elements/1.1/" ["owl"]="http://www.w3.org/2002/07/owl#" ["rdfs"]="http://www.w3.org/2000/01/rdf-schema#" ["dbo"]="http://dbpedia.org/ontology/" ["ont"]="http://purl.org/net/ns/ontology-annot#" ["onto"]="http://www.ontotext.com/" ["skos"]="http://www.w3.org/2004/02/skos/core#" ["geo"]="http://www.w3.org/2003/01/geo/wgs84_pos#" ["rss"]="http://purl.org/rss/1.0/" ["gldp"]="http://www.w3.org/ns/people#" ["sioc"]="http://rdfs.org/sioc/ns#" ["fb"]="http://rdf.freebase.com/ns/" ["sc"]="http://purl.org/science/owl/sciencecommons/" ["geonames"]="http://www.geonames.org/ontology#" ["xsd"]="http://www.w3.org/2001/XMLSchema#" ["gr"]="http://purl.org/goodrelations/v1#" ["dcterms"]="http://purl.org/dc/terms/" ["dct"]="http://purl.org/dc/terms/" ["org"]="http://www.w3.org/ns/org#" ["dbr"]="http://dbpedia.org/resource/" ["qb"]="http://purl.org/linked-data/cube#" ["void"]="http://rdfs.org/ns/void#")

    #first try to substitute namespace for sub,pred,obj filters
    if [ -n "$sub" ]; then
        ns=`echo "$sub"| cut -d':' -f 1`
        if [ -n "${prefixes["$ns"]}" ]; then
            localN=`echo "$sub"| cut -d':' -f 2`
            sub="${prefixes["$ns"]}$localN"
        fi
    fi
    if [ -n "$pred" ]; then
        ns=`echo "$pred"| cut -d':' -f 1`
        if [ -n "${prefixes["$ns"]}" ]; then
            localN=`echo "$pred"| cut -d':' -f 2`
            pred="${prefixes["$ns"]}$localN"
        fi
    fi
    if [ -n "$obj" ]; then
        ns=`echo "$obj"| cut -d':' -f 1`
        if [ -n "${prefixes["$ns"]}" ]; then
            localN=`echo "$obj"| cut -d':' -f 2`
            obj="${prefixes["$ns"]}$localN"
        fi
    fi


    if [ -n "$namespaces" ] && [ ${#namespaces[@]} -gt 0 ]; then
        #now try to substitute namespaces in namespaces array (used as filter)
        newNamespaces=()
        for ns in "${namespaces[@]}"; do
            if [ "${ns:0:4}" == "http" ]; then
                #use as-is
                newNamespaces+=("$ns")
            else
                #try substituting
                if [ -n "${prefixes["$ns"]}" ]; then
                    #found one in our list. add that value
                    newNamespaces+=("${prefixes["$ns"]}")
                else
                    echoerr "Could not find prefix IRI for namespace name \"$ns\". Instead, filtering for documents that contain IRIs that start with \"<$ns\""
                    newNamespaces+=("$ns")
                fi
            fi
        done
        namespaces=$newNamespaces
    fi
fi
#check whether we've found the full URI for any prefixed sub/pred/obj
prefixRegex='^\w\+:\w\+$'
if [ -n "$sub" ] && grep -q "$prefixRegex" <<< $sub; then
    if ! $associativeSupported; then
        echoerr "Prefixes are only supported by bash version 4. Your version does not meet these requirements: $BASH_VERSION"
    else
        echoerr "Could not find prefixed $sub. Use the full URI instead";
    fi
fi
if [ -n "$pred" ] && grep -q "$prefixRegex" <<< $pred; then
    if ! $associativeSupported; then
        echoerr "Prefixes are only supported by bash version 4. Your version does not meet these requirements: $BASH_VERSION"
    else
        echoerr "Could not find prefixed $pred. Use the full URI instead";
    fi
fi
if [ -n "$obj" ] && grep -q "$prefixRegex" <<< $obj; then
    if ! $associativeSupported; then
        echoerr "Prefixes are only supported by bash version 4. Your version does not meet these requirements: $BASH_VERSION"
    else
        echoerr "Could not find prefixed $obj. Use the full URI instead";
    fi
fi


fetchStatementsForDoc() {
    #start with a sanity check (simple non-empty check)
    if [ -z "$1" ]; then return; fi

    #extract md5 from resource url
    md5=$(sed 's/.*\/\([a-z0-9]\{32\}\).*/\1/' <<< "$1")

    if [ -z "$md5" ]; then echo "Not a valid LOD Laundromat document identifier: $1" && exit 1; fi

    if [ -z "$sub" ] && [ -z "$pred" ] && [ -z "$obj" ] && ! $showGraph; then
	   #just fetch gzip file. don't need to filter
	   curl -Gs $curlUserAgent $downloadUrl/$md5 | zcat;
    else

        if [ -n "$sub" ] || [ -n "$pred" ] || [ -n "$obj" ]; then
            #Ah, in this case we've already looked up which documents to process. Check whether this md5 is one of them..
            if ! $filterForDocsFailed && ! grep -q "$md5" <<< "$filterForDocs"; then
                return;
            fi

        fi

        #might need to use this var more often, so generate it once for this particular doc
        quadReplace=$(sed -e 's/[\/&]/\\&/g' <<< "$1")

    	url="$ldfApi/$md5"
        page=1;
        hasNext=true
	    #while we have something to send request to, i.e. pagination has not finished yet for this document
        while $hasNext; do
            hasNext=false;
            response=$(curl $curlUserAgent -Gs $url -H "Accept: application/n-quads" --data-urlencode "subject=$sub" --data-urlencode "predicate=$pred" --data-urlencode "object=$obj" --data-urlencode "page=$page")
            while read -r quad; do
                #skip blank lines
                if [ -z "$quad" ]; then continue; fi
                if echo $quad | grep -qE '#metadata>\s*\.'; then
                    if echo $quad | grep -q 'nextPage'; then
                        hasNext=true
                        ((page++))
                    fi
                else
                    if $showGraph ; then
                        echo "$quad" | sed "s/\(.*\)\.$/\1 <$quadReplace>./"
                    else
                        echo $quad;
                    fi
                fi

            done <<< "$response"
        done;
    fi;
}

fetchDocs() {
    useSparql=false;
    prefixes="PREFIX llm: <http://lodlaundromat.org/metrics/ontology/> PREFIX llo: <http://lodlaundromat.org/ontology/> PREFIX ll: <http://lodlaundromat.org/resource/>"
    offset=0
    docTpfs="?doc llo:triples ?triples ; llo:md5 ?md5 ."
    filters="FILTER(?triples >= $minTriples) "
    if [ -n "$maxTriples" ]; then
        useSparql=true;
        filters="$filters FILTER(?triples <= $maxTriples) "
    fi
    if [ "$minTriples" -gt "2" ]; then
        useSparql=true;
    fi
    if [ -n "$minAvgInDegree" ]; then
        useSparql=true;
        filters="$filters ?doc llm:metrics/llm:inDegree/llm:mean ?avgInDegree. FILTER(?avgInDegree >= $minAvgInDegree) "
    fi
    if [ -n "$maxAvgInDegree" ]; then
        useSparql=true;
        filters="$filters ?doc llm:metrics/llm:inDegree/llm:mean ?avgInDegree. FILTER(?avgInDegree <= $maxAvgInDegree) "
    fi
    if [ -n "$minAvgOutDegree" ]; then
        useSparql=true;
        filters="$filters ?doc llm:metrics/llm:outDegree/llm:mean ?avgOutDegree. FILTER(?avgOutDegree >= $minAvgOutDegree) "
    fi
    if [ -n "$maxAvgoutDegree" ]; then
        useSparql=true;
        filters="$filters ?doc llm:metrics/llm:outDegree/llm:mean ?avgOutDegree. FILTER(?avgOutDegree <= $maxAvgOutDegree) "
    fi
    if [ -n "$minAvgDegree" ]; then
        useSparql=true;
        filters="$filters ?doc llm:metrics/llm:degree/llm:mean ?avgDegree. FILTER(?avgDegree >= $minAvgDegree) "
    fi
    if [ -n "$maxAvgDegree" ]; then
        useSparql=true;
        filters="$filters ?doc llm:metrics/llm:degree/llm:mean ?avgDegree. FILTER(?avgDegree <= $maxAvgDegree) "
    fi
    if [ -z "$namespaces" ] || [ ${#namespaces[@]} -eq 0 ]; then
        #Not filtering by namespace. I.e., we might be filtering on nothing, and want all documents.
        #so, use sparql
        useSparql=true;
    fi
    if $useSparql; then
        values=
        if [ -n "$namespaceDocs" ]; then
            #first check whether we really want to include the ns docs
            #having too many might make this sparql query waaaaay too long (~25MB)
            docsLength=${#namespaceDocs}
            numDocs=$(($docsLength + 1 / 33))
            if [ "$numDocs" -lt  "$sparqlMaxDocsAsValues" ]; then
                values="VALUES (?doc) {"
                while read -r md5; do
                    values="$values (ll:$md5)"
                done <<< "$namespaceDocs"
                values="$values }";
            fi
        fi


        #Start fetching docs
        while true; do
                limitOffset="LIMIT $limit OFFSET $offset";

                query="$prefixes SELECT ?doc ?md5 WHERE {$docTpfs $filters $bgp $values} $limitOffset"

                #nicest approach would be to stream through results. For now, just take this quick approach
                result=$(curl $curlUserAgent -X POST -s "$endpoint"  --data-urlencode "query=$query" -H 'Accept: text/csv' | sed '1d');

                while read -r line; do
                    if [ -z "$line" ]; then
                        #just whitespace string
                        continue
                    fi
                    println=;
                    md5=$(echo "$line" | sed 's/.*\"\([^\"]*\)\"$/\1/')

                    #we're filtering by namespace. If md5 does not occur in a dataset containing this ns, then skip it
                    [ -n "$namespaces" ] && [ -z "$values" ] && ! grep -q "$md5" <<< "$namespaceDocs" && continue;

                    #do some sed parsing. Easy, because we know there will be no quotes in the resultset (won't get literals)
                    if $download ; then
                        println="$downloadUrl/$md5"
                    fi
                    if $resource ; then
                        r=$(echo "$line" | sed 's/^\"\([^\"]*\)\".*/\1/')
                        if [ -z $println ]; then
                            println="$r"
                        else
                            println="$println $r"
                        fi

                    fi
                    echo $println;
                done <<< "$result"
                if [ -z "$result" ]; then
                    #no results left, we are done!
                    exit 0;
                fi


                #up offset for next query
                offset=`expr $offset + $limit`
        done
    else
        #no need to use sparql. we're only filtering by namespace. Just return those documents
        while read -r md5; do
            if [ -z "$md5" ]; then
                #just whitespace string
                continue
            fi
            println=;
            $download && println="$downloadUrl/$md5"
            if $resource; then
                if [ -z $println ]; then
                    println="$resourceUrl/$md5"
                else
                    println="$println $resourceUrl/$md5"
                fi
            fi
            [ -n "$println" ] && echo "$println"
        done <<< "$namespaceDocs"

    fi
}


fetchMeta() {
    #start with a sanity check (simple non-empty check)
    if [ -z "$1" ]; then return; fi

    mainTPatterns="<$1> llm:metrics ?metricDoc ; ?pred ?obj ."
    optionalTPattern="?obj ?pred2 ?obj2 ."
    query="PREFIX llm: <http://lodlaundromat.org/metrics/ontology/> CONSTRUCT { $mainTPatterns $optionalTPattern } WHERE { $mainTPatterns OPTIONAL{$optionalTPattern}}"
    response=`curl $curlUserAgent -Gs $endpoint --data-urlencode "query=$query" -H "Accept: text/plain"`
    #the response may return '# Empty NT'
    echo "$response" | head -n 1 | grep "Empty NT" > /dev/null && return;
    echo "$response";
}


filterForDocs=
filterForDocsFailed=false
hasStatementFilter=false
fetchDocsForResources() {
    #if sub, pred or obj filter is set, check whether we can prune docs using our index
    if [ -n "$sub" ] || [ -n "$pred" ] || [ -n "$obj" ]; then
        hasStatementFilter=true
        if [ -n "$sub" ]; then
            curlCmd="curl $curlUserAgent --fail -Gs $r2dIndex$sub"
            filterForDocs=`eval $curlCmd | tr "," "\n" | tail -n+5 |  sed '$ d' | tr -d '"'; exit ${PIPESTATUS[0]}`
            curlStatus=$?;
            if [ $curlStatus -gt 0 ]; then
                echoerr "Failed fetching resources from index. Command: "
                echoerr "$curlCmd";
                filterForDocsFailed=true
                return
            fi
        fi

        if [ -n "$pred" ]; then
            curlCmd="curl $curlUserAgent --fail -Gs $r2dIndex$pred"
            docs=`eval $curlCmd | tr "," "\n" | tail -n+5 | sed '$ d' | tr -d '"'; exit ${PIPESTATUS[0]}`
            curlStatus=$?;
            if [ $curlStatus -gt 0 ]; then
                echoerr "Failed fetching resources from index. Command: "
                echoerr "$curlCmd";
                filterForDocsFailed=true
                return
            fi
            #if we've already got a doc list, then merge both
            if [ -n "$filterForDocs" ]; then
                filterForDocs=`comm -12  <(echo "$filterForDocs") <(echo "$docs");`
            else
                filterForDocs="$docs"
            fi
        fi

        if [ -n "$obj" ]; then
            curlCmd="curl $curlUserAgent --fail -Gs $r2dIndex$obj"
            docs=`eval $curlCmd | tr "," "\n" | tail -n+5 | sed '$ d' | tr -d '"'; exit ${PIPESTATUS[0]}`
            curlStatus=$?;
            if [ $curlStatus -gt 0 ]; then
                echoerr "Failed fetching resources from index. Command: "
                echoerr "$curlCmd";
                filterForDocsFailed=true
                return
            fi
            #if we've already got a doc list, then merge both
            if [ -n "$filterForDocs" ]; then
               filterForDocs=`comm -12  <(echo "$filterForDocs") <(echo "$docs")`
            else
                filterForDocs="$docs"
            fi

        fi
    fi
}

namespaceDocs=
fetchNamespaces() {
    #if sub, pred or obj filter is set, check whether we can prune docs using our index
    if [ -n "$namespaces" ]; then
        while read -r namespace; do
            curlCmd="curl $curlUserAgent --fail -Gs $ns2dIndex$namespace"
            docs=`eval $curlCmd | tr "," "\n" | tail -n+5 | sed '$ d' | tr -d '"'; exit ${PIPESTATUS[0]}`
            curlStatus=$?;
            if [ $curlStatus -gt 0 ]; then
                echoerr "Failed fetching namespaces from index. Command: "
                echoerr "$curlCmd";
            fi
            #if we've already got a doc list, then merge both
            if [ -n "$namespaceDocs" ]; then
               namespaceDocs=`comm -12  <(echo "$namespaceDocs") <(echo "$docs")`
            else
                namespaceDocs="$docs"
            fi
        done < <(tr ' ' '\n' <<< $namespaces)
    fi
}


##
## This is where we actually start doing stuff
##
if [[ "$mode" == "statements" ]]; then
    fetchDocsForResources;
    runForDocs=true
    if [ "$#" -gt 0 ]; then
        #there are graphs passed as arguments to the script
        for res in "$@"; do fetchStatementsForDoc "$res";done
        runForDocs=false
    fi
    if [ ! -t 0 ]; then
       #something is piped to this program
       while read res ; do fetchStatementsForDoc "$res" ; done
       runForDocs=false
    fi
    if $runForDocs ; then
        #no arguments and nothing piped.
        if $hasStatementFilter && ! $filterForDocsFailed && [ -z "$filterForDocs" ]; then
            #we're filtering statements, but could not find any related documents via our index
            #i.e., nothing to return!
            echoerr "No results found";
            exit 0;
        fi

        if ! $filterForDocsFailed && [ -n "$filterForDocs" ]; then
            #we're filtering the results (by sub/pred/obj). I.e., we know where we need to be!
            #use this list
            while read -r md5; do
                fetchStatementsForDoc "http://lodlaundromat.org/resource/$md5"
            done <<< "$filterForDocs"
        else
            #we want only want the download links from the documents function
            download=false
            resource=true
            #Just use the whole lod cloud
            while read res ; do fetchStatementsForDoc "$res" ; done < <( fetchDocs )
        fi
    fi
elif [[ "$mode" == "documents" ]]; then
    fetchNamespaces
    fetchDocs
elif [[ "$mode" == "meta" ]]; then
    showHelp=true
    if [ "$#" -gt 0 ]; then
        #there are graphs passed as arguments to the script
        for res in "$@"; do fetchMeta "$res";done
        showHelp=false
    fi
    if [ ! -t 0 ]; then
        #something is piped to this program
       while read res ; do fetchMeta "$res" ; done
       showHelp=false
    fi
    if $showHelp ; then
        show_meta_help
    fi
fi