Skip to content

Commit

Permalink
Reimplements XML result rendering (proper whitespace handling)
Browse files Browse the repository at this point in the history
  • Loading branch information
gremid committed Jul 17, 2024
1 parent 5f7ba0f commit af6aa35
Showing 1 changed file with 91 additions and 37 deletions.
128 changes: 91 additions & 37 deletions src/ddc/fcs.clj
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
[taoensso.timbre :as log])
(:import
(eu.clarin.sru.server SRUConstants SRUException SRUQueryParserRegistry$Builder SRUResultCountPrecision SRUSearchResultSet SRUServer SRUServerConfig)
(eu.clarin.sru.server.fcs AdvancedDataViewWriter AdvancedDataViewWriter$Unit DataView DataView$DeliveryPolicy Layer Layer$ContentEncoding ResourceInfo SimpleEndpointSearchEngineBase XMLStreamWriterHelper)
(eu.clarin.sru.server.fcs DataView DataView$DeliveryPolicy Layer Layer$ContentEncoding ResourceInfo SimpleEndpointSearchEngineBase XMLStreamWriterHelper)
(eu.clarin.sru.server.fcs.utils SimpleEndpointDescription)
(java.net URI)
(javax.servlet.http HttpServlet)
Expand All @@ -20,7 +20,8 @@
(log/handle-uncaught-jvm-exceptions!)
(log/merge-config!
{:min-level [["eu.clarin.sru.server.SRUServer" :warn]
["*"(if env/debug? :debug :info)]]
["ddc.fcs" (if env/debug? :debug :info)]
["*" :info]]
:appenders {:println (log/println-appender {:stream :std-err})}})

(require '[ddc.client :as client])
Expand Down Expand Up @@ -73,12 +74,13 @@

(def data-layer->uri
(into {}
(map #(vector % (URI. (str "http://dwds.de/ns/fcs/layer/" %))))
(map (juxt identity #(str "http://dwds.de/ns/fcs/layer/" %)))
(keys data-layer->index)))

(defn data-layer
[id]
(Layer. id (data-layer->uri id) id Layer$ContentEncoding/EMPTY nil nil nil))
(Layer. id (URI. (data-layer->uri id)) id Layer$ContentEncoding/EMPTY
nil nil nil))

(def all-data-layers
(into [] (map data-layer) (keys data-layer->index)))
Expand Down Expand Up @@ -144,21 +146,82 @@
(let [tokens (map #(into {} (map vector indices %)) tokens)]
(into [] (map assoc-space-after) (partition-all 2 1 tokens))))

(defn result->writer
[result]
(def hits-ns
"http://clarin.eu/fcs/dataview/hits")

(def hits-mime-type
"application/x-clarin-fcs-hits+xml")

(def adv-ns
"http://clarin.eu/fcs/dataview/advanced")

(def adv-mime-type
"application/x-clarin-fcs-adv+xml")

(defn result->xml
[query-type pid frag-ref result writer]
(let [indices (vec (cons "hl" (get-in result ["meta_" "indices_"])))
tokens (parse-tokens indices (second (get result "ctx_")))
writer (AdvancedDataViewWriter. AdvancedDataViewWriter$Unit/ITEM)]
(loop [n 1 offset 0 tokens tokens]
(when-let [{hl "hl" w "w" ws "ws" :as token} (first tokens)]
(let [ws? (not= ws "0")
end (+ offset (count w) (if ws? 1 0))]
(doseq [[layer index] data-layer->index
:let [v (token index)] :when v
:let [v (cond-> v (and (= index "w") ws?) (str " "))]]
(. writer addSpan (data-layer->uri layer) (inc offset) end v hl))
(recur (inc n) end (rest tokens)))))
writer))
tokens (parse-tokens indices (second (get result "ctx_")))]
(doto writer
(XMLStreamWriterHelper/writeStartResource pid nil)
(XMLStreamWriterHelper/writeStartResourceFragment nil frag-ref)
(XMLStreamWriterHelper/writeStartDataView hits-mime-type)
(. (setPrefix "hits" hits-ns))
(. (writeStartElement hits-ns "Result"))
(. (writeNamespace "hits" hits-ns)))
(doseq [{hl "hl" w "w" ws "ws" :as _token} tokens]
(if (= hl 1)
(doto writer
(. (writeStartElement hits-ns "Hit"))
(. (writeCharacters w))
(. (writeEndElement)))
(. writer (writeCharacters w)))
(when (not= ws "0")
(. writer (writeCharacters " "))))
(doto writer
(. (writeEndElement))
(XMLStreamWriterHelper/writeEndDataView))
(when (= query-type "fcs")
(doto writer
(XMLStreamWriterHelper/writeStartDataView adv-mime-type)
(. (setPrefix "adv" adv-ns))
(. (writeStartElement adv-ns "Advanced"))
(. (writeNamespace "adv" adv-ns))
(. (writeAttribute "unit" "item")))
(. writer (writeStartElement adv-ns "Segments"))
(loop [n 1 start 1 tokens tokens]
(when-let [{w "w" ws "ws"} (first tokens)]
(let [end (dec (+ start (count w)))]
(doto writer
(. (writeEmptyElement adv-ns "Segment"))
(. (writeAttribute "id" (str "s" n)))
(. (writeAttribute "start" (str start)))
(. (writeAttribute "end" (str end))))
(recur (inc n)
(cond-> (inc end) (not= ws "0") inc)
(rest tokens)))))
(. writer (writeEndElement))
(. writer (writeStartElement adv-ns "Layers"))
(doseq [[layer index] data-layer->index
:when (-> tokens first (get index))]
(. writer (writeStartElement adv-ns "Layer"))
(. writer (writeAttribute "id" (str (data-layer->uri layer))))
(doseq [[i {hl "hl" v index}] (map-indexed list tokens)]
(. writer (writeStartElement adv-ns "Span"))
(. writer (writeAttribute "ref" (str "s" (inc i))))
(when (= hl 1)
(. writer (writeAttribute "highlight" "h1")))
(. writer (writeCharacters v))
(. writer (writeEndElement)))
(. writer (writeEndElement)))
(. writer (writeEndElement))
(doto writer
(. (writeEndElement))
(XMLStreamWriterHelper/writeEndDataView)))
(doto writer
(XMLStreamWriterHelper/writeEndResourceFragment)
(XMLStreamWriterHelper/writeEndResource))
nil))

(defn result-link
[corpus query]
Expand All @@ -171,37 +234,28 @@
(let [endpoint (client/endpoints corpus)
metadata (client/metadata corpus)
pid (metadata :pid)
query-type (. request (getQueryType))
query (query/->ddc (. request (getQuery)))
frag-ref (result-link corpus query)
frag-ref (result-link corpus query)
query (str query " !#has[avail,OR0W] #separate")
start-record (. request (getStartRecord))
num-records (min (. request (getMaximumRecords)) maximum-records)
results (into []
(take num-records)
(client/query endpoint query
:offset (dec start-record)
:page-size (min num-records 1000)))
results (->> (client/query endpoint query
:offset (dec start-record)
:page-size (min num-records 1000))
(into [] (take num-records)))
total (or (some-> results first meta :total) 0)
num-results (count results)
result-idx (volatile! 0)]
result->xml (partial result->xml query-type pid frag-ref)
idx (volatile! -1)]
(proxy [SRUSearchResultSet] [diagnostics]
(getTotalRecordCount [] (min maximum-records total))
(getRecordCount [] (count results))
(getResultCountPrecision [] SRUResultCountPrecision/EXACT)
(getRecordSchemaIdentifier [] record-schema-identifier)
(nextRecord [] (< @result-idx num-results))
(nextRecord [] (-> idx deref inc (< num-results)))
(getRecordIdentifier [] nil)
(writeRecord [writer]
(let [result (results @result-idx)
data-writer (result->writer result)]
(XMLStreamWriterHelper/writeStartResource writer pid nil)
(XMLStreamWriterHelper/writeStartResourceFragment writer nil frag-ref)
(. data-writer (writeHitsDataView writer (data-layer->uri "text")))
(. data-writer (writeAdvancedDataView writer))
(XMLStreamWriterHelper/writeEndResourceFragment writer)
(XMLStreamWriterHelper/writeEndResource writer)
(vswap! result-idx inc)
nil))
(writeRecord [writer] (result->xml (results (vswap! idx inc)) writer))
(hasExtraRecordData [] false)
(writeExtraRecordData [writer]))))

Expand Down

0 comments on commit af6aa35

Please sign in to comment.