diff --git a/data.lisp b/data.lisp index 97e34b6..c64325f 100644 --- a/data.lisp +++ b/data.lisp @@ -60,10 +60,16 @@ :accessor sentence-mtokens))) +(defmethod print-object ((obj sentence) out) + (print-unreadable-object (obj out :type t) + (format out "~a at ~a" + (sentence-meta-value obj "sent_id") + (slot-value obj 'start)))) + (defmethod print-object ((obj token) out) (print-unreadable-object (obj out :type t) - (format out "~a #~a-~a-~a" - (slot-value obj 'form) ; (slot-value obj 'upostag) + (format out "~a/~a #~a-~a-~a" + (slot-value obj 'form) (slot-value obj 'upostag) (slot-value obj 'id) (slot-value obj 'deprel) (slot-value obj 'head)))) diff --git a/experimental.lisp b/experimental.lisp index 239300d..a9801ce 100644 --- a/experimental.lisp +++ b/experimental.lisp @@ -31,17 +31,17 @@ ;;; getting substrings a the sentence (flat, compounds etc) (defun linearize (sentence token &key (filter nil) (direction :both)) - (let ((childs (token-childs token sentence :fn-filter filter)) + (let ((children (token-children token sentence :fn-filter filter)) pre pos) - (dolist (c childs) + (dolist (c children) (if (< (token-id c) (token-id token)) (push c pre) (push c pos))) (let ((lhs (mappend (lambda (tk) - (linerialize sentence tk filter :direction :both)) + (linearize sentence tk :filter filter :direction :both)) (reverse pre))) (rhs (mappend (lambda (tk) - (linerialize sentence tk filter :direction :both)) + (linearize sentence tk :filter filter :direction :both)) (reverse pos)))) (case direction (:both (append lhs (list token) rhs)) @@ -57,7 +57,7 @@ (if (member (token-deprel (car alist)) deps :test #'equal) (aux (cdr alist)) alist)))) - (aux a-list-tokens))) + (reverse (aux (reverse (aux a-list-tokens)))))) (defun get-groups (deprel sent &optional (out *standard-output*)) @@ -67,11 +67,20 @@ (format out "~{~a~^ ~}~%" (mapcar #'token-form alist))) (remove-duplicates (mapcar (lambda (tk) (list-trim '("case" "advmod" "punct" "cc" "det") - (linerialize sent (token-parent tk sent) nil))) + (linearize sent (token-parent tk sent)))) leaves) :test #'equal)))) -;; Execute (get-groups "compound" a-sentence) +(defun get-groups (deprel sent) + (let ((leaves (remove-if-not (lambda (tk) (equal deprel (token-deprel tk))) + (sentence-tokens sent)))) + (mapcar (lambda (alist) + (format nil "~{~a~^ ~}" (mapcar #'token-form alist))) + (remove-duplicates (mapcar (lambda (tk) + (list-trim '("case" "advmod" "punct" "cc" "det") + (linearize sent (token-parent tk sent)))) + leaves) :test #'equal)))) +;; Execute (get-groups "compound" a-sentence) ;;; problems in the misc field diff --git a/packages.lisp b/packages.lisp index e3bd218..0074246 100644 --- a/packages.lisp +++ b/packages.lisp @@ -31,8 +31,9 @@ #:write-conllu-to-stream #:levenshtein #:diff - #:insert-token - #:remove-token + #:sentence-binary-tree + ; #:insert-token + ; #:remove-token #:adjust-sentence #:non-projective? diff --git a/rules.lisp b/rules.lisp index 9289a3a..f42de66 100644 --- a/rules.lisp +++ b/rules.lisp @@ -35,11 +35,9 @@ ;;acessores e leitor de regras - (defun rls (rule) (cadr rule)) - (defun rhs (rule) (caddr rule)) @@ -106,12 +104,12 @@ (caddr condition)))) pattern))) -;; - (defun apply-rules (sentences rules recursive) (dolist (sentence sentences) - (apply-rules-in-sentence sentence rules (cdr (assoc "sent_id" (sentence-meta sentence) :test #'equalp)) recursive))) + (apply-rules-in-sentence sentence rules + (cdr (assoc "sent_id" (sentence-meta sentence) :test #'equalp)) + recursive))) (defun apply-rules-in-sentence (a-sentence rules sent-id recursive &optional old-rules-applied)