Skip to content

Latest commit

 

History

History
101 lines (91 loc) · 1.79 KB

vectorized_pos.md

File metadata and controls

101 lines (91 loc) · 1.79 KB

vectorized_pos(text)

This method will return a vectorized form of given text. Each scaler maps to a index derived from the dictionary used in the package.

POS_type = 
    {
        "noun" : 1,
        "pronoun" : 2,
        "adjective" : 3,
        "verb" : 4,
        "adverb" : 5,
        "puncuation": 6
    }
noun_response = [ POS_type, prefix_id, suffix_id]
pronoun_response = [ POS_type, pronoun_id, pronoun_tag, number_tag, honorificity, case, proximity]
adjective_response = [ POS_type, prefix_id, suffix_id]
verb_response = [ POS_type, verb_id, tense_id, person_id, negation_id]
adverb_response = [ POS_type, verb_id, tense_id, person_id, negation_id]
puncuation_response = [ POS_type, prefix_id, suffix_id]
  tense_map = 
    {   
        "sb": 0,
        "gb": 1,
        "pb": 2,
        "bo": 3,
        "so": 4,
        "no": 5,
        "go": 6,
        "po": 7,
        "sv": 8,
        "gv": 9,
        "vo": 10
    }
person_map = 
    {
        "am": 0,
        "ap": 1,
        "tm": 2,
        "tu": 3,
        "ae": 4,
        "in": 5,
        "er": 6,
        "eR": 7,
    }
pronoun_map = 
    "pronoun_tag": {
        "Pro.Het": 0,
        "Pro.Pers2": 1,
        "Pro.Pers1": 2,
        "Pro.Pers3": 3,
        "Pro.Dem": 4,
        "Pro.Indef": 5,
        "Pro.Inter": 6,
        "Pro.Rel": 7,
        "Pro.Ref": 8,
        "Pro.Rec": 9,
        "Pro.CoRel": 10,
        "Pro.Rel.CoRel": 11,
        "Pro.Inc": 12,
        "Pro.Pers3.CoRel": 13,
    }
number_tag: {"Sing": 0, "Plu": 1},
honorificity: {"intimate": 0, "informal": 1, "formal": 2},
case: {"genitive": 0, "direct": 1, "objective": 2},
proximity: {"proximal": 0, "medial": 1, "distal": 2},