-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinstantiate_objects_in_xml.py
70 lines (50 loc) · 2.64 KB
/
instantiate_objects_in_xml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#código oficial para criar objetos do tipo sentença
#funciona perfeitamente
#cria objetos sentenças dado um xml e o imprime no arquivo de destino
import xml.etree.ElementTree as ET
import collections
from itertools import chain
import os
import string
import Sentence as Sentence
def sentence_extractor(xml_name, aspects, file_destiny): #aspects is a dict of aspects already extracted
aspects = list(aspects.keys())
tree = ET.parse(xml_name)
root = tree.getroot()
element = root.findall(".//sentence")# finds only elements with the tag "sentence" which are direct children of the current root
parsed_reviews=[]
punctuation = [i for i in string.punctuation]
for sentence in element:
word_counter = 0
id = sentence.attrib["id"]
sentiment_value = sentence.attrib["sentimentValue"]
sentiment = sentence.attrib["sentiment"]
new_sentence = Sentence.Sentence(id, sentiment_value, sentiment, xml_name)
sentence.findall(".//token")
for tokens in sentence:
for token in tokens:
for token_child in token:
if token_child.tag == "word":
new_sentence.add_token(token_child.text)
if token_child.text in aspects:
new_sentence.add_aspect(token_child.text.lower())
if token_child.text not in punctuation:
word_counter += 1
new_sentence.number_of_tokens = word_counter
with open(file_destiny, 'a+', encoding="utf-8") as f:
print("This sentence is in the file: ", new_sentence.xml, file=f)
print("Id sentence: ", new_sentence.id_sentence, file=f)
print("Sentiment value of the sentence: ", new_sentence.sentiment_value, file=f)
print("Sentiment of the sentence: ", new_sentence.sentiment, file=f)
print("Aspects: ", file=f)
for aspect in new_sentence.aspects:
print(aspect, file=f)
print("\nNumber of tokens in this sentence: ", new_sentence.number_of_tokens, file=f)
print("Tokens in this sentence: ", file=f)
print(new_sentence.__str__(), file=f)
print("\n ¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨¨\n", file=f)
if __name__ == '__main__':
aspects = {'morality': 0, 'film': 1, 'dog': 2}
#aspects_list = list(aspects_dict.keys())
#test:
sentence_extractor("x2145_1.xml", aspects, "generated_xmls.txt")