-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtext_cleanup.js
72 lines (67 loc) · 2.14 KB
/
text_cleanup.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
//functions for text cleaning!
//removes all special characters (this does not include words, whitespaces or any of the following characters: (),’'"!?.)
// function removeSpecialSign(el){
// return el.replace(/[^\s\w(),’'"&!?.ïöéëüóòèìíáà][\W]*/g, "");
// }
// removes remaining special characters, if more than 3 are in consectutive order. Characters include: (),’'"!?.
function removeMultipleSign(el){
return el.replace(/[^\s\w]{1,}/g, function(match) {
if (match.length > 3 ) {
return match.slice(0,3);
} else {
return match;
}
});
}
//inserts a whitespace after a special character. Special character includes: (),’'"!?.
function spaceAfterSign(el){
return el.replace(/[^\s\w]{1,}[\W]{1,}/g, function(match){
if (match == /[\W\s]/g ){
return match;
} else{
return match.replace(match, ""+match);
}
})
}
//removes isntances with more than 1 whitespace
function multipleSpaces(el){
return el.replace(/\s{2,}/g, " ");
}
function placeDotEndSentence(el){
if (el.slice(-1)==="." || el.slice(-1)===" "){
return el;
} else {
return el.replace(el, el+". ");
}
}
// function applySentenceCase(str) {
// return str.replace(/.+?[\.\?\!](\s|$)/g, function (txt) {
// return txt.charAt(0).toUpperCase() + txt.substr(1).toLowerCase();
// });
// }
function removeDots (str){
return str.replace(/\./g, '').toLowerCase();
}
function removeSignsOnly(str){
if(str.length < 4){
return str.replace(/[^\s\w]/g, '');
}else {
return str;
}
}
//runs all the cleanup functions and returns the cleaned-up text
function cleanUp(value){
// var textTemp=removeSpecialSign(value);
var textTemp=removeMultipleSign(value);
textTemp=multipleSpaces(textTemp);
textTemp=spaceAfterSign(textTemp);
textTemp=placeDotEndSentence(textTemp);
// textTemp=applySentenceCase(textTemp);
textTemp=removeDots(textTemp);
textTemp=removeSignsOnly(textTemp);
return textTemp;
}
// sentiment = new Sentimood();
// var analyze = sentiment.analyze(),
// positivity = sentiment.positivity(),
// negativity = sentiment.negativity();