-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathwordcloud.lua
222 lines (199 loc) · 6.72 KB
/
wordcloud.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
--[[
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% wordcloud %%
%% drawing wordclouds %%
%% with METAPOST and Lua %%
%% [email protected] %%
%% Version 0.2 (septembre 2023) %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% This work may be distributed and/or modified under the conditions of
% the LaTeX Project Public License, either version 1.3c of this license
% or (at your option) any later version. The latest version of this
% license is in http://www.latex-project.org/lppl.txt
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
]]
function wc_file2string(file)
local f = assert(io.open(file, "rb"))
local content = f:read("*all")
f:close()
return content
end
function wc_table_concat(t1,t2)
for i=1,#t2 do
t1[#t1+1] = t2[i]
end
return t1
end
function wc_ignor_fr()
local output;
output= {"et", "donc", "alors", "les", "ainsi", "des", "que", "qui", "sont", "par", "dans","est","pour","une","peut","avec","sans","Les","son","sur","ces","ses","tandis","quand"}
return output
end
function wc_ignor_en()
local output;
output= {"and", "so", "the", "the", "hence", "some", "that", "are", "is", "have", "has","in","on","could","should","with","be","this","these","where","when","not","with","can",}
return output
end
-- global variable for ignored words
wordcloud_ignor_words = {};
wc_table_concat(wordcloud_ignor_words,wc_ignor_fr())
wc_table_concat(wordcloud_ignor_words,wc_ignor_en())
-- function to add list of words to be ignored
function wc_add_ignored_words(tab)
wc_table_concat(wordcloud_ignor_words,tab)
end
-- function to build a tab from a string of the form
-- word1,word2,word3,etc.
function wc_string_to_tab(strWord)
strWord=string.gsub(strWord, "%s+", "") -- space delete
local tab
tab = string.explode(strWord, ",")
return tab
end
function wc_build_word_table(mystr)
local loc_str = mystr;
local ignor_chars = {";","’","'","\"","{","}","[","]","(",")","…",".","?","!","$","\\","#","<",">","«","»","+","*","-","/","=","%%", "€",":","~"}
loc_str = loc_str:gsub('[%p%c]+',' ')
for i=1,#ignor_chars do
loc_str = loc_str:gsub('['..ignor_chars[i]..']+',' ')
end
for i=1,#wordcloud_ignor_words do
loc_str = loc_str:gsub(' '..wordcloud_ignor_words[i]..' ',' ')
end
loc_str = loc_str:gsub(' LaTeX ',' \\LaTeX ')
loc_str = loc_str:gsub(' TeX ',' \\TeX ')
local t = {}
for i in loc_str:gmatch("%S+") do
t[#t + 1] = i
end
-- delete 2 size words
local id_two={}
for i=1,#t do
if(string.len(t[i])<=2) then
table.insert(id_two,i)
end
end
for i=#id_two,1,-1 do
table.remove(t,id_two[i])
end
return t
end
function wc_build_table_weight(t)
local weight = {}
for _, v in ipairs(t) do
weight[v] = (weight[v] or 0) + 1
end
return weight
end
function wc_table_to_tabular(table_weight)
local total_occ = 0
local tabular_weight = {}
for k,v in pairs(table_weight) do
total_occ=total_occ+v
end
local i=0
for k,v in pairs(table_weight) do
i=i+1
tabular_weight[i] ={}
tabular_weight[i][1]=k
tabular_weight[i][2]=v/total_occ
end
table.sort(tabular_weight,function (k1, k2) return k1[2] > k2[2] end)
return tabular_weight
end
function wc_build_mp_code(table_weight,maximum,rotation)
-- optional arguments
maximum = maximum or 50
rotation = rotation or 0
local total_occ = 0
local tabular_weight = wc_table_to_tabular(table_weight)
local str_mp=[[
string words[];
numeric weights[];
]]
local i=0
for i=1, #tabular_weight do
str_mp=str_mp.."words["..i.."]:=\""..tabular_weight[i][1].."\";"
str_mp=str_mp.."weights["..i.."]:="..tabular_weight[i][2]..";"
if (i>=maximum) then
break
end
end
str_mp=str_mp.."draw_wordcloud(words,weights,"..rotation..","..math.min(maximum,#tabular_weight)..");"
return str_mp
end
function wc_list_to_table(list)
-- list of words and weights (word1,weight1);(word2,weight2); etc.
local table_weight = {}
local pair = string.explode(list, ";")
local lgth=#pair
for i=1,lgth do
word,weight=string.match(pair[i],"%((.+),(.+)%)")
table_weight[word]=weight
end
return table_weight
end
function wc_size_of_table(table)
local lengthNum = 0
for k, v in pairs(table) do -- for every key in the table with a corresponding non-nil value
lengthNum = lengthNum + 1
end
return lengthNum
end
function wc_build_color_list(colors)
-- list of LaTeX colors separated with colons
local out = ""
local pair = string.explode(colors:sub(1,-2), ";") -- sub(1,-2) to delete last ;
local lgth=#pair
for i=1,lgth do
out=out.."wordcloud_colors["..i.."]:="..pair[i]..";";
end
out=out.."wordcloud_colors_number:="..lgth..";"
return out
end
-- build mp code for the wordcloud of a list given in LaTeX command
function wc_build_wordcloud(str,rotation,scale,margin,usecolor,colors)
maximum = maximum or 50
local table = wc_list_to_table(str)
local lgth_table = wc_size_of_table(table)
local output
output= [[\begin{mplibcode}[wordcloud]
input wordcloud
beginfig(0);
]]
if(usecolor=="true") then
output = output.."wordcloud_use_color(true);"
if(colors~="") then
output=output..wc_build_color_list(colors)
end
end
output = output.."set_wordcloud_scale("..scale..");"
output = output.."set_box_margin("..margin..");"
output = output..wc_build_mp_code(table,lgth_table,rotation)
output = output.."endfig;\\end{mplibcode}"
tex.sprint(output)
end
-- build mp code for the wordcloud of a file given in LaTeX command
function wc_build_wordcloud_file(file,number,rotation,scale,margin,usecolor,colors)
local str = wc_file2string(file)
local words = wc_build_word_table(str)
local table_weight = wc_build_table_weight(words)
local output
output= [[\begin{mplibcode}[wordcloud]
input wordcloud
beginfig(0);
]]
if(usecolor=="true") then
output = output.."wordcloud_use_color(true);"
if(colors~="") then
output=output..wc_build_color_list(colors)
end
end
output = output.."set_wordcloud_scale("..scale..");"
output = output.."set_box_margin("..margin..");"
output = output..wc_build_mp_code(table_weight,number,rotation)
output = output.."endfig;\\end{mplibcode}"
tex.sprint(output)
end