-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcoll.analysis.r
488 lines (385 loc) · 46.6 KB
/
coll.analysis.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
# Coll.analysis V 3.2a
# Collostructional analysis: Computing the degree of association between words and words/constructions
# Coyright (C) 2007 Stefan Th. Gries (Latest changes in this version: 03/28/2010)
rm(list=ls()) # cleanup
coll.analysis<-function() { # FUNCTION FOR THE FAMILY OF COLLOSTRUCTIONAL ANALYSES
cat("\nColl.analysis 3.2a was written by Stefan Th. Gries (<http://www.linguistics.ucsb.edu/faculty/stgries/>).\n")
cat("It computes all methods belonging to the family of collostructional analysis as developed by\nAnatol Stefanowitsch and Stefan Th. Gries. Thus, it can also be used to compute general\ncollocational strengths of word pairs or distinctive collocates.\n\n")
cat("Papers involving collostructional analysis by these authors include:\nStefanowitsch, Anatol & Stefan Th. Gries. 2003. Collostructions: Investigating the interaction\n between words and constructions. International Journal of Corpus Linguistics 8.2:209-43.\n")
cat("Gries, Stefan Th. & Anatol Stefanowitsch. 2004a. Extending collostructional analysis:\n A corpus-based perspectives on 'alternations'. International Journal of Corpus Linguistics 9.1:97-129.\nGries, Stefan Th. & Anatol Stefanowitsch. 2004b. Co-varying collexemes in the into-causative.\n")
cat(" In: Achard, Michel & Suzanne Kemmer (eds.). Language, Culture, and Mind. Stanford, CA: CSLI, p. 225-36.\nGries, Stefan Th. 2005. Syntactic priming: A corpus-based approach. Journal of Psycholinguistic Research 34.4:365-99.\nGries, Stefan Th. & Stefanie Wulff. 2005. Do foreign language learners also have constructions?\n")
cat(" Evidence from priming, sorting, and corpora. Annual Review of Cognitive Linguistics 3:182-200.\nStefanowitsch, Anatol & Stefan Th. Gries. 2005. Co-varying collexemes. Corpus Linguistics\n and Linguistic Theory 1.1:1-43.\nGries, Stefan Th. & Anatol Stefanowitsch. 2010. Cluster analysis and the identification of collexeme\n classes.")
cat("In: Newman, John & Sally Rice (eds.). Empirical and Experimental Methods in Cognitive/Functional\n Research. Stanford, CA: CSLI, p. 73-90.\n\nFor papers that document the predictive superiority of collostructional analysis over raw frequency counts, cf:\nGries, Stefan Th., Beate Hampe, & Doris Schönefeld. 2005. Converging evidence: [...].")
cat(" Cognitive Linguistics 16.4:635-76.\nGries, Stefan Th., Beate Hampe, & Doris Schönefeld. 2010. Converging evidence II: [...]. In: Newman, John &\n Sally Rice (eds.). Experimental and Empirical Methods in Cognitive/Functional Research. Stanford, CA: CSLI, p. 59-72.\n\n")
cat("You can obtain all these papers (and many more) from my website.\n\n----------------------------\nThis program is free software; you can redistribute it and/or modify it under the terms of the\nGNU General Public License as published by the Free Software Foundation; either version 2 of\nthe License, or (at your option) any later version.\n")
cat(" Because the program is licensed free of charge, there is no warranty for the program, to the\nextent permitted by applicable law. Except when otherwise stated in writing the copyright holders\nand/or other parties provide the program 'as is' without warranty of any kind, either expressed\nor implied, including, but not limited to, the implied warranties of merchantability and fitness\nfor a particular purpose.")
cat(" The entire risk as to the quality and performance of the program is\nwith you. Should the program prove defective, you assume the cost of all necessary servicing,\nrepair or correction.\n In no event unless required by applicable law or agreed to in writing will any copyright holder,\nor any other party who may modify and/or redistribute the program as permitted above, be liable\nto you for damages, including any general, special, incidental or consequential damages arising")
cat("\nout of the use or inability to use the program (including but not limited to loss of data or\ndata being rendered inaccurate or losses sustained by you or third parties or a failure of the\nprogram to operate with any other programs), even if such holder or other party has been advised\nof the possibility of such damages.\n\nAcknowledgments: I thank Gaëtanelle Gilquin and Stefanie Wulff for pointing out small bugs to me, which have been fixed in this version.\nLatest changes in this version: 03/28/2010")
cat("\n----------------------------\n\nYou should have received this program with a collection of example files and a readme file;\nI recommend that you have a look at them before you execute this program for the first time ...\n\n"); pause()
cat("\nIf you use the program, PLEASE QUOTE IT as follows:\nGries, Stefan Th. 2007. Coll.analysis 3.2a. A program for R for Windows 2.x.\n\n"); pause()
which.analysis<-menu(choice=c("collocational/ collostructional strength, i.e. collexeme analysis (cf. <1*.txt> for an example)",
"(multiple) distinctive collocates or distinctive collexeme analysis (cf. <2*.txt> for an example)",
"co-varying collexeme analysis (cf. <3*.txt> for an example)"), title="\nWhich kind of analysis do you want to perform?")
switch(which.analysis, collostructions(), dist.collexemes(),covar.collexemes())
} # END OF FUNCTION FOR THE FAMILY OF COLLOSTRUCTIONAL ANALYSES
collostructions<-function() { # FUNCTION FOR COLLEXEME ANALYSIS
cat("\nC o l l o c a t i o n a l / c o l l e x e m e a n a l y s i s . . .\n")
# introduction
cat("\nThis kind of analysis computes the degree of attraction and repulsion between\none word or construction and many other words using a user-defined statistic;\nall these statistics are based on 2-by-2 tables, and attraction and repulsion\nare indicated in a separate column in the output.\n")
# input of parameters
cat("\nWhat is the word W / the name of the construction C you investigate (without spaces)?\n")
construction.name<-scan(nmax=1, what="char", quiet=T)
if (length(construction.name)==0) construction.name<-"some_W_or_C"
cat("\nEnter the size of the corpus (in constructions or words) without digit grouping symbols!\n")
corpus<-scan(nmax=1, quiet=T)
while (corpus<=0) { cat("\nWith a value of 0 or smaller, no such tests can be computed - enter the correct corpus size!\n"); corpus<-scan(nmax=1, quiet=T) }
cat("\nEnter the frequency of", construction.name, "in the corpus you investigate (without digit grouping symbols)\n")
construction.freq<-scan(nmax=1, quiet=T)
while (construction.freq<=0) { cat("\nWith a value of 0 or smaller, no such tests can be computed - enter the correct word/construction frequency!\n"); construction.freq<-scan(nmax=1, quiet=T) }
which.index<-menu(choice=c("-log10 (Fisher-Yates exact, one-tailed) (= default)", "log-likelihood", "Mutual Information", "Chi-square", "log10 of odds ratio (adds 0.5 to each cell)"), title="\nWhich index of association strength do you want to compute?")
which.sort<-menu(choice=c("alphabetically", "co-occurrence frequency", "faith", "collostruction strength"), title="\nHow do you want to sort the output?")
cat("\nEnter the number of decimals you'd like to see in the results (and '99', when you want the default output)!\n")
which.accuracy<-scan(nmax=1, quiet=T); cat("\n")
while (which.accuracy<=0) { cat("\nWith a value of 0 or smaller, the output might not be very meaningful - enter the correct number of decimals!\n"); which.accuracy<-scan(nmax=1, quiet=T) }
cat("\nTo compute the collocational strength of one word W to many other words <A, B, ..., ?>,\nyou need a text file with the following kind of table (with column names!):\n\nWord\tFreq_A-?_in_Corpus\tFreq_A-?_&_W\nA\t...\t\t\t...\nB\t...\t\t\t...\n...\t...\t\t\t...\n\nTo compute the collostructional strength of one construction C to the words <A, B, ..., ?>,\nyou need a text file with the following kind of table (with column names!):\n\nWord\tFreq_A-?_in_Corpus\tFreq_A-?_in_C\nA\t...\t\t\t...\nB\t...\t\t\t...\n...\t...\t\t\t...\n\n")
cat("Your table must not have decimal points/separators and ideally has no spaces (for the latter, use '_' instead)!\nAlso, don't forget that R's treatment of alphanumeric characters is case-sensitive!\n\nChoose this text file with the raw data!\t"); pause()
data<-read.table(file.choose(), header=T, fileEncoding = "UTF-8", sep="\t", quote="", comment.char=""); cases<-length(data[,1]); cat("\n")
which.output<-menu(choice=c("text file (= default)", "terminal"), title="Where do you want the output ('text file' will append to already existing file with the same name)?")
# computation
words<-data[,1]; word.freq<-data[,2]; obs.freq<-data[,3]; exp.freq<-faith<-delta.p.constr.to.word<-delta.p.word.to.constr<-relation<-coll.strength<-c(rep(0, cases))
for (i in 1:cases) {
obs.freq.a<-obs.freq[i]
obs.freq.b<-construction.freq-obs.freq.a
obs.freq.c<-word.freq[i]-obs.freq.a
obs.freq.d<-corpus-(obs.freq.a+obs.freq.b+obs.freq.c)
exp.freq.a<-construction.freq*word.freq[i]/corpus; exp.freq[i]<-round(exp.freq.a, which.accuracy)
exp.freq.b<-construction.freq*(corpus-word.freq[i])/corpus
exp.freq.c<-(corpus-construction.freq)*word.freq[i]/corpus
exp.freq.d<-(corpus-construction.freq)*(corpus-word.freq[i])/corpus
faith[i]<-round((obs.freq.a/word.freq[i]), which.accuracy)
delta.p.constr.to.word[i]<-round((obs.freq.a/(obs.freq.a+obs.freq.b))-(obs.freq.c/(obs.freq.c+obs.freq.d)), which.accuracy)
delta.p.word.to.constr[i]<-round((obs.freq.a/(obs.freq.a+obs.freq.c))-(obs.freq.b/(obs.freq.b+obs.freq.d)), which.accuracy)
coll.strength[i]<-round(switch(which.index,
fye(obs.freq.a, exp.freq.a, construction.freq, corpus, word.freq[i]),
llr(obs.freq.a, obs.freq.b, obs.freq.c, obs.freq.d, exp.freq.a, exp.freq.b, exp.freq.c, exp.freq.d),
log((obs.freq.a/exp.freq.a), 2),
(corpus*(((obs.freq.a)*((corpus-construction.freq-word.freq[i]+obs.freq.a)))-((construction.freq-obs.freq.a)*(word.freq[i]-obs.freq.a)))^2)/(construction.freq*word.freq[i]*((construction.freq-obs.freq.a)+((corpus-construction.freq-word.freq[i]+obs.freq.a)))*((word.freq[i]-obs.freq.a)+((corpus-construction.freq-word.freq[i]+obs.freq.a)))),
log(((obs.freq.a+0.5)/(obs.freq.b+0.5))/((obs.freq.c+0.5)/(obs.freq.d+0.5)), 10)), which.accuracy)
if (obs.freq.a>exp.freq.a) {
relation[i]<-"attraction"
} else if (obs.freq.a<exp.freq.a) {
relation[i]<-"repulsion"
} else {
relation[i]<-"chance"
}
}
output.table<-data.frame(words, word.freq, obs.freq, exp.freq, relation, faith, delta.p.constr.to.word, delta.p.word.to.constr, coll.strength)
sort.index<-switch(which.sort, order(words), order(-obs.freq, words), order(-faith, words), order(relation, -coll.strength))
output.table<-output.table[sort.index,]
# hypothetical repulsion strength of unattested verbs
corp.size<-as.integer(log(corpus, 10))
absents.words<-absents.obs.freqs<-absents.exp.freqs<-absents.delta.p.constr.to.word<-absents.delta.p.word.to.constr<-absents.collstrengths<-c(rep(0, corp.size))
for (i in 1:corp.size) {
absents.words[i]<-letters[i]
absents.obs.freqs[i]<-10^i
obs.freq.a<-0
obs.freq.b<-construction.freq
obs.freq.c<-10^i
obs.freq.d<-corpus-(construction.freq+10^i)
exp.freq.a<-construction.freq*10^i/corpus; absents.exp.freqs[i]<-round(exp.freq.a, which.accuracy)
exp.freq.b<-construction.freq*(corpus-10^i)/corpus
exp.freq.c<-(corpus-construction.freq)*10^i/corpus
exp.freq.d<-(corpus-construction.freq)*(corpus-10^i)/corpus
absents.delta.p.constr.to.word[i]<-round((obs.freq.a/(obs.freq.a+obs.freq.b))-(obs.freq.c/(obs.freq.c+obs.freq.d)), which.accuracy)
absents.delta.p.word.to.constr[i]<-round((obs.freq.a/(obs.freq.a+obs.freq.c))-(obs.freq.b/(obs.freq.b+obs.freq.d)), which.accuracy)
absents.collstrengths[i]<-round(switch(which.index,
fye(obs.freq.a, exp.freq.a, construction.freq, corpus, word.freq[i]),
llr(obs.freq.a, obs.freq.b, obs.freq.c, obs.freq.d, exp.freq.a, exp.freq.b, exp.freq.c, exp.freq.d),
log((obs.freq.a/exp.freq.a), 2),
(corpus*(((obs.freq.a)*((corpus-construction.freq-word.freq[i]+obs.freq.a)))-((construction.freq-obs.freq.a)*(word.freq[i]-obs.freq.a)))^2)/(construction.freq*word.freq[i]*((construction.freq-obs.freq.a)+((corpus-construction.freq-word.freq[i]+obs.freq.a)))*((word.freq[i]-obs.freq.a)+((corpus-construction.freq-word.freq[i]+obs.freq.a)))),
log(((obs.freq.a+0.5)/(obs.freq.b+0.5))/((obs.freq.c+0.5)/(obs.freq.d+0.5)), 10)), which.accuracy)
}
output.table.hyp<-data.frame(absents.words, absents.obs.freqs, absents.exp.freqs, "repulsion", absents.delta.p.constr.to.word, absents.delta.p.word.to.constr, absents.collstrengths)
colnames(output.table.hyp)<-c("absents.words", "absents.obs.freqs", "absents.exp.freqs", "relation", "absents.delta.p.constr.to.word", "absents.delta.p.word.to.constr", "absents.collstrengths")
cat("\a") # progress beep
# output
which.index<-switch(which.index, "-log10 (Fisher-Yates exact, one-tailed)", "log-likelihood", "Mutual Information", "Chi-square", "log10 of odds ratio (adds 0.5 to each cell)")
if (which.output==1) {
cat("\nWhich text file do you want to store the result in?\n(Note: if you choose a file that already exists, the current output will be appended to this file.)\t"); pause()
output.file<-file.choose(); output<-file(output.file, open="at")
cat("|---------------------------------------------------------------------|\n| This output is provided without any warranty on an as-is basis by |\n| Stefan Th. Gries <http://www.linguistics.ucsb.edu/faculty/stgries/> |\n| Please cite the program as mentioned in <readme.txt>. Thanks a lot! |\n|---------------------------------------------------------------------|\n\n", date(), "\n\nword.freq: frequency of the word in the corpus\nobs.freq: observed frequency of the word with/in ", construction.name, file=output)
cat("\nexp.freq: expected frequency of the word with/in ", construction.name, "\nfaith: percentage of how many instances of the word occur with/in ", construction.name, "\nrelation: relation of the word to ", construction.name, "\ndelta.p.constr.to.word: delta p: how much does the word/construction help guess the word?\ndelta.p.constr.to.word: delta p: how much does the construction help guess the word/construction?\ncoll.strength: index of collocational/collostructional strength: ", which.index, ", the higher, the stronger\n\n", sep="", file=output)
write.table(output.table, file=output, quote=F, row.names=F, sep="\t", eol="\n")
cat("\nIn order to determine the degree of repulsion of verbs that are not attested with/in", construction.name, ",\nthe following table gives the collocational/collostructional strength for all verb frequencies\nin orders of magnitude the corpus size allows for.\n\n\n", sep="", file=output)
write.table(output.table.hyp, file=output, quote=F, row.names=F, sep="\t", eol="\n")
cat("\n\nIf your collostruction strength is based on p-values, it can be interpreted as follows:\nColl.strength>3 => p<0.001; coll.strength>2 => p<0.01; coll.strength>1.30103 => p<0.05.\nI'd be happy if you provided me with feedback and acknowledged the use of Coll.analysis 3.2a.\n", file=output)
close(output)
} else {
cat("|---------------------------------------------------------------------|\n| This output is provided without any warranty on an as-is basis by |\n| Stefan Th. Gries <http://www.linguistics.ucsb.edu/faculty/stgries/> |\n| Please cite the program as mentioned in <readme.txt>. Thanks a lot! |\n|---------------------------------------------------------------------|\n\n", date(), "\n\nword.freq: frequency of the word in the corpus\nobs.freq: observed frequency of the word with/in ", construction.name)
cat("\nexp.freq: expected frequency of the word with/in ", construction.name, "\nfaith: percentage of how many instances of the word occur with/in ", construction.name, "\nrelation: relation of the word to ", construction.name, "\ncoll.strength: index of collocational/collostructional strength: ", which.index, ", the higher, the stronger\n\n", sep="")
print(output.table)
cat("\nIn order to determine the degree of repulsion of words that are not attested with/in ", construction.name, ",\nthe following table gives the collocational/collostructional strength for all verb frequencies\nin orders of magnitude the corpus size allows for.\n\n", sep="")
print(output.table.hyp)
cat("\nIf your collostruction strength is based on p-values, it can be interpreted as follows:\nColl.strength>3 => p<0.001; coll.strength>2 => p<0.01; coll.strength>1.30103 => p<0.05.\nI'd be happy if you provided me with feedback and acknowledged the use of Coll.analysis 3.2a.\n")
}
} # END OF FUNCTION FOR COLLOSTRUCTIONAL ANALYSIS
dist.collexemes<-function() { # FUNCTION FOR DISTINCTIVE COLLEXEME ANALYSIS
cat("\nD i s t i n c t i v e c o l l o c a t e / c o l l e x e m e a n a l y s i s . . .\n")
# introduction and first input
cat("\nThis kind of analysis compares 2+ words or constructions with respect to n words they co-occur with differently frequently.\nYou must first enter whether you have two distinctive categories (e.g., when you look at English ditransitive\nvs. prep. dative) or more (e.g., when you compare English active vs. be-passive vs. get-passive)?\n")
dists<-menu(choice=c(" 2 alternatives", " 3+ alternatives"), title="How many distinctive categories do you have?")
cat("\nEnter the number of decimals you'd like to see in the results (and '99', when you want the default output)!\n")
which.accuracy<-scan(nmax=1, quiet=T); cat("\n")
while (which.accuracy<=0) { cat("\nWith a value of 0 or smaller, the output might not be very meaningful - enter the correct number of decimals!\n"); which.accuracy<-scan(nmax=1, quiet=T) }
if (dists==1) {
# introduction
cat("\nIn this case, distinctive collexeme analysis uses the log-transformed\np-value from the one-tailed Fisher-Yates exact test or the log-likelihood ratio\nand indicates preferences in a separate column.\n")
# input of parameters
which.index<-menu(choice=c("-log10 (Fisher-Yates exact, one-tailed) (= default)", "log-likelihood"), title="Which index of association strength do you want to compute?")
which.sort<-menu(choice=c("alphabetically", "frequency with W1 / in C1", "frequency with W2 / in C2", "collostruction strength"), title="How do you want to sort the output?")
cat("\nColl.analysis 3.2a accepts two kinds of input for such an analysis of distinctive collexemes:\nOn the one hand, you can use as input a file with a table of all tokens. That is, the first column\ncontains for each co-occurrence item the code for one of the two words/constructions W1/C1 and\nW2/C2 you want to investigate; the second column contains the word co-occurring with W1/C1 and W2/C2\nas listed in the first column.")
cat("\n\nW/C\tColl_Word\nA\tX\nB\tY\n...\t...\n\nOn the other hand, if you have already down more work, you can also use a text file\nwith the following kind of table (with informative column names!), where the columns 2 and 3\ncontain the co-occurrence frequencies of each word listed in column 1 with/in W/C1 and W/C2.\n\nColl_Word\tFreq_CollWord_&_W/C1\tFreq_CollWord_&_W/C2\nA\t\t...\t\t\t...\nB\t\t...\t\t\t...\n...\t\t...\t\t\t...")
cat("\n\nWhichever input format you choose, your file must not have decimal points/separators and ideally has no spaces (for the latter, use '_' instead)!\nAlso, don't forget that R's treatment of alphanumeric characters is case-sensitive!\n\n")
input.dc<-menu(choice=c("Raw list of all tokens", "Edited list with frequencies"), title="Which input format do you want to use?")
cat("\nChoose the text file with the input data!\n"); pause()
data<-read.table(file.choose(), header=T, sep="\t", quote="", comment.char="")
if (input.dc==1) {
interim<-t(table(data))
data<-data.frame(as.vector(rownames(interim)), as.vector(interim[,1]), as.vector(interim[,2]))
names(data)<-c("WORD", colnames(interim))
}
construction1.name<-colnames(data)[2]
construction2.name<-colnames(data)[3]
cat("\nEnter the overall frequency of", construction1.name, "in the corpus you investigate without digit grouping symbols (probably, this is", sum(data[,2]), "i.e., the number of occurrences of this construction in your data file)!\n")
construction1.freq<-scan(nmax=1, quiet=T)
while (construction1.freq<=0) {
cat("\nWith a value of 0 or smaller, no such tests can be computed - enter the correct word/construction frequency!\n"); construction1.freq<-scan(nmax=1, quiet=T)
}
cat("\nEnter the overall frequency of", construction2.name, "in the corpus you investigate without digit grouping symbols (probably, this is", sum(data[,3]), "i.e., the number of occurrences of this construction in your data file)!\n")
construction2.freq<-scan(nmax=1, quiet=T)
while (construction2.freq<=0) {
cat("\nWith a value of 0 or smaller, no such tests can be computed - enter the correct word/construction frequency!\n"); construction2.freq<-scan(nmax=1, quiet=T)
}
which.output<-menu(choice=c("text file", "terminal"), title="Where do you want the output ('text file' will append to already existing file with the same name)?")
# computation
cases<-length(data[,1]); words<-data[,1]; obs.freq.1<-data[,2]; obs.freq.2<-data[,3]; exp.freq.1<-exp.freq.2<-pref.occur<-delta.p.constr.to.word<-delta.p.word.to.constr<-coll.strength<-c(rep(0, cases)); overlap<-0
for (i in 1:cases) {
obs.freq.a<-obs.freq.1[i]
obs.freq.b<-construction1.freq-obs.freq.a
obs.freq.c<-obs.freq.2[i]
obs.freq.d<-construction2.freq-obs.freq.c
exp.freq.a<-(data[i,2]+data[i,3])*construction1.freq/(construction1.freq+construction2.freq); exp.freq.1[i]<-round(exp.freq.a, which.accuracy)
exp.freq.b<-construction1.freq-exp.freq.a
exp.freq.c<-(data[i,2]+data[i,3])*construction2.freq/(construction1.freq+construction2.freq); exp.freq.2[i]<-round(exp.freq.c, which.accuracy)
exp.freq.d<-construction2.freq-exp.freq.c
coll.strength[i]<-round(switch(which.index,
fye(obs.freq.a, exp.freq.a, construction1.freq, sum(construction1.freq, construction2.freq), sum(obs.freq.a, obs.freq.c)),
llr(obs.freq.a, obs.freq.b, obs.freq.c, obs.freq.d, exp.freq.a, exp.freq.b, exp.freq.c, exp.freq.d),
log(((obs.freq.a+0.5)/(obs.freq.b+0.5))/((obs.freq.c+0.5)/(obs.freq.d+0.5)), 10)), which.accuracy)
if (obs.freq.a>exp.freq.a) {
pref.occur[i]<-as.character(construction1.name)
} else if (obs.freq.a<exp.freq.a) {
pref.occur[i]<-as.character(construction2.name)
} else {
pref.occur[i]<-"no_preference"
}
delta.p.constr.to.word[i]<-round((obs.freq.a/(obs.freq.a+obs.freq.b))-(obs.freq.c/(obs.freq.c+obs.freq.d)), which.accuracy)
delta.p.word.to.constr[i]<-round((obs.freq.a/(obs.freq.a+obs.freq.c))-(obs.freq.b/(obs.freq.b+obs.freq.d)), which.accuracy)
overlap<-ifelse(all(obs.freq.a>0, obs.freq.c>0), overlap<-overlap+1, overlap)
}
output.table<-data.frame(words, obs.freq.1, obs.freq.2, exp.freq.1, exp.freq.2, pref.occur, delta.p.constr.to.word, delta.p.word.to.constr, coll.strength)
sort.index<-switch(which.sort, order(words), order(-obs.freq.1, words),order(-obs.freq.2, words), order(pref.occur, -coll.strength))
output.table<-as.data.frame(output.table[sort.index,])
cat("\a") # progress beep
# output
which.index<-switch(which.index, "-log10(Fisher-Yates exact, one-tailed)", "log-likelihood")
cat("\n")
if (which.output==1) {
cat("\nWhich text file do you want to store the result in?\n(Note: if you choose a file that already exists, the current output will be appended to this file.)\t"); pause()
output.file<-file.choose(); output<-file(output.file, open="at")
cat("|---------------------------------------------------------------------|\n| This output is provided without any warranty on an as-is basis by |\n| Stefan Th. Gries <http://www.linguistics.ucsb.edu/faculty/stgries/> |\n| Please cite the program as mentioned in <readme.txt>. Thanks a lot! |\n|---------------------------------------------------------------------|\n\n", date(), file=output)
cat("\n\nDistinctive collocate/collexeme analysis for: ", as.character(construction1.name), " vs. ", as.character(construction2.name), "\n\nobs.freq.1: observed frequency of the word A-? in/with ", as.character(construction1.name), "\nobs.freq.2: observed frequency of the word A-? in/with ", as.character(construction2.name), "\nexp.freq.1: expected frequency of the word A-? in/with ", sep="", file=output)
cat(as.character(construction1.name), "\nexp.freq.2: expected frequency of the word A-? in/with ", as.character(construction2.name), "\npref.occur: the word/construction to which the word A-? is attracted\ndelta.p.constr.to.word: delta p: how much does the word/construction help guess the word?\ndelta.p.word.to.constr: delta p: how much does the construction help guess the word/construction?\ncoll.strength: index of distinctive collostructional strength:", which.index, ", the higher, the more distinctive\n\n", sep="", file=output)
write.table(output.table, file=output, quote=F, row.names=F, sep="\t", eol="\n")
cat("\nIf your collostruction strength is based on p-values, it can be interpreted as follows:\nColl.strength>3 => p<0.001; coll.strength>2 => p<0.01; coll.strength>1.30103 => p<0.05.\nOut of the ", cases, " investigated, ", overlap," collocates/collexemes are shared by both words/constructions; i.e. ", (overlap/cases*100), "%\n\n\nI'd be happy if you provided me with feedback and acknowledged the use of Coll.analysis 3.2a.\n", sep="", file=output)
close(output)
} else {
cat("|---------------------------------------------------------------------|\n| This output is provided without any warranty on an as-is basis by |\n| Stefan Th. Gries <http://www.linguistics.ucsb.edu/faculty/stgries/> |\n| Please cite the program as mentioned in <readme.txt>. Thanks a lot! |\n|---------------------------------------------------------------------|\n\n", date(), "\n\nDistinctive collocate/collexeme analysis for: ")
cat(as.character(construction1.name), " vs. ", as.character(construction2.name), "\n\nobs.freq.1: observed frequency of the word A-? in/with ", as.character(construction1.name), "\nobs.freq.2: observed frequency of the word A-? in/with ", as.character(construction2.name), "\nexp.freq.1: expected frequency of the word A-? in/with ", as.character(construction1.name), "\nexp.freq.2: expected frequency of the word A-? in/with ")
cat(as.character(construction2.name), "\npref.occur: the word/construction to which the word A-? is attracted\ncoll.strength: index of distinctive collostructional strength: ", which.index, ", the higher, the more distinctive\n\n", sep="")
options(width=7500); print(output.table)
cat("\nIf your collostruction strength is based on p-values, it can be interpreted as follows:\nColl.strength>3 => p<0.001; coll.strength>2 => p<0.01; coll.strength>1.30103 => p<0.05.\nOut of the ", cases, " investigated, ", overlap," collocates/collexemes are shared by both words/constructions; i.e. ", (overlap/cases*100), "%\n\n\nI'd be happy if you provided me with feedback and acknowledged the use of Coll.analysis 3.2a.\n", sep="")
}
} else {
# introduction
cat("\nIn this case of multiple distinctive collexeme analysis, a more detailed introduction is necessary.\nIn regular collexeme analysis as well as distinctive collexeme analysis, we have always used the\none-tailed Fisher Yates exact test to compute the association strength between elements. As the name indicates,\nthis is an exact tests which is applied to 2-by-2 table and based on the hypergeometric distribution,")
cat("\ni.e., on sampling without replacement. If you want to perform a distinctive collexeme analysis with more\nthan two alternatives, e.g. English active vs. be-passive vs. get-passive, however,\n\nVOICE\t\tVERB\nactive\t\tthink\nbe-passive\ttell\nget-passive\tkill\n...\t\t...\n\nthen the Fisher-Yates exact test cannot be used anymore. ")
cat("The equivalent test for more than two alternatives\nis the so-called multinomial test, an exact test with sampling without replacement for 2+ alternatives.\nHowever, given the present purposes this test has two weaknesses:\n(i) it is computationally so expensive that sample sizes of several thousand items already exceed the capabilities of\nstate-of-the-art desktop computers in fall 2004, and ")
cat("(ii) the multinomial test only gives you a single\np-value and, thus, doesn't tell you where some deviation actually comes from: is an\noverall large deviation due to the low frequency for, say, _think_ in actives, or, say, the high frequency of,\nsay, _kill_ in get-passives? That is, even if the test was possible computationally,\nit would not yet answer the interesting questions.\n ")
cat("Thus, this script uses an approximation to the multinomial test, namely the one-tailed exact binomial test.\nThis test is still an exact test, i.e., it is not sensitive to low frequencies. To use the above example,\nthe present implementation of the exact binomial test computes one p-value for each word in with\neach other word / in each construction (as in configural frequency analysis) and ")
cat("log-transforms it such that\nhighly positive and highly negative values indicate a large degree of attraction and repulsion respectively\nwhile 0 indicates random co-occurrence.\n Then, to make the results more accessible, the script also outputs columns called SumAbsDev and LargestDev.\nAgain, using the above example, the former tells you for each verb the sum of all ")
cat("absolute log-transformed p-values, i.e.,\nhow strongly each verb's observed frequencies across all voices differ from the expected ones.\nThe latter tell you for each verb the single voice with the largest deviations from the expected frequencies.\n")
# input of parameters
cat("\nFor such a multiple distinctive collexeme analysis, Coll.analysis 3.2a expects as input\na file with a table of all tokens. That is, the first column contains for\neach co-occurrence item the code for one of the X words/constructions W/C\nyou want to investigate; the second column contains the word co-occurring with W/C")
cat("\nas listed in the first column.\n\nW/C\tColl_Word\nA\tX\nB\tY\nC\tZ\n...\t...\n\nYour file ideally has no spaces (use '_' instead) and don't forget that R's treatment of alphanumeric characters\nis case-sensitive! The computation of this analysis can require several minutes or even more time ...\n\nChoose the text file with the input data!\t"); pause()
mdca.data<-read.table(file.choose(), header=T, sep="\t", quote="", comment.char="")
names(mdca.data)<-c("W_C", "Coll_Word")
which.sort<-menu(choice=c("alphabetically (W_C)", "sum of absolute deviations per W_C", "W_Cs' largest deviation"), title="\nHow do you want to sort the output?")
# determine column frequencies
tab.mca.data<-table(mdca.data$Coll_Word, mdca.data$W_C) # generate table for multiple dca
colfreq<-table(mdca.data$W_C)
verb<-rownames(tab.mca.data); constr<-colnames(tab.mca.data)
n.verb<-length(verb); n.constr<-length(constr)
result.table<-data.frame(matrix(nrow=n.verb, ncol=(n.constr*3)+3))
colnames(result.table)<-c("Coll_Word", as.character(constr), paste("exp", as.character(constr), sep="_"), paste("pbin", as.character(constr), sep="_"), "SumAbsDev", "LargestDev")
result.table[,1]<-rownames(tab.mca.data)
result.table[,2:(n.constr+1)]<-tab.mca.data[,1:n.constr]
for (f in 1:n.verb) {
cur.obs<-tab.mca.data[f,]
cur.exp<-sum(cur.obs)*(colfreq/sum(colfreq))
result.table[f,(n.constr+2):(n.constr+n.constr+1)]<-round(cur.exp, which.accuracy)
counter<-0
for (g in (n.constr*2+2):(length(result.table)-2)) {
counter<-counter+1
if (cur.obs[counter]>=cur.exp[counter]) {
result.table[f,g]<-round(-log(sum(dbinom(cur.obs[counter]:sum(cur.obs), sum(cur.obs), (cur.exp[counter]/sum(cur.obs)))), 10), which.accuracy)
} else {
result.table[f,g]<-round(log(sum(dbinom(0:cur.obs[counter], sum(cur.obs), (cur.exp[counter]/sum(cur.obs)))), 10), which.accuracy)
}
}
result.table[f,length(result.table)-1]<-round(sum(abs(result.table[f,(length(names(result.table))-n.constr-1):(length(names(result.table))-2)])), which.accuracy)
largest.value<-round(max(abs(result.table[f,(length(result.table)-n.constr-1):(length(result.table)-2)])), which.accuracy)
largest.word<-as.character(constr[which(abs(result.table[f,(length(result.table)-n.constr-1):(length(result.table)-2)])==largest.value)])
if (length(largest.word)>1) { largest.word<-paste(largest.word, collapse="_&_") }
result.table[f,length(result.table)]<-largest.word
}
attach(result.table)
cat("\a") # progress beep
# output
which.output<-menu(choice=c("text file", "terminal"), title="Where do you want the output ('text file' will append to already existing file with the same name)?")
sort.index<-switch(which.sort, order(Coll_Word), order(-SumAbsDev, Coll_Word), order(LargestDev, -SumAbsDev))
result.table<-as.data.frame(result.table[sort.index,])
if (which.output==1) {
cat("\nWhich text file do you want to store the result in?\n(Note: if you choose a file that already exists, the current output will be appended to this file.)\t"); pause()
output.file<-file.choose(); output<-file(output.file, open="at")
cat("|---------------------------------------------------------------------|\n| This output is provided without any warranty on an as-is basis by |\n| Stefan Th. Gries <http://www.linguistics.ucsb.edu/faculty/stgries/> |\n| Please cite the program as mentioned in <readme.txt>. Thanks a lot! |\n|---------------------------------------------------------------------|\n\n", date(), file=output)
cat("\n\nMultiple distinctive collocate/collexeme analysis for:", paste(as.character(constr), collapse=" "), "\n\nColl_Word: collocate of the words/constructions to be contrasted\nThe next ", paste(as.character(constr), collapse=" "), " columns are the words/constructions to be contrasted and their observed co-occurrence frequencies\nThe next ", paste(as.character(constr), collapse=" "), file=output)
cat(" columns are the words/constructions to be contrasted and their expected co-occurrence frequencies\nThe next ", paste(as.character(constr), collapse=" "), " columns are the log-transformed p-values of the words/constructions to be contrasted (+ = attraction, - = repulsion)\nSumAbsDev: the sum of the absolute values of the preceding ", n.constr, " columns: the larger, the stronger the deviation", file=output)
cat("\nLargestDev: the word/construction where the strongest deviation from observed to expected is found\n\n", sep="", file=output)
write.table(result.table, file=output, quote=F, row.names=F, sep="\t", eol="\n")
cat("\n\nSorting according to the 'pbin' columns will yield the most relevant outcomes for each word/construction.\npbin_*>3 => p<0.001; pbin_*>2 => p<0.01; pbin_*>1.30103 => p<0.05.\nI'd be happy if you provided me with feedback and acknowledged the use of Coll.analysis 3.2a.\n", file=output)
close(output)
} else {
cat("\n|---------------------------------------------------------------------|\n| This output is provided without any warranty on an as-is basis by |\n| Stefan Th. Gries <http://www.linguistics.ucsb.edu/faculty/stgries/> |\n| Please cite the program as mentioned in <readme.txt>. Thanks a lot! |\n|---------------------------------------------------------------------|\n\n", date())
cat("\n\nMultiple distinctive collocate/collexeme analysis for: ", paste(as.character(constr), collapse=" "), "\n\nColl_Word: collocate of the words/constructions to be contrasted\nThe next", as.character(constr), "columns are the words/constructions to be contrasted and their observed co-occurrence frequencies\nThe next ", paste(as.character(constr), collapse=" "))
cat(" columns are the words/constructions to be contrasted and their expected co-co-occurrence frequencies\nThe next ", paste(as.character(constr), collapse=" "), " columns are the log-transformed p-values of the words/constructions to be contrasted (+ = attraction, - = repulsion)\nSumAbsDev: the sum of the absolute values of the preceding ", n.constr)
cat(" columns: the larger, the stronger the deviation\nLargestDev: the word/construction where the strongest deviation from observed to expected is found\n\n")
options(width=7500); print(result.table)
cat("\npbin_*>3 => p<0.001; pbin_*>2 => p<0.01; pbin_*>1.30103 => p<0.05.\nI'd be happy if you provided me with feedback and acknowledged the use of Coll.analysis 3.2a.\n")
}
}
} # END OF FUNCTION FOR DISTINCTIVE COLLEXEME ANALYSIS
covar.collexemes<-function() { # FUNCTION FOR CO-VARYING COLLEXEME ANALYSIS
cat("\nC o v a r y i n g c o l l e x e m e a n a l y s i s . . .\n")
# introduction
cat("\n\nThis kind of analysis investigated dependencies within two slots of a single construction.\nThis script so far only implements the so-called item-based analysis since comparative studies\n have shown that the system-based correction may require many days computational time with only")
cat("\nminor differences in the results (cf. Stefanowitsch and Gries 2005). However, somewhere down the road I may find \ntime to work on an implementation of this technique so that arbitrarily many additional variables\n(e.g. register, corpora etc.) can be included.\n")
# input of parameters
cat("\nColl.analysis 3.2a requires as input for the item-based co-varying collexeme analysis:\na file with a table of all token instances of the construction C with\nthe two words W1 and W2 occurring in the slots of each instance of C.\n")
cat("That is, you need the following kind of input file (with column names!)),\nwhere the number of rows corresponds to the number of construction tokens you have.\n\nWord_Slot1\tWord_Slot2\nA\t\tX\nB\t\tX\n...\t...\n\n")
cat("Your file must not have decimal points/separators and ideally has no spaces (for the latter, use '_' instead)!\nAlso, don't forget that R's treatment of alphanumeric characters is case-sensitive!\n\n")
cat("\nWhat is the name of the construction C you investigate (without spaces)?\t")
construction.name<-scan(nmax=1, what="character", quiet=T)
if (length(construction.name)==0) construction.name<-"some_C"
which.combos<-menu(choice=c("all possible combinations (can be memory-intensive)", "only attested combinations (not memory-intensive at all)"), title="\nWhich combinations do you want to include?")
which.index<-menu(choice=c("-log10 (Fisher-Yates exact, one-tailed) (= default)", "log-likelihood", "log10 of odds ratio (adds 0.5 to each cell)"), title="\nWhich index of association strength do you want to compute?")
which.sort<-menu(choice=c("alphabetically (W1)", "alphabetically (W2)", "frequency (W1)", "frequency (w2)", "collostruction strength"), title="How do you want to sort the output?")
cat("\nEnter the number of decimals you'd like to see in the results (and '99', when you want the default output)!\t")
which.accuracy<-scan(nmax=1, quiet=T); cat("\n")
while (which.accuracy<=0) { cat("\nWith a value of 0 or smaller, the output might not be very meaningful - enter the correct number of decimals!\n"); which.accuracy<-scan(nmax=1, quiet=T) }
cat("\nChoose the text file with the raw data!\t"); pause()
data<-read.table(file.choose(), header=T, sep="\t", colClasses=c("character", "character"), quote="", comment.char="")
types.in.1<-sort(unique(data[,1])); ntypes.in.1<-length(types.in.1)
types.in.2<-sort(unique(data[,2])); ntypes.in.2<-length(types.in.2)
construction.freq<-length(data[,1])
x<-table(data)
W1_C<-rep(types.in.1, each=ntypes.in.2)
W2_C<-rep(types.in.2, ntypes.in.1)
Freq_W1_C<-rep(as.vector(rowSums(x)), each=ntypes.in.2)
Freq_W2_C<-rep(as.vector(colSums(x)), ntypes.in.1)
W1_W2_in_C<-as.vector(t(x))
data<-data.frame(W1_C, W2_C, Freq_W1_C, Freq_W2_C, W1_W2_in_C)
if (which.combos==2) {
data<-subset(data, data[,5]!=0)
}
# computation
cases<-length(data[,1])
words1<-data[,1]; words2<-data[,2]; freq.w1<-data[,3]; freq.w2<-data[,4]; obs.w1_2.in_c<-data[,5]
exp.w1_2.in_c<-c(rep(0, cases)); relation<-c(rep(0, cases)); delta.p.constr.to.word<-delta.p.word.to.constr<-coll.strength<-c(rep(0, cases))
for (i in 1:cases) {
obs.freq.a<-obs.w1_2.in_c[i]
obs.freq.b<-freq.w1[i]-obs.freq.a
obs.freq.c<-freq.w2[i]-obs.freq.a
obs.freq.d<-construction.freq-(obs.freq.a+obs.freq.b+obs.freq.c)
exp.freq.a<-freq.w1[i]*freq.w2[i]/construction.freq; exp.w1_2.in_c[i]<-round(exp.freq.a, which.accuracy)
exp.freq.b<-freq.w1[i]-exp.freq.a
exp.freq.c<-freq.w2[i]-exp.freq.a
exp.freq.d<-construction.freq-(exp.freq.a+exp.freq.b+exp.freq.c)
coll.strength[i]<-round(switch(which.index,
fye(obs.freq.a, exp.freq.a, freq.w1[i], construction.freq, freq.w2[i]),
llr(obs.freq.a, obs.freq.b, obs.freq.c, obs.freq.d, exp.freq.a, exp.freq.b, exp.freq.c, exp.freq.d),
log(((obs.freq.a+0.5)/(obs.freq.b+0.5))/((obs.freq.c+0.5)/(obs.freq.d+0.5)), 10)), which.accuracy)
if (obs.freq.a>exp.freq.a) {
relation[i]<-"attraction"
} else if (obs.freq.a<exp.freq.a) {
relation[i]<-"repulsion"
} else {
relation[i]<-"chance"
}
delta.p.constr.to.word[i]<-round((obs.freq.a/(obs.freq.a+obs.freq.b))-(obs.freq.c/(obs.freq.c+obs.freq.d)), which.accuracy)
delta.p.word.to.constr[i]<-round((obs.freq.a/(obs.freq.a+obs.freq.c))-(obs.freq.b/(obs.freq.b+obs.freq.d)), which.accuracy)
}
which.index<-switch(which.index, "-log10 (Fisher-Yates exact, one-tailed)", "log-likelihood", "log10 of odds ratio (adds 0.5 to each cell)")
exp.w1_2.in_c<-round(exp.w1_2.in_c, 2); output.table<-data.frame(words1, words2, freq.w1, freq.w2, obs.w1_2.in_c, exp.w1_2.in_c, relation, delta.p.constr.to.word, delta.p.word.to.constr, coll.strength)
cat("\a") # progress beep
which.output<-menu(choice=c("text file", "terminal"), title="\nWhere do you want the output ('text file' will append to already existing file with the same name)?")
# output
sort.index<-switch(which.sort, order(words1, relation, -coll.strength), order(words2, relation, -coll.strength), order(-freq.w1, relation, -coll.strength), order(-freq.w2, relation, -coll.strength), order(relation, -coll.strength))
output.table<-output.table[sort.index,]
if (which.output==1) {
cat("\nWhich text file do you want to store the result in?\n(Note: if you choose a file that already exists, the current output will be appended to this file.)\t"); pause()
output.file<-file.choose(); output<-file(output.file, open="at")
cat("|---------------------------------------------------------------------|\n| This output is provided without any warranty on an as-is basis by |\n| Stefan Th. Gries <http://www.linguistics.ucsb.edu/faculty/stgries/> |\n| Please cite the program as mentioned in <readme.txt>. Thanks a lot! |\n|---------------------------------------------------------------------|\n\n", date(), file=output)
cat("\n\nCo-varying collexeme analysis for: ", construction.name, "\n\nwords1: words in the 1st slot of ", construction.name, "\nwords2: words in the 2nd slot of ", construction.name, "\nfreq.w1: frequency of word1 in ", construction.name, "\nfreq.w2: frequency of word2 in ", construction.name, "\nobs.w1_2.in_c: observed frequency of both words in both slots in ", construction.name, file=output)
cat("\nexp.w1_2.in_c: expected frequency of both words in both slots in ", construction.name, "\nrelation: relation between observed and expected frequency\ncoll.strength: index of co-varying collexeme strength: ", which.index, ", the higher, the stronger\n\n", sep="", file=output)
write.table(output.table, file=output, quote=F, row.names=F, sep="\t", eol="\n")
cat("\nIf your collostruction strength is based on p-values, it can be interpreted as follows:\nColl.strength>3 => p<0.001; coll.strength>2 => p<0.01; coll.strength>1.30103 => p<0.05.\nI'd be happy if you provided me with feedback and acknowledged the use of Coll.analysis 3.2a.\n", file=output)
close(output)
} else {
cat("|---------------------------------------------------------------------|\n| This output is provided without any warranty on an as-is basis by |\n| Stefan Th. Gries <http://www.linguistics.ucsb.edu/faculty/stgries/> |\n| Please cite the program as mentioned in <readme.txt>. Thanks a lot! |\n|---------------------------------------------------------------------|\n\n", date())
cat("\n\nCo-varying collexeme analysis for: ", construction.name, "\n\nwords1: words in the 1st slot of ", construction.name, "\nwords2: words in the 2nd slot of ", construction.name, "\nfreq.w1: frequency of word1 in ", construction.name, "\nfreq.w2: frequency of word2 in ", construction.name, "\nobs.w1_2.in_c: observed frequency of both words in both slots in ", construction.name)
cat("\nexp.w1_2.in_c: expected frequency of both words in both slots in ", construction.name, "\nrelation: relation between observed and expected frequency\ncoll.strength: index of co-varying collexeme strength: ", which.index, ", the higher, the stronger\n\n", sep="")
options(width=7500); print(output.table)
cat("\nIf your collostruction strength is based on p-values, it can be interpreted as follows:\nColl.strength>3 => p<0.001; coll.strength>2 => p<0.01; coll.strength>1.30103 => p<0.05.\nI'd be happy if you provided me with feedback and acknowledged the use of Coll.analysis 3.2a.\n")
}
} # END OF FUNCTION FOR CO-VARYING COLLEXEME ANALYSIS
pause<-function() {
cat("Press <Enter> to continue ... ")
readline()
invisible()
}
fye<-function(oa, ea, cf, cs, wf) {
if(oa>ea) {
return(-log(sum(dhyper(oa:cf, cf, (cs-cf), wf)), 10))
} else {
return(-log(sum(dhyper(0:oa, cf, (cs-cf), wf)), 10))
}
}
llr<-function(oa, ob, oc, od, ea, eb, ec, ed) {
s1<-ifelse(log((oa/ea), base=exp(1))*oa=="NaN", 0, log((oa/ea), base=exp(1))*oa)
s2<-ifelse(log((ob/eb), base=exp(1))*ob=="NaN", 0, log((ob/eb), base=exp(1))*ob)
s3<-ifelse(log((oc/ec), base=exp(1))*oc=="NaN", 0, log((oc/ec), base=exp(1))*oc)
s4<-ifelse(log((od/ed), base=exp(1))*od=="NaN", 0, log((od/ed), base=exp(1))*od)
return(2*sum(s1, s2, s3, s4))
}
coll.analysis()