-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathtextcat.h
75 lines (71 loc) · 3 KB
/
textcat.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#ifndef _TEXTCAT_H_
#define _TEXTCAT_H_
/*
* textcat.h -- routines for categorizing text
*
* Copyright (C) 2003 WiseGuys Internet B.V.
*
* THE BSD LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the
* distribution.
*
* - Neither the name of the WiseGuys Internet B.V. nor the names of
* its contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* textcat_Init() - Initialize the text classifier. The textfile
* conffile should contain a list of fingerprint filenames and
* identification strings for the categories. The filenames should be
* reachable from the current working directory. The identification
* strings will are used in the classification output.
*
* Returns: handle on success, NULL on error. (At the moment, the
* only way errors can occur, is when the library cannot read the
* conffile, or one of the fingerprint files listed in it.)
*/
extern void *textcat_Init( const char *conffile );
/**
* textcat_Done() - Free up resources for handle
*/
extern void textcat_Done( void *handle );
/**
* textcat_Classify() - Give the most likely categories for buffer
* with length size.
*
* Returns: string containing a list of category id's, each one
* between square brackets, "UNKNOWN" when not recognized, "SHORT" if the
* document was too short to make a reliable assessment.
*
* Performace note: longer buffers take longer to process. However,
* for many uses it is not necessary to categorize the whole buffer.
* For language classification, a few hundred bytes will suffice.
*/
extern char *textcat_Classify( void *handle, const char *buffer, size_t size );
/**
* textcat_Version() - Returns a string describing the version of this classifier.
*/
extern char *textcat_Version();
#endif