forked from 20020001-UET/dsa-decision-tree
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSplitData.h
94 lines (72 loc) · 2.48 KB
/
SplitData.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
/*
* This file is part of dsa-decision-tree
*
* Developed for the DSA UET course.
* This project was developed by Ba Luong and Gia Linh.
*/
#pragma once
#ifndef SPLIT_DATA_H
#define SPLIT_DATA_H
#include "Data.h"
#include "CostCalc.h"
#include "BitMask.h"
#include <math.h>
using namespace std;
namespace SplitData
{
enum SPLIT_VAL
{
NONE = -1,
ATTRIBUTE,
COMPARISON,
COMBINATION,
SPLIT_TOTAL
};
SPLIT_VAL getSplitValue(int index);
SPLIT_VAL randSplit(int rMin = ATTRIBUTE, int rMax = COMBINATION);
struct GroupSplitData
{
GroupDataSet *group;
double costIndex;
int attribute;
int compareValue;
SPLIT_VAL method;
GroupSplitData(double cost = 2.0, int atr = -1, int com = -1, SPLIT_VAL met = NONE, GroupDataSet *groupData = NULL);
~GroupSplitData();
};
namespace Attribute
{
// Compare function
bool compare(Data *data, int atr, int value);
// Split a dataset based on an attribute and attribute value (equal to atr)
GroupDataSet *split(DataSet *data, int atr, int value);
// Split a dataset based on an attribute to a group of two new datasets
// and select the best split point!
GroupSplitData getSplit(DataSet *data, int atr);
vector<int> getErrorIndex(DataSet *data, int atr, int value);
}
namespace Comparison
{
// Compare function
bool compare(Data *data, int atr, int value);
// Split a dataset based on an attribute and attribute value (less to atr)
GroupDataSet *split(DataSet *data, int atr, int value);
// Split a dataset based on comparing attribute to a group of two new datasets
// and select the best split point!
GroupSplitData getSplit(DataSet *data, int atr);
vector<int> getErrorIndex(DataSet *data, int atr, int value);
}
namespace Combination
{
// Compare function
bool compare(Data *data, int atr, int mask);
// Split a dataset based on a combination of attribute value
// mask is the combinatino bit mask of the value
GroupDataSet *split(DataSet *data, int atr, int mask);
// Split a dataset based on a combination of attribute value
// to a group of two new datasets and select the best split point!
GroupSplitData getSplit(DataSet *data, int atr);
vector<int> getErrorIndex(DataSet *data, int atr, int value);
}
}
#endif