Skip to content

Commit

Permalink
增加了分箱内最小iv的限制
Browse files Browse the repository at this point in the history
  • Loading branch information
zhaoxingfeng committed Nov 29, 2018
1 parent 9ce8005 commit 526fe5d
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 85 deletions.
133 changes: 55 additions & 78 deletions result/woe_rule.csv
Original file line number Diff line number Diff line change
Expand Up @@ -19,67 +19,56 @@ PAY_6,,-inf,-2.0,4895,981,3914,-0.1251,0.0025,0.2852
PAY_6,,-2.0,-1.0,5740,975,4765,-0.3279,0.0187,0.2852
PAY_6,,-1.0,0.0,16286,3069,13217,-0.2015,0.0208,0.2852
PAY_6,,0.0,inf,3079,1611,1468,1.3516,0.2432,0.2852
PAY_AMT1,,-inf,17.0,5404,1938,3466,0.6773,0.0973,0.1796
PAY_AMT1,,17.0,1450.0,4688,1024,3664,-0.0162,0.0000,0.1796
PAY_AMT1,,1450.0,2000.0,4323,1010,3313,0.0708,0.0007,0.1796
PAY_AMT1,,2000.0,3006.92,3755,788,2967,-0.0671,0.0006,0.1796
PAY_AMT1,,3006.92,4878.2,3467,719,2748,-0.0821,0.0008,0.1796
PAY_AMT1,,4878.2,9500.0,4613,699,3914,-0.4640,0.0289,0.1796
PAY_AMT1,,9500.0,inf,3750,458,3292,-0.7137,0.0513,0.1796
LIMIT_BAL,,-inf,40000.0,4311,1555,2756,0.6864,0.0799,0.1781
LIMIT_BAL,,40000.0,70000.0,4921,1328,3593,0.2634,0.0122,0.1781
LIMIT_BAL,,70000.0,140000.0,6058,1439,4619,0.0925,0.0018,0.1781
LIMIT_BAL,,140000.0,200000.0,5088,897,4191,-0.2830,0.0125,0.1781
LIMIT_BAL,,200000.0,260000.0,3426,567,2859,-0.3592,0.0133,0.1781
LIMIT_BAL,,260000.0,inf,6196,850,5346,-0.5802,0.0584,0.1781
PAY_AMT2,,-inf,100.0,5696,1895,3801,0.5626,0.0691,0.1550
PAY_AMT2,,100.0,1157.0,3272,743,2529,0.0338,0.0001,0.1550
PAY_AMT2,,1157.0,1600.0,3140,791,2349,0.1702,0.0032,0.1550
PAY_AMT2,,1600.0,2100.0,3347,723,2624,-0.0304,0.0001,0.1550
PAY_AMT2,,2100.0,3049.88,3351,691,2660,-0.0893,0.0009,0.1550
PAY_AMT2,,3049.88,4819.81,3094,662,2432,-0.0425,0.0002,0.1550
PAY_AMT2,,4819.81,9676.15,4455,720,3735,-0.3876,0.0199,0.1550
PAY_AMT2,,9676.15,inf,3645,411,3234,-0.8042,0.0615,0.1550
PAY_AMT3,,-inf,0.0,5968,1931,4037,0.5212,0.0616,0.1290
PAY_AMT3,,0.0,1074.0,5227,1192,4035,0.0393,0.0003,0.1290
PAY_AMT3,,1074.0,1600.0,3060,732,2328,0.1017,0.0011,0.1290
PAY_AMT3,,1600.0,2906.18,4647,1005,3642,-0.0289,0.0001,0.1290
PAY_AMT3,,2906.18,4574.95,3648,698,2950,-0.1827,0.0039,0.1290
PAY_AMT3,,4574.95,9000.0,3995,646,3349,-0.3869,0.0178,0.1290
PAY_AMT3,,9000.0,inf,3455,432,3023,-0.6869,0.0442,0.1290
PAY_AMT4,,-inf,0.0,6408,1993,4415,0.4633,0.0516,0.1094
PAY_AMT4,,0.0,640.26,3276,709,2567,-0.0280,0.0001,0.1094
PAY_AMT4,,640.26,1000.0,3098,756,2342,0.1280,0.0018,0.1094
PAY_AMT4,,1000.0,2005.0,4921,1130,3791,0.0483,0.0004,0.1094
PAY_AMT4,,2005.0,4000.0,4761,913,3848,-0.1799,0.0049,0.1094
PAY_AMT4,,4000.0,9509.0,4522,727,3795,-0.3938,0.0208,0.1094
PAY_AMT4,,9509.0,inf,3014,408,2606,-0.5956,0.0298,0.1094
PAY_AMT6,,-inf,0.0,7173,2078,5095,0.3618,0.0344,0.0975
PAY_AMT6,,0.0,900.0,4243,1051,3192,0.1478,0.0032,0.0975
PAY_AMT6,,900.0,1390.62,3106,672,2434,-0.0283,0.0001,0.0975
PAY_AMT6,,1390.62,2000.0,3233,777,2456,0.1078,0.0013,0.0975
PAY_AMT6,,2000.0,4000.0,4762,947,3815,-0.1347,0.0028,0.0975
PAY_AMT6,,4000.0,8000.0,4004,660,3344,-0.3640,0.0159,0.0975
PAY_AMT6,,8000.0,inf,3479,451,3028,-0.6455,0.0398,0.0975
PAY_AMT5,,-inf,0.0,6703,1970,4733,0.3822,0.0360,0.0905
PAY_AMT5,,0.0,700.0,3437,811,2626,0.0837,0.0008,0.0905
PAY_AMT5,,700.0,1297.0,3866,875,2991,0.0295,0.0001,0.0905
PAY_AMT5,,1297.0,2000.0,3595,853,2742,0.0910,0.0010,0.0905
PAY_AMT5,,2000.0,4322.44,5208,1029,4179,-0.1428,0.0034,0.0905
PAY_AMT5,,4322.44,9038.0,4092,700,3392,-0.3194,0.0127,0.0905
PAY_AMT5,,9038.0,inf,3099,398,2701,-0.6562,0.0365,0.0905
PAY_AMT1,,-inf,17.0,5404,1938,3466,0.6773,0.0973,0.1724
PAY_AMT1,,17.0,5000.0,17032,3658,13374,-0.0377,0.0008,0.1724
PAY_AMT1,,5000.0,9666.0,3859,588,3271,-0.4574,0.0235,0.1724
PAY_AMT1,,9666.0,inf,3705,452,3253,-0.7150,0.0508,0.1724
PAY_AMT2,,-inf,78.68,5651,1886,3765,0.5674,0.0698,0.1539
PAY_AMT2,,78.68,1500.0,5882,1405,4477,0.0998,0.0020,0.1539
PAY_AMT2,,1500.0,4819.81,10367,2214,8153,-0.0449,0.0007,0.1539
PAY_AMT2,,4819.81,9676.15,4455,720,3735,-0.3876,0.0199,0.1539
PAY_AMT2,,9676.15,inf,3645,411,3234,-0.8042,0.0615,0.1539
PAY_AMT3,,-inf,0.0,5968,1931,4037,0.5212,0.0616,0.1257
PAY_AMT3,,0.0,2436.53,11940,2713,9227,0.0346,0.0005,0.1257
PAY_AMT3,,2436.53,4500.0,4587,906,3681,-0.1432,0.0030,0.1257
PAY_AMT3,,4500.0,7000.0,3047,495,2552,-0.3814,0.0132,0.1257
PAY_AMT3,,7000.0,inf,4458,591,3867,-0.6197,0.0474,0.1257
PAY_AMT4,,-inf,0.0,6408,1993,4415,0.4633,0.0516,0.1081
PAY_AMT4,,0.0,1004.58,6438,1476,4962,0.0462,0.0005,0.1081
PAY_AMT4,,1004.58,2005.0,4857,1119,3738,0.0526,0.0005,0.1081
PAY_AMT4,,2005.0,4000.0,4761,913,3848,-0.1799,0.0049,0.1081
PAY_AMT4,,4000.0,9509.0,4522,727,3795,-0.3938,0.0208,0.1081
PAY_AMT4,,9509.0,inf,3014,408,2606,-0.5956,0.0298,0.1081
PAY_AMT6,,-inf,0.0,7173,2078,5095,0.3618,0.0344,0.0963
PAY_AMT6,,0.0,844.78,4021,991,3030,0.1411,0.0028,0.0963
PAY_AMT6,,844.78,2000.0,6561,1509,5052,0.0504,0.0006,0.0963
PAY_AMT6,,2000.0,4000.0,4762,947,3815,-0.1347,0.0028,0.0963
PAY_AMT6,,4000.0,8000.0,4004,660,3344,-0.3640,0.0159,0.0963
PAY_AMT6,,8000.0,inf,3479,451,3028,-0.6455,0.0398,0.0963
PAY_AMT5,,-inf,0.0,6703,1970,4733,0.3822,0.0360,0.0904
PAY_AMT5,,0.0,1297.0,7303,1686,5617,0.0552,0.0008,0.0904
PAY_AMT5,,1297.0,2000.0,3595,853,2742,0.0910,0.0010,0.0904
PAY_AMT5,,2000.0,4322.44,5208,1029,4179,-0.1428,0.0034,0.0904
PAY_AMT5,,4322.44,9038.0,4092,700,3392,-0.3194,0.0127,0.0904
PAY_AMT5,,9038.0,inf,3099,398,2701,-0.6562,0.0365,0.0904
PAY_5,,-inf,-2.0,4546,895,3651,-0.1472,0.0032,0.0382
PAY_5,,-2.0,-1.0,5539,897,4642,-0.3852,0.0245,0.0382
PAY_5,,-1.0,inf,19915,4844,15071,0.1237,0.0105,0.0382
EDUCATION,"[0, 4, 5, 6, 1]",,,11053,2069,8984,-0.2097,0.0153,0.0242
EDUCATION,[2],,,14030,3330,10700,0.0914,0.0040,0.0242
EDUCATION,[3],,,4917,1237,3680,0.1685,0.0049,0.0242
AGE,,-inf,25.0,3871,1032,2839,0.2467,0.0084,0.0217
AGE,,25.0,28.0,4142,852,3290,-0.0924,0.0011,0.0217
AGE,,28.0,32.0,5375,1041,4334,-0.1676,0.0048,0.0217
AGE,,32.0,35.0,3421,672,2749,-0.1500,0.0025,0.0217
AGE,,35.0,45.0,8522,1861,6661,-0.0165,0.0001,0.0217
AGE,,45.0,inf,4669,1178,3491,0.1723,0.0048,0.0217
AGE,,-inf,25.0,3871,1032,2839,0.2467,0.0084,0.0212
AGE,,25.0,28.0,4142,852,3290,-0.0924,0.0011,0.0212
AGE,,28.0,30.0,3000,587,2413,-0.1549,0.0023,0.0212
AGE,,30.0,33.0,3521,669,2852,-0.1913,0.0041,0.0212
AGE,,33.0,45.0,10797,2318,8479,-0.0382,0.0005,0.0212
AGE,,45.0,inf,4669,1178,3491,0.1723,0.0048,0.0212
BILL_AMT5,,-inf,390.0,5190,1251,3939,0.1117,0.0022,0.0200
BILL_AMT5,,390.0,2500.0,3065,631,2434,-0.0913,0.0008,0.0200
BILL_AMT5,,2500.0,12429.0,4742,884,3858,-0.2148,0.0069,0.0200
Expand All @@ -94,36 +83,24 @@ BILL_AMT6,,12252.2,22640.84,3856,994,2862,0.2011,0.0055,0.0187
BILL_AMT6,,22640.84,51296.73,5549,1344,4205,0.1181,0.0027,0.0187
BILL_AMT6,,51296.73,102998.04,3689,770,2919,-0.0739,0.0007,0.0187
BILL_AMT6,,102998.04,inf,3406,690,2716,-0.1115,0.0014,0.0187
BILL_AMT4,,-inf,671.8,5381,1274,4107,0.0882,0.0014,0.0170
BILL_AMT4,,671.8,7660.0,5006,952,4054,-0.1902,0.0057,0.0170
BILL_AMT4,,7660.0,15516.0,3066,664,2402,-0.0271,0.0001,0.0170
BILL_AMT4,,15516.0,22985.0,3038,796,2242,0.2232,0.0054,0.0170
BILL_AMT4,,22985.0,35322.65,3038,742,2296,0.1291,0.0017,0.0170
BILL_AMT4,,35322.65,57583.0,3272,732,2540,0.0145,0.0000,0.0170
BILL_AMT4,,57583.0,101181.64,3312,708,2604,-0.0437,0.0002,0.0170
BILL_AMT4,,101181.64,inf,3887,768,3119,-0.1428,0.0025,0.0170
BILL_AMT1,,-inf,780.0,4469,1104,3365,0.1442,0.0032,0.0158
BILL_AMT1,,780.0,5121.3,4001,802,3199,-0.1248,0.0020,0.0158
BILL_AMT1,,5121.3,12545.2,3273,708,2565,-0.0286,0.0001,0.0158
BILL_AMT1,,12545.2,34102.56,5832,1461,4371,0.1628,0.0054,0.0158
BILL_AMT1,,34102.56,52204.9,3425,750,2675,-0.0129,0.0000,0.0158
BILL_AMT1,,52204.9,90884.88,3510,746,2764,-0.0510,0.0003,0.0158
BILL_AMT1,,90884.88,inf,5490,1065,4425,-0.1656,0.0048,0.0158
BILL_AMT2,,-inf,0.0,3175,744,2431,0.0747,0.0006,0.0139
BILL_AMT2,,0.0,2530.0,3973,895,3078,0.0235,0.0001,0.0139
BILL_AMT2,,2530.0,8026.77,3061,579,2482,-0.1968,0.0037,0.0139
BILL_AMT2,,8026.77,16828.8,3115,737,2378,0.0873,0.0008,0.0139
BILL_AMT2,,16828.8,34763.0,4673,1176,3497,0.1689,0.0047,0.0139
BILL_AMT2,,34763.0,51046.6,3106,689,2417,0.0036,0.0000,0.0139
BILL_AMT2,,51046.6,92998.55,3797,817,2980,-0.0354,0.0002,0.0139
BILL_AMT2,,92998.55,inf,5100,999,4101,-0.1535,0.0038,0.0139
BILL_AMT3,,-inf,390.0,4583,1080,3503,0.0820,0.0011,0.0129
BILL_AMT3,,390.0,7543.16,5497,1088,4409,-0.1406,0.0035,0.0129
BILL_AMT3,,7543.16,18886.0,4316,1012,3304,0.0755,0.0008,0.0129
BILL_AMT3,,18886.0,35891.02,4314,1088,3226,0.1718,0.0044,0.0129
BILL_AMT3,,35891.02,76777.4,5290,1168,4122,-0.0024,0.0000,0.0129
BILL_AMT3,,76777.4,inf,6000,1200,4800,-0.1276,0.0031,0.0129
BILL_AMT4,,-inf,671.8,5381,1274,4107,0.0882,0.0014,0.0154
BILL_AMT4,,671.8,5711.63,4117,781,3336,-0.1933,0.0048,0.0154
BILL_AMT4,,5711.63,15516.0,3955,835,3120,-0.0595,0.0005,0.0154
BILL_AMT4,,15516.0,23176.59,3085,808,2277,0.2226,0.0054,0.0154
BILL_AMT4,,23176.59,57583.0,6263,1462,4801,0.0697,0.0010,0.0154
BILL_AMT4,,57583.0,104354.16,3456,725,2731,-0.0676,0.0005,0.0154
BILL_AMT4,,104354.16,inf,3743,751,2992,-0.1236,0.0018,0.0154
BILL_AMT1,,-inf,440.0,3785,932,2853,0.1399,0.0026,0.0132
BILL_AMT1,,440.0,12371.52,7919,1669,6250,-0.0617,0.0010,0.0132
BILL_AMT1,,12371.52,21720.9,3161,814,2347,0.1998,0.0044,0.0132
BILL_AMT1,,21720.9,52204.9,6135,1410,4725,0.0494,0.0005,0.0132
BILL_AMT1,,52204.9,92004.2,3600,756,2844,-0.0662,0.0005,0.0132
BILL_AMT1,,92004.2,inf,5400,1055,4345,-0.1568,0.0042,0.0132
SEX,[2],,,18112,3763,14349,-0.0798,0.0038,0.0092
SEX,[1],,,11888,2873,9015,0.1152,0.0054,0.0092
MARRIAGE,"[0, 2]",,,16018,3346,12672,-0.0729,0.0028,0.0059
MARRIAGE,"[1, 3]",,,13982,3290,10692,0.0801,0.0031,0.0059
BILL_AMT2,,-inf,92998.55,24900,5637,19263,0.0299,0.0007,0.0045
BILL_AMT2,,92998.55,inf,5100,999,4101,-0.1535,0.0038,0.0045
BILL_AMT3,,-inf,76777.4,24000,5436,18564,0.0305,0.0008,0.0039
BILL_AMT3,,76777.4,inf,6000,1200,4800,-0.1276,0.0031,0.0039
16 changes: 9 additions & 7 deletions woe.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def format_tree(self, tree, woe_iv_list, split_value_list):


class WoeFeatureProcess(object):
def __init__(self, path_conf, path_woe_rule, min_sample_rate=0.1):
def __init__(self, path_conf, path_woe_rule, min_sample_rate=0.1, min_iv=0.0005):
"""
:param path_conf: 描述每个特征的情况
is_continous: 1为连续型变量,0为离散型变量
Expand All @@ -81,6 +81,7 @@ def __init__(self, path_conf, path_woe_rule, min_sample_rate=0.1):
var_name: 特征名
:param path_woe_rule: 存储csv格式特征分箱
:param min_sample_rate: 每个分箱最小样本比例(*总体样本)
:param min_iv: 每个分箱最小iv,如果小于给定值则该箱被合并
"""
self.dataset = None
self.conf = pd.read_csv(path_conf)
Expand All @@ -90,9 +91,10 @@ def __init__(self, path_conf, path_woe_rule, min_sample_rate=0.1):
self.woe_rule_df = pd.DataFrame()
self.path_woe_rule = path_woe_rule
self.min_sample_rate = min_sample_rate
self.total_bad_cnt = 0
self.total_good_cnt = 0
self.min_sample = 0
self.total_bad_cnt = 1
self.total_good_cnt = 1
self.min_sample = 1
self.min_iv = min_iv

def fit(self, dataset):
self.dataset = dataset
Expand Down Expand Up @@ -281,7 +283,7 @@ def choose_best_split(self, dataset, var):
woe_left, iv_left = self.calculate_woe_iv(dataset_left)
woe_right, iv_right = self.calculate_woe_iv(dataset_right)

if iv_left + iv_right > best_split_iv:
if iv_left + iv_right > best_split_iv and iv_left >= self.min_iv and iv_right >= self.min_iv:
best_split_value = split_value
best_split_iv = iv_left + iv_right
best_dataset_left = dataset_left
Expand Down Expand Up @@ -329,10 +331,10 @@ def _transform_discrete(sub_woe_rule, value):

if __name__ == '__main__':
df = pd.read_csv("source/credit_card.csv")
df_conf = pd.read_csv("f_conf/credit_card.conf")
woe = WoeFeatureProcess(path_conf="f_conf/credit_card.conf",
path_woe_rule="result/woe_rule.csv",
min_sample_rate=0.1)
min_sample_rate=0.1,
min_iv=0.0005)
woe.fit(df)
print(woe.woe_rule_df)
woe.plot_woe_structure()
Expand Down

0 comments on commit 526fe5d

Please sign in to comment.