-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathexample.py
41 lines (33 loc) · 1.52 KB
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
"""Run k-means++ on test data and save output to image in same directory."""
from __future__ import division, print_function
from datetime import datetime
import numpy as np
import wkmeans as wkm
# If you base your code on this and put wkmeans in its own directory (called
# weighted_k_means, as would happen if you clone this repo) use the following
# to import instead:
# import weighted_k_means.wkmeans as wkm
startTime = datetime.now()
# Number of data points we want to generate:
N = 5000
# Random counts because each data point is not unique and can even be "empty",
# i.e., have a count of zero:
# random_counts = np.random.randint(100, size=(N)) # integers for the counts
random_counts = np.random.random_sample((N,)) * 100 # floats for the counts
# Initialise the class with some default values:
wkmeans = wkm.KPlusPlus(3, N=N, c=random_counts, alpha=3, beta=0.9)
# If you have your own data use:
# wkmeans = wkm.KPlusPlus(3, X=my_data, c=my_counts, alpha=3, beta=0.9)
# Initialise centroids using k-means++...
wkmeans.init_centers()
# and run to find clusters:
wkmeans.find_centers(method='++')
# Now plot the result:
wkmeans.plot_clusters(wkmeans.plot_clusters.calls)
# We're done so print some useful info:
print('The End!')
print('\tRun time: ', datetime.now() - startTime)
print('\tTotal runs: ', wkmeans._cluster_points.calls)
print('\tNumber of unique items per cluster: ', [len(x) for x in
wkmeans.clusters])
print('\tNumber of items per cluster: ', wkmeans.counts_per_cluster)