-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGeneratorProfKesden.py
More file actions
105 lines (89 loc) · 2.92 KB
/
GeneratorProfKesden.py
File metadata and controls
105 lines (89 loc) · 2.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import sys
import csv
import numpy
import getopt
import math
def usage():
print '$> python generaterawdata.py <required args> [optional args]\n' + \
'\t-c <#>\t\tNumber of clusters to generate\n' + \
'\t-p <#>\t\tNumber of points per cluster\n' + \
'\t-o <file>\tFilename for the output of the raw data\n' + \
'\t-v [#]\t\tMaximum coordinate value for points\n'
def euclideanDistance(p1, p2):
'''
Takes two 2-D points and computes the Euclidean distance between them.
'''
return math.sqrt(math.pow((p2[0] - p1[0]), 2) + \
math.pow((p2[1] - p1[1]), 2))
def tooClose(point, points, minDist):
'''
Computes the euclidean distance between the point and all points
in the list, and if any points in the list are closer than minDist,
this method returns true.
'''
for pair in points:
if euclideanDistance(point, pair) < minDist:
return True
return False
def handleArgs(args):
# set up return values
numClusters = -1
numPoints = -1
output = None
maxValue = 10
try:
optlist, args = getopt.getopt(args[1:], 'c:p:v:o:')
except getopt.GetoptError, err:
print str(err)
usage()
sys.exit(2)
for key, val in optlist:
# first, the required arguments
if key == '-c':
numClusters = int(val)
elif key == '-p':
numPoints = int(val)
elif key == '-o':
output = val
# now, the optional argument
elif key == '-v':
maxValue = float(val)
# check required arguments were inputted
if numClusters < 0 or numPoints < 0 or \
maxValue < 1 or \
output is None:
usage()
sys.exit()
return (numClusters, numPoints, output, \
maxValue)
def drawOrigin(maxValue):
return numpy.random.uniform(0, maxValue, 2)
# start by reading the command line
numClusters, \
numPoints, \
output, \
maxValue = handleArgs(sys.argv)
writer = csv.writer(open(output, "w"))
# step 1: generate each 2D centroid
centroids_radii = []
minDistance = 0
for i in range(0, numClusters):
centroid_radius = drawOrigin(maxValue)
# is it far enough from the others?
while (tooClose(centroid_radius, centroids_radii, minDistance)):
centroid_radius = drawOrigin(maxValue)
centroids_radii.append(centroid_radius)
# step 2: generate the points for each centroid
points = []
minClusterVar = 0
maxClusterVar = 0.5
for i in range(0, numClusters):
# compute the variance for this cluster
variance = numpy.random.uniform(minClusterVar, maxClusterVar)
cluster = centroids_radii[i]
for j in range(0, numPoints):
# generate a 2D point with specified variance
# point is normally-distributed around centroids[i]
x, y = numpy.random.normal(cluster, variance)
# write the points out
writer.writerow([x, y])