assignment-programming-with-python/function.py at master · Poatan222/assignment-programming-with-python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
import pandas as pd
from sqlalchemy import create_engine

class FunctionManager:

    def __init__(self, path_of_csv):
        """
        Parses a local .csv into a list of Functions. On iterating the object, it returns a Function.
        The functions can also be retrieved with the .functions property
        The csv needs a specific structure in which the first column represents x-values and following columns represent y-values
        :param path_of_csv: local path of the csv
        """
        self._functions = []

        #The csv is being red by the Panda module and turned into a dataframe
        try:
            self._function_data = pd.read_csv(path_of_csv)
        except FileNotFoundError:
            print("Issue while reading file {}".format(path_of_csv))
            raise

        #The x values are stored and later on fed into each Function
        x_values = self._function_data["x"]

        #The next lines iterate over each column within panda dataframe and create a new Function object from the data
        for name_of_column, data_of_column in self._function_data.iteritems():
            if "x" in name_of_column:
                continue
            # We already stored the x column, we now have the y colum. We can stick them together with the concat function
            subset = pd.concat([x_values, data_of_column], axis=1)
            function = Function.from_dataframe(name_of_column, subset)
            self._functions.append(function)


    def to_sql(self, file_name, suffix):
        """
        Writes the data to a local sqlite db using pandas to.sql() method
        If the file already exists, it will be replaced
        :param file_name: the name the db gets
        :param suffix: to comply to the assignment the headers require a specific suffix to the original column name
        """
        #Using SQLalchemy an "engine" is created. It handles the creation of the db for us if it is not existent
        engine = create_engine('sqlite:///{}.db'.format(file_name), echo=False)

        # Instead of writing an own implementation and possibly create bugs,
        # I decided to use functionality from Pandas to write to an sql db.
        # It only needs the "engine" object from sqlalchemy
        # Some special care has to be taken to furfill the requirements from the assignment on the naming of the columns
        # In the next lines, the names of the functions are slightly modified to comply
        copy_of_function_data = self._function_data.copy()
        copy_of_function_data.columns = [name.capitalize() + suffix for name in copy_of_function_data.columns]
        copy_of_function_data.set_index(copy_of_function_data.columns[0], inplace=True)

        copy_of_function_data.to_sql(
            file_name,
            engine,
            if_exists="replace",
            index=True,
        )

    @property
    def functions(self):
        """
        Returns a list with all the functions. The user can also just iterate over the object itself.
        :rtype: object
        """
        return self._functions

    def __iter__(self):
        # this makes the object iterable
        return FunctionManagerIterator(self)

    def __repr__(self):
        return "Contains {} number of functions".format(len(self.functions))


class FunctionManagerIterator():

    def __init__(self, function_manager):
        """
        Used for the iteration of a FunctionManager
        :param function_manager:
        """
        #This simple class which handles the iteration over a FunctionManager
        self._index = 0
        self._function_manager = function_manager

    def __next__(self):
        """
        returns a function object as it iterates over the list of functions
        :rtype: function
        """
        if self._index < len(self._function_manager.functions):
            value_requested = self._function_manager.functions[self._index]
            self._index = self._index + 1
            return value_requested
        raise StopIteration


class Function:

    def __init__(self, name):
        """
        Contains the X and Y values of a function. Underneath it uses a Panda dataframe.
        It has some convenient methods that makes calculating regressions easy.
        1) you can give it a name that can be retrieved later
        2) it is iterable and returns a point represented as dict
        3) you can retrieve a Y-Value by providing an X-Value
        4) you can subtract two functions and get a resulting dataframe with the deviation
        :param name: the name the function should have
        """
        self._name = name
        self.dataframe = pd.DataFrame()

    def locate_y_based_on_x(self, x):
        """
        retrieves a Y-Value
        :param x: the X-Value
        :return: the Y-Value
        """
        # use panda iloc function to find the x and return the corresponding y
        # If it is not found, an exception is raised
        search_key = self.dataframe["x"] == x
        try:
            return self.dataframe.loc[search_key].iat[0, 1]
        except IndexError:
            raise IndexError


    @property
    def name(self):
        """
        The name of the function
        :return: name as str
        """
        return self._name

    def __iter__(self):
        return FunctionIterator(self)

    def __sub__(self, other):
        """
        Substracts two functions and returns a new dataframe
        :rtype: object
        """
        diff = self.dataframe - other.dataframe
        return diff

    @classmethod
    def from_dataframe(cls, name, dataframe):
        """
        Immediately create a function by providing a dataframe.
        On creation the original column names are overwritten to "x" and "y"
        :rtype: a Function
        """
        function = cls(name)
        function.dataframe = dataframe
        function.dataframe.columns = ["x", "y"]
        return function

    def __repr__(self):
        return "Function for {}".format(self.name)

class IdealFunction(Function):
    def __init__(self, function, training_function, error):
        """
        An ideal function stores the predicting function, training data and the regression.
        Make sure to provide a tolerance_factor if for classification purpose tolerance is allowed
        Otherwise it will default to the maximum deviation between ideal and train function
        :param function: the ideal function
        :param training_function: the training data the classifying data is based upon
        :param squared_error: the beforehand calculated regression
        """
        super().__init__(function.name)
        self.dataframe = function.dataframe

        self.training_function = training_function
        self.error = error
        self._tolerance_value = 1
        self._tolerance = 1

    def _determine_largest_deviation(self, ideal_function, train_function):
        # Accepts an two functions and substracts them
        # From the resulting dataframe, it finds the one which is largest
        distances = train_function - ideal_function
        distances["y"] = distances["y"].abs()
        largest_deviation = max(distances["y"])
        return largest_deviation

    @property
    def tolerance(self):
        """
        This property describes the accepted tolerance towards the regression in order to still count as classification.
        Although you can set a tolerance directly (good for unit testing) this is not recommended. Instead provide
        a tolerance_factor
        :return: the tolerance
        """
        self._tolerance = self.tolerance_factor * self.largest_deviation
        return self._tolerance

    @tolerance.setter
    def tolerance(self, value):

        self._tolerance = value

    @property
    def tolerance_factor(self):
        """
        Set the factor of the largest_deviation to determine the tolerance
        :return:
        """
        return self._tolerance_value

    @tolerance_factor.setter
    def tolerance_factor(self, value):
        self._tolerance_value = value

    @property
    def largest_deviation(self):
        """
        Retrieves the largest deviation between classifying function and the training function it is based upon
        :return: the largest deviation
        """
        largest_deviation = self._determine_largest_deviation(self, self.training_function)
        return largest_deviation


class FunctionIterator:

    def __init__(self, function):
        #On iterating over a function it returns a dict that describes the point
        self._function = function
        self._index = 0

    def __next__(self):
        # On iterating over a function it returns a dict that describes the point
        if self._index < len(self._function.dataframe):
            value_requested_series = (self._function.dataframe.iloc[self._index])
            point = {"x": value_requested_series.x, "y": value_requested_series.y}
            self._index += 1
            return point
        raise StopIteration