Title | Kmeans ia |
---|---|
Course | Intel·ligència artificial |
Institution | Universitat Autònoma de Barcelona |
Pages | 16 |
File Size | 73.7 KB |
File Type | |
Total Downloads | 68 |
Total Views | 186 |
"""@author: ramon, bojana """ import numpy as npdef NIUs(): return 1491382, 1390204, 1493402def distance(X,C): """@brief Calculates the distance between each pixcel and each centroid@param X numpy array PxD 1st set of d...
"""
@author: ramon, bojana """ import numpy as np
def NIUs(): return 1491382, 1390204, 1493402
def distance(X,C): """@brief Calculates the distance between each pixcel and each centroid
@param X numpy array PxD 1st set of data points (usually data points) @param C numpy array KxD 2nd set of data points (usually cluster centroids points)
@return dist: PxK numpy array position ij is the distance between the i-th point of the first set an the j-th point of the second set """ ######################################################### ## YOU MUST REMOVE THE REST OF THE CODE OF THIS FUNCTION ## AND CHANGE FOR YOUR OWN CODE #########################################################
matriu_de_distancies=np.empty([len(C),len(X)])
for pos in range(C.shape[0]):
matriu_de_distancies[pos]=np.sqrt(np.sum(((X)-C[pos])**2, axis=1))
return matriu_de_distancies.T
class KMeans():
def __init__(self, X, K, options=None): """@brief Constructor of KMeans class
@param X LIST input data @param K INT
number of centroids
@param options DICT dctionary with options """ self._init_X(X) self._init_options(options) self._init_rest(K)
# LIST data coordinates # DICT options # Initializes de rest of the object
############################################################# ## THIS FUNCTION CAN BE MODIFIED FROM THIS POINT, if needed #############################################################
def _init_X(self, X): """@brief Initialization of all pixels
@param X LIST list of all pixel values. Usually it will be a numpy array containing an image NxMx3
sets X an as an array of data in vector form (PxD where P=N*M and D=3 in the above example) """ ####################################################### ## YOU MUST REMOVE THE REST OF THE CODE OF THIS FUNCTION ## AND CHANGE FOR YOUR OWN CODE ####################################################### if len(X.shape)!=2: self.X = 1.0*np.reshape(X, (-1, X.shape[2])) else: self.X = np.copy(1.0*X)
def _init_options(self, options): """@brief Initialization of options in case some fields are left undefined
@param options DICT dctionary with options
sets de options parameters """ if options == None: options = {} if not 'km_init' in options: options['km_init'] = 'first' if not 'verbose' in options: options['verbose'] = False if not 'tolerance' in options:
options['tolerance'] = 0 if not 'max_iter' in options: options['max_iter'] = np.inf if not 'fitting' in options: options['fitting'] = 'Fisher' self.options = options
############################################################# ## THIS FUNCTION CAN BE MODIFIED FROM THIS POINT, if needed #############################################################
def _init_rest(self, K): """@brief Initialization of the remainig data in the class.
@param options DICT dctionary with options """ self.K = K
# INT number of clusters
if self.K>0: self._init_centroids()
# LIST centroids coordinates
self.old_centroids = np.empty_like(self.centroids) # LIST coordinates of centroids from previous iteration self.clusters = np.empty(len(self.X),dtype=int) # LIST list that assignes each element of X into a cluster self._cluster_points()
# sets the first cluster assignation
if self.options['km_init']=='optimum': self.distanceCentroids= np.zeros_like(self.centroids) self.num_iter = 0
# INT current iteration
if self.options['km_init']=='optimum': self.optimum=0 ############################################################# ## THIS FUNCTION CAN BE MODIFIED FROM THIS POINT, if needed #############################################################
def _init_centroids(self): """@brief Initialization of centroids depends on self.options['km_init'] """ ####################################################### ## YOU MUST REMOVE THE REST OF THE CODE OF THIS FUNCTION ## AND CHANGE FOR YOUR OWN CODE ####################################################### if self.options['km_init'].lower() == 'custom': self.centroids=np.empty((self.K,self.X.shape[1])) for k in range (self.K): self.centroids[k,:]=k*255/(self.K-1)
elif self.options['km_init'].lower() == 'first': #Inicializacion por defecto con ceros para poder aceder despues centroids[]
self.centroids=np.zeros([self.K,self.X.shape[1]]) arrayDeZeros=np.zeros([self.X.shape[-1]]) pos_X=0 pos_C_A_Afegir=0
while pos_C_A_Afegir < self.K and pos_X < len(self.X): isInCentroids=0 #Mirem si el valor d'X[i] ja esta afegit a la llista de centroids if (pos_C_A_Afegir == 0) or (np.array_equal(arrayDeZeros,self.X[pos_X])): pass else: for value in self.centroids: if np.array_equal(value,self.X[pos_X]): isInCentroids=1 break
if isInCentroids==0: self.centroids[pos_C_A_Afegir] = np.copy(self.X[pos_X]) pos_C_A_Afegir+=1
pos_X+=1
elif self.options['km_init'].lower() == 'optimum': self.centroids=np.zeros([self.K,self.X.shape[1]]) #for k in range (self.K-1): self.centroids[k,:]=k*255/(self.K-1) #self.centroids[-1]=(np.mean(self.X,axis=0))/self.X.shape[0]
MAX_VALUE=np.max(self.X,axis=0) MIN_VALUE=np.min(self.X,axis=0) ''' if self.options['colorspace'].lower() == 'RGB'.lower(): MAX_VALUE=255.0
MIN_VALUE=0.0
elif self.options['colorspace'].lower() =='Lab'.lower() : MAX_VALUE=100.0 MIN_VALUE=-100.0
elif self.options['colorspace'].lower() == 'ColorNaming'.lower(): MAX_VALUE=1.0 MIN_VALUE=0.0
elif self.options['colorspace'].lower()=='HSV'.lower(): MAX_VALUE=1.0 MIN_VALUE=0.0 ''' if self.K%2==0:#Parell paritat=0
else: paritat=1 self.centroids[int((self.K-1)/2)] =np.mean(self.X,axis=0)
for i in range(self.X.shape[-1]): self.centroids[:int((self.K-paritat)/2),i]=np.linspace(MIN_VALUE[i], self.centroids[int((self.K-1)/2)][i], num=int((self.K-paritat)/2), endpoint=False)
for i in range(self.X.shape[-1]): self.centroids[int((self.K-paritat)/2+paritat):, i]=np.linspace(MAX_VALUE[i], self.centroids[int((self.K-paritat)/2)][i], num=int((self.K-paritat)/2),endpoint=False)
#self.centroids=self.centroids*1.0 else: self.centroids = np.random.rand(self.K,self.X.shape[1])
def _cluster_points(self): """@brief Calculates the closest centroid of all points in X """ ####################################################### ## YOU MUST REMOVE THE REST OF THE CODE OF THIS FUNCTION ## AND CHANGE FOR YOUR OWN CODE #######################################################
valorsDelCluster = np.empty([self.K,len(self.X)]) for pos in range(self.K): valorsDelCluster[pos]=np.sqrt(np.sum((self.X-self.centroids[pos])**2, axis=1))
self.clusters=np.argmin(valorsDelCluster,axis=0)
def _get_centroids(self): """@brief Calculates coordinates of centroids based on the coordinates of all the points assigned to the centroid """ ####################################################### ## YOU MUST REMOVE THE REST OF THE CODE OF THIS FUNCTION ## AND CHANGE FOR YOUR OWN CODE #######################################################
self.old_centroids=np.copy(self.centroids)
sumador=np.zeros([self.K,self.X.shape[-1]]) for pos in range(len(self.X)): sumador[self.clusters[pos]]+=self.X[pos]
#La vaiable contador la creo com a self ja que aixi no la he de inicialitzar totes les iteracions cosa #que seria estupida ja que els valors no depenen del anteriors i es sobrescriuen i fa el programa mes lent for k in range(self.K): if np.sum(self.clusters==k)!=0: self.centroids[k]=sumador[k]/np.sum(self.clusters==k) else: self.centroids[k]= np.copy(self.centroids[k-1])
''' sumador2=np.zeros([self.K,self.X.shape[-1]],dtype=float) for k in range(self.K): for i in range(len(self.X[self.clusters==k,:])): sumador2[k]+=self.X[self.clusters==k,:][i,:] '''
def _converges(self): """@brief Checks if there is a difference between current and old centroids
""" ####################################################### ## YOU MUST REMOVE THE REST OF THE CODE OF THIS FUNCTION ## AND CHANGE FOR YOUR OWN CODE ####################################################### if np.amax(np.absolute(self.centroids-self.old_centroids))>self.options['tolerance']: ''' if self.options['km_init']=='optimum': if np.amax(np.absolute(np.absolute(self.centroidsself.old_centroids),self.distanceCentroids))...