Kmeans ia PDF

Title Kmeans ia
Course Intel·ligència artificial
Institution Universitat Autònoma de Barcelona
Pages 16
File Size 73.7 KB
File Type PDF
Total Downloads 68
Total Views 186

Summary

"""@author: ramon, bojana """ import numpy as npdef NIUs(): return 1491382, 1390204, 1493402def distance(X,C): """@brief Calculates the distance between each pixcel and each centroid@param X numpy array PxD 1st set of d...


Description

"""

@author: ramon, bojana """ import numpy as np

def NIUs(): return 1491382, 1390204, 1493402

def distance(X,C): """@brief Calculates the distance between each pixcel and each centroid

@param X numpy array PxD 1st set of data points (usually data points) @param C numpy array KxD 2nd set of data points (usually cluster centroids points)

@return dist: PxK numpy array position ij is the distance between the i-th point of the first set an the j-th point of the second set """ ######################################################### ## YOU MUST REMOVE THE REST OF THE CODE OF THIS FUNCTION ## AND CHANGE FOR YOUR OWN CODE #########################################################

matriu_de_distancies=np.empty([len(C),len(X)])

for pos in range(C.shape[0]):

matriu_de_distancies[pos]=np.sqrt(np.sum(((X)-C[pos])**2, axis=1))

return matriu_de_distancies.T

class KMeans():

def __init__(self, X, K, options=None): """@brief Constructor of KMeans class

@param X LIST input data @param K INT

number of centroids

@param options DICT dctionary with options """ self._init_X(X) self._init_options(options) self._init_rest(K)

# LIST data coordinates # DICT options # Initializes de rest of the object

############################################################# ## THIS FUNCTION CAN BE MODIFIED FROM THIS POINT, if needed #############################################################

def _init_X(self, X): """@brief Initialization of all pixels

@param X LIST list of all pixel values. Usually it will be a numpy array containing an image NxMx3

sets X an as an array of data in vector form (PxD where P=N*M and D=3 in the above example) """ ####################################################### ## YOU MUST REMOVE THE REST OF THE CODE OF THIS FUNCTION ## AND CHANGE FOR YOUR OWN CODE ####################################################### if len(X.shape)!=2: self.X = 1.0*np.reshape(X, (-1, X.shape[2])) else: self.X = np.copy(1.0*X)

def _init_options(self, options): """@brief Initialization of options in case some fields are left undefined

@param options DICT dctionary with options

sets de options parameters """ if options == None: options = {} if not 'km_init' in options: options['km_init'] = 'first' if not 'verbose' in options: options['verbose'] = False if not 'tolerance' in options:

options['tolerance'] = 0 if not 'max_iter' in options: options['max_iter'] = np.inf if not 'fitting' in options: options['fitting'] = 'Fisher' self.options = options

############################################################# ## THIS FUNCTION CAN BE MODIFIED FROM THIS POINT, if needed #############################################################

def _init_rest(self, K): """@brief Initialization of the remainig data in the class.

@param options DICT dctionary with options """ self.K = K

# INT number of clusters

if self.K>0: self._init_centroids()

# LIST centroids coordinates

self.old_centroids = np.empty_like(self.centroids) # LIST coordinates of centroids from previous iteration self.clusters = np.empty(len(self.X),dtype=int) # LIST list that assignes each element of X into a cluster self._cluster_points()

# sets the first cluster assignation

if self.options['km_init']=='optimum': self.distanceCentroids= np.zeros_like(self.centroids) self.num_iter = 0

# INT current iteration

if self.options['km_init']=='optimum': self.optimum=0 ############################################################# ## THIS FUNCTION CAN BE MODIFIED FROM THIS POINT, if needed #############################################################

def _init_centroids(self): """@brief Initialization of centroids depends on self.options['km_init'] """ ####################################################### ## YOU MUST REMOVE THE REST OF THE CODE OF THIS FUNCTION ## AND CHANGE FOR YOUR OWN CODE ####################################################### if self.options['km_init'].lower() == 'custom': self.centroids=np.empty((self.K,self.X.shape[1])) for k in range (self.K): self.centroids[k,:]=k*255/(self.K-1)

elif self.options['km_init'].lower() == 'first': #Inicializacion por defecto con ceros para poder aceder despues centroids[]

self.centroids=np.zeros([self.K,self.X.shape[1]]) arrayDeZeros=np.zeros([self.X.shape[-1]]) pos_X=0 pos_C_A_Afegir=0

while pos_C_A_Afegir < self.K and pos_X < len(self.X): isInCentroids=0 #Mirem si el valor d'X[i] ja esta afegit a la llista de centroids if (pos_C_A_Afegir == 0) or (np.array_equal(arrayDeZeros,self.X[pos_X])): pass else: for value in self.centroids: if np.array_equal(value,self.X[pos_X]): isInCentroids=1 break

if isInCentroids==0: self.centroids[pos_C_A_Afegir] = np.copy(self.X[pos_X]) pos_C_A_Afegir+=1

pos_X+=1

elif self.options['km_init'].lower() == 'optimum': self.centroids=np.zeros([self.K,self.X.shape[1]]) #for k in range (self.K-1): self.centroids[k,:]=k*255/(self.K-1) #self.centroids[-1]=(np.mean(self.X,axis=0))/self.X.shape[0]

MAX_VALUE=np.max(self.X,axis=0) MIN_VALUE=np.min(self.X,axis=0) ''' if self.options['colorspace'].lower() == 'RGB'.lower(): MAX_VALUE=255.0

MIN_VALUE=0.0

elif self.options['colorspace'].lower() =='Lab'.lower() : MAX_VALUE=100.0 MIN_VALUE=-100.0

elif self.options['colorspace'].lower() == 'ColorNaming'.lower(): MAX_VALUE=1.0 MIN_VALUE=0.0

elif self.options['colorspace'].lower()=='HSV'.lower(): MAX_VALUE=1.0 MIN_VALUE=0.0 ''' if self.K%2==0:#Parell paritat=0

else: paritat=1 self.centroids[int((self.K-1)/2)] =np.mean(self.X,axis=0)

for i in range(self.X.shape[-1]): self.centroids[:int((self.K-paritat)/2),i]=np.linspace(MIN_VALUE[i], self.centroids[int((self.K-1)/2)][i], num=int((self.K-paritat)/2), endpoint=False)

for i in range(self.X.shape[-1]): self.centroids[int((self.K-paritat)/2+paritat):, i]=np.linspace(MAX_VALUE[i], self.centroids[int((self.K-paritat)/2)][i], num=int((self.K-paritat)/2),endpoint=False)

#self.centroids=self.centroids*1.0 else: self.centroids = np.random.rand(self.K,self.X.shape[1])

def _cluster_points(self): """@brief Calculates the closest centroid of all points in X """ ####################################################### ## YOU MUST REMOVE THE REST OF THE CODE OF THIS FUNCTION ## AND CHANGE FOR YOUR OWN CODE #######################################################

valorsDelCluster = np.empty([self.K,len(self.X)]) for pos in range(self.K): valorsDelCluster[pos]=np.sqrt(np.sum((self.X-self.centroids[pos])**2, axis=1))

self.clusters=np.argmin(valorsDelCluster,axis=0)

def _get_centroids(self): """@brief Calculates coordinates of centroids based on the coordinates of all the points assigned to the centroid """ ####################################################### ## YOU MUST REMOVE THE REST OF THE CODE OF THIS FUNCTION ## AND CHANGE FOR YOUR OWN CODE #######################################################

self.old_centroids=np.copy(self.centroids)

sumador=np.zeros([self.K,self.X.shape[-1]]) for pos in range(len(self.X)): sumador[self.clusters[pos]]+=self.X[pos]

#La vaiable contador la creo com a self ja que aixi no la he de inicialitzar totes les iteracions cosa #que seria estupida ja que els valors no depenen del anteriors i es sobrescriuen i fa el programa mes lent for k in range(self.K): if np.sum(self.clusters==k)!=0: self.centroids[k]=sumador[k]/np.sum(self.clusters==k) else: self.centroids[k]= np.copy(self.centroids[k-1])

''' sumador2=np.zeros([self.K,self.X.shape[-1]],dtype=float) for k in range(self.K): for i in range(len(self.X[self.clusters==k,:])): sumador2[k]+=self.X[self.clusters==k,:][i,:] '''

def _converges(self): """@brief Checks if there is a difference between current and old centroids

""" ####################################################### ## YOU MUST REMOVE THE REST OF THE CODE OF THIS FUNCTION ## AND CHANGE FOR YOUR OWN CODE ####################################################### if np.amax(np.absolute(self.centroids-self.old_centroids))>self.options['tolerance']: ''' if self.options['km_init']=='optimum': if np.amax(np.absolute(np.absolute(self.centroidsself.old_centroids),self.distanceCentroids))...


Similar Free PDFs