Assignment 05 - blatt5 PDF

Title Assignment 05 - blatt5
Course Grundlagen Bioinformatik
Institution Eberhard Karls Universität Tübingen
Pages 11
File Size 253.4 KB
File Type PDF
Total Downloads 88
Total Views 166

Summary

blatt5...


Description

Jiahui An 4151638

Assignment 05

1. HMM and Viterbi algorithm

2.Implementation of HMM generator

import java.io.*;

public class Generator { public static void main(String[] args)throws Exception{ Reader r= new FileReader("C:\\Users\\jiahu\\Desktop\\casino.hmm"); //read in the casino.hmm file //create a new HMM object HMM hmm = new HMM(r);

pg. 1

Jiahui An 4151638 r.close(); System.out.println(hmm. generateSequence()); //print the generated sequence System.out.println(hmm.getStates()); //print the path of the generated sequence } }

3.Implementation of HMM Viterbi decoder

import java.io.*; public class Decoder { public static void main(String[] args) throws Exception { Reader r= new FileReader("C:\\Users\\jiahu\\Desktop\\casino.hmm"); //read in the casino.hmm file HMM hmm = new HMM(r); //create a new HMM object r.close(); System.out.println(hmm.runViterbi("66653421521342343656666")); //find and print the probable path of a given sequence //using the Viterbi algorithm }

4.Evaluation of the Viterbi decoder

import java.io.*; public class EvaluateDecoder { public static void main(String[] args) throws Exception { Reader r= new FileReader("C:\\Users\\jiahu\\Desktop\\casino.hmm" ); //read in the casino.hmm file HMM hmm = new HMM(r); r.close(); int CorrectState =0; int IncorrectState =0; for(int i=1; i 0)

pg. 4

Jiahui An 4151638 numStates = (int) st.nval; else throw new Exception("Parse Error in line: " + st.lineno()); // read the names of the states: stateNames = new char[numStates]; for (int i = 0; i < numStates; i++) { if (st.nextToken() != StreamTokenizer.TT_EOF) { if (st.ttype == StreamTokenizer.TT_WORD) stateNames[i] = st.sval.charAt(0); else if (st.ttype == StreamTokenizer.TT_NUMBER) stateNames[i] = (new Double(st.nval)).toString().charAt(0); } else throw new Exception("Parse Error in line: " + st.lineno()); } // read number of symbols if (st.nextToken() != StreamTokenizer.TT_EOF && st.ttype == StreamTokenizer.TT_NUMBER && st.nval > 0) numSymbols = (int) st.nval; else throw new Exception("Parse Error in line: " + st.lineno()); // read the names of the symbols: symbolNames = new char[numSymbols]; for (int i = 0; i < numSymbols; i++) { if (st.nextToken() != StreamTokenizer.TT_EOF) { if (st.ttype == StreamTokenizer.TT_WORD) symbolNames[i] = st.sval.charAt(0); else if (st.ttype == StreamTokenizer.TT_NUMBER) symbolNames[i] = (new Double(st.nval)).toString().charAt(0); } else throw new Exception("Parse Error in line: " + st.lineno()); } // read transition matrix: transMatrix = new double[numStates][numStates]; for (int i = 0; i < numStates; i++) { for (int j = 0; j < numStates; j++) { if (st.nextToken() != StreamTokenizer.TT_EOF && st.ttype == StreamTokenizer.TT_NUMBER) transMatrix[i][j] = st.nval; else throw new Exception("Parse Error in line: " + st.lineno()); } } // read emission probabilities: emissionProb = new double[numStates][numSymbols]; for (int i = 0; i < numStates; i++) { for (int j = 0; j < numSymbols; j++) { if (st.nextToken() != StreamTokenizer.TT_EOF && st.ttype == StreamTokenizer.TT_NUMBER) emissionProb[i][j] = st.nval;

pg. 5

Jiahui An 4151638 else throw new Exception("Parse Error in line: " + st.lineno()); } } checkValid(); } catch (java.io.IOException ex) { throw new IOException("Parse error in line " + st.lineno() + ": " + ex.getMessage()); } } // II. Input and output of HMMs: /** * Read an HMM from a string. * * @param str the string containing a description of the HMM */ public static HMM valueOf(String str) throws Exception { return new HMM(new StringReader(str)); } /** * Print a description of the HMM to a string. * * @return a description of the HMM */ public String toString() { StringBuilder str = new StringBuilder(); str.append("#####\n"); str.append("# Number of states:\n ").append(getNumStates()).append("\n"); str.append("# States:\n"); for (int i = 0; i < getNumStates(); i++) str.append(" ").append(stateNames[i]); str.append("\n"); str.append("# Number of symbols:\n ").append(getNumSymbols()).append("\n"); str.append("# Symbols:\n"); for (int i = 0; i < getNumSymbols(); i++) str.append(" ").append(symbolNames[i]); str.append("\n"); str.append("# Transition matrix:\n"); for (int i = 0; i < getNumStates(); i++) { for (int j = 0; j < getNumStates(); j++) str.append(" ").append(getTransMatrix(i, j)); str.append("\n"); } str.append("# Emission probabilities:\n"); for (int i = 0; i < getNumStates(); i++) { for (int j = 0; j < getNumSymbols(); j++) str.append(" ").append(getEmissionProb(i, j)); str.append("\n"); } str.append("#####\n"); return str.toString(); } // III. Methods for querying the HMM (is and get functions):

pg. 6

Jiahui An 4151638 /** * Check that the HMM contains valid and consistent data, throw an * exception, if this is not the case */ public void checkValid() throws Exception { if (numStates < 0 || numSymbols < 0) throw new Exception("Invalid data, numStates=" + numStates + ", numSymbols=" + numSymbols); // Test transMatrix: for (int i = 0; i < numStates; i++) { double sum = 0; for (int j = 0; j < numStates; j++) { if (transMatrix[i][j] < 0) throw new Exception("Invalid data, transMatrix[" + i + "][" + j + "]=" + transMatrix[i][j]); sum += transMatrix[i][j]; } if (Math.abs(1.0 - sum) > 0.000001) throw new Exception("Invalid data, transMatrix(row=" + i + "): " + sum); } // (No need to check begin and end state...) } /** * Get the * * @return */ public int return }

number of states number of states getNumStates() { numStates;

/** * Get the name of the i-th state * * @param i the rank of the state * @return the name of the i-th state */ public char getStateName(int i) { return stateNames[i]; } /** * Get the * * @return */ public int return }

number of symbols number of symbols getNumSymbols() { numSymbols;

/** * Get the name of the i-th symbol * * @param i the rank of the symbol * @return the name of the i-th symbol */

public char getSymbolName(int i) {

pg. 7

Jiahui An 4151638 // Careful here: emitSymbol will return -1, if called while // in the begin/end state 0. Need to make sure you don't call // getSymbolName then! return symbolNames[i]; } /** * Get the rank i of a symbol s_i * * @param s the symbol * @return the rank 0..numSymbols-1 of the symbol */ public int getSymbolRank(char s) throws Exception { for (int i = 0; i < getNumSymbols(); i++) if (s == getSymbolName(i)) return i; throw new Exception("Unrecognized symbol: " + s); } /** * Get the transition probability for s to t * * @param s the source state * @param t the target state * @return transition probability s to t */ public double getTransMatrix(int s, int t) { return transMatrix[s][t]; } /** * Get the emission probability for state k and symbol s * * @param k the state * @param s the symbol state * @return emission probability for state k and symbol s */ public double getEmissionProb(int k, int s) { return emissionProb[k][s]; } /* IV. Methods for setting data: */ /** * Set the i-th symbol * * @param i the rank of the symbol * @param s the symbol */ public void setSymbol(int i, char s) { symbolNames[i] = s; } // The preceding is just an example of a set method. This should // be followed by more set methods, which we haven't got here because we // don't need them yet... // V. Computational methods and algorithms:

pg. 8

Jiahui An 4151638 /** * Chooses the next state using the transition matrix * * @param k current state * @return next state */ public int chooseNextState(int k) { // todo: Please implement //generate a random double randomNumber = Math.random(); number and store it double upperLimit = 0; for(int i=0; i< getNumStates(); i++){ upperLimit += getTransMatrix(k,i); //determine the cumulative upper limit with the probability of variable if(upperLimit > randomNumber) //if the random number is located in the current interval, return i; //then return the current state } return -1; } /** * Chooses a symbol to emit * * @param k the current state * @return the number of the emitted symbol */ public int emitSymbol(int k) { // todo: Please implement double randomNumber = Math.random(); number and store it double upperLimit = 0;

//generate a random

for(int i=0; i< getNumSymbols(); i++){ upperLimit += getEmissionProb(k,i); //determine the cumulative upper limit with the probability of variable //if the random number is if(upperLimit > randomNumber) located in the current interval, //then return the current return i; symbol } return -1; } /** * Randomly generate a sequence of symbols using the HMM, * the state path used to generate the symbols can be accessed using * getStates() * * @return the sequence of emitted symbols */ public String generateSequence() { StringBuilder symbols = new StringBuilder(); // the emitted symbols StringBuilder path = new StringBuilder(); // the state path used // todo: Please implement int state =0; state = chooseNextState(state);

pg. 9

//get the first state

Jiahui An 4151638

while(state != 0){ //if the current state is not the 'end state' path.append(getStateName(state)); //add the current state to the path //add the symbols.append(getSymbolName(emitSymbol(state))); current symbol to the sequence //choose state = chooseNextState(state); the next state } generatingPath = path.toString(); // access via getGeneratingPath return symbols.toString(); } /** * Get the state path used to generate the sequence of symbols returned * by generateSequence() * * @return the state path used */ public String getStates() { return generatingPath; } /** * The Viterbi algorithm is run to decode the given sequence of * symbols. * * @param seq the sequence of observed symbols * @return the most probable sequence of states */ public String runViterbi(String seq) throws Exception { // *** Algorithm as described in the lecture and in DEKM98, page. 56: // todo: Please implement // setup arrays: int L = seq.length(); double v[][] = new double[numStates][L + 1]; // Viterbi variable int ptr[][] = new int[L + 1][numStates]; // traceback pointers int pi[] = new int[L + 1]; // most probable path // Initialization: v[0][0] = 1; for (int k = 1; k < numStates; k++) v[k][0] = 0; // Recursion: for (int i = 1; i l) for (int k = 0; k < numStates ;k++){ if( v[k][i-1]*getTransMatrix(k,l) > max){ max = v[k][i-1]*getTransMatrix(k,l); //update the maximum

pg. 10

Jiahui An 4151638 ptr[i][l] = k; //store the state(k) in which the maximum of v[k][i-1]*transition-probability(k->l) occurs } v[l][i] = getEmissionProb(l,xi)* max ;

//store the

viterbi variable } } }

i++){

/* for(int i= 0 ; i< numStates; /////////////////////check for(int j=0; j= max){ max = v[k][L]; //update the maximum of Viterbi variable //the maximal maxValue = max*getTransMatrix(k,0); probability is the maximum of Viterbi variable times transitionprobability(k->0) //store the state(k), in which the maxPtr = k; maximum of v[k][i-1]*transition-probability(k->l) occurs } } double prob = maxValue; // probability of most probable path pi[L] = maxPtr; // Traceback: for (int i = L; i >= 1; i--) pi[i - 1] = ptr[i][pi[i]]; // *** End of algorithm // Generate string of state names: StringBuilder sb = new StringBuilder(); for (int i = 1; i...


Similar Free PDFs