Title | Assignment 05 - blatt5 |
---|---|
Course | Grundlagen Bioinformatik |
Institution | Eberhard Karls Universität Tübingen |
Pages | 11 |
File Size | 253.4 KB |
File Type | |
Total Downloads | 88 |
Total Views | 166 |
blatt5...
Jiahui An 4151638
Assignment 05
1. HMM and Viterbi algorithm
2.Implementation of HMM generator
import java.io.*;
public class Generator { public static void main(String[] args)throws Exception{ Reader r= new FileReader("C:\\Users\\jiahu\\Desktop\\casino.hmm"); //read in the casino.hmm file //create a new HMM object HMM hmm = new HMM(r);
pg. 1
Jiahui An 4151638 r.close(); System.out.println(hmm. generateSequence()); //print the generated sequence System.out.println(hmm.getStates()); //print the path of the generated sequence } }
3.Implementation of HMM Viterbi decoder
import java.io.*; public class Decoder { public static void main(String[] args) throws Exception { Reader r= new FileReader("C:\\Users\\jiahu\\Desktop\\casino.hmm"); //read in the casino.hmm file HMM hmm = new HMM(r); //create a new HMM object r.close(); System.out.println(hmm.runViterbi("66653421521342343656666")); //find and print the probable path of a given sequence //using the Viterbi algorithm }
4.Evaluation of the Viterbi decoder
import java.io.*; public class EvaluateDecoder { public static void main(String[] args) throws Exception { Reader r= new FileReader("C:\\Users\\jiahu\\Desktop\\casino.hmm" ); //read in the casino.hmm file HMM hmm = new HMM(r); r.close(); int CorrectState =0; int IncorrectState =0; for(int i=1; i 0)
pg. 4
Jiahui An 4151638 numStates = (int) st.nval; else throw new Exception("Parse Error in line: " + st.lineno()); // read the names of the states: stateNames = new char[numStates]; for (int i = 0; i < numStates; i++) { if (st.nextToken() != StreamTokenizer.TT_EOF) { if (st.ttype == StreamTokenizer.TT_WORD) stateNames[i] = st.sval.charAt(0); else if (st.ttype == StreamTokenizer.TT_NUMBER) stateNames[i] = (new Double(st.nval)).toString().charAt(0); } else throw new Exception("Parse Error in line: " + st.lineno()); } // read number of symbols if (st.nextToken() != StreamTokenizer.TT_EOF && st.ttype == StreamTokenizer.TT_NUMBER && st.nval > 0) numSymbols = (int) st.nval; else throw new Exception("Parse Error in line: " + st.lineno()); // read the names of the symbols: symbolNames = new char[numSymbols]; for (int i = 0; i < numSymbols; i++) { if (st.nextToken() != StreamTokenizer.TT_EOF) { if (st.ttype == StreamTokenizer.TT_WORD) symbolNames[i] = st.sval.charAt(0); else if (st.ttype == StreamTokenizer.TT_NUMBER) symbolNames[i] = (new Double(st.nval)).toString().charAt(0); } else throw new Exception("Parse Error in line: " + st.lineno()); } // read transition matrix: transMatrix = new double[numStates][numStates]; for (int i = 0; i < numStates; i++) { for (int j = 0; j < numStates; j++) { if (st.nextToken() != StreamTokenizer.TT_EOF && st.ttype == StreamTokenizer.TT_NUMBER) transMatrix[i][j] = st.nval; else throw new Exception("Parse Error in line: " + st.lineno()); } } // read emission probabilities: emissionProb = new double[numStates][numSymbols]; for (int i = 0; i < numStates; i++) { for (int j = 0; j < numSymbols; j++) { if (st.nextToken() != StreamTokenizer.TT_EOF && st.ttype == StreamTokenizer.TT_NUMBER) emissionProb[i][j] = st.nval;
pg. 5
Jiahui An 4151638 else throw new Exception("Parse Error in line: " + st.lineno()); } } checkValid(); } catch (java.io.IOException ex) { throw new IOException("Parse error in line " + st.lineno() + ": " + ex.getMessage()); } } // II. Input and output of HMMs: /** * Read an HMM from a string. * * @param str the string containing a description of the HMM */ public static HMM valueOf(String str) throws Exception { return new HMM(new StringReader(str)); } /** * Print a description of the HMM to a string. * * @return a description of the HMM */ public String toString() { StringBuilder str = new StringBuilder(); str.append("#####\n"); str.append("# Number of states:\n ").append(getNumStates()).append("\n"); str.append("# States:\n"); for (int i = 0; i < getNumStates(); i++) str.append(" ").append(stateNames[i]); str.append("\n"); str.append("# Number of symbols:\n ").append(getNumSymbols()).append("\n"); str.append("# Symbols:\n"); for (int i = 0; i < getNumSymbols(); i++) str.append(" ").append(symbolNames[i]); str.append("\n"); str.append("# Transition matrix:\n"); for (int i = 0; i < getNumStates(); i++) { for (int j = 0; j < getNumStates(); j++) str.append(" ").append(getTransMatrix(i, j)); str.append("\n"); } str.append("# Emission probabilities:\n"); for (int i = 0; i < getNumStates(); i++) { for (int j = 0; j < getNumSymbols(); j++) str.append(" ").append(getEmissionProb(i, j)); str.append("\n"); } str.append("#####\n"); return str.toString(); } // III. Methods for querying the HMM (is and get functions):
pg. 6
Jiahui An 4151638 /** * Check that the HMM contains valid and consistent data, throw an * exception, if this is not the case */ public void checkValid() throws Exception { if (numStates < 0 || numSymbols < 0) throw new Exception("Invalid data, numStates=" + numStates + ", numSymbols=" + numSymbols); // Test transMatrix: for (int i = 0; i < numStates; i++) { double sum = 0; for (int j = 0; j < numStates; j++) { if (transMatrix[i][j] < 0) throw new Exception("Invalid data, transMatrix[" + i + "][" + j + "]=" + transMatrix[i][j]); sum += transMatrix[i][j]; } if (Math.abs(1.0 - sum) > 0.000001) throw new Exception("Invalid data, transMatrix(row=" + i + "): " + sum); } // (No need to check begin and end state...) } /** * Get the * * @return */ public int return }
number of states number of states getNumStates() { numStates;
/** * Get the name of the i-th state * * @param i the rank of the state * @return the name of the i-th state */ public char getStateName(int i) { return stateNames[i]; } /** * Get the * * @return */ public int return }
number of symbols number of symbols getNumSymbols() { numSymbols;
/** * Get the name of the i-th symbol * * @param i the rank of the symbol * @return the name of the i-th symbol */
public char getSymbolName(int i) {
pg. 7
Jiahui An 4151638 // Careful here: emitSymbol will return -1, if called while // in the begin/end state 0. Need to make sure you don't call // getSymbolName then! return symbolNames[i]; } /** * Get the rank i of a symbol s_i * * @param s the symbol * @return the rank 0..numSymbols-1 of the symbol */ public int getSymbolRank(char s) throws Exception { for (int i = 0; i < getNumSymbols(); i++) if (s == getSymbolName(i)) return i; throw new Exception("Unrecognized symbol: " + s); } /** * Get the transition probability for s to t * * @param s the source state * @param t the target state * @return transition probability s to t */ public double getTransMatrix(int s, int t) { return transMatrix[s][t]; } /** * Get the emission probability for state k and symbol s * * @param k the state * @param s the symbol state * @return emission probability for state k and symbol s */ public double getEmissionProb(int k, int s) { return emissionProb[k][s]; } /* IV. Methods for setting data: */ /** * Set the i-th symbol * * @param i the rank of the symbol * @param s the symbol */ public void setSymbol(int i, char s) { symbolNames[i] = s; } // The preceding is just an example of a set method. This should // be followed by more set methods, which we haven't got here because we // don't need them yet... // V. Computational methods and algorithms:
pg. 8
Jiahui An 4151638 /** * Chooses the next state using the transition matrix * * @param k current state * @return next state */ public int chooseNextState(int k) { // todo: Please implement //generate a random double randomNumber = Math.random(); number and store it double upperLimit = 0; for(int i=0; i< getNumStates(); i++){ upperLimit += getTransMatrix(k,i); //determine the cumulative upper limit with the probability of variable if(upperLimit > randomNumber) //if the random number is located in the current interval, return i; //then return the current state } return -1; } /** * Chooses a symbol to emit * * @param k the current state * @return the number of the emitted symbol */ public int emitSymbol(int k) { // todo: Please implement double randomNumber = Math.random(); number and store it double upperLimit = 0;
//generate a random
for(int i=0; i< getNumSymbols(); i++){ upperLimit += getEmissionProb(k,i); //determine the cumulative upper limit with the probability of variable //if the random number is if(upperLimit > randomNumber) located in the current interval, //then return the current return i; symbol } return -1; } /** * Randomly generate a sequence of symbols using the HMM, * the state path used to generate the symbols can be accessed using * getStates() * * @return the sequence of emitted symbols */ public String generateSequence() { StringBuilder symbols = new StringBuilder(); // the emitted symbols StringBuilder path = new StringBuilder(); // the state path used // todo: Please implement int state =0; state = chooseNextState(state);
pg. 9
//get the first state
Jiahui An 4151638
while(state != 0){ //if the current state is not the 'end state' path.append(getStateName(state)); //add the current state to the path //add the symbols.append(getSymbolName(emitSymbol(state))); current symbol to the sequence //choose state = chooseNextState(state); the next state } generatingPath = path.toString(); // access via getGeneratingPath return symbols.toString(); } /** * Get the state path used to generate the sequence of symbols returned * by generateSequence() * * @return the state path used */ public String getStates() { return generatingPath; } /** * The Viterbi algorithm is run to decode the given sequence of * symbols. * * @param seq the sequence of observed symbols * @return the most probable sequence of states */ public String runViterbi(String seq) throws Exception { // *** Algorithm as described in the lecture and in DEKM98, page. 56: // todo: Please implement // setup arrays: int L = seq.length(); double v[][] = new double[numStates][L + 1]; // Viterbi variable int ptr[][] = new int[L + 1][numStates]; // traceback pointers int pi[] = new int[L + 1]; // most probable path // Initialization: v[0][0] = 1; for (int k = 1; k < numStates; k++) v[k][0] = 0; // Recursion: for (int i = 1; i l) for (int k = 0; k < numStates ;k++){ if( v[k][i-1]*getTransMatrix(k,l) > max){ max = v[k][i-1]*getTransMatrix(k,l); //update the maximum
pg. 10
Jiahui An 4151638 ptr[i][l] = k; //store the state(k) in which the maximum of v[k][i-1]*transition-probability(k->l) occurs } v[l][i] = getEmissionProb(l,xi)* max ;
//store the
viterbi variable } } }
i++){
/* for(int i= 0 ; i< numStates; /////////////////////check for(int j=0; j= max){ max = v[k][L]; //update the maximum of Viterbi variable //the maximal maxValue = max*getTransMatrix(k,0); probability is the maximum of Viterbi variable times transitionprobability(k->0) //store the state(k), in which the maxPtr = k; maximum of v[k][i-1]*transition-probability(k->l) occurs } } double prob = maxValue; // probability of most probable path pi[L] = maxPtr; // Traceback: for (int i = L; i >= 1; i--) pi[i - 1] = ptr[i][pi[i]]; // *** End of algorithm // Generate string of state names: StringBuilder sb = new StringBuilder(); for (int i = 1; i...