@spiritnotes
2016-03-26T08:58:54.000000Z
字数 5107
阅读 1604
机器学习
算法
任何一次观测都和前面的所有观测有关,则有
对其进行简化,假设其每次观测只与前面一次或者两次有关,则有
一阶马尔科夫模型
接受观测具有K个状态,则有 有 K-1 个参数, 可为K个值,则有 K(K-1) 个参数。M 阶参数有 个。
是关于时序的概率模型,描述由一个隐藏的马尔科夫链随机生成不可观测的状态随机序列,再由各个状态生成一个观测而产生观测随机序列的过程。
基本假设:
给定模型和观测序列,计算在模型下观测序列O出现的概率
scala代码如下
def calcProbDirect(output: Array[Int]): Double = {
val len = output.length
val stateLen = stateChange.length
def rescCalc(outputIndex: Int, lastState: Int, currProb:Double):scala.collection.immutable.IndexedSeq[Double] ={
if (outputIndex == 0){
for (state <- 0 until stateLen;
i <- rescCalc(outputIndex+1,state, pi(state)*ouputPorbs(state)(output(outputIndex))*currProb))
yield i
}
else if (outputIndex == len-1) {
for (state <- 0 until stateLen) yield currProb*stateChange(lastState)(state)*ouputPorbs(state)(output(outputIndex))
}
else {
for (state <- 0 until stateLen;
i <- rescCalc(outputIndex+1,state, currProb*stateChange(lastState)(state)*ouputPorbs(state)(output(outputIndex))))
yield i
}
}
rescCalc(0, 0, 1).sum
}
def calcProbForward(output: Array[Int]): Double = {
val len = output.length
val stateLen = stateChange.length
def clacOutIndexProbForState(outputIndex:Int, state:Int) ={
ouputPorbs(state)(output(outputIndex))
}
def forward(outputIndex: Int): Seq[(Int, Double)] ={
if (outputIndex == 0) {
(0 until stateLen zip pi).map{case (state, prob) => (state, prob*clacOutIndexProbForState(outputIndex, state))}
}
else {
forward(outputIndex-1).flatMap {
case (state, prob) => {
((0 until stateLen) zip stateChange(state)).map {
case (nextState, changeRatio) => {
(nextState, prob * changeRatio * clacOutIndexProbForState(outputIndex, nextState))
}
}
}
}.groupBy(_._1).map(line =>{(line._1, line._2.map(_._2).sum)}).toList
}
}
forward(len-1).map(_._2).sum
}
def calcProbBackWord(output: Array[Int]): Double = {
val len = output.length
val stateLen = stateChange.length
def clacOutIndexProbForState(outputIndex:Int, state:Int) ={
ouputPorbs(state)(output(outputIndex))
}
def backward(outputIndex: Int): Seq[(Int, Double)] ={
if (outputIndex == len-1) {
0 until stateLen map ((_, 1.0))
}
else if (outputIndex == -1){
backward(outputIndex+1).map{case(state, prob) => {(state, prob*pi(state)*clacOutIndexProbForState(outputIndex+1, state))}}
}
else {
backward(outputIndex+1).flatMap {
case (state, prob) => {
val stateprobs = stateChange.map(line=> line(state))
((0 until stateLen) zip stateprobs).map {
case (preState, changeRatio) => {
//println(preState, state, prob, changeRatio, clacOutIndexProbForState(outputIndex+1, state), prob * changeRatio * clacOutIndexProbForState(outputIndex+1, state), outputIndex)
(preState, prob * changeRatio * clacOutIndexProbForState(outputIndex+1, state))
}
}
}
}.groupBy(_._1).map(line =>{(line._1, line._2.map(_._2).sum)}).toList
}
}
backward(-1).map(_._2).sum
}
利用前向算法与后向算法合并可得观测概率
给定模型和观测,在t时刻处于状态的概率
给定模型,在时刻t处于以及在t+1处于的概率
已知观测序列,估计模型参数,在该模型下观测序列概率最大
已知训练集中包含S个长度相同的观测序列和对应的状态序列,那么如下估计参数
在语音识别中用户实际的本意可以表达为 而我们实际识别到的音轨可以表示为 ,于是需要判断其实际的本意则可以通过如下的概率表示
自动纠错与语音识别差不多,区别只在于