viterbipy.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. # -*- coding: UTF-8 -*-
  2. """
  3. 此脚本用于实现viterbi算法
  4. """
  5. import numpy as np
  6. from sklearn.utils.extmath import safe_sparse_dot
  7. def viterbi(obs, init_prob, trans_prob, emit_prob):
  8. """
  9. viterbi算法
  10. 参数
  11. ----
  12. obs : {np.array 或者scipy.sparse.csr_matrix},维度为(样本数,特征数),
  13. 数据的特征矩阵
  14. initProb : {np.array或者scipy.sparse.csr_matrix},维度为(状态数),
  15. 表示各状态的初始分布
  16. transProb : {np.array或者scipy.sparse.csr_matrix},维度为(状态数,状态数),
  17. 状态间的转移矩阵
  18. emitProb : {np.array或者scipy.sparse.csr_matrix},维度为(状态数,特征数),
  19. 各状态下,特征的条件概率
  20. 返回
  21. ----
  22. score : {np.array},维度为(样本数,状态数),viterbi算法中间概率
  23. path : {np.array},维度为(样本数),最终结果表示每个样本的隐藏状态
  24. """
  25. sample_num, state_num = obs.shape[0], init_prob.shape[0]
  26. backp = np.empty((sample_num, state_num), dtype=np.intp)
  27. score = safe_sparse_dot(obs, emit_prob.T)
  28. for i in range(state_num):
  29. score[0, i] += init_prob[i]
  30. for i in range(1, sample_num):
  31. for j in range(state_num):
  32. max_ind = 0
  33. max_val = -np.inf
  34. for k in range(state_num):
  35. candidate = score[i - 1, k] + trans_prob[k, j] + score[i, j]
  36. if candidate > max_val:
  37. max_ind = k
  38. max_val = candidate
  39. score[i, j] = max_val
  40. backp[i, j] = max_ind
  41. path = np.empty(sample_num, dtype=np.intp)
  42. path[sample_num - 1] = score[sample_num - 1, :].argmax()
  43. for i in range(sample_num - 2, -1, -1):
  44. path[i] = backp[i + 1, path[i + 1]]
  45. return score, path