|
| 1 | +// bin/compile-graph.cc |
| 2 | + |
| 3 | +// Copyright 2018 Johns Hopkins University (Author: Daniel Povey) |
| 4 | + |
| 5 | +// See ../../COPYING for clarification regarding multiple authors |
| 6 | +// |
| 7 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 8 | +// you may not use this file except in compliance with the License. |
| 9 | +// You may obtain a copy of the License at |
| 10 | +// |
| 11 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 12 | +// |
| 13 | +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED |
| 15 | +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, |
| 16 | +// MERCHANTABLITY OR NON-INFRINGEMENT. |
| 17 | +// See the Apache 2 License for the specific language governing permissions and |
| 18 | +// limitations under the License. |
| 19 | + |
| 20 | +#include "base/kaldi-common.h" |
| 21 | +#include "util/common-utils.h" |
| 22 | +#include "tree/context-dep.h" |
| 23 | +#include "hmm/transition-model.h" |
| 24 | +#include "hmm/hmm-utils.h" |
| 25 | +#include "fstext/fstext-lib.h" |
| 26 | +#include "fstext/push-special.h" |
| 27 | +#include "fstext/grammar-context-fst.h" |
| 28 | +#include "decoder/grammar-fst.h" |
| 29 | + |
| 30 | + |
| 31 | + |
| 32 | +int main(int argc, char *argv[]) { |
| 33 | + try { |
| 34 | + using namespace kaldi; |
| 35 | + typedef kaldi::int32 int32; |
| 36 | + using fst::SymbolTable; |
| 37 | + using fst::VectorFst; |
| 38 | + using fst::StdArc; |
| 39 | + |
| 40 | + |
| 41 | + const char *usage = |
| 42 | + "Creates HCLG decoding graph. Similar to mkgraph.sh but done in code.\n" |
| 43 | + "\n" |
| 44 | + "Usage: compile-graph [options] <tree-in> <model-in> <lexicon-fst-in> " |
| 45 | + " <gammar-rspecifier> <hclg-wspecifier>\n" |
| 46 | + "e.g.: \n" |
| 47 | + " compile-train-graphs-fsts tree 1.mdl L_disambig.fst G.fst HCLG.fst\n"; |
| 48 | + ParseOptions po(usage); |
| 49 | + |
| 50 | + |
| 51 | + BaseFloat transition_scale = 1.0; |
| 52 | + BaseFloat self_loop_scale = 1.0; // Caution: the script default is 0.1. |
| 53 | + int32 nonterm_phones_offset = -1; |
| 54 | + std::string disambig_rxfilename; |
| 55 | + |
| 56 | + |
| 57 | + po.Register("read-disambig-syms", &disambig_rxfilename, "File containing " |
| 58 | + "list of disambiguation symbols in phone symbol table"); |
| 59 | + po.Register("transition-scale", &transition_scale, "Scale of transition " |
| 60 | + "probabilities (excluding self-loops)."); |
| 61 | + po.Register("self-loop-scale", &self_loop_scale, "Scale of self-loop vs. " |
| 62 | + "non-self-loop probability mass. Caution: the default of " |
| 63 | + "mkgraph.sh is 0.1, but this defaults to 1.0."); |
| 64 | + po.Register("nonterm-phones-offset", &nonterm_phones_offset, "Integer " |
| 65 | + "value of symbol #nonterm_bos in phones.txt, if present. " |
| 66 | + "(Only relevant for grammar decoding)."); |
| 67 | + |
| 68 | + po.Read(argc, argv); |
| 69 | + |
| 70 | + if (po.NumArgs() != 5) { |
| 71 | + po.PrintUsage(); |
| 72 | + exit(1); |
| 73 | + } |
| 74 | + |
| 75 | + std::string tree_rxfilename = po.GetArg(1), |
| 76 | + model_rxfilename = po.GetArg(2), |
| 77 | + lex_rxfilename = po.GetArg(3), |
| 78 | + grammar_rxfilename = po.GetArg(4), |
| 79 | + hclg_wxfilename = po.GetArg(5); |
| 80 | + |
| 81 | + ContextDependency ctx_dep; // the tree. |
| 82 | + ReadKaldiObject(tree_rxfilename, &ctx_dep); |
| 83 | + |
| 84 | + TransitionModel trans_model; |
| 85 | + ReadKaldiObject(model_rxfilename, &trans_model); |
| 86 | + |
| 87 | + VectorFst<StdArc> *lex_fst = fst::ReadFstKaldi(lex_rxfilename), |
| 88 | + *grammar_fst = fst::ReadFstKaldi(grammar_rxfilename); |
| 89 | + |
| 90 | + std::vector<int32> disambig_syms; |
| 91 | + if (disambig_rxfilename != "") |
| 92 | + if (!ReadIntegerVectorSimple(disambig_rxfilename, &disambig_syms)) |
| 93 | + KALDI_ERR << "Could not read disambiguation symbols from " |
| 94 | + << disambig_rxfilename; |
| 95 | + if (disambig_syms.empty()) |
| 96 | + KALDI_WARN << "You supplied no disambiguation symbols; note, these are " |
| 97 | + << "typically necessary when compiling graphs from FSTs (i.e. " |
| 98 | + << "supply L_disambig.fst and the list of disambig syms with\n" |
| 99 | + << "--read-disambig-syms)"; |
| 100 | + |
| 101 | + const std::vector<int32> &phone_syms = trans_model.GetPhones(); |
| 102 | + SortAndUniq(&disambig_syms); |
| 103 | + for (int32 i = 0; i < disambig_syms.size(); i++) |
| 104 | + if (std::binary_search(phone_syms.begin(), phone_syms.end(), |
| 105 | + disambig_syms[i])) |
| 106 | + KALDI_ERR << "Disambiguation symbol " << disambig_syms[i] |
| 107 | + << " is also a phone."; |
| 108 | + |
| 109 | + VectorFst<StdArc> lg_fst; |
| 110 | + TableCompose(*lex_fst, *grammar_fst, &lg_fst); |
| 111 | + |
| 112 | + DeterminizeStarInLog(&lg_fst, fst::kDelta); |
| 113 | + |
| 114 | + MinimizeEncoded(&lg_fst, fst::kDelta); |
| 115 | + |
| 116 | + fst::PushSpecial(&lg_fst, fst::kDelta); |
| 117 | + |
| 118 | + delete grammar_fst; |
| 119 | + delete lex_fst; |
| 120 | + |
| 121 | + VectorFst<StdArc> clg_fst; |
| 122 | + |
| 123 | + std::vector<std::vector<int32> > ilabels; |
| 124 | + |
| 125 | + int32 context_width = ctx_dep.ContextWidth(), |
| 126 | + central_position = ctx_dep.CentralPosition(); |
| 127 | + |
| 128 | + if (nonterm_phones_offset < 0) { |
| 129 | + // The normal case. |
| 130 | + ComposeContext(disambig_syms, context_width, central_position, |
| 131 | + &lg_fst, &clg_fst, &ilabels); |
| 132 | + } else { |
| 133 | + // The grammar-FST case. See ../doc/grammar.dox for an intro. |
| 134 | + if (context_width != 2 || central_position != 1) { |
| 135 | + KALDI_ERR << "Grammar-fst graph creation only supports models with left-" |
| 136 | + "biphone context. (--nonterm-phones-offset option was supplied)."; |
| 137 | + } |
| 138 | + ComposeContextLeftBiphone(nonterm_phones_offset, disambig_syms, |
| 139 | + lg_fst, &clg_fst, &ilabels); |
| 140 | + } |
| 141 | + lg_fst.DeleteStates(); |
| 142 | + |
| 143 | + HTransducerConfig h_cfg; |
| 144 | + h_cfg.transition_scale = transition_scale; |
| 145 | + h_cfg.nonterm_phones_offset = nonterm_phones_offset; |
| 146 | + std::vector<int32> disambig_syms_h; // disambiguation symbols on |
| 147 | + // input side of H. |
| 148 | + VectorFst<StdArc> *h_fst = GetHTransducer(ilabels, |
| 149 | + ctx_dep, |
| 150 | + trans_model, |
| 151 | + h_cfg, |
| 152 | + &disambig_syms_h); |
| 153 | + |
| 154 | + VectorFst<StdArc> hclg_fst; // transition-id to word. |
| 155 | + TableCompose(*h_fst, clg_fst, &hclg_fst); |
| 156 | + clg_fst.DeleteStates(); |
| 157 | + delete h_fst; |
| 158 | + |
| 159 | + KALDI_ASSERT(hclg_fst.Start() != fst::kNoStateId); |
| 160 | + |
| 161 | + // Epsilon-removal and determinization combined. This will fail if not determinizable. |
| 162 | + DeterminizeStarInLog(&hclg_fst); |
| 163 | + |
| 164 | + if (!disambig_syms_h.empty()) { |
| 165 | + RemoveSomeInputSymbols(disambig_syms_h, &hclg_fst); |
| 166 | + RemoveEpsLocal(&hclg_fst); |
| 167 | + } |
| 168 | + |
| 169 | + // Encoded minimization. |
| 170 | + MinimizeEncoded(&hclg_fst); |
| 171 | + |
| 172 | + std::vector<int32> disambig; |
| 173 | + bool check_no_self_loops = true, |
| 174 | + reorder = true; |
| 175 | + AddSelfLoops(trans_model, |
| 176 | + disambig, |
| 177 | + self_loop_scale, |
| 178 | + reorder, |
| 179 | + check_no_self_loops, |
| 180 | + &hclg_fst); |
| 181 | + |
| 182 | + if (nonterm_phones_offset >= 0) |
| 183 | + PrepareForGrammarFst(nonterm_phones_offset, &hclg_fst); |
| 184 | + |
| 185 | + { // convert 'hclg' to ConstFst and write. |
| 186 | + fst::ConstFst<StdArc> const_hclg(hclg_fst); |
| 187 | + bool binary = true, write_binary_header = false; // suppress the ^@B |
| 188 | + Output ko(hclg_wxfilename, binary, write_binary_header); |
| 189 | + fst::FstWriteOptions wopts(PrintableWxfilename(hclg_wxfilename)); |
| 190 | + const_hclg.Write(ko.Stream(), wopts); |
| 191 | + } |
| 192 | + |
| 193 | + KALDI_LOG << "Wrote graph with " << hclg_fst.NumStates() |
| 194 | + << " states to " << hclg_wxfilename; |
| 195 | + return 0; |
| 196 | + } catch(const std::exception &e) { |
| 197 | + std::cerr << e.what(); |
| 198 | + return -1; |
| 199 | + } |
| 200 | +} |
0 commit comments