Skip to content

Commit ff4e85d

Browse files
committed
[src,egs] Optimization to decoder; add program to compile graphs; test it.
1 parent fbc295b commit ff4e85d

11 files changed

Lines changed: 282 additions & 20 deletions

File tree

egs/mini_librispeech/s5/local/grammar/simple_demo.sh

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -62,26 +62,44 @@ if [ $stage -le 2 ]; then
6262
3
6363
EOF
6464
utils/mkgraph.sh --self-loop-scale 1.0 $lang $tree_dir $tree_dir/grammar1
65+
66+
# test that the binary 'compile-graph' does the same thing as mkgraph.sh.
67+
compile-graph --read-disambig-syms=$lang/phones/disambig.int $tree_dir/tree $tree_dir/1.mdl $lang/L_disambig.fst $lang/G.fst $tree_dir/grammar1/HCLG2.fst
68+
69+
if ! fstequivalent --delta=0.01 --random=true --npath=100 $tree_dir/grammar1/HCLG{,2}.fst; then
70+
echo "$0: two methods of producing graph in $tree_dir/grammar1 were different."
71+
exit 1
72+
fi
6573
fi
6674

6775

6876
if [ $stage -le 3 ]; then
69-
# create the stop-level graph in data/lang_nosp_grammar2a
70-
71-
# you can of course choose to put what symbols you want on the output side, as
72-
# long as they are defined in words.txt. #nonterm:contact_list, #nonterm_begin
73-
# and #nonterm_end would be defined in this example. This might be useful in
74-
# situations where you want to keep track of the structure of calling
75-
# nonterminals.
76-
lang=data/lang_nosp_grammar2a
77-
cat <<EOF | fstcompile --isymbols=$lang/words.txt --osymbols=$lang/words.txt > $lang/G.fst
77+
# create the top-level graph in data/lang_nosp_grammar2a
78+
79+
# you can of course choose to put what symbols you want on the output side, as
80+
# long as they are defined in words.txt. #nonterm:contact_list, #nonterm_begin
81+
# and #nonterm_end would be defined in this example. This might be useful in
82+
# situations where you want to keep track of the structure of calling
83+
# nonterminals.
84+
lang=data/lang_nosp_grammar2a
85+
cat <<EOF | fstcompile --isymbols=$lang/words.txt --osymbols=$lang/words.txt > $lang/G.fst
7886
0 1 GROUP GROUP
7987
1 2 #nonterm:contact_list <eps>
8088
2 3 ASSIST ASSIST 0.69314718055994
8189
2 0.69314718055994
8290
3
8391
EOF
84-
utils/mkgraph.sh --self-loop-scale 1.0 $lang $tree_dir $tree_dir/grammar2a
92+
utils/mkgraph.sh --self-loop-scale 1.0 $lang $tree_dir $tree_dir/grammar2a
93+
94+
# test that the binary 'compile-graph' does the same thing as mkgraph.sh.
95+
offset=$(grep nonterm_bos $lang/phones.txt | awk '{print $2}') # 364
96+
compile-graph --nonterm-phones-offset=$offset --read-disambig-syms=$lang/phones/disambig.int \
97+
$tree_dir/tree $tree_dir/1.mdl $lang/L_disambig.fst $lang/G.fst $tree_dir/grammar2a/HCLG2.fst
98+
99+
if ! fstequivalent --delta=0.01 --random=true --npath=100 $tree_dir/grammar2a/HCLG{,2}.fst; then
100+
echo "$0: two methods of producing graph in $tree_dir/grammar2a were different."
101+
exit 1
102+
fi
85103
fi
86104

87105
if [ $stage -le 4 ]; then
@@ -98,6 +116,17 @@ if [ $stage -le 4 ]; then
98116
3
99117
EOF
100118
utils/mkgraph.sh --self-loop-scale 1.0 $lang $tree_dir $tree_dir/grammar2b
119+
120+
121+
# test that the binary 'compile-graph' does the same thing as mkgraph.sh.
122+
offset=$(grep nonterm_bos $lang/phones.txt | awk '{print $2}') # 364
123+
compile-graph --nonterm-phones-offset=$offset --read-disambig-syms=$lang/phones/disambig.int \
124+
$tree_dir/tree $tree_dir/1.mdl $lang/L_disambig.fst $lang/G.fst $tree_dir/grammar2b/HCLG2.fst
125+
126+
if ! fstequivalent --delta=0.01 --random=true --npath=100 $tree_dir/grammar2b/HCLG{,2}.fst; then
127+
echo "$0: two methods of producing graph in $tree_dir/grammar2b were different."
128+
exit 1
129+
fi
101130
fi
102131

103132
if [ $stage -le 5 ]; then

egs/wsj/s5/utils/mkgraph.sh

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,7 @@ trap "rm -f $lang/tmp/LG.fst.$$" EXIT HUP INT PIPE TERM
9898
if [[ ! -s $lang/tmp/LG.fst || $lang/tmp/LG.fst -ot $lang/G.fst || \
9999
$lang/tmp/LG.fst -ot $lang/L_disambig.fst ]]; then
100100
fsttablecompose $lang/L_disambig.fst $lang/G.fst | fstdeterminizestar --use-log=true | \
101-
fstminimizeencoded | fstpushspecial | \
102-
fstarcsort --sort_type=ilabel > $lang/tmp/LG.fst.$$ || exit 1;
101+
fstminimizeencoded | fstpushspecial > $lang/tmp/LG.fst.$$ || exit 1;
103102
mv $lang/tmp/LG.fst.$$ $lang/tmp/LG.fst
104103
fstisstochastic $lang/tmp/LG.fst || echo "[info]: LG not stochastic."
105104
fi
@@ -147,7 +146,7 @@ fi
147146

148147
trap "rm -f $dir/HCLG.fst.$$" EXIT HUP INT PIPE TERM
149148
if [[ ! -s $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLGa.fst ]]; then
150-
add-self-loops --self-loop-scale=$loopscale --reorder=true $model < $dir/HCLGa.fst | \
149+
add-self-loops --self-loop-scale=$loopscale --reorder=true $model $dir/HCLGa.fst | \
151150
$prepare_grammar_command | \
152151
fstconvert --fst_type=const > $dir/HCLG.fst.$$ || exit 1;
153152
mv $dir/HCLG.fst.$$ $dir/HCLG.fst

src/bin/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ BINFILES = align-equal align-equal-compiled acc-tree-stats \
2121
post-to-pdf-post logprob-to-post prob-to-post copy-post \
2222
matrix-sum build-pfile-from-ali get-post-on-ali tree-info am-info \
2323
vector-sum matrix-sum-rows est-pca sum-lda-accs sum-mllt-accs \
24-
transform-vec align-text matrix-dim post-to-smat
24+
transform-vec align-text matrix-dim post-to-smat compile-graph
2525

2626

2727
OBJFILES =

src/bin/compile-graph.cc

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
// bin/compile-graph.cc
2+
3+
// Copyright 2018 Johns Hopkins University (Author: Daniel Povey)
4+
5+
// See ../../COPYING for clarification regarding multiple authors
6+
//
7+
// Licensed under the Apache License, Version 2.0 (the "License");
8+
// you may not use this file except in compliance with the License.
9+
// You may obtain a copy of the License at
10+
//
11+
// http://www.apache.org/licenses/LICENSE-2.0
12+
//
13+
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15+
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16+
// MERCHANTABLITY OR NON-INFRINGEMENT.
17+
// See the Apache 2 License for the specific language governing permissions and
18+
// limitations under the License.
19+
20+
#include "base/kaldi-common.h"
21+
#include "util/common-utils.h"
22+
#include "tree/context-dep.h"
23+
#include "hmm/transition-model.h"
24+
#include "hmm/hmm-utils.h"
25+
#include "fstext/fstext-lib.h"
26+
#include "fstext/push-special.h"
27+
#include "fstext/grammar-context-fst.h"
28+
#include "decoder/grammar-fst.h"
29+
30+
31+
32+
int main(int argc, char *argv[]) {
33+
try {
34+
using namespace kaldi;
35+
typedef kaldi::int32 int32;
36+
using fst::SymbolTable;
37+
using fst::VectorFst;
38+
using fst::StdArc;
39+
40+
41+
const char *usage =
42+
"Creates HCLG decoding graph. Similar to mkgraph.sh but done in code.\n"
43+
"\n"
44+
"Usage: compile-graph [options] <tree-in> <model-in> <lexicon-fst-in> "
45+
" <gammar-rspecifier> <hclg-wspecifier>\n"
46+
"e.g.: \n"
47+
" compile-train-graphs-fsts tree 1.mdl L_disambig.fst G.fst HCLG.fst\n";
48+
ParseOptions po(usage);
49+
50+
51+
BaseFloat transition_scale = 1.0;
52+
BaseFloat self_loop_scale = 1.0; // Caution: the script default is 0.1.
53+
int32 nonterm_phones_offset = -1;
54+
std::string disambig_rxfilename;
55+
56+
57+
po.Register("read-disambig-syms", &disambig_rxfilename, "File containing "
58+
"list of disambiguation symbols in phone symbol table");
59+
po.Register("transition-scale", &transition_scale, "Scale of transition "
60+
"probabilities (excluding self-loops).");
61+
po.Register("self-loop-scale", &self_loop_scale, "Scale of self-loop vs. "
62+
"non-self-loop probability mass. Caution: the default of "
63+
"mkgraph.sh is 0.1, but this defaults to 1.0.");
64+
po.Register("nonterm-phones-offset", &nonterm_phones_offset, "Integer "
65+
"value of symbol #nonterm_bos in phones.txt, if present. "
66+
"(Only relevant for grammar decoding).");
67+
68+
po.Read(argc, argv);
69+
70+
if (po.NumArgs() != 5) {
71+
po.PrintUsage();
72+
exit(1);
73+
}
74+
75+
std::string tree_rxfilename = po.GetArg(1),
76+
model_rxfilename = po.GetArg(2),
77+
lex_rxfilename = po.GetArg(3),
78+
grammar_rxfilename = po.GetArg(4),
79+
hclg_wxfilename = po.GetArg(5);
80+
81+
ContextDependency ctx_dep; // the tree.
82+
ReadKaldiObject(tree_rxfilename, &ctx_dep);
83+
84+
TransitionModel trans_model;
85+
ReadKaldiObject(model_rxfilename, &trans_model);
86+
87+
VectorFst<StdArc> *lex_fst = fst::ReadFstKaldi(lex_rxfilename),
88+
*grammar_fst = fst::ReadFstKaldi(grammar_rxfilename);
89+
90+
std::vector<int32> disambig_syms;
91+
if (disambig_rxfilename != "")
92+
if (!ReadIntegerVectorSimple(disambig_rxfilename, &disambig_syms))
93+
KALDI_ERR << "Could not read disambiguation symbols from "
94+
<< disambig_rxfilename;
95+
if (disambig_syms.empty())
96+
KALDI_WARN << "You supplied no disambiguation symbols; note, these are "
97+
<< "typically necessary when compiling graphs from FSTs (i.e. "
98+
<< "supply L_disambig.fst and the list of disambig syms with\n"
99+
<< "--read-disambig-syms)";
100+
101+
const std::vector<int32> &phone_syms = trans_model.GetPhones();
102+
SortAndUniq(&disambig_syms);
103+
for (int32 i = 0; i < disambig_syms.size(); i++)
104+
if (std::binary_search(phone_syms.begin(), phone_syms.end(),
105+
disambig_syms[i]))
106+
KALDI_ERR << "Disambiguation symbol " << disambig_syms[i]
107+
<< " is also a phone.";
108+
109+
VectorFst<StdArc> lg_fst;
110+
TableCompose(*lex_fst, *grammar_fst, &lg_fst);
111+
112+
DeterminizeStarInLog(&lg_fst, fst::kDelta);
113+
114+
MinimizeEncoded(&lg_fst, fst::kDelta);
115+
116+
fst::PushSpecial(&lg_fst, fst::kDelta);
117+
118+
delete grammar_fst;
119+
delete lex_fst;
120+
121+
VectorFst<StdArc> clg_fst;
122+
123+
std::vector<std::vector<int32> > ilabels;
124+
125+
int32 context_width = ctx_dep.ContextWidth(),
126+
central_position = ctx_dep.CentralPosition();
127+
128+
if (nonterm_phones_offset < 0) {
129+
// The normal case.
130+
ComposeContext(disambig_syms, context_width, central_position,
131+
&lg_fst, &clg_fst, &ilabels);
132+
} else {
133+
// The grammar-FST case. See ../doc/grammar.dox for an intro.
134+
if (context_width != 2 || central_position != 1) {
135+
KALDI_ERR << "Grammar-fst graph creation only supports models with left-"
136+
"biphone context. (--nonterm-phones-offset option was supplied).";
137+
}
138+
ComposeContextLeftBiphone(nonterm_phones_offset, disambig_syms,
139+
lg_fst, &clg_fst, &ilabels);
140+
}
141+
lg_fst.DeleteStates();
142+
143+
HTransducerConfig h_cfg;
144+
h_cfg.transition_scale = transition_scale;
145+
h_cfg.nonterm_phones_offset = nonterm_phones_offset;
146+
std::vector<int32> disambig_syms_h; // disambiguation symbols on
147+
// input side of H.
148+
VectorFst<StdArc> *h_fst = GetHTransducer(ilabels,
149+
ctx_dep,
150+
trans_model,
151+
h_cfg,
152+
&disambig_syms_h);
153+
154+
VectorFst<StdArc> hclg_fst; // transition-id to word.
155+
TableCompose(*h_fst, clg_fst, &hclg_fst);
156+
clg_fst.DeleteStates();
157+
delete h_fst;
158+
159+
KALDI_ASSERT(hclg_fst.Start() != fst::kNoStateId);
160+
161+
// Epsilon-removal and determinization combined. This will fail if not determinizable.
162+
DeterminizeStarInLog(&hclg_fst);
163+
164+
if (!disambig_syms_h.empty()) {
165+
RemoveSomeInputSymbols(disambig_syms_h, &hclg_fst);
166+
RemoveEpsLocal(&hclg_fst);
167+
}
168+
169+
// Encoded minimization.
170+
MinimizeEncoded(&hclg_fst);
171+
172+
std::vector<int32> disambig;
173+
bool check_no_self_loops = true,
174+
reorder = true;
175+
AddSelfLoops(trans_model,
176+
disambig,
177+
self_loop_scale,
178+
reorder,
179+
check_no_self_loops,
180+
&hclg_fst);
181+
182+
if (nonterm_phones_offset >= 0)
183+
PrepareForGrammarFst(nonterm_phones_offset, &hclg_fst);
184+
185+
{ // convert 'hclg' to ConstFst and write.
186+
fst::ConstFst<StdArc> const_hclg(hclg_fst);
187+
bool binary = true, write_binary_header = false; // suppress the ^@B
188+
Output ko(hclg_wxfilename, binary, write_binary_header);
189+
fst::FstWriteOptions wopts(PrintableWxfilename(hclg_wxfilename));
190+
const_hclg.Write(ko.Stream(), wopts);
191+
}
192+
193+
KALDI_LOG << "Wrote graph with " << hclg_fst.NumStates()
194+
<< " states to " << hclg_wxfilename;
195+
return 0;
196+
} catch(const std::exception &e) {
197+
std::cerr << e.what();
198+
return -1;
199+
}
200+
}

src/decoder/grammar-fst.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,22 @@ class GrammarFst {
172172
}
173173
}
174174

175+
// This is called in LatticeFasterDecoder. As an implementation shortcut, if
176+
// the state is an expanded state, we return 1, meaning 'yes, there are input
177+
// epsilons'; the calling code doesn't actually care about the exact number.
178+
inline size_t NumInputEpsilons(StateId s) const {
179+
// Compare with the constructor of ArcIterator.
180+
int32 instance_id = s >> 32;
181+
BaseStateId base_state = static_cast<int32>(s);
182+
const GrammarFst::FstInstance &instance = instances_[instance_id];
183+
const ConstFst<StdArc> *base_fst = instance.fst;
184+
if (base_fst->Final(base_state).Value() != KALDI_GRAMMAR_FST_SPECIAL_WEIGHT) {
185+
return base_fst->NumInputEpsilons(base_state);
186+
} else {
187+
return 1;
188+
}
189+
}
190+
175191
inline std::string Type() const { return "grammar"; }
176192

177193
~GrammarFst();

src/decoder/lattice-faster-decoder.cc

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -831,15 +831,20 @@ void LatticeFasterDecoderTpl<FST>::ProcessNonemitting(BaseFloat cutoff) {
831831
// problem did not improve overall speed.
832832

833833
KALDI_ASSERT(queue_.empty());
834-
for (const Elem *e = toks_.GetList(); e != NULL; e = e->tail)
835-
queue_.push_back(e->key);
836-
if (queue_.empty()) {
834+
835+
if (toks_.GetList() == NULL) {
837836
if (!warned_) {
838837
KALDI_WARN << "Error, no surviving tokens: frame is " << frame;
839838
warned_ = true;
840839
}
841840
}
842841

842+
for (const Elem *e = toks_.GetList(); e != NULL; e = e->tail) {
843+
StateId key = e->key;
844+
if (fst_->NumInputEpsilons(key) != 0)
845+
queue_.push_back(key);
846+
}
847+
843848
while (!queue_.empty()) {
844849
StateId state = queue_.back();
845850
queue_.pop_back();
@@ -872,7 +877,8 @@ void LatticeFasterDecoderTpl<FST>::ProcessNonemitting(BaseFloat cutoff) {
872877

873878
// "changed" tells us whether the new token has a different
874879
// cost from before, or is new [if so, add into queue].
875-
if (changed) queue_.push_back(arc.nextstate);
880+
if (changed && fst_->NumInputEpsilons(arc.nextstate) != 0)
881+
queue_.push_back(arc.nextstate);
876882
}
877883
}
878884
} // for all arcs

src/decoder/lattice-faster-decoder.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,12 @@ class LatticeFasterDecoderTpl {
279279
};
280280

281281
using Elem = typename HashList<StateId, Token*>::Elem;
282+
// Equivalent to:
283+
// struct Elem {
284+
// StateId key;
285+
// Token *val;
286+
// Elem *tail;
287+
// };
282288

283289
void PossiblyResizeHash(size_t num_toks);
284290

src/fstext/context-fst.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ void ComposeContext(const vector<int32> &disambig_syms_in,
290290
// (*ofst) = inv(inv_c) * (*ifst)
291291
ComposeDeterministicOnDemandInverse(*ifst, &inv_c, ofst);
292292

293-
*ilabels_out = inv_c.IlabelInfo();
293+
inv_c.SwapIlabelInfo(ilabels_out);
294294
}
295295

296296
void AddSubsequentialLoop(StdArc::Label subseq_symbol,

src/fstext/context-fst.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,9 @@ class InverseContextFst: public DeterministicOnDemandFst<StdArc> {
195195
return ilabel_info_;
196196
}
197197

198+
// A way to destructively obtain the ilabel-info. Only do this if you
199+
// are just about to destroy this object.
200+
void SwapIlabelInfo(vector<vector<int32> > *vec) { ilabel_info_.swap(*vec); }
198201

199202
private:
200203

src/fstext/grammar-context-fst.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ void ComposeContextLeftBiphone(
222222
// (*ofst) = inv(inv_c) * (*ifst)
223223
ComposeDeterministicOnDemandInverse(ifst, &inv_c, ofst);
224224

225-
*ilabels = inv_c.IlabelInfo();
225+
inv_c.SwapIlabelInfo(ilabels);
226226
}
227227

228228
} // end namespace fst

0 commit comments

Comments
 (0)