|
Joshua
open source statistical hierarchical phrase-based machine translation system
|
Classes | |
| class | DPStateOracle |
| class | PrefixGrammar |
Public Member Functions | |
| OracleExtractionHG (int lm_feat_id_) | |
| Object[] | oracle_extract_nbest (KBestExtractor kbest_extractor, HyperGraph hg, int n, boolean do_ngram_clip, String ref_sent) |
| HyperGraph | oracle_extract_hg (HyperGraph hg, int src_sent_len_in, int lm_order, String ref_sent_str) |
| void | get_ngrams (HashMap< String, Integer > tbl, int order, int[] wrds, boolean ignore_null_equiv_symbol) |
| void | get_ngrams (HashMap< String, Integer > tbl, int order, ArrayList< Integer > wrds, boolean ignore_null_equiv_symbol) |
| double | compute_sentence_bleu (String ref_sent, String hyp_sent, boolean do_ngram_clip, int bleu_order) |
| double | compute_sentence_bleu (int[] ref_sent, int[] hyp_sent, boolean do_ngram_clip, int bleu_order) |
Static Public Member Functions | |
| static void | main (String[] args) throws IOException |
| static void | compare_two_int_arrays (int[] a, int[] b) |
| static double | compute_bleu (int hyp_len, double ref_len, int[] num_ngram_match, int bleu_order) |
| static void | setup_prefix_suffix_tbl (int[] wrds, int order, HashMap< String, Boolean > prefix_tbl, HashMap< String, Boolean > suffix_tbl) |
| static void | setup_prefix_suffix_grammar (int[] wrds, int order, PrefixGrammar prefix_gr, PrefixGrammar suffix_gr) |
Public Attributes | |
| int | BACKOFF_LEFT_LM_STATE_SYM_ID |
| int | NULL_LEFT_LM_STATE_SYM_ID |
| int | NULL_RIGHT_LM_STATE_SYM_ID |
Protected Member Functions | |
| void | process_one_combination_axiom (HGNode parent_item, HashMap< String, VirtualItem > virtual_item_sigs, HyperEdge cur_dt) |
| void | process_one_combination_nonaxiom (HGNode parent_item, HashMap< String, VirtualItem > virtual_item_sigs, HyperEdge cur_dt, ArrayList< VirtualItem > l_ant_virtual_item) |
| DPStateOracle | compute_state (HGNode parent_item, HyperEdge dt, ArrayList< VirtualItem > l_ant_virtual_item, HashMap< String, Integer > tbl_ref_ngrams, boolean do_local_ngram_clip, int lm_order, double ref_len, double[] bleu_score, HashMap< String, Boolean > tbl_suffix, HashMap< String, Boolean > tbl_prefix) |
Protected Attributes | |
| int | src_sent_len = 0 |
| int | ref_sent_len = 0 |
| int | g_lm_order = 4 |
| HashMap< String, Integer > | tbl_ref_ngrams = new HashMap<String, Integer>() |
Static Protected Attributes | |
| static boolean | do_local_ngram_clip = false |
| static boolean | maitain_length_state = false |
| static int | g_bleu_order = 4 |
Package Attributes | |
| HashMap< String, Boolean > | tbl_suffix = new HashMap<String, Boolean>() |
| HashMap< String, Boolean > | tbl_prefix = new HashMap<String, Boolean>() |
| int | lm_feat_id = 0 |
Static Package Attributes | |
| static String | BACKOFF_LEFT_LM_STATE_SYM = "<lzfbo>" |
| static String | NULL_LEFT_LM_STATE_SYM = "<lzflnull>" |
| static String | NULL_RIGHT_LM_STATE_SYM = "<lzfrnull>" |
| static boolean | using_left_equiv_state = true |
| static boolean | using_right_equiv_state = true |
| static PrefixGrammar | grammar_prefix = new PrefixGrammar() |
| static PrefixGrammar | grammar_suffix = new PrefixGrammar() |
| static boolean | always_maintain_seperate_lm_state = true |
Private Member Functions | |
| int[] | intListToArray (List< Integer > words) |
| int[] | get_left_equiv_state (ArrayList< Integer > left_state_sequence, HashMap< String, Boolean > tbl_suffix) |
| boolean | is_a_suffix_in_tbl (ArrayList< Integer > left_state_sequence, int start_pos, int end_pos, HashMap< String, Boolean > tbl_suffix) |
| boolean | is_a_suffix_in_grammar (ArrayList< Integer > left_state_sequence, int start_pos, int end_pos, PrefixGrammar grammar_suffix) |
| int[] | get_right_equiv_state (ArrayList< Integer > right_state_sequence, HashMap< String, Boolean > tbl_prefix) |
| boolean | is_a_prefix_in_tbl (ArrayList< Integer > right_state_sequence, int start_pos, int end_pos, HashMap< String, Boolean > tbl_prefix) |
| boolean | isAPrefixInGrammar (ArrayList< Integer > right_state_sequence, int start_pos, int end_pos, PrefixGrammar gr_prefix) |
Static Private Member Functions | |
| static void | printState (Object[] state) |
approximated BLEU (1) do not consider clipping effect (2) in the dynamic programming, do not maintain different states for different hyp length (3) brief penalty is calculated based on the avg ref length (4) using sentence-level BLEU, instead of doc-level BLEU
| joshua.oracle.OracleExtractionHG.OracleExtractionHG | ( | int | lm_feat_id_ | ) |
Constructs a new object capable of extracting a tree from a hypergraph that most closely matches a provided oracle sentence.
It seems that the symbol table here should only need to represent monolingual terminals, plus nonterminals.
| lm_feat_id_ |
| static void joshua.oracle.OracleExtractionHG.compare_two_int_arrays | ( | int[] | a, |
| int[] | b | ||
| ) | [static] |
| static double joshua.oracle.OracleExtractionHG.compute_bleu | ( | int | hyp_len, |
| double | ref_len, | ||
| int[] | num_ngram_match, | ||
| int | bleu_order | ||
| ) | [static] |
| double joshua.oracle.OracleExtractionHG.compute_sentence_bleu | ( | String | ref_sent, |
| String | hyp_sent, | ||
| boolean | do_ngram_clip, | ||
| int | bleu_order | ||
| ) |
| double joshua.oracle.OracleExtractionHG.compute_sentence_bleu | ( | int[] | ref_sent, |
| int[] | hyp_sent, | ||
| boolean | do_ngram_clip, | ||
| int | bleu_order | ||
| ) |
| DPStateOracle joshua.oracle.OracleExtractionHG.compute_state | ( | HGNode | parent_item, |
| HyperEdge | dt, | ||
| ArrayList< VirtualItem > | l_ant_virtual_item, | ||
| HashMap< String, Integer > | tbl_ref_ngrams, | ||
| boolean | do_local_ngram_clip, | ||
| int | lm_order, | ||
| double | ref_len, | ||
| double[] | bleu_score, | ||
| HashMap< String, Boolean > | tbl_suffix, | ||
| HashMap< String, Boolean > | tbl_prefix | ||
| ) | [protected] |
| int [] joshua.oracle.OracleExtractionHG.get_left_equiv_state | ( | ArrayList< Integer > | left_state_sequence, |
| HashMap< String, Boolean > | tbl_suffix | ||
| ) | [private] |
| void joshua.oracle.OracleExtractionHG.get_ngrams | ( | HashMap< String, Integer > | tbl, |
| int | order, | ||
| int[] | wrds, | ||
| boolean | ignore_null_equiv_symbol | ||
| ) |
| void joshua.oracle.OracleExtractionHG.get_ngrams | ( | HashMap< String, Integer > | tbl, |
| int | order, | ||
| ArrayList< Integer > | wrds, | ||
| boolean | ignore_null_equiv_symbol | ||
| ) |
accumulate ngram counts into tbl.
| int [] joshua.oracle.OracleExtractionHG.get_right_equiv_state | ( | ArrayList< Integer > | right_state_sequence, |
| HashMap< String, Boolean > | tbl_prefix | ||
| ) | [private] |
| int [] joshua.oracle.OracleExtractionHG.intListToArray | ( | List< Integer > | words | ) | [private] |
| boolean joshua.oracle.OracleExtractionHG.is_a_prefix_in_tbl | ( | ArrayList< Integer > | right_state_sequence, |
| int | start_pos, | ||
| int | end_pos, | ||
| HashMap< String, Boolean > | tbl_prefix | ||
| ) | [private] |
| boolean joshua.oracle.OracleExtractionHG.is_a_suffix_in_grammar | ( | ArrayList< Integer > | left_state_sequence, |
| int | start_pos, | ||
| int | end_pos, | ||
| PrefixGrammar | grammar_suffix | ||
| ) | [private] |
| boolean joshua.oracle.OracleExtractionHG.is_a_suffix_in_tbl | ( | ArrayList< Integer > | left_state_sequence, |
| int | start_pos, | ||
| int | end_pos, | ||
| HashMap< String, Boolean > | tbl_suffix | ||
| ) | [private] |
| boolean joshua.oracle.OracleExtractionHG.isAPrefixInGrammar | ( | ArrayList< Integer > | right_state_sequence, |
| int | start_pos, | ||
| int | end_pos, | ||
| PrefixGrammar | gr_prefix | ||
| ) | [private] |
| static void joshua.oracle.OracleExtractionHG.main | ( | String[] | args | ) | throws IOException [static] |
| HyperGraph joshua.oracle.OracleExtractionHG.oracle_extract_hg | ( | HyperGraph | hg, |
| int | src_sent_len_in, | ||
| int | lm_order, | ||
| String | ref_sent_str | ||
| ) |
| Object [] joshua.oracle.OracleExtractionHG.oracle_extract_nbest | ( | KBestExtractor | kbest_extractor, |
| HyperGraph | hg, | ||
| int | n, | ||
| boolean | do_ngram_clip, | ||
| String | ref_sent | ||
| ) |
| static void joshua.oracle.OracleExtractionHG.printState | ( | Object[] | state | ) | [static, private] |
| void joshua.oracle.OracleExtractionHG.process_one_combination_axiom | ( | HGNode | parent_item, |
| HashMap< String, VirtualItem > | virtual_item_sigs, | ||
| HyperEdge | cur_dt | ||
| ) | [protected, virtual] |
| void joshua.oracle.OracleExtractionHG.process_one_combination_nonaxiom | ( | HGNode | parent_item, |
| HashMap< String, VirtualItem > | virtual_item_sigs, | ||
| HyperEdge | cur_dt, | ||
| ArrayList< VirtualItem > | l_ant_virtual_item | ||
| ) | [protected, virtual] |
| static void joshua.oracle.OracleExtractionHG.setup_prefix_suffix_grammar | ( | int[] | wrds, |
| int | order, | ||
| PrefixGrammar | prefix_gr, | ||
| PrefixGrammar | suffix_gr | ||
| ) | [static] |
| static void joshua.oracle.OracleExtractionHG.setup_prefix_suffix_tbl | ( | int[] | wrds, |
| int | order, | ||
| HashMap< String, Boolean > | prefix_tbl, | ||
| HashMap< String, Boolean > | suffix_tbl | ||
| ) | [static] |
boolean joshua.oracle.OracleExtractionHG.always_maintain_seperate_lm_state = true [static, package] |
String joshua.oracle.OracleExtractionHG.BACKOFF_LEFT_LM_STATE_SYM = "<lzfbo>" [static, package] |
boolean joshua.oracle.OracleExtractionHG.do_local_ngram_clip = false [static, protected] |
int joshua.oracle.OracleExtractionHG.g_bleu_order = 4 [static, protected] |
int joshua.oracle.OracleExtractionHG.g_lm_order = 4 [protected] |
PrefixGrammar joshua.oracle.OracleExtractionHG.grammar_prefix = new PrefixGrammar() [static, package] |
PrefixGrammar joshua.oracle.OracleExtractionHG.grammar_suffix = new PrefixGrammar() [static, package] |
int joshua.oracle.OracleExtractionHG.lm_feat_id = 0 [package] |
boolean joshua.oracle.OracleExtractionHG.maitain_length_state = false [static, protected] |
String joshua.oracle.OracleExtractionHG.NULL_LEFT_LM_STATE_SYM = "<lzflnull>" [static, package] |
String joshua.oracle.OracleExtractionHG.NULL_RIGHT_LM_STATE_SYM = "<lzfrnull>" [static, package] |
int joshua.oracle.OracleExtractionHG.ref_sent_len = 0 [protected] |
int joshua.oracle.OracleExtractionHG.src_sent_len = 0 [protected] |
HashMap<String, Boolean> joshua.oracle.OracleExtractionHG.tbl_prefix = new HashMap<String, Boolean>() [package] |
HashMap<String, Integer> joshua.oracle.OracleExtractionHG.tbl_ref_ngrams = new HashMap<String, Integer>() [protected] |
HashMap<String, Boolean> joshua.oracle.OracleExtractionHG.tbl_suffix = new HashMap<String, Boolean>() [package] |
boolean joshua.oracle.OracleExtractionHG.using_left_equiv_state = true [static, package] |
boolean joshua.oracle.OracleExtractionHG.using_right_equiv_state = true [static, package] |