|
Joshua
open source statistical hierarchical phrase-based machine translation system
|
Static Public Member Functions | |
| static void | processCommandLineOptions (String[] options) |
| static void | readConfigFile (String configFile) throws IOException |
| static void | sanityCheck () |
| static String | normalize_key (String text) |
Static Public Attributes | |
| static ArrayList< String > | lms = new ArrayList<String>() |
| static String | lm_type = "kenlm" |
| static double | lm_ceiling_cost = 100 |
| static boolean | use_left_equivalent_state = false |
| static boolean | use_right_equivalent_state = true |
| static int | lm_order = 3 |
| static boolean | use_sent_specific_lm = false |
| static String | lm_file = null |
| static int | ngramStateID = 0 |
| static int | span_limit = 10 |
| static String | phrase_owner = "pt" |
| static String | glue_owner = "pt" |
| static String | default_non_terminal = "PHRASE" |
| static String | goal_symbol = "S" |
| static boolean | use_sent_specific_tm = false |
| static boolean | dense_features = true |
| static String | tm_file = null |
| static String | tm_format = null |
| static String | glue_file = null |
| static String | glue_format = null |
| static boolean | constrain_parse = false |
| static boolean | use_pos_labels = false |
| static float | oov_feature_cost = 100 |
| static boolean | use_max_lm_cost_for_oov = false |
| static int | oov_feature_index = -1 |
| static boolean | true_oovs_only = false |
| static int | num_phrasal_features = 0 |
| static int | pop_limit = 100 |
| static boolean | useCubePrune = true |
| static boolean | useBeamAndThresholdPrune = false |
| static double | fuzz1 = 0.1 |
| static double | fuzz2 = 0.1 |
| static int | max_n_items = 30 |
| static double | relative_threshold = 10.0 |
| static int | max_n_rules = 50 |
| static boolean | use_unique_nbest = false |
| static boolean | use_tree_nbest = false |
| static boolean | include_align_index = false |
| static boolean | add_combined_cost = true |
| static int | topN = 500 |
| static boolean | escape_trees = false |
| static String | parallel_files_prefix = "/tmp/temp.parallel" |
| static int | num_parallel_decoders = 1 |
| static boolean | save_disk_hg = false |
| static boolean | use_kbest_hg = false |
| static boolean | forest_pruning = false |
| static double | forest_pruning_threshold = 10 |
| static boolean | visualize_hypergraph = false |
| static boolean | useGoogleLinearCorpusGain = false |
| static double[] | linearCorpusGainThetas = null |
| static boolean | mark_oovs = true |
| static String | oracleFile = null |
| static boolean | parse = false |
| static ArrayList< String > | features = new ArrayList<String>() |
Static Private Attributes | |
| static final Logger | logger = Logger.getLogger(JoshuaConfiguration.class.getName()) |
Configuration file for Joshua decoder.
When adding new features to Joshua, any new configurable parameters should be added to this class.
| static String joshua.decoder.JoshuaConfiguration.normalize_key | ( | String | text | ) | [static] |
Normalizes parameter names by removing underscores and hyphens and lowercasing. This defines equivalence classes on external use of parameter names, permitting arbitrary_under_scores and camelCasing in paramter names without forcing the user to memorize them all. Here are some examples of equivalent ways to refer to parameter names:
{pop-limit, poplimit, PopLimit, popLimit, pop_lim_it} {lmfile, lm-file, LM-FILE, lm_file}
| static void joshua.decoder.JoshuaConfiguration.processCommandLineOptions | ( | String[] | options | ) | [static] |
To process command-line options, we write them to a file that looks like the config file, and then call readConfigFile() on it. It would be more general to define a class that sits on a stream and knows how to chop it up, but this was quicker to implement.
| static void joshua.decoder.JoshuaConfiguration.readConfigFile | ( | String | configFile | ) | throws IOException [static] |
| static void joshua.decoder.JoshuaConfiguration.sanityCheck | ( | ) | [static] |
Checks for invalid variable configurations
boolean joshua.decoder.JoshuaConfiguration.add_combined_cost = true [static] |
boolean joshua.decoder.JoshuaConfiguration.constrain_parse = false [static] |
String joshua.decoder.JoshuaConfiguration.default_non_terminal = "PHRASE" [static] |
boolean joshua.decoder.JoshuaConfiguration.dense_features = true [static] |
boolean joshua.decoder.JoshuaConfiguration.escape_trees = false [static] |
ArrayList<String> joshua.decoder.JoshuaConfiguration.features = new ArrayList<String>() [static] |
boolean joshua.decoder.JoshuaConfiguration.forest_pruning = false [static] |
double joshua.decoder.JoshuaConfiguration.forest_pruning_threshold = 10 [static] |
double joshua.decoder.JoshuaConfiguration.fuzz1 = 0.1 [static] |
double joshua.decoder.JoshuaConfiguration.fuzz2 = 0.1 [static] |
String joshua.decoder.JoshuaConfiguration.glue_file = null [static] |
String joshua.decoder.JoshuaConfiguration.glue_format = null [static] |
String joshua.decoder.JoshuaConfiguration.glue_owner = "pt" [static] |
String joshua.decoder.JoshuaConfiguration.goal_symbol = "S" [static] |
boolean joshua.decoder.JoshuaConfiguration.include_align_index = false [static] |
double [] joshua.decoder.JoshuaConfiguration.linearCorpusGainThetas = null [static] |
double joshua.decoder.JoshuaConfiguration.lm_ceiling_cost = 100 [static] |
String joshua.decoder.JoshuaConfiguration.lm_file = null [static] |
int joshua.decoder.JoshuaConfiguration.lm_order = 3 [static] |
String joshua.decoder.JoshuaConfiguration.lm_type = "kenlm" [static] |
ArrayList<String> joshua.decoder.JoshuaConfiguration.lms = new ArrayList<String>() [static] |
final Logger joshua.decoder.JoshuaConfiguration.logger = Logger.getLogger(JoshuaConfiguration.class.getName()) [static, private] |
boolean joshua.decoder.JoshuaConfiguration.mark_oovs = true [static] |
int joshua.decoder.JoshuaConfiguration.max_n_items = 30 [static] |
int joshua.decoder.JoshuaConfiguration.max_n_rules = 50 [static] |
int joshua.decoder.JoshuaConfiguration.ngramStateID = 0 [static] |
int joshua.decoder.JoshuaConfiguration.num_parallel_decoders = 1 [static] |
int joshua.decoder.JoshuaConfiguration.num_phrasal_features = 0 [static] |
float joshua.decoder.JoshuaConfiguration.oov_feature_cost = 100 [static] |
int joshua.decoder.JoshuaConfiguration.oov_feature_index = -1 [static] |
String joshua.decoder.JoshuaConfiguration.oracleFile = null [static] |
String joshua.decoder.JoshuaConfiguration.parallel_files_prefix = "/tmp/temp.parallel" [static] |
boolean joshua.decoder.JoshuaConfiguration.parse = false [static] |
String joshua.decoder.JoshuaConfiguration.phrase_owner = "pt" [static] |
int joshua.decoder.JoshuaConfiguration.pop_limit = 100 [static] |
double joshua.decoder.JoshuaConfiguration.relative_threshold = 10.0 [static] |
boolean joshua.decoder.JoshuaConfiguration.save_disk_hg = false [static] |
int joshua.decoder.JoshuaConfiguration.span_limit = 10 [static] |
String joshua.decoder.JoshuaConfiguration.tm_file = null [static] |
String joshua.decoder.JoshuaConfiguration.tm_format = null [static] |
int joshua.decoder.JoshuaConfiguration.topN = 500 [static] |
boolean joshua.decoder.JoshuaConfiguration.true_oovs_only = false [static] |
boolean joshua.decoder.JoshuaConfiguration.use_kbest_hg = false [static] |
boolean joshua.decoder.JoshuaConfiguration.use_left_equivalent_state = false [static] |
boolean joshua.decoder.JoshuaConfiguration.use_max_lm_cost_for_oov = false [static] |
boolean joshua.decoder.JoshuaConfiguration.use_pos_labels = false [static] |
boolean joshua.decoder.JoshuaConfiguration.use_right_equivalent_state = true [static] |
boolean joshua.decoder.JoshuaConfiguration.use_sent_specific_lm = false [static] |
boolean joshua.decoder.JoshuaConfiguration.use_sent_specific_tm = false [static] |
boolean joshua.decoder.JoshuaConfiguration.use_tree_nbest = false [static] |
boolean joshua.decoder.JoshuaConfiguration.use_unique_nbest = false [static] |
boolean joshua.decoder.JoshuaConfiguration.useBeamAndThresholdPrune = false [static] |
boolean joshua.decoder.JoshuaConfiguration.useCubePrune = true [static] |
boolean joshua.decoder.JoshuaConfiguration.useGoogleLinearCorpusGain = false [static] |
boolean joshua.decoder.JoshuaConfiguration.visualize_hypergraph = false [static] |