|
Joshua
open source statistical hierarchical phrase-based machine translation system
|
Public Member Functions | |
| DiskHyperGraph (int LMFeatureID, boolean storeModelCosts, List< FeatureFunction > featureFunctions) | |
| void | initWrite (String itemsFile, boolean useForestPruning, double threshold) throws IOException |
| void | initRead (String hypergraphsFile, String rulesFile, HashMap< Integer,?> selectedSentences) |
| HashMap< Integer, Rule > | getAssocatedGrammar () |
| void | closeReaders () |
| void | closeItemsWriter () |
| void | saveHyperGraph (HyperGraph hg) throws IOException |
| String | createModelLogPLine (HGNode parentNode, HyperEdge edge) |
| HyperGraph | readHyperGraph () |
| void | writeRulesNonParallel (String rulesFile) throws IOException |
| void | writeRulesParallel (BufferedWriter out, HashMap< Integer, Integer > writtenRules) throws IOException |
Static Public Member Functions | |
| static Map< String, Integer > | obtainRuleStringToIDTable (String rulesFile) |
| static int | mergeDiskHyperGraphs (int ngramStateID, boolean saveModelCosts, int totalNumSent, boolean useUniqueNbest, boolean useTreeNbest, String filePrefix1, String filePrefix2, String filePrefixOut, boolean removeDuplicate) throws IOException |
Private Member Functions | |
| void | resetStates () |
| void | constructItemTables (HyperGraph hg) |
| void | constructItemTables (HGNode item) |
| void | writeItem (HGNode item) throws IOException |
| final boolean | isOutOfVocabularyRule (Rule rl) |
| void | writeHyperedge (HGNode node, HyperEdge edge) throws IOException |
| HGNode | readNode () |
| HyperEdge | readHyperedge () |
| void | writeRule (BufferedWriter out, Rule rule, int ruleID) throws IOException |
Private Attributes | |
| int | LMFeatureID = 0 |
| List< FeatureFunction > | featureFunctions |
| boolean | storeModelLogP = false |
| String | startLine |
| HashMap< HGNode, Integer > | itemToID = new HashMap<HGNode, Integer>() |
| HashMap< Integer, HGNode > | idToItem = new HashMap<Integer, HGNode>() |
| int | currentItemID = 1 |
| int | qtyDeductions = 0 |
| HashMap< Integer, Rule > | associatedGrammar = new HashMap<Integer, Rule>() |
| BufferedWriter | itemsWriter |
| BufferedReader | itemsReader |
| HyperGraphPruning | pruner |
| GrammarReader< BilingualRule > | ruleReader |
| HashMap< Integer,?> | selectedSentences |
| int | sentID |
Static Private Attributes | |
| static final String | SENTENCE_TAG = "#SENT: " |
| static final String | ITEM_TAG = "#I" |
| static final String | ITEM_STATE_TAG = " ST " |
| static final String | NULL_ITEM_STATE = "nullstate" |
| static int | NULL_RULE_ID = -1 |
| static Grammar | pGrammar = new MemoryBasedBatchGrammar() |
| static final Logger | logger = Logger.getLogger(DiskHyperGraph.class.getName()) |
this class implements functions of writting/reading hypergraph on disk. Limitations of this version (1) cannot recover each individual feature, notably the LM feature (2) assume we only have one stateful featuure, which must be a LM feature
| joshua.decoder.hypergraph.DiskHyperGraph.DiskHyperGraph | ( | int | LMFeatureID, |
| boolean | storeModelCosts, | ||
| List< FeatureFunction > | featureFunctions | ||
| ) |
For saving purpose, one needs to specify the featureFunctions. For reading purpose, one does not need to provide the list.
| void joshua.decoder.hypergraph.DiskHyperGraph.constructItemTables | ( | HyperGraph | hg | ) | [private] |
Assign IDs to all HGNodes in the hypergraph. We do a depth-first traversal starting at the goal item, and assign IDs from the bottom up. BUG: this code could stack overflow for deep trees.
| void joshua.decoder.hypergraph.DiskHyperGraph.constructItemTables | ( | HGNode | item | ) | [private] |
This method is really private, and should only be called by constructItemTables(HyperGraph).
| String joshua.decoder.hypergraph.DiskHyperGraph.createModelLogPLine | ( | HGNode | parentNode, |
| HyperEdge | edge | ||
| ) |
Do not remove this function as it gives freedom for an extended class to override it
| HashMap<Integer, Rule> joshua.decoder.hypergraph.DiskHyperGraph.getAssocatedGrammar | ( | ) |
| void joshua.decoder.hypergraph.DiskHyperGraph.initRead | ( | String | hypergraphsFile, |
| String | rulesFile, | ||
| HashMap< Integer,?> | selectedSentences | ||
| ) |
| void joshua.decoder.hypergraph.DiskHyperGraph.initWrite | ( | String | itemsFile, |
| boolean | useForestPruning, | ||
| double | threshold | ||
| ) | throws IOException |
| final boolean joshua.decoder.hypergraph.DiskHyperGraph.isOutOfVocabularyRule | ( | Rule | rl | ) | [private] |
| static int joshua.decoder.hypergraph.DiskHyperGraph.mergeDiskHyperGraphs | ( | int | ngramStateID, |
| boolean | saveModelCosts, | ||
| int | totalNumSent, | ||
| boolean | useUniqueNbest, | ||
| boolean | useTreeNbest, | ||
| String | filePrefix1, | ||
| String | filePrefix2, | ||
| String | filePrefixOut, | ||
| boolean | removeDuplicate | ||
| ) | throws IOException [static] |
| static Map<String, Integer> joshua.decoder.hypergraph.DiskHyperGraph.obtainRuleStringToIDTable | ( | String | rulesFile | ) | [static] |
This is a hack. as the pGrammar does not set defaultLHS properly
| HGNode joshua.decoder.hypergraph.DiskHyperGraph.readNode | ( | ) | [private] |
| void joshua.decoder.hypergraph.DiskHyperGraph.resetStates | ( | ) | [private] |
| void joshua.decoder.hypergraph.DiskHyperGraph.saveHyperGraph | ( | HyperGraph | hg | ) | throws IOException |
| void joshua.decoder.hypergraph.DiskHyperGraph.writeHyperedge | ( | HGNode | node, |
| HyperEdge | edge | ||
| ) | throws IOException [private] |
| void joshua.decoder.hypergraph.DiskHyperGraph.writeItem | ( | HGNode | item | ) | throws IOException [private] |
| void joshua.decoder.hypergraph.DiskHyperGraph.writeRule | ( | BufferedWriter | out, |
| Rule | rule, | ||
| int | ruleID | ||
| ) | throws IOException [private] |
| void joshua.decoder.hypergraph.DiskHyperGraph.writeRulesNonParallel | ( | String | rulesFile | ) | throws IOException |
| void joshua.decoder.hypergraph.DiskHyperGraph.writeRulesParallel | ( | BufferedWriter | out, |
| HashMap< Integer, Integer > | writtenRules | ||
| ) | throws IOException |
HashMap<Integer, Rule> joshua.decoder.hypergraph.DiskHyperGraph.associatedGrammar = new HashMap<Integer, Rule>() [private] |
int joshua.decoder.hypergraph.DiskHyperGraph.currentItemID = 1 [private] |
HashMap<Integer, HGNode> joshua.decoder.hypergraph.DiskHyperGraph.idToItem = new HashMap<Integer, HGNode>() [private] |
final String joshua.decoder.hypergraph.DiskHyperGraph.ITEM_STATE_TAG = " ST " [static, private] |
final String joshua.decoder.hypergraph.DiskHyperGraph.ITEM_TAG = "#I" [static, private] |
BufferedReader joshua.decoder.hypergraph.DiskHyperGraph.itemsReader [private] |
BufferedWriter joshua.decoder.hypergraph.DiskHyperGraph.itemsWriter [private] |
HashMap<HGNode, Integer> joshua.decoder.hypergraph.DiskHyperGraph.itemToID = new HashMap<HGNode, Integer>() [private] |
int joshua.decoder.hypergraph.DiskHyperGraph.LMFeatureID = 0 [private] |
final Logger joshua.decoder.hypergraph.DiskHyperGraph.logger = Logger.getLogger(DiskHyperGraph.class.getName()) [static, private] |
final String joshua.decoder.hypergraph.DiskHyperGraph.NULL_ITEM_STATE = "nullstate" [static, private] |
int joshua.decoder.hypergraph.DiskHyperGraph.NULL_RULE_ID = -1 [static, private] |
Grammar joshua.decoder.hypergraph.DiskHyperGraph.pGrammar = new MemoryBasedBatchGrammar() [static, private] |
This is wrong as the default LHS and owner are not properly set. For this reason, the creation of OOV rule may cause bugs
int joshua.decoder.hypergraph.DiskHyperGraph.qtyDeductions = 0 [private] |
GrammarReader<BilingualRule> joshua.decoder.hypergraph.DiskHyperGraph.ruleReader [private] |
HashMap<Integer, ?> joshua.decoder.hypergraph.DiskHyperGraph.selectedSentences [private] |
final String joshua.decoder.hypergraph.DiskHyperGraph.SENTENCE_TAG = "#SENT: " [static, private] |
int joshua.decoder.hypergraph.DiskHyperGraph.sentID [private] |
String joshua.decoder.hypergraph.DiskHyperGraph.startLine [private] |
boolean joshua.decoder.hypergraph.DiskHyperGraph.storeModelLogP = false [private] |