public class BitextClassifierUtils extends Object
Constructor and Description |
---|
BitextClassifierUtils() |
Modifier and Type | Method and Description |
---|---|
String |
concat(String[] tokens) |
static void |
main(String[] args) |
void |
runPrepareSentenceExtractionData(String fLang,
String eLang,
String fFile,
String eFile,
String pairsFile,
String fStopwordsFile,
String eStopwordsFile,
String fVocabSrcFile,
String eVocabTrgFile,
String eVocabSrcFile,
String fVocabTrgFile,
String probTablef2eFile,
String probTablee2fFile,
String fTokenFile,
String eTokenFile,
int featureSet,
float prob,
String alignmentFileName) |
public void runPrepareSentenceExtractionData(String fLang, String eLang, String fFile, String eFile, String pairsFile, String fStopwordsFile, String eStopwordsFile, String fVocabSrcFile, String eVocabTrgFile, String eVocabSrcFile, String fVocabTrgFile, String probTablef2eFile, String probTablee2fFile, String fTokenFile, String eTokenFile, int featureSet, float prob, String alignmentFileName)
fFile
- source language text in 'one sentence per line' formatfeatureSet
- integer value indicating which set of features to generate in
the test/train dataalignmentFileName
- optional. if word-alignments are available for corpus, they
can be used to generate additional features