public class PreprocessHelper extends Object
Constructor and Description |
---|
PreprocessHelper(int minVectorTerms,
int minSentenceLength,
Configuration conf)
Implemented for non-cluster mode, files read directly from local FS
|
PreprocessHelper(int minVectorTerms,
int minSentenceLength,
JobConf conf)
Implemented for HDFS cluster mode, files read from local cache
|
Modifier and Type | Method and Description |
---|---|
HMapSFW |
createEDocVector(String sentence) |
HMapSFW |
createEDocVector(String sentence,
HMapSIW term2Tf) |
HMapSFW |
createFDocVector(String sentence) |
HMapSFW |
createFDocVector(String sentence,
HMapSIW term2Tf) |
opennlp.model.MaxentModel |
getClassifier() |
TTable_monolithic_IFAs |
getE2F() |
float |
getEOOVRate(String eSent) |
opennlp.tools.sentdetect.SentenceDetectorME |
getESentenceModel() |
ArrayListWritable<Text> |
getESentences(String text,
ArrayListWritable<HMapSFW> vectors,
ArrayListOfIntsWritable sentLengths) |
Vocab |
getESrc() |
Tokenizer |
getETokenizer() |
Vocab |
getETrg() |
TTable_monolithic_IFAs |
getF2E() |
float |
getFOOVRate(String fSent) |
opennlp.tools.sentdetect.SentenceDetectorME |
getFSentenceModel() |
ArrayListWritable<Text> |
getFSentences(String text,
ArrayListWritable<HMapSFW> vectors,
ArrayListOfIntsWritable sentLengths) |
Vocab |
getFSrc() |
Tokenizer |
getFTokenizer() |
Vocab |
getFTrg() |
void |
loadModels(JobConf job) |
public PreprocessHelper(int minVectorTerms, int minSentenceLength, Configuration conf) throws Exception
Exception
public opennlp.model.MaxentModel getClassifier()
public TTable_monolithic_IFAs getE2F()
public float getEOOVRate(String eSent)
public opennlp.tools.sentdetect.SentenceDetectorME getESentenceModel()
public ArrayListWritable<Text> getESentences(String text, ArrayListWritable<HMapSFW> vectors, ArrayListOfIntsWritable sentLengths) throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException
public Vocab getESrc()
public Tokenizer getETokenizer()
public Vocab getETrg()
public TTable_monolithic_IFAs getF2E()
public float getFOOVRate(String fSent)
public opennlp.tools.sentdetect.SentenceDetectorME getFSentenceModel()
public ArrayListWritable<Text> getFSentences(String text, ArrayListWritable<HMapSFW> vectors, ArrayListOfIntsWritable sentLengths) throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException
public Vocab getFSrc()
public Tokenizer getFTokenizer()
public Vocab getFTrg()