public class LuceneArabicAnalyzer extends Tokenizer
Constructor and Description |
---|
LuceneArabicAnalyzer() |
Modifier and Type | Method and Description |
---|---|
void |
configure(Configuration conf) |
void |
configure(Configuration conf,
FileSystem fs) |
float |
getOOVRate(String text,
VocabularyWritable vocab) |
String[] |
processContent(String text) |
String |
stem(String token) |
getNumberTokens, getStem2NonStemMapping, getUTF8, getVocab, isDiscard, isDiscard, isStemming, isStopWord, isStopWord, isStopwordRemoval, main, normalizeFrench, removeBorderStopWords, removeNonUnicodeChars, setVocab
public void configure(Configuration conf)
public void configure(Configuration conf, FileSystem fs)
public float getOOVRate(String text, VocabularyWritable vocab)
getOOVRate
in class Tokenizer
public String[] processContent(String text)
processContent
in class Tokenizer