public class RetrievalEnvironment extends Object
Modifier and Type | Field and Description |
---|---|
static String |
dataCollection |
static long |
defaultCf |
static int |
defaultDf |
static int |
documentCount |
static boolean |
mIsNewModel |
static int |
topK |
Constructor and Description |
---|
RetrievalEnvironment(String indexPath,
FileSystem fs) |
Modifier and Type | Method and Description |
---|---|
void |
addImportanceModel(String key,
ConceptImportanceModel m) |
static String |
appendPath(String base,
String file) |
void |
clearPostingsReaderCache() |
long |
collectionFrequency(String expression)
Returns the collection frequency of a particular expression.
|
static Path |
createPath(String base,
String file) |
int |
documentFrequency(String expression)
Returns the document frequency of a particular expression.
|
IntDocVector[] |
documentVectors(int[] docSet) |
String |
getCfByIntData()
Returns file that contains a list of collection frequencies sorted by
term id.
|
String |
getCfByTermData()
Returns file that contains a list of collection frequencies sorted by the
alphabetical order of terms.
|
long |
getCollectionSize() |
long |
getDefaultCf()
Returns the default collection frequency.
|
int |
getDefaultDf()
Returns the default document frequency.
|
String |
getDfByIntData()
Returns file that contains a list of document frequencies sorted by term
id.
|
String |
getDfByTermData()
Returns file that contains a list of document frequencies sorted by the
alphabetical order of terms.
|
Path |
getDoclengthsData()
Returns file that contains the document length data.
|
Path |
getDoclengthsDirectory()
Returns directory that contains the document length data.
|
DocnoMapping |
getDocnoMapping() |
Path |
getDocnoMappingData() |
Path |
getDocnoMappingDirectory() |
float |
getDocScore(String type,
int docno) |
long |
getDocumentCount() |
int |
getDocumentLength(int docid) |
int |
getIdFromTerm(String term) |
ConceptImportanceModel |
getImportanceModel(String id) |
Collection<ConceptImportanceModel> |
getImportanceModels() |
String |
getIndexTermIdMappingData()
Returns file that contains an index of term ids into the array of terms.
|
String |
getIndexTermIdsData()
Returns file that contains term ids sorted by the alphabetical order of
terms.
|
String |
getIndexTermsData()
Returns file that contains the list of terms in the collection.
|
String |
getIntDocVectorsDirectory()
Returns directory that contains the
IntDocVector representation
of the collection. |
String |
getIntDocVectorsForwardIndex()
Returns file that contains an index into the
IntDocVector
representation of the collection. |
String |
getNonPositionalPostingsDirectory() |
String |
getPostingsDirectory() |
String |
getPostingsIndexData()
Returns file that contains an index into the postings.
|
PostingsList |
getPostingsList(String term) |
PostingsReader |
getPostingsReader(Expression exp) |
String |
getTempDirectory() |
String |
getTermDfCfDirectory() |
String |
getTermDocVectorsDirectory()
Returns directory that contains the
TermDocVector representation
of the collection. |
String |
getTermDocVectorsForwardIndex()
Returns file that contains an index into the
TermDocVector
representation of the collection. |
String |
getTermFromId(int termid) |
String |
getWeightedIntDocVectorsDirectory() |
String |
getWeightedIntDocVectorsForwardIndex() |
String |
getWeightedTermDocVectorsDirectory() |
void |
initialize(boolean loadDoclengths) |
static DocnoMapping |
loadDocnoMapping(String indexPath,
FileSystem fs) |
void |
loadDocScore(String type,
String provider,
String path) |
static void |
main(String[] args) |
float |
readCollectionAverageDocumentLength() |
int |
readCollectionDocumentCount() |
long |
readCollectionLength() |
String |
readCollectionName() |
String |
readCollectionPath() |
int |
readCollectionTermCount() |
String |
readDocnoMappingClass() |
int |
readDocnoOffset() |
String |
readInputFormat() |
String |
readPostingsType() |
String |
readTokenizerClass() |
static void |
setIsNew(boolean isNewModel) |
String[] |
tokenize(String text)
Tokenizes text according to the tokenizer used to process the document
collection.
|
void |
writeCollectionAverageDocumentLength(float n) |
void |
writeCollectionDocumentCount(int n) |
void |
writeCollectionLength(long cnt) |
void |
writeCollectionName(String s) |
void |
writeCollectionPath(String s) |
void |
writeCollectionTermCount(int cnt) |
void |
writeDocnoMappingClass(String s) |
void |
writeDocnoOffset(int n) |
void |
writeInputFormat(String s) |
void |
writePostingsType(String type) |
void |
writeTokenizerClass(String s) |
public static String dataCollection
public static long defaultCf
public static int defaultDf
public static int documentCount
public static boolean mIsNewModel
public static int topK
public RetrievalEnvironment(String indexPath, FileSystem fs) throws IOException
IOException
public void addImportanceModel(String key, ConceptImportanceModel m)
public void clearPostingsReaderCache()
public long collectionFrequency(String expression)
public int documentFrequency(String expression)
public IntDocVector[] documentVectors(int[] docSet)
public String getCfByIntData()
public String getCfByTermData()
getIndexTermsData()
data.public long getCollectionSize()
public long getDefaultCf()
public int getDefaultDf()
public String getDfByIntData()
public String getDfByTermData()
getIndexTermsData()
data.public Path getDoclengthsData()
DocLengthTable
, which provides random access to
document lengths.public Path getDoclengthsDirectory()
getDoclengthsData()
.public DocnoMapping getDocnoMapping() throws IOException
IOException
public Path getDocnoMappingData()
public Path getDocnoMappingDirectory()
public float getDocScore(String type, int docno)
public long getDocumentCount()
public int getDocumentLength(int docid)
public int getIdFromTerm(String term)
public ConceptImportanceModel getImportanceModel(String id)
public Collection<ConceptImportanceModel> getImportanceModels()
public String getIndexTermIdMappingData()
getIndexTermsData()
file to map from
term ids back to terms.public String getIndexTermIdsData()
getIndexTermsData()
data.public String getIndexTermsData()
public String getIntDocVectorsDirectory()
IntDocVector
representation
of the collection.public String getIntDocVectorsForwardIndex()
IntDocVector
representation of the collection. This file serves as input to
IntDocVectorsForwardIndex
, which provides random access to the
document
vectors.public String getNonPositionalPostingsDirectory()
public String getPostingsDirectory()
public String getPostingsIndexData()
IntPostingsForwardIndex
, which provides random access
to postings lists.public PostingsList getPostingsList(String term)
public PostingsReader getPostingsReader(Expression exp)
public String getTempDirectory()
public String getTermDfCfDirectory()
public String getTermDocVectorsDirectory()
TermDocVector
representation
of the collection.public String getTermDocVectorsForwardIndex()
TermDocVector
representation of the collection. This file serves as input to
TermDocVectorsForwardIndex
, which provides random access to the
document vectors.public String getTermFromId(int termid)
public String getWeightedIntDocVectorsDirectory()
public String getWeightedIntDocVectorsForwardIndex()
public String getWeightedTermDocVectorsDirectory()
public void initialize(boolean loadDoclengths) throws IOException, ConfigurationException
IOException
ConfigurationException
public static DocnoMapping loadDocnoMapping(String indexPath, FileSystem fs) throws IOException
IOException
public float readCollectionAverageDocumentLength()
public int readCollectionDocumentCount()
public long readCollectionLength()
public String readCollectionName()
public String readCollectionPath()
public int readCollectionTermCount()
public String readDocnoMappingClass()
public int readDocnoOffset()
public String readInputFormat()
public String readPostingsType()
public String readTokenizerClass()
public static void setIsNew(boolean isNewModel)
public String[] tokenize(String text)
text
- text to tokenizepublic void writeCollectionAverageDocumentLength(float n)
public void writeCollectionDocumentCount(int n)
public void writeCollectionLength(long cnt)
public void writeCollectionName(String s)
public void writeCollectionPath(String s)
public void writeCollectionTermCount(int cnt)
public void writeDocnoMappingClass(String s)
public void writeDocnoOffset(int n)
public void writeInputFormat(String s)
public void writePostingsType(String type)
public void writeTokenizerClass(String s)