public class DocumentVectorUtility extends Object
Modifier and Type | Field and Description |
---|---|
static int |
BLOCK_SIZE |
static int |
MAX_POSITIONS |
Constructor and Description |
---|
DocumentVectorUtility() |
Modifier and Type | Method and Description |
---|---|
static int[][] |
compressData(int[] data,
int blockSize,
boolean computeGaps)
Compresses positions using PForDelta compression
|
static int[] |
deserializePositions(byte[] bytes)
Deserializes the gamma-encoded positions.
|
static int[][] |
getPositions(int[] doc,
int[] terms)
Given a document vector and an array of query terms, this function
constructs the positions.
|
static int |
lastBlockSize(int dataLength,
int nbBlocks,
int blockSize) |
static DocumentVector |
newInstance(String documentVectorClass,
IntDocVector document)
Factory method
|
static DocumentVector |
readInstance(String documentVectorClass,
DataInput input)
Reads an instance of DocumentVector from input
|
static byte[] |
serializePositions(int[] positions)
Serializes the positions using gamma codes
|
public static final int BLOCK_SIZE
public static final int MAX_POSITIONS
public static int[][] compressData(int[] data, int blockSize, boolean computeGaps)
public static int[] deserializePositions(byte[] bytes) throws IOException
bytes
- Serialized positionsIOException
public static int[][] getPositions(int[] doc, int[] terms)
doc
- Document vectorterms
- Query termspublic static int lastBlockSize(int dataLength, int nbBlocks, int blockSize)
public static DocumentVector newInstance(String documentVectorClass, IntDocVector document) throws Exception
documentVectorClass
- DocumentVector classdocument
- IntDocVector (term positions start from 1)Exception
public static DocumentVector readInstance(String documentVectorClass, DataInput input) throws Exception
input
- DataInputdocumentVectorClass
- DocumentVector classException
public static byte[] serializePositions(int[] positions) throws IOException
positions
- Array of positions for a termIOException