当前位置:网站首页>Lucene skip table
Lucene skip table
2022-07-03 07:29:00 【chuanyangwang】
org.apache.lucene.codecs.MultiLevelSkipListReader skipTo
/** Skips entries to the first beyond the current whose document number is
* greater than or equal to <i>target</i>. Returns the current doc count.
*/
public int skipTo(int target) throws IOException {
// walk up the levels until highest level is found that has a skip
// for this target
int level = 0;
// Go up and find the first one greater than target Of level. If you can't find it, it will end this while
while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) {
level++;
}
// When level Greater than 0 Execute the following when while
while (level >= 0) {
// target If it is greater than level Sentry on the ground floor
if (target > skipDoc[level]) {
// Load next SkipDatum
if (!loadNextSkip(level)) {
// If unsuccessful, continue here level turn back and proceed
continue;
}
} else {
// no more skips on this level, go down one level
// If there is no more SkipDatum Go down
if (level > 0 && lastChildPointer > skipStream[level - 1].getFilePointer()) {
// Go down
seekChild(level - 1);
}
level--;
}
}
return numSkipped[0] - skipInterval[0] - 1;
}skipDoc: Sentinel array
skipStream:
lastChildPointer: This layer childPoint
private boolean loadNextSkip(int level) throws IOException {
// we have to skip, the target document is greater than the current
// skip list entry
// Set up
setLastSkipData(level);
numSkipped[level] += skipInterval[level];
// numSkipped may overflow a signed int, so compare as unsigned.
if (Integer.compareUnsigned(numSkipped[level], docCount) > 0) {
// this skip list is exhausted
skipDoc[level] = Integer.MAX_VALUE;
if (numberOfSkipLevels > level) numberOfSkipLevels = level;
return false;
}
// read next skip entry
skipDoc[level] += readSkipData(level, skipStream[level]);
if (level != 0) {
// read the child pointer if we are not on the leaf level
// childPointer[level] = The starting position of the lower layer + The offset of the lower layer
// skipPointer[level - 1] : The actual address of the next floor
// skipStream[level].readVLong(): The offset of the next layer
childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
}
return true;
}
( This figure is quoted from the reference document )
/** Copies the values of the last read skip entry on this level */
protected void setLastSkipData(int level) {
lastDoc = skipDoc[level];
lastChildPointer = childPointer[level];
}Initialization operation
org.apache.lucene.backward_codecs.lucene40.blocktree.SegmentTermsEnum postings
org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsReader postings
org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsReader reset
@Override
public int advance(int target) throws IOException {
if (target > nextSkipDoc) {
if (skipper == null) {
// Lazy init: first time this enum has ever been used for skipping
skipper =
new Lucene84SkipReader(
docIn.clone(), MAX_SKIP_LEVELS, true, indexHasOffsets, indexHasPayloads);
}
if (!skipped) {
assert skipOffset != -1;
// This is the first time this enum has skipped
// since reset() was called; load the skip data:
// initialization , Pass in the location information of the jump table
skipper.init(
docTermStartFP + skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
skipped = true;
}
final int newDocUpto = skipper.skipTo(target) + 1;
if (newDocUpto > blockUpto - BLOCK_SIZE + docBufferUpto) {
// Skipper moved
assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
blockUpto = newDocUpto;
// Force to read next block
docBufferUpto = BLOCK_SIZE;
accum = skipper.getDoc();
docIn.seek(skipper.getDocPointer());
posPendingFP = skipper.getPosPointer();
payPendingFP = skipper.getPayPointer();
posPendingCount = skipper.getPosBufferUpto();
lastStartOffset = 0; // new document
payloadByteUpto = skipper.getPayloadByteUpto();
}
nextSkipDoc = skipper.getNextSkipDoc();
}
if (docBufferUpto == BLOCK_SIZE) {
refillDocs();
}
// Now scan:
long doc;
while (true) {
doc = docBuffer[docBufferUpto];
freq = (int) freqBuffer[docBufferUpto];
posPendingCount += freq;
docBufferUpto++;
if (doc >= target) {
break;
}
}
position = 0;
lastStartOffset = 0;
return this.doc = (int) doc;
}Load jump table
/** Initializes the reader, for reuse on a new term. */
public void init(long skipPointer, int df) throws IOException {
this.skipPointer[0] = skipPointer;
this.docCount = df;
assert skipPointer >= 0 && skipPointer <= skipStream[0].length()
: "invalid skip pointer: " + skipPointer + ", length=" + skipStream[0].length();
Arrays.fill(skipDoc, 0);
Arrays.fill(numSkipped, 0);
Arrays.fill(childPointer, 0);
for (int i = 1; i < numberOfSkipLevels; i++) {
skipStream[i] = null;
}
loadSkipLevels();
}
/** Loads the skip levels */
private void loadSkipLevels() throws IOException {
if (docCount <= skipInterval[0]) {
numberOfSkipLevels = 1;
} else {
numberOfSkipLevels = 1+MathUtil.log(docCount/skipInterval[0], skipMultiplier);
}
if (numberOfSkipLevels > maxNumberOfSkipLevels) {
numberOfSkipLevels = maxNumberOfSkipLevels;
}
skipStream[0].seek(skipPointer[0]);
int toBuffer = numberOfLevelsToBuffer;
for (int i = numberOfSkipLevels - 1; i > 0; i--) {
// the length of the current level
long length = skipStream[0].readVLong();
// the start pointer of the current level
skipPointer[i] = skipStream[0].getFilePointer();
if (toBuffer > 0) {
// buffer this level
skipStream[i] = new SkipBuffer(skipStream[0], (int) length);
toBuffer--;
} else {
// clone this stream, it is already at the start of the current level
skipStream[i] = skipStream[0].clone();
if (inputIsBuffered && length < BufferedIndexInput.BUFFER_SIZE) {
((BufferedIndexInput) skipStream[i]).setBufferSize(Math.max(BufferedIndexInput.MIN_BUFFER_SIZE, (int) length));
}
// move base stream beyond the current level
// length: The length of this layer
// skipStream[0] The address of this floor
// Layers are continuous, so it can be calculated in this way
skipStream[0].seek(skipStream[0].getFilePointer() + length);
}
}
// use base stream for the lowest level
skipPointer[0] = skipStream[0].getFilePointer();
}Reference resources
Index file generation ( Four )-html
https://www.amazingkoala.com.cn/Lucene/Index/2020/0106/124.html
边栏推荐
- Circuit, packet and message exchange
- 树莓派更新工具链
- High concurrency memory pool
- 你开发数据API最快多长时间?我1分钟就足够了
- JS monitors empty objects and empty references
- Arduino Serial系列函数 有关print read 的总结
- Comparison of advantages and disadvantages between most complete SQL and NoSQL
- Common problems in io streams
- Raspberry pie update tool chain
- Wireshark software usage
猜你喜欢
随机推荐
Read config configuration file of vertx
An overview of IfM Engage
[solved] sqlexception: invalid value for getint() - 'Tian Peng‘
Summary of abnormal mechanism of interview
Common methods of file class
Lombok cooperates with @slf4j and logback to realize logging
HCIA notes
Download address collection of various versions of devaexpress
Unified handling and interception of exception exceptions of vertx
PgSQL converts string to double type (to_number())
昇思MindSpore再升级,深度科学计算的极致创新
Use of other streams
C code production YUV420 planar format file
Deep learning parameter initialization (I) Xavier initialization with code
OSI knowledge sorting
When MySQL inserts Chinese into the database, there is a diamond question mark garbled code
Store WordPress media content on 4everland to complete decentralized storage
IP home online query platform
2. E-commerce tool cefsharp autojs MySQL Alibaba cloud react C RPA automated script, open source log
[untitled]









