当前位置:网站首页>Lucene skip table
Lucene skip table
2022-07-03 07:29:00 【chuanyangwang】
org.apache.lucene.codecs.MultiLevelSkipListReader skipTo
/** Skips entries to the first beyond the current whose document number is
* greater than or equal to <i>target</i>. Returns the current doc count.
*/
public int skipTo(int target) throws IOException {
// walk up the levels until highest level is found that has a skip
// for this target
int level = 0;
// Go up and find the first one greater than target Of level. If you can't find it, it will end this while
while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) {
level++;
}
// When level Greater than 0 Execute the following when while
while (level >= 0) {
// target If it is greater than level Sentry on the ground floor
if (target > skipDoc[level]) {
// Load next SkipDatum
if (!loadNextSkip(level)) {
// If unsuccessful, continue here level turn back and proceed
continue;
}
} else {
// no more skips on this level, go down one level
// If there is no more SkipDatum Go down
if (level > 0 && lastChildPointer > skipStream[level - 1].getFilePointer()) {
// Go down
seekChild(level - 1);
}
level--;
}
}
return numSkipped[0] - skipInterval[0] - 1;
}
skipDoc: Sentinel array
skipStream:
lastChildPointer: This layer childPoint
private boolean loadNextSkip(int level) throws IOException {
// we have to skip, the target document is greater than the current
// skip list entry
// Set up
setLastSkipData(level);
numSkipped[level] += skipInterval[level];
// numSkipped may overflow a signed int, so compare as unsigned.
if (Integer.compareUnsigned(numSkipped[level], docCount) > 0) {
// this skip list is exhausted
skipDoc[level] = Integer.MAX_VALUE;
if (numberOfSkipLevels > level) numberOfSkipLevels = level;
return false;
}
// read next skip entry
skipDoc[level] += readSkipData(level, skipStream[level]);
if (level != 0) {
// read the child pointer if we are not on the leaf level
// childPointer[level] = The starting position of the lower layer + The offset of the lower layer
// skipPointer[level - 1] : The actual address of the next floor
// skipStream[level].readVLong(): The offset of the next layer
childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
}
return true;
}
( This figure is quoted from the reference document )
/** Copies the values of the last read skip entry on this level */
protected void setLastSkipData(int level) {
lastDoc = skipDoc[level];
lastChildPointer = childPointer[level];
}
Initialization operation
org.apache.lucene.backward_codecs.lucene40.blocktree.SegmentTermsEnum postings
org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsReader postings
org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsReader reset
@Override
public int advance(int target) throws IOException {
if (target > nextSkipDoc) {
if (skipper == null) {
// Lazy init: first time this enum has ever been used for skipping
skipper =
new Lucene84SkipReader(
docIn.clone(), MAX_SKIP_LEVELS, true, indexHasOffsets, indexHasPayloads);
}
if (!skipped) {
assert skipOffset != -1;
// This is the first time this enum has skipped
// since reset() was called; load the skip data:
// initialization , Pass in the location information of the jump table
skipper.init(
docTermStartFP + skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
skipped = true;
}
final int newDocUpto = skipper.skipTo(target) + 1;
if (newDocUpto > blockUpto - BLOCK_SIZE + docBufferUpto) {
// Skipper moved
assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
blockUpto = newDocUpto;
// Force to read next block
docBufferUpto = BLOCK_SIZE;
accum = skipper.getDoc();
docIn.seek(skipper.getDocPointer());
posPendingFP = skipper.getPosPointer();
payPendingFP = skipper.getPayPointer();
posPendingCount = skipper.getPosBufferUpto();
lastStartOffset = 0; // new document
payloadByteUpto = skipper.getPayloadByteUpto();
}
nextSkipDoc = skipper.getNextSkipDoc();
}
if (docBufferUpto == BLOCK_SIZE) {
refillDocs();
}
// Now scan:
long doc;
while (true) {
doc = docBuffer[docBufferUpto];
freq = (int) freqBuffer[docBufferUpto];
posPendingCount += freq;
docBufferUpto++;
if (doc >= target) {
break;
}
}
position = 0;
lastStartOffset = 0;
return this.doc = (int) doc;
}
Load jump table
/** Initializes the reader, for reuse on a new term. */
public void init(long skipPointer, int df) throws IOException {
this.skipPointer[0] = skipPointer;
this.docCount = df;
assert skipPointer >= 0 && skipPointer <= skipStream[0].length()
: "invalid skip pointer: " + skipPointer + ", length=" + skipStream[0].length();
Arrays.fill(skipDoc, 0);
Arrays.fill(numSkipped, 0);
Arrays.fill(childPointer, 0);
for (int i = 1; i < numberOfSkipLevels; i++) {
skipStream[i] = null;
}
loadSkipLevels();
}
/** Loads the skip levels */
private void loadSkipLevels() throws IOException {
if (docCount <= skipInterval[0]) {
numberOfSkipLevels = 1;
} else {
numberOfSkipLevels = 1+MathUtil.log(docCount/skipInterval[0], skipMultiplier);
}
if (numberOfSkipLevels > maxNumberOfSkipLevels) {
numberOfSkipLevels = maxNumberOfSkipLevels;
}
skipStream[0].seek(skipPointer[0]);
int toBuffer = numberOfLevelsToBuffer;
for (int i = numberOfSkipLevels - 1; i > 0; i--) {
// the length of the current level
long length = skipStream[0].readVLong();
// the start pointer of the current level
skipPointer[i] = skipStream[0].getFilePointer();
if (toBuffer > 0) {
// buffer this level
skipStream[i] = new SkipBuffer(skipStream[0], (int) length);
toBuffer--;
} else {
// clone this stream, it is already at the start of the current level
skipStream[i] = skipStream[0].clone();
if (inputIsBuffered && length < BufferedIndexInput.BUFFER_SIZE) {
((BufferedIndexInput) skipStream[i]).setBufferSize(Math.max(BufferedIndexInput.MIN_BUFFER_SIZE, (int) length));
}
// move base stream beyond the current level
// length: The length of this layer
// skipStream[0] The address of this floor
// Layers are continuous, so it can be calculated in this way
skipStream[0].seek(skipStream[0].getFilePointer() + length);
}
}
// use base stream for the lowest level
skipPointer[0] = skipStream[0].getFilePointer();
}
Reference resources
Index file generation ( Four )-htmlhttps://www.amazingkoala.com.cn/Lucene/Index/2020/0106/124.html
边栏推荐
- pgAdmin 4 v6.11 发布,PostgreSQL 开源图形化管理工具
- [cmake] cmake link SQLite Library
- Logging log configuration of vertx
- Responsive MySQL of vertx
- gstreamer ffmpeg avdec解码数据流向分析
- TypeScript let与var的区别
- Basic knowledge about SQL database
- Mail sending of vertx
- 《指環王:力量之戒》新劇照 力量之戒鑄造者亮相
- [set theory] order relation (partial order relation | partial order set | example of partial order set)
猜你喜欢
How long is the fastest time you can develop data API? One minute is enough for me
1. E-commerce tool cefsharp autojs MySQL Alibaba cloud react C RPA automated script, open source log
Le Seigneur des anneaux: l'anneau du pouvoir
[set theory] Stirling subset number (Stirling subset number concept | ball model | Stirling subset number recurrence formula | binary relationship refinement relationship of division)
3311. 最长算术
The embodiment of generics in inheritance and wildcards
最全SQL与NoSQL优缺点对比
Leetcode 198: 打家劫舍
FileInputStream and fileoutputstream
4279. 笛卡尔树
随机推荐
[cmake] cmake link SQLite Library
691. 立方体IV
圖像識別與檢測--筆記
Deep learning parameter initialization (I) Xavier initialization with code
Margin left: -100% understanding in the Grail layout
Hello world of vertx
C code production YUV420 planar format file
Common methods of file class
Vertx's responsive MySQL template
LeetCode
File operation serialization recursive copy
Sorting, dichotomy
sharepoint 2007 versions
Image recognition and detection -- Notes
2021-07-18
TypeScript let与var的区别
Spa single page application
不出网上线CS的各种姿势
Summary of abnormal mechanism of interview
[plus de détails] dernière entrevue complète redis (50)