当前位置：网站首页>Lucene merge document order

Lucene merge document order

2022-07-03 07:30:00 【chuanyangwang】

org.apache.lucene.index.MergeState#buildDocMaps

Two cases

1. No, indexSort The situation of , Directly remove the deleted ID. And then according to reader Write in the order of DocMap in

2. Yes indexSort In the case of . according to [field][segmentOrd] Organize , And put it in a limited queue . Comparison method according to sorter Compare sizes in the order of （ It is similar to merging multiple ordered linked lists ）

  private DocMap[] buildDocMaps(List<CodecReader> readers, Sort indexSort) throws IOException {

    if (indexSort == null) {
      // no index sort ... we only must map around deletions, and rebase to the merged segment's
      // docID space
      return buildDeletionDocMaps(readers);
    } else {
      // do a merge sort of the incoming leaves:
      long t0 = System.nanoTime();
      DocMap[] result = MultiSorter.sort(indexSort, readers);
      if (result == null) {
        // already sorted so we can switch back to map around deletions
        return buildDeletionDocMaps(readers);
      } else {
        needsIndexSort = true;
      }
      long t1 = System.nanoTime();
      if (infoStream.isEnabled("SM")) {
        infoStream.message(
            "SM",
            String.format(
                Locale.ROOT, "%.2f msec to build merge sorted DocMaps", (t1 - t0) / 1000000.0));
      }
      return result;
    }
  }

  /**
   * Does a merge sort of the leaves of the incoming reader, returning {@link DocMap} to map each
   * leaf's documents into the merged segment. The documents for each incoming leaf reader must
   * already be sorted by the same sort! Returns null if the merge sort is not needed (segments are
   * already in index sort order).
   */
  static MergeState.DocMap[] sort(Sort sort, List<CodecReader> readers) throws IOException {

    // TODO: optimize if only 1 reader is incoming, though that's a rare case

    SortField[] fields = sort.getSort();
    final IndexSorter.ComparableProvider[][] comparables =
        new IndexSorter.ComparableProvider[fields.length][];
    final int[] reverseMuls = new int[fields.length];
    for (int i = 0; i < fields.length; i++) {
      IndexSorter sorter = fields[i].getIndexSorter();
      if (sorter == null) {
        throw new IllegalArgumentException(
            "Cannot use sort field " + fields[i] + " for index sorting");
      }
      comparables[i] = sorter.getComparableProviders(readers);
      reverseMuls[i] = fields[i].getReverse() ? -1 : 1;
    }
    int leafCount = readers.size();

    PriorityQueue<LeafAndDocID> queue =
        new PriorityQueue<LeafAndDocID>(leafCount) {
          @Override
          public boolean lessThan(LeafAndDocID a, LeafAndDocID b) {
            for (int i = 0; i < comparables.length; i++) {
              int cmp = Long.compare(a.valuesAsComparableLongs[i], b.valuesAsComparableLongs[i]);
              if (cmp != 0) {
                return reverseMuls[i] * cmp < 0;
              }
            }

            // tie-break by docID natural order:
            if (a.readerIndex != b.readerIndex) {
              return a.readerIndex < b.readerIndex;
            } else {
              return a.docID < b.docID;
            }
          }
        };

    PackedLongValues.Builder[] builders = new PackedLongValues.Builder[leafCount];

    for (int i = 0; i < leafCount; i++) {
      CodecReader reader = readers.get(i);
      LeafAndDocID leaf =
          new LeafAndDocID(i, reader.getLiveDocs(), reader.maxDoc(), comparables.length);
      for (int j = 0; j < comparables.length; j++) {
        leaf.valuesAsComparableLongs[j] = comparables[j][i].getAsComparableLong(leaf.docID);
      }
      queue.add(leaf);
      builders[i] = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
    }

    // merge sort:
    int mappedDocID = 0;
    int lastReaderIndex = 0;
    boolean isSorted = true;
    while (queue.size() != 0) {
      LeafAndDocID top = queue.top();
      if (lastReaderIndex > top.readerIndex) {
        // merge sort is needed
        isSorted = false;
      }
      lastReaderIndex = top.readerIndex;
      builders[top.readerIndex].add(mappedDocID);
      if (top.liveDocs == null || top.liveDocs.get(top.docID)) {
        mappedDocID++;
      }
      top.docID++;
      if (top.docID < top.maxDoc) {
        for (int j = 0; j < comparables.length; j++) {
          top.valuesAsComparableLongs[j] =
              comparables[j][top.readerIndex].getAsComparableLong(top.docID);
        }
        queue.updateTop();
      } else {
        queue.pop();
      }
    }
    if (isSorted) {
      return null;
    }

    MergeState.DocMap[] docMaps = new MergeState.DocMap[leafCount];
    for (int i = 0; i < leafCount; i++) {
      final PackedLongValues remapped = builders[i].build();
      final Bits liveDocs = readers.get(i).getLiveDocs();
      docMaps[i] =
          new MergeState.DocMap() {
            @Override
            public int get(int docID) {
              if (liveDocs == null || liveDocs.get(docID)) {
                return (int) remapped.get(docID);
              } else {
                return -1;
              }
            }
          };
    }

    return docMaps;
  }

原网站

版权声明
本文为[chuanyangwang]所创，转载请带上原文链接，感谢
https://yzsam.com/2022/02/202202150604521850.html

当前位置：网站首页>Lucene merge document order

Lucene merge document order

边栏推荐

猜你喜欢

随机推荐