Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Flexsearch result are not ordered by relevance. #1216

@Maximilien-Lepivain-Wiztivi

Description

Description

When doing a search throughout the documentation, the result are not ordered by relevance which makes search less efficient than it could be.

Steps to reproduce

I can't provide a demo.
I can detail my example and propose the fix that I have done on my side.

  1. We have around 200 pages of documentation (and growing ),
  2. I type a word, for example, "Performance",
  3. The pages that are provided are relevant but it feels that they are not properly ordered. In the example case, I have at least two pages provided, entitled "Performance" and "Configuration".

Expected result

Both pages indeed contain the word "performance" but the dedicated page should be proposed first.

Actual result

The page titled "Configuration" is proposed before the page entitled "Performance".

Environment

> npm version

{
  npm: '9.8.1',
  node: '18.18.2',
  acorn: '8.10.0',
  ada: '2.6.0',
  ares: '1.19.1',
  brotli: '1.0.9',
  cldr: '43.1',
  icu: '73.2',
  llhttp: '6.0.11',
  modules: '108',
  napi: '9',
  nghttp2: '1.57.0',
  nghttp3: '0.7.0',
  ngtcp2: '0.8.1',
  openssl: '3.0.10+quic',
  simdutf: '3.2.14',
  tz: '2023c',
  undici: '5.26.3',
  unicode: '15.0',
  uv: '1.44.2',
  uvwasi: '0.0.18',
  v8: '10.2.154.26-node.26',
  zlib: '1.2.13.1-motley'
}

> npm list

├── @hyas/[email protected]
├── @hyas/[email protected]
├── @hyas/[email protected]
├── @hyas/[email protected]
├── @tabler/[email protected]
├── [email protected]
├── [email protected]
├── [email protected]
└── [email protected]

> exec-bin node_modules/.bin/hugo/hugo version

hugo v0.121.1-00b46fed8e47f7bb0a85d7cfc2d9f1356379b740+extended darwin/amd64 BuildDate=2023-12-08T08:47:45Z VendorInfo=gohugoio

Proposed fix:

From my understanding, Flexsearch is providing result in relevance order, however the flexsearch.js wrapper in @hyas/assets/js/flexSearch.js is implicitly reordering the result due to the way data are aggregating before display. The function do search is aggegrating result by there index in a map:

results.forEach(function (result) {
	result.result.forEach(function (r) {
        items[r.id] = r.doc;
     });
});

The issue is that when you do Object.keys on result, the order is not relevance given by Flexsearch anymore but the id number.
If you want to aggregate on id you also need to keep the order of the document provided by Flexsearch as follow (it's a tiny bit dirty):

function showResults(items, order) {
    const template = document.querySelector('template').content;
    const fragment = document.createDocumentFragment();

    const results = document.querySelector('.search-results');
    results.textContent = '';

    const itemsLength = Object.keys(items).length;

    // Show/hide "No recent searches" and "No search results" messages
    if ((itemsLength === 0) && (query.value === '')) {
      // Hide "No search results" message
      document.querySelector('.search-no-results').classList.add('d-none');
      // Show "No recent searches" message
      document.querySelector('.search-no-recent').classList.remove('d-none');
    } else if ((itemsLength === 0) && (query.value !== '')) {
      // Hide "No recent searches" message
      document.querySelector('.search-no-recent').classList.add('d-none');
      // Show "No search results" message
      const queryNoResults = document.querySelector('.query-no-results');
      queryNoResults.innerText = query.value;
      document.querySelector('.search-no-results').classList.remove('d-none');
    } else {
      // Hide both "No recent searches" and "No search results" messages
      document.querySelector('.search-no-recent').classList.add('d-none');
      document.querySelector('.search-no-results').classList.add('d-none');
    }

    order.forEach((id) => {
      const item = items[id];
      const result = template.cloneNode(true);
      const a = result.querySelector('a');
      const time = result.querySelector('time');
      const content = result.querySelector('.content');
      a.innerHTML = item.title;
      a.href = item.permalink;
      time.innerText = "";
      content.innerHTML = item.summary;
      fragment.appendChild(result);
    });

    results.appendChild(fragment);
  }

  function doSearch() {
    const query = document.querySelector('.search-text').value.trim();
    const limit = {{ .searchLimit }};
    const results = index.search({
      query: query,
      enrich: true,
      limit: limit,
    });
    const items = {};
    const order = [];

    results.forEach(function (result) {

      result.result.forEach(function (r) {
        if(!order.includes(r.id)) {
          order.push(r.id);
        }

        items[r.id] = r.doc;
      });
    });

    showResults(items, order);
  }

The result, feels more relevant that way, on our case. "Performance" page was proposed first other felt much more relevant.

Don't hesitate to tell me, if I miss something or got something wrong.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions