From a7bd8d971ac5a354785de5318d2afa49aba78d57 Mon Sep 17 00:00:00 2001 From: Sny Date: Mon, 6 Apr 2026 11:30:09 +0530 Subject: [PATCH 1/4] OpenConceptLab/ocl_issues#2388 | custom encoder model for reranking --- .../map-projects/ConfigurationForm.jsx | 7 +- src/components/map-projects/MapProject.jsx | 20 ++++- .../map-projects/RerankerConfig.jsx | 90 +++++++++++++++++++ src/components/map-projects/rerankerModels.js | 7 ++ src/i18n/locales/en/translations.json | 6 ++ src/i18n/locales/es/translations.json | 6 ++ src/i18n/locales/zh/translations.json | 6 ++ 7 files changed, 138 insertions(+), 4 deletions(-) create mode 100644 src/components/map-projects/RerankerConfig.jsx create mode 100644 src/components/map-projects/rerankerModels.js diff --git a/src/components/map-projects/ConfigurationForm.jsx b/src/components/map-projects/ConfigurationForm.jsx index 15e4bb7..10dd4bd 100644 --- a/src/components/map-projects/ConfigurationForm.jsx +++ b/src/components/map-projects/ConfigurationForm.jsx @@ -32,6 +32,7 @@ import { SCORES_COLOR } from './constants' import FilterTable from './FilterTable' import MultiAlgoSelector from './MultiAlgoSelector' import LookupConfig from './LookupConfig' +import RerankerConfig from './RerankerConfig' const VisuallyHiddenInput = styled('input')({ clip: 'rect(0 0 0 0)', @@ -46,7 +47,7 @@ const VisuallyHiddenInput = styled('input')({ }); -const ConfigurationForm = ({ project, handleFileUpload, file, owner, setOwner, name, setName, description, setDescription, repo, onRepoChange, repoVersion, setRepoVersion, versions, mappedSources, targetSourcesFromRows, algosSelected, setAlgosSelected, sx, algos, validColumns, columns, isValidColumnValue, updateColumn, configure, setConfigure, columnVisibilityModel, setColumnVisibilityModel, onSave, isSaving, candidatesScore, onScoreChange, includeDefaultFilter, setIncludeDefaultFilter, filters, setFilters, locales, isLoadingLocales, setAIAssistantColumns, AIAssistantColumns, inAIAssistantGroup, lookupConfig, setLookupConfig, canBridge, canScispacy }) => { +const ConfigurationForm = ({ project, handleFileUpload, file, owner, setOwner, name, setName, description, setDescription, repo, onRepoChange, repoVersion, setRepoVersion, versions, mappedSources, targetSourcesFromRows, algosSelected, setAlgosSelected, sx, algos, validColumns, columns, isValidColumnValue, updateColumn, configure, setConfigure, columnVisibilityModel, setColumnVisibilityModel, onSave, isSaving, candidatesScore, onScoreChange, includeDefaultFilter, setIncludeDefaultFilter, filters, setFilters, locales, isLoadingLocales, setAIAssistantColumns, AIAssistantColumns, inAIAssistantGroup, lookupConfig, setLookupConfig, rerankerConfig, setRerankerConfig, isCoreUser, canBridge, canScispacy }) => { const { t } = useTranslation(); const isLLMAlgoNotAllowed = !repoVersion?.match_algorithms?.includes('llm') const appliedLocales = filters?.locale ? filters?.locale?.split(',') : [] @@ -222,6 +223,10 @@ const ConfigurationForm = ({ project, handleFileUpload, file, owner, setOwner, n onChange={setAlgosSelected} repo={repoVersion} /> + { + isCoreUser && + + } <> {t('map_project.score_configuration')} diff --git a/src/components/map-projects/MapProject.jsx b/src/components/map-projects/MapProject.jsx index 441174c..4cb8f0b 100644 --- a/src/components/map-projects/MapProject.jsx +++ b/src/components/map-projects/MapProject.jsx @@ -103,6 +103,7 @@ import ImportToCollection from './ImportToCollection' import ProjectLogs from './ProjectLogs'; import { useAlgos } from './algorithms' import AutoMatchDialog from './AutoMatchDialog' +import { DEFAULT_ENCODER_MODEL } from './rerankerModels' import './MapProject.scss' import '../common/ResizablePanel.scss' @@ -209,6 +210,7 @@ const MapProject = () => { const [analysis, setAnalysis] = React.useState({}) const [AIModels, setAIModels] = React.useState([]) const [lookupConfig, setLookupConfig] = React.useState({}) + const [encoderModel, setEncoderModel] = React.useState(DEFAULT_ENCODER_MODEL) // import const [openImportToCollection, setOpenImportToCollection] = React.useState(false) @@ -422,6 +424,7 @@ const MapProject = () => { setRetired(Boolean(response.data?.include_retired)) setCandidatesScore(response.data?.score_configuration) setLookupConfig(response.data?.lookup_config) + setEncoderModel(response.data?.encoder_model || DEFAULT_ENCODER_MODEL) setAnalysis(response.data?.analysis || {}) setProject(response.data) setConfigure(false) @@ -866,6 +869,7 @@ const MapProject = () => { formData.append('algorithms', JSON.stringify(map(algosSelected, algo => omit(algo, ['__key'])))) formData.append('score_configuration', JSON.stringify(candidatesScore)) formData.append('lookup_config', JSON.stringify(lookupConfig)) + formData.append('encoder_model', encoderModel) formData.append('include_retired', retired) formData.append('filters', JSON.stringify(getFilters())) const isUpdate = Boolean(project?.id) @@ -1098,7 +1102,8 @@ const MapProject = () => { includeMappings: true, mappingBrief: true, mapTypes: 'SAME-AS,SAME AS,SAME_AS', - reranker: !isMultiAlgo + reranker: !isMultiAlgo, + ...(encoderModel ? { encoder_model: encoderModel } : {}) } forEach(rowBatch, __row => markAlgo(__row.__index, algo.id, 0)) @@ -2040,7 +2045,8 @@ const MapProject = () => { limit: algoDef.limit || CANDIDATES_LIMIT, offset: offset || 0, semantic: ['ocl-semantic', 'custom'].includes(algoDef.type), - reranker: !isMultiAlgo && algoDef.provider === 'ocl' + reranker: !isMultiAlgo && algoDef.provider === 'ocl', + encoder_model: !isMultiAlgo && encoderModel ? encoderModel : undefined }).then(response => callback(response, payload)) } @@ -2201,7 +2207,11 @@ const MapProject = () => { markAlgo(index, 'rerank', 0) const service = APIService.concepts().appendToUrl('$rerank/') try { - const response = await service.post({q: query, rows: candidates,}); + const response = await service.post({ + q: query, + rows: candidates, + ...(encoderModel ? { encoder_model: encoderModel } : {}) + }); setAllCandidates(prev => { const newCandidates = {...prev} @@ -2510,6 +2520,7 @@ const MapProject = () => { filters: filters, fields_mapped: cols, score_configuration: candidatesScore, + encoder_model: encoderModel, target_repo: repo } } @@ -2627,6 +2638,9 @@ const MapProject = () => { inAIAssistantGroup={inAIAssistantGroup} lookupConfig={lookupConfig} setLookupConfig={setLookupConfig} + rerankerConfig={encoderModel} + setRerankerConfig={setEncoderModel} + isCoreUser={isCoreUser} /> ) diff --git a/src/components/map-projects/RerankerConfig.jsx b/src/components/map-projects/RerankerConfig.jsx new file mode 100644 index 0000000..8fa18fb --- /dev/null +++ b/src/components/map-projects/RerankerConfig.jsx @@ -0,0 +1,90 @@ +import React from 'react' +import { useTranslation } from 'react-i18next' +import Autocomplete from '@mui/material/Autocomplete' +import TextField from '@mui/material/TextField' +import Collapse from '@mui/material/Collapse' +import Button from '@mui/material/Button' +import FormHelperText from '@mui/material/FormHelperText' +import UpIcon from '@mui/icons-material/ArrowDropUp'; +import DownIcon from '@mui/icons-material/ArrowDropDown'; +import { CUSTOM_ENCODER_MODEL_OPTION, DEFAULT_ENCODER_MODEL, ENCODER_MODEL_OPTIONS } from './rerankerModels' + +const RerankerConfig = ({ value, onChange }) => { + const { t } = useTranslation() + const [open, setOpen] = React.useState(false) + const presetOptions = ENCODER_MODEL_OPTIONS.map(model => ({ + id: model, + label: model, + isDefault: model === DEFAULT_ENCODER_MODEL, + })) + const options = [ + ...presetOptions, + { id: CUSTOM_ENCODER_MODEL_OPTION, label: t('map_project.reranker_configuration_custom_option') } + ] + const isKnownOption = ENCODER_MODEL_OPTIONS.includes(value) + const selectedOption = isKnownOption ? + presetOptions.find(option => option.id === value) : + options.find(option => option.id === CUSTOM_ENCODER_MODEL_OPTION) + const isCustomSelected = selectedOption?.id === CUSTOM_ENCODER_MODEL_OPTION + + return ( +
+ + +
+ + {t('map_project.reranker_configuration_description')} + + option?.label || ''} + isOptionEqualToValue={(option, current) => option.id === current.id} + onChange={(event, option) => { + if(option?.id === CUSTOM_ENCODER_MODEL_OPTION) { + onChange(isKnownOption ? '' : value) + return + } + onChange(option?.id || DEFAULT_ENCODER_MODEL) + }} + renderInput={params => ( + + )} + renderOption={(props, option) => ( +
  • + {option.label}{option.isDefault ? ` (${t('common.default')})` : ''} +
  • + )} + /> + { + isCustomSelected && + onChange(event.target.value || '')} + /> + } +
    +
    +
    + ) +} + +export default RerankerConfig diff --git a/src/components/map-projects/rerankerModels.js b/src/components/map-projects/rerankerModels.js new file mode 100644 index 0000000..202e4db --- /dev/null +++ b/src/components/map-projects/rerankerModels.js @@ -0,0 +1,7 @@ +export const DEFAULT_ENCODER_MODEL = 'BAAI/bge-reranker-v2-m3' + +export const ENCODER_MODEL_OPTIONS = [ + DEFAULT_ENCODER_MODEL, +] + +export const CUSTOM_ENCODER_MODEL_OPTION = '__custom_encoder_model__' diff --git a/src/i18n/locales/en/translations.json b/src/i18n/locales/en/translations.json index e61f3d3..c74785e 100644 --- a/src/i18n/locales/en/translations.json +++ b/src/i18n/locales/en/translations.json @@ -559,6 +559,12 @@ "lookup_configuration_url": "Repository URL", "lookup_configuration_token": "Token", "lookup_configuration_description": "Configure a Source/CodeSystem for lookup operation to fetch candidates definitions.", + "reranker_configuration": "Reranker Configuration", + "reranker_configuration_description": "Choose the reranker model used to calculate unified scores for this project. The default model is selected automatically, or you can enter a custom model name.", + "reranker_configuration_model": "Reranker model", + "reranker_configuration_custom_option": "Custom model name", + "reranker_configuration_custom_model": "Custom reranker model", + "reranker_configuration_placeholder": "e.g. BAAI/bge-reranker-v2-m3", "refresh_candidates_tooltip": "Refresh Candidates", "group_candidates": "Group By", "sort_candidates": "Sort", diff --git a/src/i18n/locales/es/translations.json b/src/i18n/locales/es/translations.json index 3836f97..2b1a92f 100644 --- a/src/i18n/locales/es/translations.json +++ b/src/i18n/locales/es/translations.json @@ -534,6 +534,12 @@ "lookup_configuration_url": "URL del repositorio", "lookup_configuration_token": "Token", "lookup_configuration_description": "Configure una Fuente/Sistema de códigos para la operación de búsqueda y recuperar definiciones de candidatos.", + "reranker_configuration": "Configuracion del reranker", + "reranker_configuration_description": "Elija el modelo de reranker utilizado para calcular las puntuaciones unificadas de este proyecto. El modelo predeterminado se selecciona automaticamente, o puede ingresar un nombre de modelo personalizado.", + "reranker_configuration_model": "Modelo de reranker", + "reranker_configuration_custom_option": "Nombre de modelo personalizado", + "reranker_configuration_custom_model": "Modelo de reranker personalizado", + "reranker_configuration_placeholder": "p. ej. BAAI/bge-reranker-v2-m3", "refresh_candidates_tooltip": "Actualizar candidatos", "group_candidates": "Agrupar por", "sort_candidates": "Ordenar", diff --git a/src/i18n/locales/zh/translations.json b/src/i18n/locales/zh/translations.json index 5c0ee70..d2977c3 100644 --- a/src/i18n/locales/zh/translations.json +++ b/src/i18n/locales/zh/translations.json @@ -559,6 +559,12 @@ "lookup_configuration_url": "仓库 URL", "lookup_configuration_token": "令牌", "lookup_configuration_description": "配置用于查找操作的源/代码系统,以获取候选项定义。", + "reranker_configuration": "重排序器配置", + "reranker_configuration_description": "为此项目选择用于计算统一分数的重排序器模型。默认模型会自动选中,您也可以输入自定义模型名称。", + "reranker_configuration_model": "重排序器模型", + "reranker_configuration_custom_option": "自定义模型名称", + "reranker_configuration_custom_model": "自定义重排序器模型", + "reranker_configuration_placeholder": "例如 BAAI/bge-reranker-v2-m3", "refresh_candidates_tooltip": "刷新候选项", "group_candidates": "分组依据", "sort_candidates": "排序", From 478ba9184ec1dc5e4aed8c67538446e3f0cf9aba Mon Sep 17 00:00:00 2001 From: Sunny Aggarwal Date: Mon, 13 Apr 2026 12:50:00 +0530 Subject: [PATCH 2/4] OpenConceptLab/ocl_issues#2388 | Added Qwen model for encoder --- src/components/map-projects/rerankerModels.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/components/map-projects/rerankerModels.js b/src/components/map-projects/rerankerModels.js index 202e4db..5f3c8a3 100644 --- a/src/components/map-projects/rerankerModels.js +++ b/src/components/map-projects/rerankerModels.js @@ -1,7 +1,10 @@ export const DEFAULT_ENCODER_MODEL = 'BAAI/bge-reranker-v2-m3' +export const QWEN_3_0_POINT_6B = 'Qwen/Qwen3-Reranker-0.6B' +export const QWEN_3_4B = 'Qwen/Qwen3-Reranker-4B' export const ENCODER_MODEL_OPTIONS = [ DEFAULT_ENCODER_MODEL, + QWEN_3_0_POINT_6B ] export const CUSTOM_ENCODER_MODEL_OPTION = '__custom_encoder_model__' From 56ef70b9d585e82724e2e6335b3d5065a01805a8 Mon Sep 17 00:00:00 2001 From: Sunny Aggarwal Date: Mon, 13 Apr 2026 13:07:48 +0530 Subject: [PATCH 3/4] OpenConceptLab/ocl_issues#2388 | Expanded encoder model option --- .../map-projects/RerankerConfig.jsx | 23 +++++++++----- src/components/map-projects/rerankerModels.js | 30 ++++++++++++++++--- 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/src/components/map-projects/RerankerConfig.jsx b/src/components/map-projects/RerankerConfig.jsx index 8fa18fb..4e803d2 100644 --- a/src/components/map-projects/RerankerConfig.jsx +++ b/src/components/map-projects/RerankerConfig.jsx @@ -5,6 +5,7 @@ import TextField from '@mui/material/TextField' import Collapse from '@mui/material/Collapse' import Button from '@mui/material/Button' import FormHelperText from '@mui/material/FormHelperText' +import ListItemText from '@mui/material/ListItemText' import UpIcon from '@mui/icons-material/ArrowDropUp'; import DownIcon from '@mui/icons-material/ArrowDropDown'; import { CUSTOM_ENCODER_MODEL_OPTION, DEFAULT_ENCODER_MODEL, ENCODER_MODEL_OPTIONS } from './rerankerModels' @@ -13,15 +14,16 @@ const RerankerConfig = ({ value, onChange }) => { const { t } = useTranslation() const [open, setOpen] = React.useState(false) const presetOptions = ENCODER_MODEL_OPTIONS.map(model => ({ - id: model, - label: model, - isDefault: model === DEFAULT_ENCODER_MODEL, + id: model.id, + label: model.description, + isDefault: Boolean(model.default), + disabled: Boolean(model.disabled) })) const options = [ ...presetOptions, { id: CUSTOM_ENCODER_MODEL_OPTION, label: t('map_project.reranker_configuration_custom_option') } ] - const isKnownOption = ENCODER_MODEL_OPTIONS.includes(value) + const isKnownOption = ENCODER_MODEL_OPTIONS.map(option => option.id).includes(value) const selectedOption = isKnownOption ? presetOptions.find(option => option.id === value) : options.find(option => option.id === CUSTOM_ENCODER_MODEL_OPTION) @@ -48,8 +50,9 @@ const RerankerConfig = ({ value, onChange }) => { disableClearable options={options} value={selectedOption || null} - getOptionLabel={option => option?.label || ''} + getOptionLabel={option => option?.id || ''} isOptionEqualToValue={(option, current) => option.id === current.id} + getOptionDisabled={(option) => option?.disabled} onChange={(event, option) => { if(option?.id === CUSTOM_ENCODER_MODEL_OPTION) { onChange(isKnownOption ? '' : value) @@ -65,9 +68,13 @@ const RerankerConfig = ({ value, onChange }) => { /> )} renderOption={(props, option) => ( -
  • - {option.label}{option.isDefault ? ` (${t('common.default')})` : ''} -
  • + )} /> { diff --git a/src/components/map-projects/rerankerModels.js b/src/components/map-projects/rerankerModels.js index 5f3c8a3..e35f5f4 100644 --- a/src/components/map-projects/rerankerModels.js +++ b/src/components/map-projects/rerankerModels.js @@ -1,10 +1,32 @@ export const DEFAULT_ENCODER_MODEL = 'BAAI/bge-reranker-v2-m3' -export const QWEN_3_0_POINT_6B = 'Qwen/Qwen3-Reranker-0.6B' -export const QWEN_3_4B = 'Qwen/Qwen3-Reranker-4B' export const ENCODER_MODEL_OPTIONS = [ - DEFAULT_ENCODER_MODEL, - QWEN_3_0_POINT_6B + { + id: DEFAULT_ENCODER_MODEL, + description: 'Multilingual, general-purpose (0.6B)', + default: true + }, + { + id: 'Qwen/Qwen3-Reranker-0.6B', + description: 'General purpose reranker (0.6B)', + }, + { + id: 'Qwen/Qwen3-Reranker-4B', + description: 'General purpose reranker, best in quality (4B)', + disabled: true + }, + { + id: 'cross-encoder/ms-marco-MiniLM-L-6-v2', + description: 'Fast and lightweight, English-only (23M)', + }, + { + id: 'ncbi/MedCPT-Cross-Encoder', + description: 'Biomedical domain, trained on PubMed (110M)', + }, + { + id: 'Alibaba-NLP/gte-reranker-modernbert-base', + description: 'Balanced quality, supports longer descriptions (149M)', + }, ] export const CUSTOM_ENCODER_MODEL_OPTION = '__custom_encoder_model__' From 762668142070cdeb92d33072e6048127e8533441 Mon Sep 17 00:00:00 2001 From: Sunny Aggarwal Date: Mon, 13 Apr 2026 13:17:47 +0530 Subject: [PATCH 4/4] OpenConceptLab/ocl_issues#2388 | Added model logging for reranker --- src/components/map-projects/Discuss.jsx | 5 ++++- src/components/map-projects/MapProject.jsx | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/components/map-projects/Discuss.jsx b/src/components/map-projects/Discuss.jsx index aecc559..05c3956 100644 --- a/src/components/map-projects/Discuss.jsx +++ b/src/components/map-projects/Discuss.jsx @@ -56,8 +56,11 @@ const Discuss = ({ logs, onAdd }) => { } if(log.action === 'algo_finished') return <>{t('map_project.finished_running')} {log.extras.algo} - if(log.action === 'rerank_finished') + if(log.action === 'rerank_finished') { + if(log.description) + return log.description return <>{t('map_project.finished_reranking')} + } return log.description || startCase(log.action) } diff --git a/src/components/map-projects/MapProject.jsx b/src/components/map-projects/MapProject.jsx index 4cb8f0b..52ca042 100644 --- a/src/components/map-projects/MapProject.jsx +++ b/src/components/map-projects/MapProject.jsx @@ -2233,12 +2233,12 @@ const MapProject = () => { return newCandidates }) markAlgo(index, 'rerank', 1) - log({action: 'rerank_finished'}, index) + log({action: 'rerank_finished', description: `Reranked with ${encoderModel}`}, index) if(isBulk) setTimeout(() => setAutoMatched([index]), 1000) return response } catch (e) { - log({action: 'rerank_failed'}, index) + log({action: 'rerank_failed', description: `Rerank failed with ${encoderModel}`}, index) markAlgo(index, 'rerank', -2); // optional: failed state return null; }