Passa al contenuto principale

Documenti & RAG

Come gestiamo caricamento, indicizzazione e ricerca semantica dei documenti.

Upload documenti / URL

/main/src/server/serverHttp.ts
app.post('/upload/files/:collectionName', upload.single('file'), async (req, res) => { /* ... */ })
app.post('/upload/url/:collectionName', validateParams(paramsSchema), validateBody(uploadUrlBodySchema), async (req, res) => { /* ... */ })
  • I task vengono inviati al Worker (/tasks/add) per l'estrazione testi e creazione embedding.

Tool di ricerca documenti

/main/src/agentTools/documentSearchTool.ts
export const searchDocumentsTool = tool(async ({ query }, runnable_config) => {
const configurable = runnable_config.configurable as CustomConfigurable
const collectionName = configurable?.collection_name
// ... chiama searchVectorStore
return resultsWithoutLinks
}, { name: 'searchDocuments', /* schema ... */ })

Ricerca nel vector store (Qdrant)

/main/src/agentTools/qdrantService.ts
export async function searchVectorStore({ query, limit = 10, metadata, collectionName }: SearchVectorStoreParams) {
const queryEmbedding = await generateEmbeddings(query)
const filter = metadata?.page ? { must: [{ key: 'metadata.loc.pageNumber', match: { value: metadata.page as unknown as string } }] } : undefined
const searchResult = await qdrantClient.search(collectionName, { vector: queryEmbedding.flat(), limit, filter })
return searchResult.map(r => ({ content: (r.payload?.content as string) || '', metadata: (r.payload?.metadata as Record<string, unknown>) || {}, similarity: r.score }))
}