Spring Boot 3.x + Milvus 2.x 实战：Java 从 0 到 1 实现向量检索全流程



Spring Boot 3.x + Milvus 2.x 实战：Java 从 0 到 1 实现向量检索全流程

章鱼出海

2025-10-19

导读：在 AI 向量检索场景中，Milvus 作为主流的向量数据库，常需与 Java 技术栈结合落地。

在 AI 向量检索场景中，Milvus 作为主流的向量数据库，常需与 Java 技术栈结合落地。本文基于Spring Boot 3.5.6 + JDK 21 + Milvus SDK 2.6.3，手把手教你用 Java 实现从环境搭建到向量相似检索的完整流程，代码可直接复制使用，新手也能快速上手。

一、环境准备：配置依赖与参数

在开始实战前，需先完成 Maven 依赖引入和 Milvus 连接参数配置，确保基础环境可用。

1.1 Maven 依赖配置

引入 Spring Boot Web、Lombok、Milvus Java SDK 等核心依赖，版本信息已固定，避免版本兼容问题。

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>3.5.6</version>
        <relativePath/> <!-- lookup parent from repository -->
    </parent>
    <groupId>com.strivelearn</groupId>
    <artifactId>milvus-study</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <name>milvus-study</name>
    <description>milvus-study</description>
    <url/>
    <licenses>
        <license/>
    </licenses>
    <developers>
        <developer/>
    </developers>
    <scm>
        <connection/>
        <developerConnection/>
        <tag/>
        <url/>
    </scm>
    <properties>
        <java.version>21</java.version>
    </properties>
    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>

        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <optional>true</optional>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>

        <dependency>
            <groupId>io.milvus</groupId>
            <artifactId>milvus-sdk-java</artifactId>
            <version>2.6.3</version>
        </dependency>
      
        <dependency>
              <groupId>com.google.code.gson</groupId>
              <artifactId>gson</artifactId>
          </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <annotationProcessorPaths>
                        <path>
                            <groupId>org.projectlombok</groupId>
                            <artifactId>lombok</artifactId>
                        </path>
                    </annotationProcessorPaths>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
                <configuration>
                    <excludes>
                        <exclude>
                            <groupId>org.projectlombok</groupId>
                            <artifactId>lombok</artifactId>
                        </exclude>
                    </excludes>
                </configuration>
            </plugin>
        </plugins>
    </build>

</project>

1.2 应用参数配置（application.yml）

在resources目录下创建application.yml，配置 Milvus 的连接地址、鉴权令牌、目标数据库和集合名，后续操作会直接读取这些参数。

milvus:
  uri: "http://localhost:19530" # 远程Milvus需替换为 http://<服务器IP>:19530
  token: "root:Milvus"          # 未启用鉴权可留空（生产环境建议启用）
  database: "ai_image_db"       # 自定义目标数据库名
  collection: "image_vector_db" # 自定义向量集合名
  dim: 5                        # 向量维度（本文示例为5维，需与实际向量匹配）

二、核心实战：从连接到检索的 8 个步骤

步骤 1：Milvus 连接配置 —— 注入核心客户端

通过 Spring Bean 的方式注入MilvusClientV2，全局复用客户端实例，避免重复创建连接。

package com.strivelearn.milvusstudy.config;

import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

import io.milvus.v2.client.ConnectConfig;
import io.milvus.v2.client.MilvusClientV2;

/**
 * Milvus客户端配置类：将MilvusClientV2注入Spring容器
 *
 * @author strivelearn
 * @version MilvusConfig.java, 2025年10月19日
 */
@Configuration
public class MilvusConfig {
    @Bean
    public MilvusClientV2 milvusClient(@Value("${milvus.uri}") String uri, @Value("${milvus.token}") String token) {
        // 构建连接配置：传入地址和鉴权令牌
        ConnectConfig config = ConnectConfig.builder().uri(uri).token(token).build();
        // 创建客户端实例并返回
        MilvusClientV2 client = new MilvusClientV2(config);
        return client;
    }
}

步骤 2：创建并切换数据库 —— 隔离数据空间

Milvus 支持多数据库，先创建目标数据库（若不存在），再切换到该库，确保后续操作在指定空间内执行。

package com.strivelearn.milvusstudy;

import io.milvus.v2.client.MilvusClientV2;
import io.milvus.v2.service.database.request.CreateDatabaseReq;
import jakarta.annotation.Resource;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;

@SpringBootTest
class MilvusStudyApplicationTests {
    // 注入步骤1配置的Milvus客户端
    @Resource
    private MilvusClientV2 client;

    @Test
    public void testCreateAndSwitchDb() throws InterruptedException {
        // 目标数据库名（与application.yml中一致）
        String dbName = "ai_image_db";
        try {
            // 1. 创建数据库：若已存在会抛出异常
            client.createDatabase(CreateDatabaseReq.builder().databaseName(dbName).build());
            System.out.println("创建数据库成功: " + dbName);
        } catch (Exception e) {
            // 2. 捕获“数据库已存在”异常，避免程序报错
            if ((e.getMessage() + "").contains("already exists")) {
                System.out.println("数据库已存在: " + dbName);
            } else {
                // 其他异常直接抛出，便于排查问题
                throw e;
            }
        }

        // 3. 切换到目标数据库：后续操作均在该库下执行
        client.useDatabase(dbName);
    }
}

步骤 3：创建向量集合 —— 定义数据结构

集合（Collection）是 Milvus 存储数据的基本单元，需定义主键、向量字段、标量字段（如标签），本文以 “图像向量” 为例设计结构。

package com.strivelearn.milvusstudy;

import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;

import io.milvus.v2.client.MilvusClientV2;
import io.milvus.v2.common.DataType;
import io.milvus.v2.service.collection.request.AddFieldReq;
import io.milvus.v2.service.collection.request.CreateCollectionReq;
import io.milvus.v2.service.collection.request.HasCollectionReq;
import jakarta.annotation.Resource;

@SpringBootTest
class MilvusStudyApplicationTests {
    @Resource
    private MilvusClientV2 client;

    @Test
    public void testCreateCollection() throws InterruptedException {
        // 1. 先切换到目标数据库
        String dbName = "ai_image_db";
        client.useDatabase(dbName);

        // 2. 目标集合名（与application.yml中一致）、向量维度
        String collection = "image_vector_db";
        int dim = 5;

        // 3. 检查集合是否已存在：避免重复创建
        boolean exists = client.hasCollection(HasCollectionReq.builder().collectionName(collection).build());

        if (!exists) {
            // 4. 创建集合Schema：定义字段结构
            CreateCollectionReq.CollectionSchema schema = client.createSchema();

            // 字段1：主键（img_id）- Int64类型，自动生成（autoID=true）
            schema.addField(AddFieldReq.builder()
                    .fieldName("img_id")
                    .dataType(DataType.Int64)
                    .isPrimaryKey(true)
                    .autoID(true)
                    .description("图像主键（自动生成）")
                    .build());

            // 字段2：向量字段（img_vector）- FloatVector类型，5维
            schema.addField(AddFieldReq.builder()
                    .fieldName("img_vector")
                    .dataType(DataType.FloatVector)
                    .dimension(dim)
                    .description("图像向量（5维）")
                    .build());

            // 字段3：标量字段（label）- Int64类型，用于标记图像类别（0=猫，1=狗，2=鸟）
            schema.addField(AddFieldReq.builder()
                    .fieldName("label")
                    .dataType(DataType.Int64)
                    .description("图像类别标签（0=猫，1=狗，2=鸟）")
                    .build());

            // 5. 创建集合：传入集合名和Schema
            client.createCollection(CreateCollectionReq.builder()
                    .collectionName(collection)
                    .collectionSchema(schema)
                    .build());

            System.out.println("集合创建成功: " + collection);
        } else {
            System.out.println("集合已存在: " + collection);
        }
    }
}

步骤 4：创建向量索引 —— 加速检索

Milvus 需通过索引提升检索效率，本文选用常用的IVF_FLAT 索引（适合中小数据量），搭配 L2 距离（欧式距离）计算相似度。

package com.strivelearn.milvusstudy;

import java.util.List;
import java.util.Map;

import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;

import io.milvus.v2.client.MilvusClientV2;
import io.milvus.v2.common.IndexParam;
import io.milvus.v2.service.index.request.CreateIndexReq;
import jakarta.annotation.Resource;

/**
 * 索引操作测试类
 *
 * @author strivelearn
 * @version IndexTest.java, 2025年10月19日
 */
@SpringBootTest
public class IndexTest {
    @Resource
    private MilvusClientV2 client;

    @Test
    public void testCreateIndex() throws InterruptedException {
        // 1. 切换到目标数据库和集合
        String dbName = "ai_image_db";
        client.useDatabase(dbName);
        String collection = "image_vector_db";

        // 2. 构建索引参数：针对向量字段img_vector创建索引
        IndexParam indexParam = IndexParam.builder()
                .fieldName("img_vector") // 索引关联的向量字段
                .indexType(IndexParam.IndexType.IVF_FLAT) // 索引类型：IVF_FLAT
                .metricType(IndexParam.MetricType.L2) // 相似度计算方式：L2距离
                .extraParams(Map.of("nlist", 128)) // 聚类数量：默认128，可按数据量调整
                .build();

        // 3. 创建索引：传入集合名和索引参数
        client.createIndex(CreateIndexReq.builder()
                .collectionName(collection)
                .indexParams(List.of(indexParam))
                .build());

        System.out.println("索引创建成功: IVF_FLAT / L2 / nlist=128");
    }
}

步骤 5：插入测试数据 —— 填充向量样本

创建完集合和索引后，插入 5 条图像向量样本（含类别标签），用于后续相似检索测试。

package com.strivelearn.milvusstudy;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;

import com.google.gson.Gson;
import com.google.gson.JsonObject;

import io.milvus.v2.client.MilvusClientV2;
import io.milvus.v2.service.vector.request.InsertReq;
import jakarta.annotation.Resource;

/**
 * 数据插入测试类
 *
 * @author strivelearn
 * @version DataInsertTest.java, 2025年10月19日
 */
@SpringBootTest
public class DataInsertTest {

    @Resource
    private MilvusClientV2 client;

    // 工具方法：将float数组转为List<Float>（适配Milvus字段类型）
    List<Float> toFloatList(float[] a) {
        List<Float> list = new ArrayList<>(a.length);
        for (float v : a)
            list.add(v);
        return list;
    }

    @Test
    public void testInsertData() throws InterruptedException {
        // 1. 切换到目标数据库和集合
        String dbName = "ai_image_db";
        client.useDatabase(dbName);
        String collection = "image_vector_db";

        // 2. 准备测试数据：5条图像向量 + 对应标签
        List<float[]> vectors = List.of(
                new float[] { 0.1f, 0.2f, 0.3f, 0.4f, 0.5f }, // 标签0（猫）
                new float[] { 0.2f, 0.3f, 0.4f, 0.5f, 0.6f }, // 标签0（猫）
                new float[] { 0.3f, 0.4f, 0.5f, 0.6f, 0.7f }, // 标签1（狗）
                new float[] { 0.4f, 0.5f, 0.6f, 0.7f, 0.8f }, // 标签1（狗）
                new float[] { 0.5f, 0.6f, 0.7f, 0.8f, 0.9f }  // 标签2（鸟）
        );
        List<Integer> labels = List.of(0, 0, 1, 1, 2);

        // 3. 组装数据：转为Milvus支持的Map格式（key=字段名，value=字段值）
        List<Map<String, Object>> rows = new ArrayList<>();
        for (int i = 0; i < vectors.size(); i++) {
            Map<String, Object> row = new HashMap<>();
            row.put("img_vector", toFloatList(vectors.get(i))); // 向量字段
            row.put("label", labels.get(i)); // 标量字段（标签）
            // 主键img_id无需手动传入（autoID=true）
            rows.add(row);
        }

        // 4. 转为JsonObject格式：Milvus SDK要求的入参类型
        Gson gson = new Gson();
        List<JsonObject> jsonObjects = rows.stream()
                .map(m -> gson.toJsonTree(m).getAsJsonObject())
                .toList();

        // 5. 执行插入操作
        client.insert(InsertReq.builder()
                .collectionName(collection)
                .data(jsonObjects)
                .build());

        System.out.println("插入测试数据 5 行完成");
        // 关键注意点：Milvus 2.x默认开启自动flush，插入后无需手动调用（数据会定期落盘）
    }
}

步骤 6：加载集合到内存 —— 检索前置操作

Milvus 检索时需将集合加载到内存（仅需执行一次），若不加载会导致检索失败，这是容易踩坑的点。

package com.strivelearn.milvusstudy;

import org.springframework.boot.test.context.SpringBootTest;

import io.milvus.v2.client.MilvusClientV2;
import io.milvus.v2.service.collection.request.LoadCollectionReq;
import jakarta.annotation.Resource;

/**
 * 集合加载与检索测试类
 *
 * @author strivelearn
 * @version SearchTests.java, 2025年10月19日
 */
@SpringBootTest
public class SearchTests {

    @Resource
    private MilvusClientV2 client;

    // 单独提取加载方法，便于复用
    public void loadCollectionToMemory() throws InterruptedException {
        String dbName = "ai_image_db";
        client.useDatabase(dbName);
        String collection = "image_vector_db";

        // 加载集合到内存：检索前必须执行
        client.loadCollection(LoadCollectionReq.builder()
                .collectionName(collection)
                .build());

        System.out.println("集合已加载到内存：" + collection);
    }
}

步骤 7：向量相似检索 —— 核心功能实现

基于插入的测试数据，执行TopK 检索（返回最相似的前 3 条结果），通过 L2 距离判断相似度（距离越小越相似）。

package com.strivelearn.milvusstudy;

import java.util.List;
import java.util.Map;

import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;

import io.milvus.v2.client.MilvusClientV2;
import io.milvus.v2.common.IndexParam;
import io.milvus.v2.service.collection.request.LoadCollectionReq;
import io.milvus.v2.service.collection.request.ReleaseCollectionReq;
import io.milvus.v2.service.vector.request.SearchReq;
import io.milvus.v2.service.vector.request.data.BaseVector;
import io.milvus.v2.service.vector.request.data.FloatVec;
import io.milvus.v2.service.vector.response.SearchResp;
import jakarta.annotation.Resource;

@SpringBootTest
public class SearchTests {

    @Resource
    private MilvusClientV2 client;

    // 待检索的查询向量（模拟一张“猫”的图像向量）
    private final float[] queryVector = new float[] { 0.15f, 0.25f, 0.35f, 0.45f, 0.55f };

    @Test
    public void testVectorSearch() throws InterruptedException {
        // 1. 切换数据库 + 加载集合到内存
        String dbName = "ai_image_db";
        client.useDatabase(dbName);
        String collection = "image_vector_db";
        client.loadCollection(LoadCollectionReq.builder().collectionName(collection).build());
        System.out.println("集合已加载到内存");

        // 2. 构建查询向量：转为Milvus支持的FloatVec类型
        List<BaseVector> queryVectors = List.of(new FloatVec(queryVector));

        // 3. 构建检索请求：配置核心参数
        SearchReq searchReq = SearchReq.builder()
                .collectionName(collection) // 目标集合
                .annsField("img_vector")    // 检索的向量字段
                .metricType(IndexParam.MetricType.L2) // 相似度计算方式（与索引一致）
                .data(queryVectors)         // 查询向量列表（本文仅1条）
                .limit(3)                   // TopK：返回前3条最相似结果
                .searchParams(Map.of("nprobe", 10)) // 检索参数：nprobe越大精度越高（默认10）
                .outputFields(List.of("label")) // 需返回的标量字段（如标签）
                .build();

        // 4. 执行检索并获取结果
        SearchResp resp = client.search(searchReq);

        // 5. 解析检索结果：遍历输出ID、距离、标签
        List<List<SearchResp.SearchResult>> results = resp.getSearchResults();
        for (List<SearchResp.SearchResult> perQueryResult : results) {
            for (SearchResp.SearchResult result : perQueryResult) {
                Object imgId = result.getId(); // 图像主键（autoID生成）
                Float distance = result.getScore(); // L2距离（越小越相似）
                Map<String, Object> fields = result.getEntity(); // 标量字段（如label）
                System.out.printf("ID:%s 距离:%.3f 标签:%s%n", 
                        String.valueOf(imgId), distance, String.valueOf(fields.get("label")));
            }
        }

        // 6. 检索完成后释放集合内存（可选：避免占用过多内存）
        client.releaseCollection(ReleaseCollectionReq.builder()
                .collectionName(collection)
                .build());
    }
}

检索结果说明

由于查询向量模拟 “猫” 的特征，结果会优先返回标签为 0（猫）的两条数据，距离最小的为最相似，典型输出如下：

ID:1 距离:0.050 标签:0
ID:2 距离:0.050 标签:0
ID:3 距离:0.350 标签:1

步骤 8：清理资源 —— 释放内存与断开连接

检索完成后，可根据需求释放集合内存（不删除数据）或断开客户端连接，避免资源浪费。

// 1. 释放集合内存：仅释放内存，数据仍保存在磁盘（推荐）
client.releaseCollection(ReleaseCollectionReq.builder()
        .collectionName("image_vector_db")
        .build());
System.out.println("集合内存已释放");

// 2. 断开客户端连接：程序退出前执行（如服务关闭时）
client.close();
System.out.println("Milvus客户端已关闭，资源清理完成");

三、总结与扩展

本文覆盖了 Spring Boot + Milvus 的全流程实战，从环境搭建到向量检索，代码可直接复用。若需进一步优化，可关注以下方向：

批量插入优化：大数据量时使用insertBatch接口，提升插入效率；
检索性能调优：调整nprobe参数（精度与速度权衡），或选用 IVF_PQ 等更高效的索引；
异常处理增强：增加重试机制（如连接超时），提升生产环境稳定性。

如果在实战中遇到问题，欢迎在留言区交流，后续会分享更多 Milvus 进阶技巧！

【声明】内容源于网络

章鱼出海

跨境分享坊 | 每天提供跨境参考

内容 47037

粉丝 3

章鱼出海跨境分享坊 | 每天提供跨境参考

总阅读262.4k

粉丝3

内容47.0k