新增CMS+RAG+AI知识库模块/向量数据库检索增强生成及人工智能对话

This commit is contained in:
thinkgem
2025-03-19 12:39:51 +08:00
parent af61f14a9a
commit 080b48559f
17 changed files with 4476 additions and 0 deletions

View File

@@ -0,0 +1,54 @@
/**
* Copyright (c) 2013-Now http://jeesite.com All rights reserved.
* No deletion without permission, or be held responsible to law.
*/
package com.jeesite.modules.cms.ai.config;
import com.jeesite.common.datasource.DataSourceHolder;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Primary;
import org.springframework.jdbc.core.JdbcTemplate;
import javax.sql.DataSource;
import java.sql.SQLException;
/**
* AI 聊天配置类
* @author ThinkGem
*/
@Configuration
public class CmsAiChatConfig {
/**
* PG向量库数据源
* @author ThinkGem
*/
@Bean
@Primary
@ConditionalOnProperty(name = "jdbc.ds_pgvector.type")
public JdbcTemplate pgVectorStoreJdbcTemplate() throws SQLException {
DataSource dataSource = DataSourceHolder.getRoutingDataSource()
.createDataSource("ds_pgvector");
return new JdbcTemplate(dataSource);
}
/**
* 聊天对话客户端
* @author ThinkGem
*/
@Bean
public ChatClient chatClient(ChatClient.Builder builder) {
return builder
.defaultSystem("你是我的知识库AI助手请帮我解答我提出的相关问题。")
.build();
}
// @Bean
// public BatchingStrategy batchingStrategy() {
// return new TokenCountBatchingStrategy(EncodingType.CL100K_BASE, Integer.MAX_VALUE, 0.1);
// }
}

View File

@@ -0,0 +1,113 @@
/**
* Copyright (c) 2013-Now http://jeesite.com All rights reserved.
* No deletion without permission, or be held responsible to law.
*/
package com.jeesite.modules.cms.ai.service;
import com.jeesite.common.collect.ListUtils;
import com.jeesite.common.collect.MapUtils;
import com.jeesite.common.lang.StringUtils;
import com.jeesite.common.lang.TimeUtils;
import com.jeesite.common.utils.PageUtils;
import com.jeesite.modules.cms.entity.Article;
import com.jeesite.modules.cms.service.ArticleVectorStore;
import com.jeesite.modules.cms.utils.CmsUtils;
import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.document.Document;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.ai.vectorstore.filter.FilterExpressionBuilder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.List;
import java.util.Map;
/**
* CMS 文章向量库存储
* @author ThinkGem
*/
@Service
public class ArticleVectorStoreImpl implements ArticleVectorStore {
protected Logger logger = LoggerFactory.getLogger(getClass());
@Autowired
private VectorStore vectorStore;
/**
* 保存文章到向量库
* @author ThinkGem
*/
@Override
public void save(Article article) {
Map<String, Object> metadata = MapUtils.newHashMap();
metadata.put("id", article.getId());
metadata.put("siteCode", article.getCategory().getSite().getSiteCode());
metadata.put("categoryCode", article.getCategory().getCategoryCode());
metadata.put("categoryName", article.getCategory().getCategoryName());
metadata.put("title", article.getTitle());
metadata.put("href", article.getHref());
metadata.put("keywords", article.getKeywords());
metadata.put("description", article.getDescription());
metadata.put("url", article.getUrl());
metadata.put("status", article.getStatus());
metadata.put("createBy", article.getCreateBy());
metadata.put("createDate", article.getCreateDate());
metadata.put("updateBy", article.getUpdateBy());
metadata.put("updateDate", article.getUpdateDate());
String content = article.getTitle() + ", " + article.getKeywords() + ", "
+ article.getDescription() + ", " + StringUtils.toMobileHtml(
article.getArticleData().getContent());
String markdown = FlexmarkHtmlConverter.builder().build().convert(content);
List<Document> documents = List.of(new Document(article.getId(), markdown, metadata));
List<Document> splitDocuments = new TokenTextSplitter().apply(documents);
this.delete(article); // 删除原数据
ListUtils.pageList(splitDocuments, 64, params -> {
vectorStore.add((List<Document>)params[0]); // 增加新数据
return null;
});
}
/**
* 删除向量库文章
* @author ThinkGem
*/
@Override
public void delete(Article article) {
if (StringUtils.isNotBlank(article.getId())) {
vectorStore.delete(new FilterExpressionBuilder().eq("id", article.getId()).build());
}
}
/**
* 重建向量库文章
* @author ThinkGem
*/
public String rebuild(Article article) {
logger.debug("开始重建向量库。 siteCode: {}, categoryCode: {}",
article.getCategory().getSite().getSiteCode(),
article.getCategory().getCategoryCode());
long start = System.currentTimeMillis();
try{
article.setIsQueryArticleData(true); // 查询文章内容
PageUtils.findList(article, null, e -> {
List<Article> list = CmsUtils.getArticleService().findList((Article) e);
if (!list.isEmpty()) {
list.forEach(this::save);
return true;
}
return false;
});
}catch(Exception ex){
logger.error("重建向量库失败", ex);
return "重建向量库失败:" + ex.getMessage();
}
String message = "重建向量库完成! 用时" + TimeUtils.formatTime(System.currentTimeMillis() - start) + "";
logger.debug(message);
return message;
}
}

View File

@@ -0,0 +1,45 @@
/**
* Copyright (c) 2013-Now http://jeesite.com All rights reserved.
* No deletion without permission, or be held responsible to law.
*/
package com.jeesite.modules.cms.ai.service;
import com.jeesite.common.cache.CacheUtils;
import com.jeesite.common.collect.ListUtils;
import org.springframework.ai.chat.memory.ChatMemory;
import org.springframework.ai.chat.messages.Message;
import org.springframework.stereotype.Service;
import java.util.List;
/**
* AI 对话消息存储
* @author ThinkGem
*/
@Service
public class CacheChatMemory implements ChatMemory {
private static final String CMS_CHAT_MSG_CACHE = "cmsChatMsgCache";
@Override
public void add(String conversationId, List<Message> messages) {
List<Message> conversationHistory = CacheUtils.get(CMS_CHAT_MSG_CACHE, conversationId);
if (conversationHistory == null) {
conversationHistory = ListUtils.newArrayList();
}
conversationHistory.addAll(messages);
CacheUtils.put(CMS_CHAT_MSG_CACHE, conversationId, conversationHistory);
}
@Override
public List<Message> get(String conversationId, int lastN) {
List<Message> all = CacheUtils.get(CMS_CHAT_MSG_CACHE, conversationId);
return all != null ? all.stream().skip(Math.max(0, all.size() - lastN)).toList() : List.of();
}
@Override
public void clear(String conversationId) {
CacheUtils.remove(CMS_CHAT_MSG_CACHE, conversationId);
}
}

View File

@@ -0,0 +1,115 @@
/**
* Copyright (c) 2013-Now http://jeesite.com All rights reserved.
* No deletion without permission, or be held responsible to law.
*/
package com.jeesite.modules.cms.ai.service;
import com.jeesite.common.cache.CacheUtils;
import com.jeesite.common.collect.MapUtils;
import com.jeesite.common.idgen.IdGen;
import com.jeesite.common.lang.DateUtils;
import com.jeesite.common.lang.StringUtils;
import com.jeesite.common.service.BaseService;
import com.jeesite.modules.sys.utils.UserUtils;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.client.advisor.MessageChatMemoryAdvisor;
import org.springframework.ai.chat.client.advisor.QuestionAnswerAdvisor;
import org.springframework.ai.chat.memory.ChatMemory;
import org.springframework.ai.chat.messages.Message;
import org.springframework.ai.chat.messages.UserMessage;
import org.springframework.ai.chat.model.ChatResponse;
import org.springframework.ai.vectorstore.SearchRequest;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import reactor.core.publisher.Flux;
import java.util.List;
import java.util.Map;
/**
* AI 聊天服务类
* @author ThinkGem
*/
@Service
public class CmsAiChatService extends BaseService {
private static final String CMS_CHAT_CACHE = "cmsChatCache";
@Autowired
private ChatClient chatClient;
@Autowired
private ChatMemory chatMemory;
@Autowired
private VectorStore vectorStore;
/**
* 获取聊天对话消息
* @author ThinkGem
*/
public List<Message> getChatMessage(String conversationId) {
return chatMemory.get(conversationId, 100);
}
private static String getChatCacheKey() {
String key = UserUtils.getUser().getId();
if (StringUtils.isBlank(key)) {
key = UserUtils.getSession().getId().toString();
}
return key;
}
public Map<String, Map<String, Object>> getChatCacheMap() {
Map<String, Map<String, Object>> cache = CacheUtils.get(CMS_CHAT_CACHE, getChatCacheKey());
if (cache == null) {
cache = MapUtils.newHashMap();
}
return cache;
}
/**
* 新建或更新聊天对话
* @author ThinkGem
*/
public Map<String, Object> saveChatConversation(String conversationId, String title) {
if (StringUtils.isBlank(conversationId)) {
conversationId = IdGen.nextId();
}
if (StringUtils.isBlank(title)) {
title = "新对话 " + DateUtils.getTime();
}
Map<String, Object> map = MapUtils.newHashMap();
map.put("id", conversationId);
map.put("title", title);
Map<String, Map<String, Object>> cache = getChatCacheMap();
cache.put(conversationId, map);
CacheUtils.put(CMS_CHAT_CACHE, getChatCacheKey(), cache);
return map;
}
/**
* 删除聊天对话
* @author ThinkGem
*/
public void deleteChatConversation(String conversationId) {
Map<String, Map<String, Object>> cache = getChatCacheMap();
cache.remove(conversationId);
CacheUtils.put(CMS_CHAT_CACHE, getChatCacheKey(), cache);
chatMemory.clear(conversationId);
}
/**
* 聊天对话,流输出
* @author ThinkGem
*/
public Flux<ChatResponse> chatStream(String conversationId, String message) {
return chatClient.prompt()
.messages(new UserMessage(message))
.advisors(
new MessageChatMemoryAdvisor(chatMemory, conversationId, 1024),
new QuestionAnswerAdvisor(vectorStore, SearchRequest.builder().similarityThreshold(0.6F).topK(6).build()))
.stream()
.chatResponse();
}
}

View File

@@ -0,0 +1,84 @@
/**
* Copyright (c) 2013-Now http://jeesite.com All rights reserved.
* No deletion without permission, or be held responsible to law.
*/
package com.jeesite.modules.cms.ai.web;
import com.jeesite.common.config.Global;
import com.jeesite.common.web.BaseController;
import com.jeesite.modules.cms.ai.service.CmsAiChatService;
import org.springframework.ai.chat.messages.Message;
import org.springframework.ai.chat.model.ChatResponse;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import reactor.core.publisher.Flux;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
/**
* AI 聊天控制器类
* @author ThinkGem
*/
@RestController
@RequestMapping("${adminPath}/cms/chat")
public class CmsAiChatController extends BaseController {
@Autowired
private CmsAiChatService cmsAiChatService;
/**
* 获取聊天对话消息
* @author ThinkGem
*/
@RequestMapping("/message")
public List<Message> message(String id) {
return cmsAiChatService.getChatMessage(id);
}
/**
* 聊天对话列表
* @author ThinkGem
*/
@RequestMapping("/list")
public Collection<Map<String, Object>> list() {
return cmsAiChatService.getChatCacheMap().values().stream()
.sorted(Comparator.comparing(map -> (String) map.get("id"),
Comparator.reverseOrder())).collect(Collectors.toList());
}
/**
* 新建或更新聊天对话
* @author ThinkGem
*/
@RequestMapping("/save")
public String save(String id, String title) {
Map<String, Object> map = cmsAiChatService.saveChatConversation(id, title);
return renderResult(Global.TRUE, "保存成功", map);
}
/**
* 删除聊天对话
* @author ThinkGem
*/
@RequestMapping("/delete")
public String delete(String id) {
cmsAiChatService.deleteChatConversation(id);
return renderResult(Global.TRUE, "删除成功", id);
}
/**
* 聊天对话,流输出
* @author ThinkGem
*/
@RequestMapping(value = "/stream", produces = MediaType.TEXT_EVENT_STREAM_VALUE)
public Flux<ChatResponse> stream(String id, String message) {
return cmsAiChatService.chatStream(id, message);
}
}

View File

@@ -0,0 +1,12 @@
## 重要提示Tip
## 请勿在该配置文件中添加其它任何配置(添加也不会生效)。
## 该文件,仅仅是为了让 jeesite-cms-ai.yml 文件,
## 在 IDEA 中有一个自动完成及帮助提示,并无其它用意。
## 参数配置请在 jeesite-cms-ai.yml 文件中添加。
spring:
config:
import:
- classpath:config/jeesite-cms-ai.yml

View File

@@ -0,0 +1,125 @@
# 温馨提示不建议直接修改此文件为了平台升级方便建议将需要修改的参数值复制到application.yml里进行覆盖该参数值。
spring:
ai:
# 云上大模型(使用该模型,请开启 enabled 参数)
openai:
base-url: https://api.siliconflow.cn
api-key: ${SFLOW_APP_KEY}
#base-url: https://ai.gitee.com
#api-key: ${GITEE_APP_KEY}
# 聊天对话模型
chat:
enabled: true
options:
model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
#model: DeepSeek-R1-Distill-Qwen-14B
max-tokens: 1024
temperature: 0.6
top-p: 0.7
frequency-penalty: 0
logprobs: true
# 向量库知识库模型(注意:不同的模型维度不同)
embedding:
enabled: true
options:
model: BAAI/bge-m3
#model: bge-large-zh-v1.5
dimensions: 512
# 本地大模型配置(使用该模型,请开启 enabled 参数)
ollama:
base-url: http://localhost:11434
# 聊天对话模型
chat:
enabled: false
options:
#model: qwen2.5
model: deepseek-r1:7b
max-tokens: 1024
temperature: 0.6
top-p: 0.7
frequency-penalty: 0
# 向量库知识库模型(注意:不同的模型维度不同)
embedding:
enabled: false
# 维度 dimensions 设置为 384
#model: all-minilm:33m
# 维度 dimensions 设置为 768
#model: nomic-embed-text
# 维度 dimensions 设置为 1024
model: bge-m3
# 向量数据库配置
vectorstore:
# Postgresql 向量数据库PG 连接配置,见下文,需要手动建表)
pgvector:
initialize-schema: false
id-type: TEXT
index-type: HNSW
distance-type: COSINE_DISTANCE
#table-name: vector_store_384
#dimensions: 384
#table-name: vector_store_786
#dimensions: 768
table-name: vector_store_1024
dimensions: 1024
batching-strategy: TOKEN_COUNT
max-document-batch-size: 10000
# # ES 向量数据库ES 连接配置,见下文)
# elasticsearch:
# initialize-schema: true
# index-name: vector-index
# dimensions: 1024
# similarity: cosine
# batching-strategy: TOKEN_COUNT
# # Milvus 向量数据库字符串长度不超过65535
# milvus:
# initialize-schema: true
# client:
# host: "localhost"
# port: 19530
# username: "root"
# password: "milvus"
# database-name: "default2"
# collection-name: "vector_store2"
# embedding-dimension: 384
# index-type: HNSW
# metric-type: COSINE
# ========= Postgresql 向量数据库数据源 =========
jdbc:
ds_pgvector:
type: postgresql
driver: org.postgresql.Driver
url: jdbc:postgresql://127.0.0.1:5433/jeesite-ai
username: postgres
password: postgres
testSql: SELECT 1
# ========= ES 向量数据库连接配置 =========
spring.elasticsearch:
enabled: true
socket-timeout: 120s
connection-timeout: 120s
uris: http://127.0.0.1:9200
username: elastic
password: elastic
# 对话消息存缓存,可自定义存数据库
j2cache:
caffeine:
region:
# 对话消息的超期时间,默认 30天根据需要可以设置更久。
cmsChatCache: 100000, 30d
cmsChatMsgCache: 100000, 30d
#logging:
# level:
# org.springframework: debug

View File

@@ -0,0 +1 @@
5.11.0