From 1b21375e1c6891bfefd7a19bfcd8137c947765ec Mon Sep 17 00:00:00 2001 From: thinkgem Date: Wed, 14 May 2025 21:48:29 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8D=87=E7=BA=A7=20Spring=20AI=201.0.0-RC1?= =?UTF-8?q?=E3=80=81Chroma=201.0.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/cms-ai/README.md | 2 +- modules/cms-ai/pom.xml | 2 +- .../cms/ai/config/CmsAiChatConfig.java | 15 +++++ .../ai/service/ArticleVectorStoreImpl.java | 62 ++++++++++++------- .../cms/ai/service/CacheChatMemory.java | 45 -------------- .../ai/service/CacheChatMemoryRepository.java | 44 +++++++++++++ .../cms/ai/service/CmsAiChatService.java | 6 +- .../main/resources/config/jeesite-cms-ai.yml | 4 +- 8 files changed, 106 insertions(+), 74 deletions(-) delete mode 100644 modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/service/CacheChatMemory.java create mode 100644 modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/service/CacheChatMemoryRepository.java diff --git a/modules/cms-ai/README.md b/modules/cms-ai/README.md index af19fbd0..94c82409 100644 --- a/modules/cms-ai/README.md +++ b/modules/cms-ai/README.md @@ -52,7 +52,7 @@ ### 安装 Chroma ```sh -docker run --name chroma -p 8000:8000 ghcr.io/chroma-core/chroma:0.5.20 +docker run -it --rm --name chroma -p 8000:8000 ghcr.io/chroma-core/chroma:1.0.0 ``` ### 安装 PGVector diff --git a/modules/cms-ai/pom.xml b/modules/cms-ai/pom.xml index 6b0d3dbe..c3a375f5 100644 --- a/modules/cms-ai/pom.xml +++ b/modules/cms-ai/pom.xml @@ -19,7 +19,7 @@ - 1.0.0-M8 + 1.0.0-RC1 diff --git a/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/config/CmsAiChatConfig.java b/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/config/CmsAiChatConfig.java index ceeff286..006b9883 100644 --- a/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/config/CmsAiChatConfig.java +++ b/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/config/CmsAiChatConfig.java @@ -7,8 +7,11 @@ package com.jeesite.modules.cms.ai.config; import com.jeesite.common.datasource.DataSourceHolder; import com.jeesite.common.lang.StringUtils; import com.jeesite.modules.cms.ai.properties.CmsAiProperties; +import com.jeesite.modules.cms.ai.service.CacheChatMemoryRepository; import com.jeesite.modules.cms.ai.tools.CmsAiTools; import org.springframework.ai.chat.client.ChatClient; +import org.springframework.ai.chat.memory.ChatMemory; +import org.springframework.ai.chat.memory.MessageWindowChatMemory; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.context.annotation.Bean; @@ -39,6 +42,18 @@ public class CmsAiChatConfig { return builder.build(); } + /** + * 聊天对话数据存储 + * @author ThinkGem + */ + @Bean + public ChatMemory chatMemory(CacheChatMemoryRepository cacheChatMemoryRepository) { + return MessageWindowChatMemory.builder() + .chatMemoryRepository(cacheChatMemoryRepository) + .maxMessages(1024) + .build(); + } + // @Bean // public BatchingStrategy batchingStrategy() { // return new TokenCountBatchingStrategy(EncodingType.CL100K_BASE, Integer.MAX_VALUE, 0.1); diff --git a/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/service/ArticleVectorStoreImpl.java b/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/service/ArticleVectorStoreImpl.java index 07d24ee2..247eaeeb 100644 --- a/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/service/ArticleVectorStoreImpl.java +++ b/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/service/ArticleVectorStoreImpl.java @@ -12,6 +12,7 @@ import com.jeesite.common.lang.StringUtils; import com.jeesite.common.lang.TimeUtils; import com.jeesite.common.utils.PageUtils; import com.jeesite.common.web.http.HttpClientUtils; +import com.jeesite.common.web.http.ServletUtils; import com.jeesite.modules.cms.entity.Article; import com.jeesite.modules.cms.service.ArticleVectorStore; import com.jeesite.modules.cms.utils.CmsUtils; @@ -21,11 +22,11 @@ import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter; import com.vladsch.flexmark.html2md.converter.HtmlLinkResolver; import com.vladsch.flexmark.html2md.converter.HtmlLinkResolverFactory; import com.vladsch.flexmark.html2md.converter.HtmlNodeConverterContext; +import jakarta.servlet.http.HttpServletRequest; import org.apache.tika.Tika; import org.apache.tika.config.TikaConfig; import org.apache.tika.exception.TikaException; import org.jetbrains.annotations.NotNull; -import org.jetbrains.annotations.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.ai.document.Document; @@ -75,13 +76,33 @@ public class ArticleVectorStoreImpl implements ArticleVectorStore { metadata.put("updateBy", article.getUpdateBy()); metadata.put("updateDate", article.getUpdateDate()); List attachmentList = ListUtils.newArrayList(); - HtmlLinkResolverFactory linkResolverFactory = new HtmlLinkResolverFactory() { + String content = article.getTitle() + ", " + article.getKeywords() + ", " + + article.getDescription() + ", " + FlexmarkHtmlConverter.builder() + .linkResolverFactory(getHtmlLinkResolverFactory(attachmentList)).build() + .convert(article.getArticleData().getContent()) + + ", attachment: " + attachmentList; + List documents = List.of(new Document(article.getId(), content, metadata)); + List splitDocuments = new TokenTextSplitter().apply(documents); + this.delete(article); // 删除原数据 + ListUtils.pageList(splitDocuments, 10, params -> { + vectorStore.add((List)params[0]); // 增加新数据 + return null; + }); + } + + /** + * 解析文章中的连接并提取内容 + * @author ThinkGem + */ + private @NotNull HtmlLinkResolverFactory getHtmlLinkResolverFactory(List attachmentList) { + HttpServletRequest request = ServletUtils.getRequest(); + return new HtmlLinkResolverFactory() { @Override - public @Nullable Set> getAfterDependents() { + public @NotNull Set> getAfterDependents() { return Set.of(); } @Override - public @Nullable Set> getBeforeDependents() { + public @NotNull Set> getBeforeDependents() { return Set.of(); } @Override @@ -94,11 +115,16 @@ public class ArticleVectorStoreImpl implements ArticleVectorStore { if ("a".equalsIgnoreCase(node.nodeName())) { String href = node.attributes().get("href"); String url = href; if (StringUtils.contains(url, "://")) { - try (InputStream is = HttpClientUtils.getInputStream(url, null)) { - String text = getDocumentText(is); - attachmentList.add(url + text); - } catch (IOException | TikaException e) { - logger.error(e.getMessage(), e); + // 只提取系统允许跳转的附件内容,外部网站内容不进行提取,shiro.allowRedirects 参数设置范围 + if (ServletUtils.isAllowRedirects(request, url)) { + try (InputStream is = HttpClientUtils.getInputStream(url, null)) { + if (is != null) { + String text = getDocumentText(is); + attachmentList.add(url + text); + } + } catch (IOException | TikaException e) { + logger.error(e.getMessage(), e); + } } } else { String ctxPath = Global.getCtxPath(); @@ -106,8 +132,10 @@ public class ArticleVectorStoreImpl implements ArticleVectorStore { url = url.substring(ctxPath.length()); } try (InputStream is = IOUtils.getFileInputStream(Global.getUserfilesBaseDir(url))){ - String text = getDocumentText(is); - attachmentList.add(url + text); + if (is != null) { + String text = getDocumentText(is); + attachmentList.add(url + text); + } } catch (IOException | TikaException e) { logger.error(e.getMessage(), e); } @@ -130,18 +158,6 @@ public class ArticleVectorStoreImpl implements ArticleVectorStore { .orElse(StringUtils.EMPTY); } }; - String content = article.getTitle() + ", " + article.getKeywords() + ", " - + article.getDescription() + ", " + FlexmarkHtmlConverter.builder() - .linkResolverFactory(linkResolverFactory).build() - .convert(article.getArticleData().getContent()) - + ", attachment: " + attachmentList; - List documents = List.of(new Document(article.getId(), content, metadata)); - List splitDocuments = new TokenTextSplitter().apply(documents); - this.delete(article); // 删除原数据 - ListUtils.pageList(splitDocuments, 64, params -> { - vectorStore.add((List)params[0]); // 增加新数据 - return null; - }); } /** diff --git a/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/service/CacheChatMemory.java b/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/service/CacheChatMemory.java deleted file mode 100644 index 3255204c..00000000 --- a/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/service/CacheChatMemory.java +++ /dev/null @@ -1,45 +0,0 @@ -/** - * Copyright (c) 2013-Now http://jeesite.com All rights reserved. - * No deletion without permission, or be held responsible to law. - */ -package com.jeesite.modules.cms.ai.service; - -import com.jeesite.common.cache.CacheUtils; -import com.jeesite.common.collect.ListUtils; -import org.springframework.ai.chat.memory.ChatMemory; -import org.springframework.ai.chat.messages.Message; -import org.springframework.stereotype.Service; - -import java.util.List; - -/** - * AI 对话消息存储 - * @author ThinkGem - */ -@Service -public class CacheChatMemory implements ChatMemory { - - private static final String CMS_CHAT_MSG_CACHE = "cmsChatMsgCache"; - - @Override - public void add(String conversationId, List messages) { - List conversationHistory = CacheUtils.get(CMS_CHAT_MSG_CACHE, conversationId); - if (conversationHistory == null) { - conversationHistory = ListUtils.newArrayList(); - } - conversationHistory.addAll(messages); - CacheUtils.put(CMS_CHAT_MSG_CACHE, conversationId, conversationHistory); - } - - @Override - public List get(String conversationId, int lastN) { - List all = CacheUtils.get(CMS_CHAT_MSG_CACHE, conversationId); - return all != null ? all.stream().skip(Math.max(0, all.size() - lastN)).toList() : List.of(); - } - - @Override - public void clear(String conversationId) { - CacheUtils.remove(CMS_CHAT_MSG_CACHE, conversationId); - } - -} diff --git a/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/service/CacheChatMemoryRepository.java b/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/service/CacheChatMemoryRepository.java new file mode 100644 index 00000000..d8d51d31 --- /dev/null +++ b/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/service/CacheChatMemoryRepository.java @@ -0,0 +1,44 @@ +/** + * Copyright (c) 2013-Now http://jeesite.com All rights reserved. + * No deletion without permission, or be held responsible to law. + */ +package com.jeesite.modules.cms.ai.service; + +import com.jeesite.common.cache.CacheUtils; +import org.jetbrains.annotations.NotNull; +import org.springframework.ai.chat.memory.ChatMemoryRepository; +import org.springframework.ai.chat.messages.Message; +import org.springframework.stereotype.Service; + +import java.util.List; + +/** + * AI 对话消息存储 + * @author ThinkGem + */ +@Service +public class CacheChatMemoryRepository implements ChatMemoryRepository { + + private static final String CMS_CHAT_MSG_CACHE = "cmsChatMsgCache"; + + @Override + public @NotNull List findConversationIds() { + return CacheUtils.getCache(CMS_CHAT_MSG_CACHE).keys().stream().map(Object::toString).toList(); + } + + @Override + public @NotNull List findByConversationId(@NotNull String conversationId) { + List all = CacheUtils.get(CMS_CHAT_MSG_CACHE, conversationId); + return all != null ? all : List.of(); + } + + @Override + public void saveAll(@NotNull String conversationId, @NotNull List messages) { + CacheUtils.put(CMS_CHAT_MSG_CACHE, conversationId, messages); + } + + @Override + public void deleteByConversationId(@NotNull String conversationId) { + CacheUtils.remove(CMS_CHAT_MSG_CACHE, conversationId); + } +} diff --git a/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/service/CmsAiChatService.java b/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/service/CmsAiChatService.java index 33419055..fb2da442 100644 --- a/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/service/CmsAiChatService.java +++ b/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/service/CmsAiChatService.java @@ -59,7 +59,7 @@ public class CmsAiChatService extends BaseService { * @author ThinkGem */ public List getChatMessage(String conversationId) { - return chatMemory.get(conversationId, 100); + return chatMemory.get(conversationId); } private static String getChatCacheKey() { @@ -119,7 +119,9 @@ public class CmsAiChatService extends BaseService { new UserMessage(StringUtils.replaceEach(message, USER_MESSAGE_SEARCH, USER_MESSAGE_REPLACE)) ) .advisors( - new MessageChatMemoryAdvisor(chatMemory, conversationId, 1024), + MessageChatMemoryAdvisor.builder(chatMemory) + .conversationId(conversationId) + .build(), QuestionAnswerAdvisor.builder(vectorStore) .searchRequest(SearchRequest.builder().similarityThreshold(0.6F).topK(6).build()) .promptTemplate(new PromptTemplate(properties.getDefaultPromptTemplate())) diff --git a/modules/cms-ai/src/main/resources/config/jeesite-cms-ai.yml b/modules/cms-ai/src/main/resources/config/jeesite-cms-ai.yml index f4faceda..552e8c69 100644 --- a/modules/cms-ai/src/main/resources/config/jeesite-cms-ai.yml +++ b/modules/cms-ai/src/main/resources/config/jeesite-cms-ai.yml @@ -61,8 +61,8 @@ spring: host: http://testserver port: 8000 initialize-schema: true - collection-name: vector_store - #collection-name: vector_store_1024 +# collection-name: vector_store + collection-name: vector_store_1024 # Postgresql 向量数据库(PG 连接配置,见下文,需要手动建表)【请在 pom.xml 中打开 pgvector 的注释,并注释上其它向量库】 pgvector: