diff --git a/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/config/WebClientThinkConfig.java b/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/config/WebClientThinkConfig.java index c2b6a48a..6c1e1045 100644 --- a/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/config/WebClientThinkConfig.java +++ b/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/config/WebClientThinkConfig.java @@ -92,7 +92,7 @@ public class WebClientThinkConfig { } String reasoningContent = (String) delta.get("reasoning_content"); String content = (String) delta.get("content"); - if (StringUtils.isNotBlank(reasoningContent) && StringUtils.isBlank(content)) { + if (StringUtils.isNotEmpty(reasoningContent) && StringUtils.isEmpty(content)) { if (!thinkingFlag.get()) { thinkingFlag.set(true); delta.put("content", "\n" + reasoningContent); diff --git a/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/service/ArticleVectorStoreImpl.java b/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/service/ArticleVectorStoreImpl.java index 247eaeeb..7b9ad5cd 100644 --- a/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/service/ArticleVectorStoreImpl.java +++ b/modules/cms-ai/src/main/java/com/jeesite/modules/cms/ai/service/ArticleVectorStoreImpl.java @@ -26,6 +26,9 @@ import jakarta.servlet.http.HttpServletRequest; import org.apache.tika.Tika; import org.apache.tika.config.TikaConfig; import org.apache.tika.exception.TikaException; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -38,6 +41,7 @@ import org.springframework.stereotype.Service; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; import java.util.Set; @@ -151,7 +155,19 @@ public class ArticleVectorStoreImpl implements ArticleVectorStore { */ private static @NotNull String getDocumentText(InputStream is) throws IOException, TikaException { TikaConfig config = TikaConfig.getDefaultConfig(); - String content = new Tika(config).parseToString(is); + Tika tika = new Tika(config); + Metadata metadata = new Metadata(); + TikaInputStream stream = TikaInputStream.get(is); + MediaType mimetype = tika.getDetector().detect(stream, metadata); + if (mimetype != null && StringUtils.equals(mimetype.getType(), "text")) { + String text = IOUtils.toString(stream, StandardCharsets.UTF_8); + if (StringUtils.isNotBlank(text)) { + return FlexmarkHtmlConverter.builder().build().convert(text); + } else { + return text; + } + } + String content = tika.parseToString(stream, metadata); return content.lines() .map(String::strip).filter(line -> !line.isEmpty()) .reduce((a, b) -> a + System.lineSeparator() + b)