优化文章附件读取,如果html附件,则保留格式
This commit is contained in:
@@ -92,7 +92,7 @@ public class WebClientThinkConfig {
|
|||||||
}
|
}
|
||||||
String reasoningContent = (String) delta.get("reasoning_content");
|
String reasoningContent = (String) delta.get("reasoning_content");
|
||||||
String content = (String) delta.get("content");
|
String content = (String) delta.get("content");
|
||||||
if (StringUtils.isNotBlank(reasoningContent) && StringUtils.isBlank(content)) {
|
if (StringUtils.isNotEmpty(reasoningContent) && StringUtils.isEmpty(content)) {
|
||||||
if (!thinkingFlag.get()) {
|
if (!thinkingFlag.get()) {
|
||||||
thinkingFlag.set(true);
|
thinkingFlag.set(true);
|
||||||
delta.put("content", "<think>\n" + reasoningContent);
|
delta.put("content", "<think>\n" + reasoningContent);
|
||||||
|
|||||||
@@ -26,6 +26,9 @@ import jakarta.servlet.http.HttpServletRequest;
|
|||||||
import org.apache.tika.Tika;
|
import org.apache.tika.Tika;
|
||||||
import org.apache.tika.config.TikaConfig;
|
import org.apache.tika.config.TikaConfig;
|
||||||
import org.apache.tika.exception.TikaException;
|
import org.apache.tika.exception.TikaException;
|
||||||
|
import org.apache.tika.io.TikaInputStream;
|
||||||
|
import org.apache.tika.metadata.Metadata;
|
||||||
|
import org.apache.tika.mime.MediaType;
|
||||||
import org.jetbrains.annotations.NotNull;
|
import org.jetbrains.annotations.NotNull;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
@@ -38,6 +41,7 @@ import org.springframework.stereotype.Service;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
@@ -151,7 +155,19 @@ public class ArticleVectorStoreImpl implements ArticleVectorStore {
|
|||||||
*/
|
*/
|
||||||
private static @NotNull String getDocumentText(InputStream is) throws IOException, TikaException {
|
private static @NotNull String getDocumentText(InputStream is) throws IOException, TikaException {
|
||||||
TikaConfig config = TikaConfig.getDefaultConfig();
|
TikaConfig config = TikaConfig.getDefaultConfig();
|
||||||
String content = new Tika(config).parseToString(is);
|
Tika tika = new Tika(config);
|
||||||
|
Metadata metadata = new Metadata();
|
||||||
|
TikaInputStream stream = TikaInputStream.get(is);
|
||||||
|
MediaType mimetype = tika.getDetector().detect(stream, metadata);
|
||||||
|
if (mimetype != null && StringUtils.equals(mimetype.getType(), "text")) {
|
||||||
|
String text = IOUtils.toString(stream, StandardCharsets.UTF_8);
|
||||||
|
if (StringUtils.isNotBlank(text)) {
|
||||||
|
return FlexmarkHtmlConverter.builder().build().convert(text);
|
||||||
|
} else {
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
String content = tika.parseToString(stream, metadata);
|
||||||
return content.lines()
|
return content.lines()
|
||||||
.map(String::strip).filter(line -> !line.isEmpty())
|
.map(String::strip).filter(line -> !line.isEmpty())
|
||||||
.reduce((a, b) -> a + System.lineSeparator() + b)
|
.reduce((a, b) -> a + System.lineSeparator() + b)
|
||||||
|
|||||||
Reference in New Issue
Block a user