优化文章附件读取,如果html附件,则保留格式
This commit is contained in:
@@ -92,7 +92,7 @@ public class WebClientThinkConfig {
|
||||
}
|
||||
String reasoningContent = (String) delta.get("reasoning_content");
|
||||
String content = (String) delta.get("content");
|
||||
if (StringUtils.isNotBlank(reasoningContent) && StringUtils.isBlank(content)) {
|
||||
if (StringUtils.isNotEmpty(reasoningContent) && StringUtils.isEmpty(content)) {
|
||||
if (!thinkingFlag.get()) {
|
||||
thinkingFlag.set(true);
|
||||
delta.put("content", "<think>\n" + reasoningContent);
|
||||
|
||||
@@ -26,6 +26,9 @@ import jakarta.servlet.http.HttpServletRequest;
|
||||
import org.apache.tika.Tika;
|
||||
import org.apache.tika.config.TikaConfig;
|
||||
import org.apache.tika.exception.TikaException;
|
||||
import org.apache.tika.io.TikaInputStream;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.mime.MediaType;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
@@ -38,6 +41,7 @@ import org.springframework.stereotype.Service;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
@@ -151,7 +155,19 @@ public class ArticleVectorStoreImpl implements ArticleVectorStore {
|
||||
*/
|
||||
private static @NotNull String getDocumentText(InputStream is) throws IOException, TikaException {
|
||||
TikaConfig config = TikaConfig.getDefaultConfig();
|
||||
String content = new Tika(config).parseToString(is);
|
||||
Tika tika = new Tika(config);
|
||||
Metadata metadata = new Metadata();
|
||||
TikaInputStream stream = TikaInputStream.get(is);
|
||||
MediaType mimetype = tika.getDetector().detect(stream, metadata);
|
||||
if (mimetype != null && StringUtils.equals(mimetype.getType(), "text")) {
|
||||
String text = IOUtils.toString(stream, StandardCharsets.UTF_8);
|
||||
if (StringUtils.isNotBlank(text)) {
|
||||
return FlexmarkHtmlConverter.builder().build().convert(text);
|
||||
} else {
|
||||
return text;
|
||||
}
|
||||
}
|
||||
String content = tika.parseToString(stream, metadata);
|
||||
return content.lines()
|
||||
.map(String::strip).filter(line -> !line.isEmpty())
|
||||
.reduce((a, b) -> a + System.lineSeparator() + b)
|
||||
|
||||
Reference in New Issue
Block a user