Spring AI ETL Summary Metadata Enricher Document Transformer

Sometimes the requirement is to summarize and provide a short description from large documents for easy understanding, The Spring AI ETL Summary Metadata Enricher Document Transformer uses a generative AI model to create summaries of large documents and add them as metadata. It can generate summaries for the current document, as well as adjacent documents such as previous and next.

The Spring AI ETL Summary Metadata Enricher Document Transformer default prompt is
""" Here is the content of the section: {context_str} Summarize the key topics and entities of the section. Summary: """

package com.example.springai.controller;

import org.springframework.ai.chat.model.ChatModel;
import org.springframework.ai.document.Document;
import org.springframework.ai.reader.TextReader;
import org.springframework.ai.transformer.SummaryMetadataEnricher;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.Resource;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;

import java.io.IOException;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.List;
import java.util.Map;

public class SpringAiController {
    private final Resource apples;
    private ChatModel chatModel;

    public SpringAiController(@Value("classpath:apples.st") Resource apples) {
        this.apples = apples;

    List<Document> textDocument() throws IOException {
        var textReader = new TextReader(apples);
                .putAll(Map.of("length", apples.contentLength(), "last modified", LocalDateTime.ofInstant(Instant.ofEpochMilli(apples.lastModified()), ZoneId.systemDefault())));
        var documents = textReader.get();
        TokenTextSplitter splitter = new TokenTextSplitter(true);
        return summaryMetadata().apply(splitter.apply(documents));

    public SummaryMetadataEnricher summaryMetadata() {
        return new SummaryMetadataEnricher(chatModel, List.of(SummaryMetadataEnricher.SummaryType.PREVIOUS, SummaryMetadataEnricher.SummaryType.CURRENT, SummaryMetadataEnricher.SummaryType.NEXT));
package com.example.springai;

import org.springframework.ai.chat.model.ChatModel;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;

public class SpringAiApplication {
    private ChatModel chatModel;

    public static void main(String[] args) {
        SpringApplication.run(SpringAiApplication.class, args);

create a file apples.st in src/main/resources directory

# The default Ollama Model in Spring Ai is mistral, but it can be changed by setting the below property. make sure to download the same model in entrypoint.sh file
# If running the Ollama Docker Instance separately, then set this property
    image: ollama/ollama:latest
    container_name: ollama_container
      - 11434:11434/tcp
      test: ollama --version || exit 1
    command: serve
      - ./ollama/ollama:/root/.ollama
      - ./entrypoint.sh:/entrypoint.sh
    pull_policy: missing
    tty: true
    restart: no
    entrypoint: [ "/usr/bin/bash", "/entrypoint.sh" ]

    image: ghcr.io/open-webui/open-webui:main
    container_name: open_webui_container
      WEBUI_AUTH: false
      - "8081:8080"
      - "host.docker.internal:host-gateway"
      - open-webui:/app/backend/data
    restart: no

# Start Ollama in the background.
/bin/ollama serve &
# Record Process ID.
# Pause for Ollama to start.
sleep 5
# The default Ollama Model in Spring Ai is mistral, but it can be changed in the applications property file. Make sure to download the same Model here
echo "🔴 Retrieve LLAMA3 model..."
ollama pull mistral
echo "🟢 Done!"
# Wait for the Ollama process to finish.
wait $pid
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <name>ETL Pipeline Text Split Summary Metadata Enricher</name>
    <description>Demo project for Spring Boot</description>


            <name>Spring Milestones</name>
            <name>Spring Snapshots</name>

Run the curl to see the Spring AI ETL Summary Metadata Enricher Document Transformer


follow us on