package com.pablotj.ia.chat.boot; import de.kherud.llama.InferenceParameters; import de.kherud.llama.LlamaModel; import de.kherud.llama.ModelParameters; import de.kherud.llama.LlamaOutput; import jakarta.annotation.PostConstruct; import org.springframework.stereotype.Service; @Service public class LlamaService implements AutoCloseable { private LlamaModel model; @PostConstruct public void init() { try { ModelParameters params = new ModelParameters() .setModelFilePath("models/openchat-3.5-0106.Q4_K_M.gguf"); model = new LlamaModel(params); } catch (Exception e) { throw new RuntimeException("Error cargando el modelo", e); } } public String chat(String prompt) { PromptBuilder chat = new PromptBuilder("You are a helpful assistant"); // Historial previo // chat.user("Pregunta"); // chat.assistant("Respuesta"); chat.user(prompt); String finalPrompt = chat.build(); InferenceParameters inf = new InferenceParameters(finalPrompt) .setTemperature(0.7f) .setTopP(0.9f) .setTopK(40); StringBuilder sb = new StringBuilder(); for (LlamaOutput out : model.generate(inf)) { sb.append(out.text); } return sb.toString().replace("<|end_of_turn|>", "").trim(); } @Override public void close() { if (model != null) model.close(); } }