using System.Net.Http.Json; using System.Text.Json; using Microsoft.Extensions.Options; using ROLAC.API.Data; namespace ROLAC.API.Services.Ai; /// /// Translates and classifies an expense via the Google Gemini generateContent API, using /// Gemini's structured-output mode (responseSchema). The catalog, prompt, and id validation /// come from ; this class only owns the Gemini HTTP call + parse. /// public sealed class GeminiExpenseAiService : ExpenseAiServiceBase { private readonly HttpClient _http; private readonly GeminiOptions _options; private readonly ILogger _logger; public GeminiExpenseAiService( HttpClient http, IOptions options, AppDbContext db, ILogger logger) : base(db) { _http = http; _options = options.Value; _logger = logger; } protected override async Task CallModelAsync(string prompt, CancellationToken ct) { if (string.IsNullOrWhiteSpace(_options.ApiKey)) { _logger.LogWarning("Gemini API key is not configured; expense AI assist is disabled."); return null; } try { var payload = new { contents = new[] { new { parts = new[] { new { text = prompt } } }, }, generationConfig = new { responseMimeType = "application/json", responseSchema = new { type = "object", properties = new { chineseDescription = new { type = "string" }, englishDescription = new { type = "string" }, groupId = new { type = "integer" }, subCategoryId = new { type = "integer" }, confidence = new { type = "number" }, }, required = new[] { "chineseDescription", "englishDescription", "groupId", "subCategoryId", "confidence" }, }, }, }; var url = $"{_options.BaseUrl}/models/{_options.Model}:generateContent"; using var request = new HttpRequestMessage(HttpMethod.Post, url) { Content = JsonContent.Create(payload), }; request.Headers.Add("X-goog-api-key", _options.ApiKey); using var response = await _http.SendAsync(request, ct); if (!response.IsSuccessStatusCode) { var body = await response.Content.ReadAsStringAsync(ct); _logger.LogWarning("Gemini returned {Status}: {Body}", (int)response.StatusCode, body); return null; } // Navigate candidates[0].content.parts[0].text — the model's JSON answer as a string. using var doc = JsonDocument.Parse(await response.Content.ReadAsStreamAsync(ct)); var text = doc.RootElement .GetProperty("candidates")[0] .GetProperty("content") .GetProperty("parts")[0] .GetProperty("text") .GetString(); if (string.IsNullOrWhiteSpace(text)) { _logger.LogWarning("Gemini response contained no text part."); return null; } var parsed = JsonSerializer.Deserialize( text, new JsonSerializerOptions { PropertyNameCaseInsensitive = true }); if (parsed is null) return null; return new ModelAnswer(parsed.EnglishDescription, parsed.ChineseDescription, parsed.GroupId, parsed.SubCategoryId, parsed.Confidence); } catch (Exception ex) { _logger.LogError(ex, "Gemini expense AI assist failed."); return null; } } /// Shape of Gemini's JSON answer (constrained by responseSchema). private sealed class GeminiAnswer { public string? EnglishDescription { get; set; } public string? ChineseDescription { get; set; } public int GroupId { get; set; } public int SubCategoryId { get; set; } public double Confidence { get; set; } } }