This commit is contained in:
Chris Chen
2026-06-25 12:38:13 -07:00
parent a89e936f4d
commit bdccb79029
11 changed files with 416 additions and 103 deletions
@@ -0,0 +1,124 @@
using System.Net.Http.Json;
using System.Text.Json;
using Microsoft.Extensions.Options;
using ROLAC.API.Data;
namespace ROLAC.API.Services.Ai;
/// <summary>
/// Translates and classifies an expense via the Anthropic Claude Messages API. It forces a single
/// tool call (<c>tool_choice</c> → <c>classify_expense</c>) whose <c>input_schema</c> matches our
/// answer shape, so the model returns structured JSON in a <c>tool_use</c> block. The catalog,
/// prompt, and id validation come from <see cref="ExpenseAiServiceBase"/>; this class only owns the
/// Claude HTTP call + parse. Forced tool use works on every Claude model, so the configured
/// <see cref="ClaudeOptions.Model"/> can be swapped (e.g. to a cheaper model) without code changes.
/// </summary>
public sealed class ClaudeExpenseAiService : ExpenseAiServiceBase
{
private readonly HttpClient _http;
private readonly ClaudeOptions _options;
private readonly ILogger<ClaudeExpenseAiService> _logger;
public ClaudeExpenseAiService(
HttpClient http,
IOptions<ClaudeOptions> options,
AppDbContext db,
ILogger<ClaudeExpenseAiService> logger)
: base(db)
{
_http = http;
_options = options.Value;
_logger = logger;
}
protected override async Task<ModelAnswer?> CallModelAsync(string prompt, CancellationToken ct)
{
if (string.IsNullOrWhiteSpace(_options.ApiKey))
{
_logger.LogWarning("Claude API key is not configured; expense AI assist is disabled.");
return null;
}
try
{
var payload = new
{
model = _options.Model,
max_tokens = 1024,
tools = new[]
{
new
{
name = "classify_expense",
description = "Record the English translation and the chosen expense category ids for the expense.",
input_schema = new
{
type = "object",
properties = new
{
chineseDescription = new { type = "string" },
englishDescription = new { type = "string" },
groupId = new { type = "integer" },
subCategoryId = new { type = "integer" },
confidence = new { type = "number" },
},
required = new[] { "chineseDescription", "englishDescription", "groupId", "subCategoryId", "confidence" },
},
},
},
tool_choice = new { type = "tool", name = "classify_expense" },
messages = new[]
{
new { role = "user", content = prompt },
},
};
var url = $"{_options.BaseUrl}/messages";
using var request = new HttpRequestMessage(HttpMethod.Post, url)
{
Content = JsonContent.Create(payload),
};
request.Headers.Add("x-api-key", _options.ApiKey);
request.Headers.Add("anthropic-version", _options.AnthropicVersion);
using var response = await _http.SendAsync(request, ct);
if (!response.IsSuccessStatusCode)
{
var body = await response.Content.ReadAsStringAsync(ct);
_logger.LogWarning("Claude returned {Status}: {Body}", (int)response.StatusCode, body);
return null;
}
// The forced tool call lands in content[] as a tool_use block; its `input` is our object.
using var doc = JsonDocument.Parse(await response.Content.ReadAsStreamAsync(ct));
foreach (var block in doc.RootElement.GetProperty("content").EnumerateArray())
{
if (block.GetProperty("type").GetString() != "tool_use") continue;
var parsed = block.GetProperty("input").Deserialize<ClaudeAnswer>(
new JsonSerializerOptions { PropertyNameCaseInsensitive = true });
if (parsed is null) return null;
return new ModelAnswer(parsed.EnglishDescription, parsed.ChineseDescription, parsed.GroupId, parsed.SubCategoryId, parsed.Confidence);
}
_logger.LogWarning("Claude response contained no tool_use block.");
return null;
}
catch (Exception ex)
{
_logger.LogError(ex, "Claude expense AI assist failed.");
return null;
}
}
/// <summary>Shape of the classify_expense tool input the model fills in.</summary>
private sealed class ClaudeAnswer
{
public string? EnglishDescription { get; set; }
public string? ChineseDescription { get; set; }
public int GroupId { get; set; }
public int SubCategoryId { get; set; }
public double Confidence { get; set; }
}
}
@@ -0,0 +1,11 @@
namespace ROLAC.API.Services.Ai;
/// <summary>Anthropic Claude API settings (bound from the "Claude" config section).</summary>
public sealed class ClaudeOptions
{
/// <summary>API key sent as the <c>x-api-key</c> header. Keep out of source control.</summary>
public string ApiKey { get; set; } = "";
public string Model { get; set; } = "claude-opus-4-8";
public string BaseUrl { get; set; } = "https://api.anthropic.com/v1";
public string AnthropicVersion { get; set; } = "2023-06-01";
}
@@ -0,0 +1,119 @@
using System.Text.Json;
using Microsoft.EntityFrameworkCore;
using ROLAC.API.Data;
using ROLAC.API.DTOs.Expense;
namespace ROLAC.API.Services.Ai;
/// <summary>
/// Provider-independent expense-AI logic: loads the active category catalog, builds the
/// classification prompt, and validates the model's chosen ids against that catalog. Concrete
/// providers (Gemini, Claude) only implement <see cref="CallModelAsync"/> — the HTTP call plus
/// response parsing — so the catalog/prompt/validation code lives in exactly one place.
/// </summary>
public abstract class ExpenseAiServiceBase : IExpenseAiService
{
private readonly AppDbContext _db;
protected ExpenseAiServiceBase(AppDbContext db) => _db = db;
/// <summary>One sub-category in the catalog passed to the model.</summary>
protected sealed record CatalogSub(int Id, string NameEn, string? NameZh);
/// <summary>One major category (with its sub-categories) in the catalog passed to the model.</summary>
protected sealed record CatalogGroup(int Id, string NameEn, string? NameZh, IReadOnlyList<CatalogSub> Subs);
/// <summary>The model's raw answer, before its ids are validated against the catalog.</summary>
protected sealed record ModelAnswer(
string? EnglishDescription, string? ChineseDescription, int GroupId, int SubCategoryId, double Confidence);
public async Task<ExpenseAiSuggestion> SuggestAsync(string chineseText, decimal amount, CancellationToken ct = default)
{
var catalog = await LoadCatalogAsync(ct);
var prompt = BuildPrompt(chineseText, amount, catalog);
var answer = await CallModelAsync(prompt, ct);
if (answer is null) return new ExpenseAiSuggestion();
return BuildSuggestion(answer, catalog);
}
/// <summary>
/// Call the provider's API with <paramref name="prompt"/> and return its parsed answer, or null
/// on any failure (missing key, HTTP error, unparseable response). Implementations must not throw.
/// </summary>
protected abstract Task<ModelAnswer?> CallModelAsync(string prompt, CancellationToken ct);
private async Task<List<CatalogGroup>> LoadCatalogAsync(CancellationToken ct)
{
return await _db.ExpenseCategoryGroups
.AsNoTracking()
.Where(group => group.IsActive)
.OrderBy(group => group.SortOrder)
.Select(group => new CatalogGroup(
group.Id,
group.Name_en,
group.Name_zh,
group.SubCategories
.Where(sub => sub.IsActive)
.OrderBy(sub => sub.SortOrder)
.Select(sub => new CatalogSub(sub.Id, sub.Name_en, sub.Name_zh))
.ToList()))
.ToListAsync(ct);
}
private static string BuildPrompt(string chineseText, decimal amount, List<CatalogGroup> catalog)
{
var catalogJson = JsonSerializer.Serialize(catalog);
return
"You are a bookkeeping assistant for a church. Given an expense description (often in " +
"Traditional Chinese) and its amount, do three things:\n" +
"1. Correct any typos in the description and refine it into natural Traditional Chinese — " +
"return it as chineseDescription.\n" +
"2. Translate that into concise, natural accounting English (a short noun phrase, not a " +
"full sentence) — return it as englishDescription.\n" +
"3. Choose the single best matching major category (group) and sub-category from the catalog " +
"below. You MUST pick a groupId and subCategoryId that appear in the catalog, and the " +
"subCategoryId must belong to that groupId. If nothing fits well, choose the closest " +
"\"Other / 其他\" option and lower your confidence.\n\n" +
$"Expense description: {chineseText}\n" +
$"Amount: {amount}\n\n" +
$"Category catalog (JSON; each group has an Id, English/Chinese names, and its Subs):\n{catalogJson}";
}
private static ExpenseAiSuggestion BuildSuggestion(ModelAnswer answer, List<CatalogGroup> catalog)
{
var suggestion = new ExpenseAiSuggestion
{
EnglishDescription = string.IsNullOrWhiteSpace(answer.EnglishDescription)
? null
: answer.EnglishDescription.Trim(),
ChineseDescription = string.IsNullOrWhiteSpace(answer.ChineseDescription)
? null
: answer.ChineseDescription.Trim(),
Confidence = answer.Confidence,
};
// Re-validate the returned ids against the catalog; drop anything that doesn't line up
// (defends against a hallucinated id, or a sub-category that doesn't belong to the group).
var group = catalog.FirstOrDefault(candidate => candidate.Id == answer.GroupId);
if (group is not null)
{
suggestion.GroupId = group.Id;
suggestion.GroupLabel = Label(group.NameEn, group.NameZh);
var sub = group.Subs.FirstOrDefault(candidate => candidate.Id == answer.SubCategoryId);
if (sub is not null)
{
suggestion.SubCategoryId = sub.Id;
suggestion.SubLabel = Label(sub.NameEn, sub.NameZh);
}
}
return suggestion;
}
/// <summary>Mirror the frontend's bilingual() convention: "English / 中文" (or just English).</summary>
private static string Label(string nameEn, string? nameZh)
=> string.IsNullOrWhiteSpace(nameZh) ? nameEn : $"{nameEn} / {nameZh}";
}
@@ -1,23 +1,19 @@
using System.Net.Http.Json;
using System.Text.Json;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Options;
using ROLAC.API.Data;
using ROLAC.API.DTOs.Expense;
namespace ROLAC.API.Services.Ai;
/// <summary>
/// Calls the Google Gemini <c>generateContent</c> API to translate an expense description and
/// classify it into the church's existing expense category catalog (大項 / 系項). The full active
/// catalog is sent in the prompt so the model can only choose from real ids; any id it returns is
/// re-validated against the catalog before being surfaced, so a hallucinated id is dropped, not echoed.
/// Translates and classifies an expense via the Google Gemini <c>generateContent</c> API, using
/// Gemini's structured-output mode (<c>responseSchema</c>). The catalog, prompt, and id validation
/// come from <see cref="ExpenseAiServiceBase"/>; this class only owns the Gemini HTTP call + parse.
/// </summary>
public sealed class GeminiExpenseAiService : IExpenseAiService
public sealed class GeminiExpenseAiService : ExpenseAiServiceBase
{
private readonly HttpClient _http;
private readonly GeminiOptions _options;
private readonly AppDbContext _db;
private readonly ILogger<GeminiExpenseAiService> _logger;
public GeminiExpenseAiService(
@@ -25,57 +21,23 @@ public sealed class GeminiExpenseAiService : IExpenseAiService
IOptions<GeminiOptions> options,
AppDbContext db,
ILogger<GeminiExpenseAiService> logger)
: base(db)
{
_http = http;
_options = options.Value;
_db = db;
_logger = logger;
}
public async Task<ExpenseAiSuggestion> SuggestAsync(string chineseText, decimal amount, CancellationToken ct = default)
protected override async Task<ModelAnswer?> CallModelAsync(string prompt, CancellationToken ct)
{
// Load the active catalog: the allow-list the model must classify into.
var groups = await _db.ExpenseCategoryGroups
.AsNoTracking()
.Where(group => group.IsActive)
.OrderBy(group => group.SortOrder)
.Select(group => new
{
group.Id,
group.Name_en,
group.Name_zh,
Subs = group.SubCategories
.Where(sub => sub.IsActive)
.OrderBy(sub => sub.SortOrder)
.Select(sub => new { sub.Id, sub.Name_en, sub.Name_zh })
.ToList(),
})
.ToListAsync(ct);
if (string.IsNullOrWhiteSpace(_options.ApiKey))
{
_logger.LogWarning("Gemini API key is not configured; expense AI assist is disabled.");
return new ExpenseAiSuggestion();
return null;
}
try
{
var catalogJson = JsonSerializer.Serialize(groups);
var prompt =
"You are a bookkeeping assistant for a church. Given an expense description (often in " +
"Traditional Chinese) and its amount, do two things:\n" +
"1. Translate the description into concise, natural accounting English (a short noun phrase, " +
"not a full sentence).\n" +
"2. Choose the single best matching major category (group) and sub-category from the catalog " +
"below. You MUST pick a groupId and subCategoryId that appear in the catalog, and the " +
"subCategoryId must belong to that groupId. If nothing fits well, choose the closest " +
"\"Other / 其他\" option and lower your confidence.\n\n" +
$"Expense description: {chineseText}\n" +
$"Amount: {amount}\n\n" +
$"Category catalog (JSON; each group has an id, English/Chinese names, and its sub-categories):\n{catalogJson}\n\n" +
"Respond with JSON: englishDescription (string), groupId (integer), subCategoryId (integer), " +
"confidence (number 0..1).";
var payload = new
{
contents = new[]
@@ -90,12 +52,13 @@ public sealed class GeminiExpenseAiService : IExpenseAiService
type = "object",
properties = new
{
chineseDescription = new { type = "string" },
englishDescription = new { type = "string" },
groupId = new { type = "integer" },
subCategoryId = new { type = "integer" },
confidence = new { type = "number" },
},
required = new[] { "englishDescription", "groupId", "subCategoryId", "confidence" },
required = new[] { "chineseDescription", "englishDescription", "groupId", "subCategoryId", "confidence" },
},
},
};
@@ -112,7 +75,7 @@ public sealed class GeminiExpenseAiService : IExpenseAiService
{
var body = await response.Content.ReadAsStringAsync(ct);
_logger.LogWarning("Gemini returned {Status}: {Body}", (int)response.StatusCode, body);
return new ExpenseAiSuggestion();
return null;
}
// Navigate candidates[0].content.parts[0].text — the model's JSON answer as a string.
@@ -127,53 +90,27 @@ public sealed class GeminiExpenseAiService : IExpenseAiService
if (string.IsNullOrWhiteSpace(text))
{
_logger.LogWarning("Gemini response contained no text part.");
return new ExpenseAiSuggestion();
return null;
}
var parsed = JsonSerializer.Deserialize<GeminiAnswer>(
text, new JsonSerializerOptions { PropertyNameCaseInsensitive = true });
if (parsed is null) return new ExpenseAiSuggestion();
if (parsed is null) return null;
var suggestion = new ExpenseAiSuggestion
{
EnglishDescription = string.IsNullOrWhiteSpace(parsed.EnglishDescription)
? null
: parsed.EnglishDescription.Trim(),
Confidence = parsed.Confidence,
};
// Re-validate the returned ids against the catalog; drop anything that doesn't line up.
var group = groups.FirstOrDefault(candidate => candidate.Id == parsed.GroupId);
if (group is not null)
{
suggestion.GroupId = group.Id;
suggestion.GroupLabel = Label(group.Name_en, group.Name_zh);
var sub = group.Subs.FirstOrDefault(candidate => candidate.Id == parsed.SubCategoryId);
if (sub is not null)
{
suggestion.SubCategoryId = sub.Id;
suggestion.SubLabel = Label(sub.Name_en, sub.Name_zh);
}
}
return suggestion;
return new ModelAnswer(parsed.EnglishDescription, parsed.ChineseDescription, parsed.GroupId, parsed.SubCategoryId, parsed.Confidence);
}
catch (Exception ex)
{
_logger.LogError(ex, "Expense AI assist failed.");
return new ExpenseAiSuggestion();
_logger.LogError(ex, "Gemini expense AI assist failed.");
return null;
}
}
/// <summary>Mirror the frontend's bilingual() convention: "English / 中文" (or just English).</summary>
private static string Label(string nameEn, string? nameZh)
=> string.IsNullOrWhiteSpace(nameZh) ? nameEn : $"{nameEn} / {nameZh}";
/// <summary>Shape of the model's JSON answer (constrained by responseSchema).</summary>
/// <summary>Shape of Gemini's JSON answer (constrained by responseSchema).</summary>
private sealed class GeminiAnswer
{
public string? EnglishDescription { get; set; }
public string? ChineseDescription { get; set; }
public int GroupId { get; set; }
public int SubCategoryId { get; set; }
public double Confidence { get; set; }
+1 -1
View File
@@ -5,6 +5,6 @@ public sealed class GeminiOptions
{
/// <summary>API key sent as the <c>X-goog-api-key</c> header. Keep out of source control.</summary>
public string ApiKey { get; set; } = "";
public string Model { get; set; } = "gemini-2.5-flash";
public string Model { get; set; } = "gemini-2.5-flash-lite";
public string BaseUrl { get; set; } = "https://generativelanguage.googleapis.com/v1beta";
}