diff --git a/DevProxy.Abstractions/LanguageModel/PricesData.cs b/DevProxy.Abstractions/LanguageModel/PricesData.cs index cfe2cebf..6f8a0509 100644 --- a/DevProxy.Abstractions/LanguageModel/PricesData.cs +++ b/DevProxy.Abstractions/LanguageModel/PricesData.cs @@ -9,6 +9,7 @@ namespace DevProxy.Abstractions.LanguageModel; public class ModelPrices { public double Input { get; set; } + public double CachedInput { get; set; } public double Output { get; set; } } @@ -44,7 +45,7 @@ public bool TryGetModelPrices(string modelName, out ModelPrices? prices) return false; } - public (double Input, double Output) CalculateCost(string modelName, long inputTokens, long outputTokens) + public (double Input, double Output) CalculateCost(string modelName, long inputTokens, long outputTokens, long cachedInputTokens = 0) { if (!TryGetModelPrices(modelName, out var prices)) { @@ -53,8 +54,13 @@ public bool TryGetModelPrices(string modelName, out ModelPrices? prices) Debug.Assert(prices != null, "Prices data should not be null here."); - // Prices in the data are per 1M tokens - var inputCost = prices.Input * (inputTokens / 1_000_000.0); + // Prices in the data are per 1M tokens. + // When no cached input price is configured, fall back to the + // regular input price so all tokens are billed correctly. + var effectiveCachedPrice = prices.CachedInput > 0 ? prices.CachedInput : prices.Input; + var regularInputTokens = inputTokens - cachedInputTokens; + var inputCost = (prices.Input * (regularInputTokens / 1_000_000.0)) + + (effectiveCachedPrice * (cachedInputTokens / 1_000_000.0)); var outputCost = prices.Output * (outputTokens / 1_000_000.0); return (inputCost, outputCost); diff --git a/DevProxy.Plugins/Inspection/LanguageModelPricingLoader.cs b/DevProxy.Plugins/Inspection/LanguageModelPricingLoader.cs index 2b3cff38..bee49a76 100644 --- a/DevProxy.Plugins/Inspection/LanguageModelPricingLoader.cs +++ b/DevProxy.Plugins/Inspection/LanguageModelPricingLoader.cs @@ -40,11 +40,18 @@ protected override void LoadData(string fileContents) if (modelProperty.Value.TryGetProperty("input", out var inputElement) && modelProperty.Value.TryGetProperty("output", out var outputElement)) { - pricesData[modelName] = new() + var modelPrices = new ModelPrices { Input = inputElement.GetDouble(), Output = outputElement.GetDouble() }; + + if (modelProperty.Value.TryGetProperty("cached_input", out var cachedInputElement)) + { + modelPrices.CachedInput = cachedInputElement.GetDouble(); + } + + pricesData[modelName] = modelPrices; } } diff --git a/DevProxy.Plugins/Inspection/OpenAITelemetryPlugin.cs b/DevProxy.Plugins/Inspection/OpenAITelemetryPlugin.cs index e76064b1..58836067 100644 --- a/DevProxy.Plugins/Inspection/OpenAITelemetryPlugin.cs +++ b/DevProxy.Plugins/Inspection/OpenAITelemetryPlugin.cs @@ -931,7 +931,8 @@ private void RecordUsageMetrics(Activity activity, OpenAIRequest request, OpenAI return; } - var (inputCost, outputCost) = Configuration.Prices.CalculateCost(response.Model, usage.PromptTokens, usage.CompletionTokens); + var cachedTokens = usage.PromptTokensDetails?.CachedTokens ?? 0L; + var (inputCost, outputCost) = Configuration.Prices.CalculateCost(response.Model, usage.PromptTokens, usage.CompletionTokens, cachedTokens); if (inputCost > 0) { @@ -1042,7 +1043,8 @@ private List GetReportModelUsa return usagePerModel; } - var (inputCost, outputCost) = Configuration.Prices.CalculateCost(response.Model, usage.PromptTokens, usage.CompletionTokens); + var cachedTokens = usage.PromptTokensDetails?.CachedTokens ?? 0L; + var (inputCost, outputCost) = Configuration.Prices.CalculateCost(response.Model, usage.PromptTokens, usage.CompletionTokens, cachedTokens); if (inputCost > 0) {