summaryrefslogtreecommitdiff
path: root/src/ai_providers/openAI.cpp
diff options
context:
space:
mode:
authorAldrik Ramaekers <aldrikboy@gmail.com>2025-09-28 17:41:50 +0200
committerAldrik Ramaekers <aldrikboy@gmail.com>2025-09-28 17:41:50 +0200
commita2299b0bae21c8f05f091732a78fc250cbd5e016 (patch)
treeb1de8319d1088b1209ba377665995b4543180d4b /src/ai_providers/openAI.cpp
parentd8c4d84dc75300c6d4d8b0adceafa33741960b92 (diff)
openAI invoice importing v0.1
Diffstat (limited to 'src/ai_providers/openAI.cpp')
-rw-r--r--src/ai_providers/openAI.cpp252
1 files changed, 252 insertions, 0 deletions
diff --git a/src/ai_providers/openAI.cpp b/src/ai_providers/openAI.cpp
new file mode 100644
index 0000000..5dd2c50
--- /dev/null
+++ b/src/ai_providers/openAI.cpp
@@ -0,0 +1,252 @@
+/*
+* Copyright (c) 2025 Aldrik Ramaekers <aldrik.ramaekers@gmail.com>
+*
+* Permission to use, copy, modify, and/or distribute this software for any
+* purpose with or without fee is hereby granted, provided that the above
+* copyright notice and this permission notice appear in all copies.
+*
+* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include <fstream>
+#include <iostream>
+#include <string>
+
+#define CPPHTTPLIB_OPENSSL_SUPPORT
+#include "httplib.h"
+#include "log.hpp"
+#include "ai_service.hpp"
+
+static char *extract_json_value(const char *json, const char *key, char *out, size_t out_size, int skip = 0) {
+ char pattern[128];
+ snprintf(pattern, sizeof(pattern), "\"%s\"", key);
+ const char *pos = strstr(json, pattern);
+ while(skip > 0) {
+ pos = strstr(pos+1, pattern);
+ skip--;
+ }
+ if (!pos) return NULL;
+ pos = strchr(pos, ':');
+ if (!pos) return NULL;
+ pos++;
+
+ // Skip whitespace and quotes
+ while (*pos == ' ' || *pos == '\"') pos++;
+
+ size_t i = 0;
+ while (*pos && !(*pos == '\"' && *(pos-1) != '\\') && i < out_size - 1) {
+ out[i++] = *pos++;
+ }
+ out[i] = '\0';
+ return out;
+}
+
+static const char* get_filename(const char* path) {
+ const char* filename = strrchr(path, '/'); // for Unix-style paths
+ if (filename) return filename + 1; // skip the '/'
+ filename = strrchr(path, '\\'); // for Windows-style paths
+ if (filename) return filename + 1;
+ return path; // no slashes found, path itself is filename
+}
+
+static char *escape_quotes(const char *input, size_t buffer_size) {
+ if (!input) return NULL;
+
+ char *result = (char*)malloc(buffer_size + 100); // Ballpark
+ if (!result) return NULL;
+
+ const char *src = input;
+ char *dst = result;
+
+ while (*src) {
+ if (*src == '"') {
+ *dst++ = '\\';
+ *dst++ = '"';
+ }
+ else if (*src == '\n') {
+ // empty
+ }
+ else {
+ *dst++ = *src;
+ }
+ src++;
+ }
+ *dst = '\0';
+
+ return result;
+}
+
+static char *unescape_quotes(char *input) {
+ if (!input) return NULL;
+
+ char *src = input;
+ char *dst = input;
+
+ while (*src) {
+ if (*src == '\\' && *(src+1) == '"') {
+ src++;
+ }
+ else if (*src == '\\' && *(src+1) == 'n') {
+ src++;src++;
+ }
+ *dst++ = *src++;
+ }
+ *dst = '\0';
+
+ return input;
+}
+
+static bool _openAI_query_with_file(char* query, size_t query_length, char* file_id, char** response)
+{
+ #define TESTING_IMPORT
+
+ #ifndef TESTING_IMPORT
+ const char *api_key = administration_get_ai_service().api_key_public;
+
+ httplib::SSLClient cli("api.openai.com", 443);
+ //cli.enable_server_certificate_verification(false);
+
+ char* query_escaped = escape_quotes(query, query_length);
+ free(query);
+
+ size_t body_size = query_length + 200;
+ char* body = (char*)malloc(body_size);
+ snprintf(body, body_size,
+ "{\"model\":\"gpt-5-nano\", \"input\": [ { \"role\": \"user\", \"content\": [ { \"type\": \"input_file\", \"file_id\": \"%s\" }, "
+ "{ \"type\": \"input_text\", \"text\": \"%s\" } ] } ] }", file_id, query_escaped);
+
+ httplib::Headers headers;
+ headers.insert(std::make_pair("Authorization", std::string("Bearer ") + api_key));
+
+ httplib::Result res = cli.Post("/v1/responses", headers, body, "application/json");
+ free(body);
+
+ if (!res || res->status != 200) {
+ log_error("ERROR Failed to query API.");
+ return 0;
+ }
+
+ char* response_body = (char*)res->body.c_str();
+ #else
+ char* response_body = "{\n \"id\": \"resp_68d9482030fc8196930b43b6b28feeb104e98afee829eee0\",\n \"object\": \"response\",\n \"created_at\": 1759070240,\n \"status\": \"completed\",\n \"background\": false,\n \"billing\": {\n \"payer\": \"developer\"\n },\n \"error\": null,\n \"incomplete_details\": null,\n \"instructions\": null,\n \"max_output_tokens\": null,\n \"max_tool_calls\": null,\n \"model\": \"gpt-5-2025-08-07\",\n \"output\": [\n {\n \"id\": \"rs_68d94821d1f0819694533a6ed7ed6b2904e98afee829eee0\",\n \"type\": \"reasoning\",\n \"summary\": []\n },\n {\n \"id\": \"msg_68d948a09e0c819696782e09c6b7626104e98afee829eee0\",\n \"type\": \"message\",\n \"status\": \"completed\",\n \"content\": [\n {\n \"type\": \"output_text\",\n \"annotations\": [],\n \"logprobs\": [],\n \"text\": \"<Invoice xmlns=\\\"urn:oasis:names:specification:ubl:schema:xsd:Invoice-2\\\" xmlns:cac=\\\"urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2\\\" xmlns:cbc=\\\"urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2\\\">\\n <cbc:CustomizationID>urn:cen.eu:en16931:2017#compliant#urn:fdc:peppol.eu:2017:poacc:billing:3.0</cbc:CustomizationID>\\n <cbc:ProfileID>urn:fdc:peppol.eu:2017:poacc:billing:01:1.0</cbc:ProfileID>\\n <cbc:ID>586928</cbc:ID>\\n <cbc:IssueDate>2025-03-17</cbc:IssueDate>\\n <cbc:DueDate>2025-03-24</cbc:DueDate>\\n <cbc:InvoiceTypeCode>380</cbc:InvoiceTypeCode>\\n <cbc:DocumentCurrencyCode>EUR</cbc:DocumentCurrencyCode>\\n <cac:DespatchDocumentReference>\\n <cbc:ID>699607</cbc:ID>\\n </cac:DespatchDocumentReference>\\n <cac:AdditionalDocumentReference>\\n <cbc:ID>AR385893</cbc:ID>\\n <cbc:DocumentDescription>Jouw bestelling : 420675-WWW.SCHROEVEN-EXPRESS.NL-14.03.25-18:31</cbc:DocumentDescription>\\n </cac:AdditionalDocumentReference>\\n <cac:OrderReference>\\n <cbc:ID>420675-WWW.SCHROEVEN-EXPRESS.NL-14.03.25-18:31</cbc:ID>\\n </cac:OrderReference>\\n <cac:ProjectReference>\\n <cbc:ID></cbc:ID>\\n </cac:ProjectReference>\\n <cbc:AccountingCost></cbc:AccountingCost>\\n <cac:AccountingSupplierParty>\\n <cac:Party>\\n <cbc:EndpointID schemeID=\\\"\\\"></cbc:EndpointID>\\n <cac:PartyIdentification>\\n <cbc:ID schemeID=\\\"ZZZ\\\">R.C le mans B 302 494 224</cbc:ID>\\n </cac:PartyIdentification>\\n <cac:PartyName>\\n <cbc:Name>Visserie-service</cbc:Name>\\n </cac:PartyName>\\n <cac:PostalAddress>\\n <cbc:StreetName>Z.A Nord</cbc:StreetName>\\n <cbc:AdditionalStreetName></cbc:AdditionalStreetName>\\n <cbc:CityName>Parce sur Sarthe</cbc:CityName>\\n <cbc:PostalZone>72300</cbc:PostalZone>\\n <cbc:CountrySubentity></cbc:CountrySubentity>\\n <cac:Country>\\n <cbc:IdentificationCode>FR</cbc:IdentificationCode>\\n </cac:Country>\\n </cac:PostalAddress>\\n <cac:PartyTaxScheme>\\n <cbc:CompanyID>FR57 302 494 224</cbc:CompanyID>\\n <cac:TaxScheme>\\n <cbc:ID>VAT</cbc:ID>\\n </cac:TaxScheme>\\n </cac:PartyTaxScheme>\\n <cac:PartyLegalEntity>\\n <cbc:RegistrationName>Visserie Service SAS</cbc:RegistrationName>\\n </cac:PartyLegalEntity>\\n <cac:Contact>\\n <cbc:Name>AMELIE L</cbc:Name>\\n <cbc:Telephone>02.43.62.09.08</cbc:Telephone>\\n <cbc:ElectronicMail>klantenservice@schroeven-express.nl</cbc:ElectronicMail>\\n </cac:Contact>\\n </cac:Party>\\n </cac:AccountingSupplierParty>\\n <cac:AccountingCustomerParty>\\n <cac:Party>\\n <cbc:EndpointID schemeID=\\\"\\\"></cbc:EndpointID>\\n <cac:PartyIdentification>\\n <cbc:ID schemeID=\\\"ZZZ\\\">cl585187</cbc:ID>\\n </cac:PartyIdentification>\\n <cac:PartyName>\\n <cbc:Name>ALDRIK RAMAEKERS</cbc:Name>\\n </cac:PartyName>\\n <cac:PostalAddress>\\n <cbc:StreetName>KEERDERSTRAAT 81</cbc:StreetName>\\n <cbc:AdditionalStreetName></cbc:AdditionalStreetName>\\n <cbc:CityName>MAASTRICHT</cbc:CityName>\\n <cbc:PostalZone>6226X</cbc:PostalZone>\\n <cbc:CountrySubentity></cbc:CountrySubentity>\\n <cac:Country>\\n <cbc:IdentificationCode>NL</cbc:IdentificationCode>\\n </cac:Country>\\n </cac:PostalAddress>\\n <cac:PartyTaxScheme>\\n <cbc:CompanyID></cbc:CompanyID>\\n <cac:TaxScheme>\\n <cbc:ID>VAT</cbc:ID>\\n </cac:TaxScheme>\\n </cac:PartyTaxScheme>\\n <cac:PartyLegalEntity>\\n <cbc:RegistrationName>ALDRIK RAMAEKERS</cbc:RegistrationName>\\n </cac:PartyLegalEntity>\\n <cac:Contact>\\n <cbc:Name>A RAMAEKERS</cbc:Name>\\n <cbc:Telephone>31618260377</cbc:Telephone>\\n <cbc:ElectronicMail>aldrikboy@gmail.com</cbc:ElectronicMail>\\n </cac:Contact>\\n </cac:Party>\\n </cac:AccountingCustomerParty>\\n <cac:Delivery>\\n <cbc:ActualDeliveryDate>2025-03-17</cbc:ActualDeliveryDate>\\n <cac:DeliveryLocation>\\n <cac:Address>\\n <cbc:StreetName>KEERDERSTRAAT 81</cbc:StreetName>\\n <cbc:AdditionalStreetName></cbc:AdditionalStreetName>\\n <cbc:CityName>MAASTRICHT</cbc:CityName>\\n <cbc:PostalZone>6226X</cbc:PostalZone>\\n <cbc:CountrySubentity></cbc:CountrySubentity>\\n <cac:Country>\\n <cbc:IdentificationCode>NL</cbc:IdentificationCode>\\n </cac:Country>\\n </cac:Address>\\n </cac:DeliveryLocation>\\n <cac:DeliveryParty>\\n <cac:PartyName>\\n <cbc:Name>ALDRIK RAMAEKERS</cbc:Name>\\n </cac:PartyName>\\n </cac:DeliveryParty>\\n </cac:Delivery>\\n <cac:PaymentMeans>\\n <cbc:PaymentMeansCode></cbc:PaymentMeansCode>\\n <cbc:PaymentID>586928</cbc:PaymentID>\\n <cac:PayeeFinancialAccount>\\n <cbc:ID>FR76 1790 6001 1272 5017 0700 137</cbc:ID>\\n <cbc:Name>Visserie Service SAS</cbc:Name>\\n <cac:FinancialInstitutionBranch>\\n <cac:FinancialInstitution>\\n <cbc:ID>AGRIFRPP879</cbc:ID>\\n </cac:FinancialInstitution>\\n </cac:FinancialInstitutionBranch>\\n </cac:PayeeFinancialAccount>\\n <cac:PayerFinancialAccount>\\n <cbc:ID></cbc:ID>\\n </cac:PayerFinancialAccount>\\n </cac:PaymentMeans>\\n <cac:TaxTotal>\\n <cbc:TaxAmount currencyID=\\\"EUR\\\">2.59</cbc:TaxAmount>\\n <cac:TaxSubtotal>\\n <cbc:TaxableAmount currencyID=\\\"EUR\\\">12.36</cbc:TaxableAmount>\\n <cbc:TaxAmount currencyID=\\\"EUR\\\">2.59</cbc:TaxAmount>\\n <cac:TaxCategory>\\n <cbc:ID></cbc:ID>\\n <cbc:Percent>21</cbc:Percent>\\n <cac:TaxScheme>\\n <cbc:ID>VAT</cbc:ID>\\n </cac:TaxScheme>\\n </cac:TaxCategory>\\n </cac:TaxSubtotal>\\n </cac:TaxTotal>\\n <cac:LegalMonetaryTotal>\\n <cbc:LineExtensionAmount currencyID=\\\"EUR\\\">6.95</cbc:LineExtensionAmount>\\n <cbc:TaxExclusiveAmount currencyID=\\\"EUR\\\">12.36</cbc:TaxExclusiveAmount>\\n <cbc:TaxInclusiveAmount currencyID=\\\"EUR\\\">14.95</cbc:TaxInclusiveAmount>\\n <cbc:PayableAmount currencyID=\\\"EUR\\\">14.95</cbc:PayableAmount>\\n </cac:LegalMonetaryTotal>\\n <cac:InvoiceLine>\\n <cbc:ID>1</cbc:ID>\\n <cbc:InvoicedQuantity unitCode=\\\"\\\">500</cbc:InvoicedQuantity>\\n <cbc:LineExtensionAmount currencyID=\\\"EUR\\\">6.95</cbc:LineExtensionAmount>\\n <cac:AllowanceCharge>\\n <cbc:ChargeIndicator>false</cbc:ChargeIndicator>\\n <cbc:AllowanceChargeReason>Discount</cbc:AllowanceChargeReason>\\n <cbc:MultiplierFactorNumeric></cbc:MultiplierFactorNumeric>\\n <cbc:Amount currencyID=\\\"EUR\\\"></cbc:Amount>\\n <cbc:BaseAmount currencyID=\\\"EUR\\\"></cbc:BaseAmount>\\n </cac:AllowanceCharge>\\n <cac:Item>\\n <cbc:Name>Metalen schroeven RVS A2 gefreesde kop Pozi N\\u00b01 M2X4 DIN 965 ISO 7046, VS0109, VS0110</cbc:Name>\\n <cac:AdditionalItemProperty>\\n <cbc:Name>Internal Tax Rate ID</cbc:Name>\\n <cbc:Value></cbc:Value>\\n </cac:AdditionalItemProperty>\\n <cac:ClassifiedTaxCategory>\\n <cbc:ID></cbc:ID>\\n <cbc:Percent>21</cbc:Percent>\\n <cac:TaxScheme>\\n <cbc:ID>VAT</cbc:ID>\\n </cac:TaxScheme>\\n </cac:ClassifiedTaxCategory>\\n </cac:Item>\\n <cac:Price>\\n <cbc:PriceAmount currencyID=\\\"EUR\\\">1.39</cbc:PriceAmount>\\n </cac:Price>\\n </cac:InvoiceLine>\\n</Invoice>\"\n }\n ],\n \"role\": \"assistant\"\n }\n ],\n \"parallel_tool_calls\": true,\n";
+ #endif
+
+ *response = (char*)malloc(100000);
+ memset(*response, 0, 100000);
+ strncpy(*response, response_body, 100000);
+
+ extract_json_value(*response, "text", *response, 100000);
+ *response = unescape_quotes(*response);
+
+ return 1;
+}
+
+static bool _openAI_upload_file(char* file_path, char* file_id, size_t file_id_len)
+{
+ const char *api_key = administration_get_ai_service().api_key_public;
+ const char *filename = get_filename(file_path);
+
+ FILE* orig_file = fopen(file_path, "rb");
+ if (orig_file == NULL) {
+ log_error("ERROR: file to upload could not be opened.");
+ return 0;
+ }
+
+ fseek(orig_file, 0L, SEEK_END);
+ long sz = ftell(orig_file);
+ fseek(orig_file, 0, SEEK_SET);
+
+ httplib::SSLClient cli("api.openai.com", 443);
+ //cli.enable_server_certificate_verification(false);
+
+ char body[512];
+ snprintf(body, sizeof(body), "{\"filename\":\"%s\",\"purpose\":\"user_data\", \"bytes\": %d, \"mime_type\": \"application/pdf\", \"expires_after\": { \"anchor\": \"created_at\", \"seconds\": 3600 } }", filename, sz);
+
+ httplib::Headers headers;
+ headers.insert(std::make_pair("Authorization", std::string("Bearer ") + api_key));
+
+ httplib::Result res = cli.Post("/v1/uploads", headers, body, "application/json");
+ if (!res || res->status != 200) {
+ log_error("ERROR Failed to create upload.");
+ fclose(orig_file);
+ return 0;
+ }
+
+ char upload_id[128];
+ extract_json_value(res->body.c_str(), "id", upload_id, sizeof(upload_id));
+ size_t part_size = 64000000; // 64mb
+ log_info("Created upload %s with part size %zu.", upload_id, part_size);
+
+ char *buffer = (char*)malloc(part_size);
+
+ char completion_body[1048];
+ snprintf(completion_body, sizeof(completion_body), "{\"part_ids\": [");
+
+ int part_number = 0;
+ while (1) {
+ size_t read_bytes = fread(buffer, 1, part_size, orig_file);
+ if (read_bytes == 0) break;
+
+ httplib::Headers part_headers;
+ part_headers.insert(std::make_pair("Authorization", std::string("Bearer ") + api_key));
+
+ std::string chunk(buffer, read_bytes);
+
+ httplib::UploadFormDataItems items = {
+ {"data", chunk, filename, "application/octet-stream"}
+ };
+
+ char path[256];
+ snprintf(path, sizeof(path), "/v1/uploads/%s/parts?part_number=%d", upload_id, part_number);
+
+ httplib::Result part_res = cli.Post(path, part_headers, items);
+
+ if (!part_res || part_res->status != 200) {
+ log_error("Failed to upload part %d.", part_number);
+ free(buffer);
+ fclose(orig_file);
+ return 0;
+ }
+ else {
+ char part_id[128];
+ extract_json_value(part_res->body.c_str(), "id", part_id, sizeof(part_id));
+ if (part_number == 0) snprintf(completion_body+strlen(completion_body), sizeof(completion_body)-strlen(completion_body), "\"%s\"", part_id);
+ if (part_number != 0) snprintf(completion_body+strlen(completion_body), sizeof(completion_body)-strlen(completion_body), ", \"%s\"", part_id);
+ }
+
+ log_info("Uploaded part %d\n", part_number);
+ part_number++;
+ }
+
+ snprintf(completion_body+strlen(completion_body), sizeof(completion_body)-strlen(completion_body), "]}");
+
+ free(buffer);
+ fclose(orig_file);
+
+ // ---------- Step 3: Complete upload ----------
+
+ httplib::Result complete_res = cli.Post((std::string("/v1/uploads/") + upload_id + "/complete").c_str(),
+ headers, completion_body, "application/json");
+
+
+ if (!complete_res || complete_res->status != 200) {
+ log_error("ERROR Failed to complete upload.");
+ return 0;
+ }
+
+ char* completion_body_response = (char*)complete_res->body.c_str();
+ extract_json_value(completion_body_response, "id", file_id, file_id_len, 1);
+
+ return 1;
+}
+
+ai_provider_impl _chatgpt_api_provider = {
+ _openAI_upload_file,
+ _openAI_query_with_file,
+}; \ No newline at end of file