diff options
| author | Aldrik Ramaekers <aldrikboy@gmail.com> | 2024-11-23 22:33:43 +0100 |
|---|---|---|
| committer | Aldrik Ramaekers <aldrikboy@gmail.com> | 2024-11-23 22:33:43 +0100 |
| commit | b1e857cf1471d1871a9396696b22fa531da98249 (patch) | |
| tree | 3923008a8653057698cb339faf6dcfa92e18364b /project-base/src/string_utils.c | |
| parent | 106bb7fcadf637cec883648916cc8d19529d6199 (diff) | |
add projbase to repo
Diffstat (limited to 'project-base/src/string_utils.c')
| -rw-r--r-- | project-base/src/string_utils.c | 703 |
1 files changed, 703 insertions, 0 deletions
diff --git a/project-base/src/string_utils.c b/project-base/src/string_utils.c new file mode 100644 index 0000000..54395e1 --- /dev/null +++ b/project-base/src/string_utils.c @@ -0,0 +1,703 @@ +/* +* BSD 2-Clause “Simplified” License +* Copyright (c) 2019, Aldrik Ramaekers, aldrik.ramaekers@protonmail.com +* All rights reserved. +*/ + +char *string_get_next(char *list, char *buffer, char seperator) +{ + char *orig = list; + + utf8_int32_t ch; + while((list = utf8codepoint(list, &ch)) && ch != seperator && ch) + { + list++; + } + + char len = list - orig; + + string_copyn(buffer, orig, len); + return list; +} + +bool string_match(char *first, char *second) +{ + // If we reach at the end of both strings, we are done + if (*first == '\0' && *second == '\0') + return true; + + // Make sure that the characters after '*' are present + // in second string. This function assumes that the first + // string will not contain two consecutive '*' + if (*first == '*' && *(first+1) != '\0' && *second == '\0') + return false; + + // If the first string contains '?', or current characters + // of both strings string_match + if (*first == '?' || *first == *second) + return string_match(first+1, second+1); + + // If there is *, then there are two possibilities + // a) We consider current character of second string + // b) We ignore current character of second string. + if (*first == '*') + return string_match(first+1, second) || string_match(first, second+1); + return false; +} + +bool string_is_whitespace(char *text) { + utf8_int32_t ch; + while((text = utf8codepoint(text, &ch)) && ch) + { + if (ch != ' ') return false; + } + return true; +} + +bool string_is_asteriks(char *text) +{ + utf8_int32_t ch; + if (!utf8len(text)) return false; + while((text = utf8codepoint(text, &ch)) && ch) + { + if (ch != '*') return false; + } + return true; +} + +bool string_contains_ex(char *text_to_search, char *text_to_find, array *text_matches, bool *cancel_search) +{ + bool final_result = false; + bool is_asteriks_only = false; + +#if 0 + int len = strlen(text_to_find)+1; + char *tmp = mem_alloc(len); + strcpy(tmp, text_to_find); + text_to_find = tmp; + #endif + + // * wildcard at the start of text to find is not needed + if (string_is_asteriks(text_to_find)) + { + is_asteriks_only = true; + text_to_find += strlen(text_to_find); + } + + // remove all asteriks from start + utf8_int32_t br; + while(utf8codepoint(text_to_find, &br) && br == '*') + { + text_to_find = utf8codepoint(text_to_find, &br); + } + + char *text_to_find_original = text_to_find; + bool save_info = (text_matches != 0); + + utf8_int32_t text_to_search_ch = 0; + utf8_int32_t text_to_find_ch = 0; + + s32 line_nr_val = 1; + s32 word_offset_val = 0; + s32 word_match_len_val = 0; + char* line_start_ptr = text_to_search; + + s32 index = 0; + while((text_to_search = utf8codepoint(text_to_search, &text_to_search_ch)) + && text_to_search_ch) + { + if (cancel_search && *cancel_search) goto set_info_and_return_failure; + word_offset_val++; + if (text_to_search_ch == '\n') + { + line_nr_val++; + word_offset_val = 0; + line_start_ptr = text_to_search; + } + + char *text_to_search_current_attempt = text_to_search; + utf8_int32_t text_to_search_current_attempt_ch = text_to_search_ch; + + bool in_wildcard = false; + + text_to_find = utf8codepoint(text_to_find, &text_to_find_ch); + //text_to_search_current_attempt = utf8codepoint(text_to_search_current_attempt, + //&text_to_search_current_attempt_ch); + + word_match_len_val = 0; + while(text_to_search_current_attempt_ch) + { + if (cancel_search && *cancel_search) goto set_info_and_return_failure; + + // wildcard, accept any character in text to search + if (text_to_find_ch == '?') + goto continue_search; + + // character matches, + if (text_to_find_ch == text_to_search_current_attempt_ch && in_wildcard) + in_wildcard = false; + + // wildcard, accept any characters in text to search untill next char is found + if (text_to_find_ch == '*') + { + text_to_find = utf8codepoint(text_to_find, &text_to_find_ch); + in_wildcard = true; + } + + // text to find has reached 0byte, word has been found + if (text_to_find_ch == 0 || string_is_asteriks(text_to_find)) + { + done: + if (save_info) + { + text_match new_match; + new_match.line_nr = line_nr_val; + new_match.word_offset = word_offset_val-1; + new_match.word_match_len = word_match_len_val; + new_match.line_start = line_start_ptr; + new_match.line_info = 0; + array_push(text_matches, (uint8_t *)&new_match); + } + + final_result = true; + + if (is_asteriks_only) + { + return final_result; + } + + break; + } + + // character does not match, continue search + if (text_to_find_ch != text_to_search_current_attempt_ch && !in_wildcard) + break; + + continue_search: + if (!in_wildcard) + text_to_find = utf8codepoint(text_to_find, &text_to_find_ch); + + text_to_search_current_attempt = utf8codepoint( + text_to_search_current_attempt, + &text_to_search_current_attempt_ch); + + //if (!text_to_search_current_attempt_ch && string_is_asteriks(text_to_find)) goto done; + if (!text_to_search_current_attempt_ch && !text_to_find_ch) goto done; + + word_match_len_val++; + } + + text_to_find = text_to_find_original; + index++; + } + + return final_result; + + set_info_and_return_failure: + return false; +} + +static char *ltrim(char *str, const char *seps) +{ + size_t totrim; + if (seps == NULL) { + seps = "\t\n\v\f\r "; + } + totrim = strspn(str, seps); + if (totrim > 0) { + size_t len = strlen(str); + if (totrim == len) { + str[0] = '\0'; + } + else { + memmove(str, str + totrim, len + 1 - totrim); + } + } + return str; +} + +static char *rtrim(char *str, const char *seps) +{ + int i; + if (seps == NULL) { + seps = "\t\n\v\f\r "; + } + i = strlen(str) - 1; + while (i >= 0 && strchr(seps, str[i]) != NULL) { + str[i] = '\0'; + i--; + } + return str; +} + +inline void string_trim(char *string) +{ + ltrim(rtrim(string, 0), 0); +} + +inline bool string_equals(char *first, char *second) +{ + return (strcmp(first, second) == 0); +} + +s32 string_length(char *str) +{ + utf8_int32_t ch = 0; + s32 i = 0; + while((str = utf8codepoint(str, &ch)) && ch) + { + i++; + } + return i; +} + +// replaces " with \" for file formats +void string_appendf(char *buffer, char *text) +{ + u32 len = strlen(buffer); + while(*text) + { + if (*text < 32) + { + buffer[len] = ' '; + len++; + text++; + continue; + } + + if (*text == '"') + { + buffer[len] = '\\'; + len++; + } + if (*text == '\\') + { + buffer[len] = '\\'; + len++; + } + + buffer[len] = *text; + len++; + text++; + } +} + +void string_copyn(char *buffer, char *text, s32 bufferlen) +{ + u32 len = 0; + while(*text && len < bufferlen) + { + buffer[len] = *text; + len++; + text++; + } + buffer[len] = 0; +} + +void string_appendn(char *buffer, char *text, s32 bufferlen) +{ + u32 len = strlen(buffer); + while(*text && len < bufferlen) + { + buffer[len] = *text; + len++; + text++; + } + buffer[len] = 0; +} + +void string_append(char *buffer, char *text) +{ + u32 len = strlen(buffer); + while(*text) + { + buffer[len] = *text; + len++; + text++; + } + buffer[len] = 0; +} + +bool string_remove(char **buffer, char *text) +{ + s32 len = strlen(text); + char tmp[200]; + memcpy(tmp, *buffer, len); + memset(tmp+len, 0, 1); + + if (string_equals(tmp, text)) + { + *buffer += len; + return true; + } + + return false; +} + +char* string_get_json_literal(char **buffer, char *tmp) +{ + char *buf_start = *buffer; + char *buf = *buffer; + s32 len = 0; + while(*buf) + { + if ((*buf == ',' || *buf == '}') && (len > 0 && *(buf-1) == '"') && (len > 1 && *(buf-2) != '\\')) + { + memcpy(tmp, buf_start, len); + memset(tmp+len-1, 0, 1); + *buffer += len-1; + return tmp; + } + + len++; + buf++; + } + + return tmp; +} + +s32 string_get_json_ulong_number(char **buffer) +{ + char tmp[20]; + char *buf_start = *buffer; + char *buf = *buffer; + s32 len = 0; + while(*buf) + { + if (*buf == ',' || *buf == '}') + { + memcpy(tmp, buf_start, len); + memset(tmp+len, 0, 1); + *buffer += len; + return string_to_u64(tmp); + } + + len++; + buf++; + } + + return 0; +} + +s32 string_get_json_number(char **buffer) +{ + char tmp[20]; + char *buf_start = *buffer; + char *buf = *buffer; + s32 len = 0; + while(*buf) + { + if (*buf == ',' || *buf == '}') + { + memcpy(tmp, buf_start, len); + memset(tmp+len, 0, 1); + *buffer += len; + return string_to_s32(tmp); + } + + len++; + buf++; + } + + return 0; +} + +void utf8_str_remove_range(char *str, s32 from, s32 to) +{ + char *orig_str = str; + s32 i = 0; + utf8_int32_t ch = 0; + s32 total_len = strlen(str)+1+4; + char *replacement = calloc(total_len,1); + char *rep_off = replacement; + replacement[0] = 0; + + while((str = utf8codepoint(str, &ch)) && ch) + { + if (i < from || i >= to) + { + rep_off = utf8catcodepoint(rep_off, ch, 5); + } + + ++i; + } + *rep_off = 0; + + string_copyn(orig_str, replacement, MAX_INPUT_LENGTH); + mem_free(replacement); +} + +void utf8_str_remove_at(char *str, s32 at) +{ + char *orig_str = str; + s32 i = 0; + utf8_int32_t ch = 0; + s32 total_len = strlen(str)+1+4; + char *replacement = calloc(total_len,1); + char *rep_off = replacement; + replacement[0] = 0; + + while((str = utf8codepoint(str, &ch)) && ch) + { + if (at != i) + { + rep_off = utf8catcodepoint(rep_off, ch, 5); + } + + ++i; + } + *rep_off = 0; + + string_copyn(orig_str, replacement, MAX_INPUT_LENGTH); + mem_free(replacement); +} + +void utf8_str_insert_utf8str(char *str, s32 at, char *toinsert) +{ + s32 index = 0; + utf8_int32_t ch; + while((toinsert = utf8codepoint(toinsert, &ch)) && ch) + { + utf8_str_insert_at(str, at+index, ch); + index++; + } +} + +void utf8_str_insert_at(char *str, s32 at, utf8_int32_t newval) +{ + char *orig_str = str; + s32 i = 0; + utf8_int32_t ch = 0; + s32 total_len = strlen(str)+1+4; + char *replacement = calloc(total_len,1); + char *rep_off = replacement; + replacement[0] = 0; + + while((str = utf8codepoint(str, &ch))) + { + if (at == i) + { + rep_off = utf8catcodepoint(rep_off, newval, 5); + } + + rep_off = utf8catcodepoint(rep_off, ch, 5); + + ++i; + + if (!ch) break; + } + *rep_off = 0; + + string_copyn(orig_str, replacement, MAX_INPUT_LENGTH); + mem_free(replacement); +} + +char *utf8_str_copy_upto(char *str, s32 roof, char *buffer) +{ + utf8_int32_t ch = 0; + s32 index = 0; + char *orig_buffer = buffer; + while((str = utf8codepoint(str, &ch)) && ch) + { + if (index == roof) break; + buffer = utf8catcodepoint(buffer, ch, 5); + index++; + } + buffer = utf8catcodepoint(buffer, 0, 5); + + return orig_buffer; +} + +char *utf8_str_copy_range(char *str, s32 floor, s32 roof, char *buffer) +{ + utf8_int32_t ch = 0; + s32 index = 0; + char *orig_buffer = buffer; + while((str = utf8codepoint(str, &ch)) && ch) + { + if (index == roof) break; + if (index >= floor) + buffer = utf8catcodepoint(buffer, ch, 5); + index++; + } + buffer = utf8catcodepoint(buffer, 0, 5); + + return orig_buffer; +} + +void utf8_str_replace_at(char *str, s32 at, utf8_int32_t newval) +{ + char *orig_str = str; + s32 i = 0; + utf8_int32_t ch = 0; + s32 total_len = strlen(str)+1+4; + char *replacement = calloc(total_len,1); + char *rep_off = replacement; + replacement[0] = 0; + + while((str = utf8codepoint(str, &ch)) && ch) + { + if (at == i) + { + rep_off = utf8catcodepoint(rep_off, newval, 5); + } + else + { + rep_off = utf8catcodepoint(rep_off, ch, 5); + } + ++i; + } + *rep_off = 0; + + string_copyn(orig_str, replacement, MAX_INPUT_LENGTH); + mem_free(replacement); +} + +char* utf8_str_upto(char *str, s32 index) +{ + s32 i = 0; + utf8_int32_t ch; + char *prev_str = str; + while((str = utf8codepoint(str, &ch)) && ch) + { + if (index == i) return prev_str; + prev_str = str; + ++i; + } + + return str; +} + +utf8_int32_t utf8_str_at(char *str, s32 index) +{ + s32 i = 0; + utf8_int32_t ch; + while((str = utf8codepoint(str, &ch)) && ch) + { + if (index == i) return ch; + + ++i; + } + + return 0; +} + +bool string_is_numeric(char *str) +{ + utf8_int32_t ch; + while((str = utf8codepoint(str, &ch)) && ch) + { + if (!(ch >= 48 && ch <= 57)) + { + return false; + } + } + + return true; +} + +void string_convert_crlf_to_lf(char *buffer) +{ + char *buffer_original = buffer; + + int write_offset = 0; + int read_offset = 0; + + while(buffer[read_offset]) + { + if (buffer[read_offset] != 0x0D) + { + buffer_original[write_offset] = buffer[read_offset]; + + ++write_offset; + } + + ++read_offset; + } +} + +inline u64 string_to_u64(char *str) +{ + return (u64)strtoull(str, 0, 10); +} + +inline u32 string_to_u32(char *str) +{ + return (u32)strtoul(str, 0, 10); +} + +inline u16 string_to_u16(char *str) +{ + return (u16)strtoul(str, 0, 10); +} + +inline u8 string_to_u8(char *str) +{ + return (u8)strtoul(str, 0, 10); +} + +inline s64 string_to_s64(char *str) +{ + return (s64)strtoll(str, 0, 10); +} + +inline s32 string_to_s32(char *str) +{ + return (u32)strtol(str, 0, 10); +} + +inline s16 string_to_s16(char *str) +{ + return (s16)strtol(str, 0, 10); +} + +inline s8 string_to_s8(char *str) +{ + return (s8)strtol(str, 0, 10); +} + +inline f32 string_to_f32(char *str) +{ + return (f32)atof(str); +} + +inline f64 string_to_f64(char *str) +{ + return (f64)strtod(str, NULL); +} + +array string_split(char *text) +{ + array result = array_create(MAX_INPUT_LENGTH); + + char current_filter[MAX_INPUT_LENGTH]; + s32 filter_len = 0; + while(*text) + { + char ch = *text; + + if (ch == ',') + { + current_filter[filter_len] = 0; + array_push(&result, (uint8_t*)current_filter); + filter_len = 0; + } + else + { + if(filter_len < MAX_INPUT_LENGTH-1) + { + current_filter[filter_len++] = ch; + } + else + { + current_filter[filter_len] = ch; + } + } + + text++; + } + current_filter[filter_len] = 0; + array_push(&result, (uint8_t*)current_filter); + + return result; +} |
