#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdlib.h>
/*
Since C doesn't have a built-in Unicode-aware regex,
we used a helper function is_japanese_comma to detect
the specific 3-byte sequence of the Japanese delimiter.
Unsigned Char Casting: When dealing with Unicode bytes
(which are often > 127$),
*/
/**
* Checks if a byte is a standard ASCII separator (whitespace or punctuation).
*/
bool is_ascii_separator(unsigned char c) {
if (isspace(c)) return true;
const char *punct = ",.!?;:\"'()[]{}-/\\";
return strchr(punct, c) != NULL;
}
/**
* Checks specifically for the Japanese comma '、' (UTF-8: 0xE3 0x80 0x81).
* In C, we check the byte sequence.
*/
bool is_japanese_comma(const unsigned char *s) {
return (s[0] == 0xE3 && s[1] == 0x80 && s[2] == 0x81);
}
/**
* Extracts the word before the last word.
* Returns a heap-allocated string that the caller must free.
*/
char* get_word_before_last(const char *text) {
char **words = malloc(strlen(text) * sizeof(char*));
int word_count = 0;
int len = strlen(text);
char *current = malloc(len + 1);
int curr_idx = 0;
for (int i = 0; i < len; i++) {
unsigned char c = (unsigned char)text[i];
if (is_ascii_separator(c) || is_japanese_comma((unsigned char*)&text[i])) {
if (curr_idx > 0) {
current[curr_idx] = '\0';
words[word_count++] = strdup(current);
curr_idx = 0;
}
// If it was a 3-byte Japanese comma, skip the extra bytes
if (!is_ascii_separator(c)) i += 2;
} else {
current[curr_idx++] = text[i];
}
}
// Flush last word
if (curr_idx > 0) {
current[curr_idx] = '\0';
words[word_count++] = strdup(current);
}
char *result = NULL;
if (word_count >= 2) {
result = strdup(words[word_count - 2]);
}
// Cleanup
for (int i = 0; i < word_count; i++) free(words[i]);
free(words);
free(current);
return result;
}
int main() {
printf("=== Testing: Get Word Before Last ===\n\n");
const char *tests[] = {
"python c++",
" many spaces here now ",
"OneWord",
"",
" ",
"Hello, world!",
"Tabs\tand\nnewlines work too",
"Unicode 世界、こんにちは",
"Ends with punctuation.",
"Multiple words, with punctuation, here!",
"state-of-the-art program example"
};
int num_tests = sizeof(tests) / sizeof(tests[0]);
for (int i = 0; i < num_tests; i++) {
char *result = get_word_before_last(tests[i]);
printf("Input: \"%s\"\n", tests[i]);
printf("Output: %s\n", result ? result : "null");
printf("----------------------------------------\n");
free(result);
}
return 0;
}
/*
OUTPUT:
=== Testing: Get Word Before Last ===
Input: "python c++"
Output: python
----------------------------------------
Input: " many spaces here now "
Output: here
----------------------------------------
Input: "OneWord"
Output: null
----------------------------------------
Input: ""
Output: null
----------------------------------------
Input: " "
Output: null
----------------------------------------
Input: "Hello, world!"
Output: Hello
----------------------------------------
Input: "Tabs and
newlines work too"
Output: work
----------------------------------------
Input: "Unicode 世界、こんにちは"
Output: 世界
----------------------------------------
Input: "Ends with punctuation."
Output: with
----------------------------------------
Input: "Multiple words, with punctuation, here!"
Output: punctuation
----------------------------------------
Input: "state-of-the-art program example"
Output: program
----------------------------------------
*/