package main
import (
"fmt"
"regexp"
"strings"
)
// getWordBeforeLast returns the second-to-last word in a string.
// It uses Unicode-aware regex to ensure scripts like Kanji/Hiragana
// are treated as words while punctuation (like 、) acts as a separator.
func getWordBeforeLast(text string) string {
// \pL matches any Unicode letter, \pN matches any Unicode number.
// We split by any sequence that is NOT a letter or a number.
re := regexp.MustCompile(`[^\pL\pN]+`)
// Split the string and filter out empty strings
rawWords := re.Split(text, -1)
var words []string
for _, w := range rawWords {
if w != "" {
words = append(words, w)
}
}
if len(words) < 2 {
return "null"
}
// Return the word before the last word
return words[len(words)-2]
}
func main() {
fmt.Println("=== Testing: Get Word Before Last ===\n")
tests := []string{
"python go",
" many spaces here now ",
"OneWord",
"",
" ",
"Hello, world!",
"Tabs\tand\nnewlines work too",
"Unicode 世界、こんにちは",
"Ends with punctuation.",
"Multiple words, with punctuation, here!",
"state-of-the-art program example",
}
for _, t := range tests {
result := getWordBeforeLast(t)
fmt.Printf("Input: \"%s\"\n", t)
fmt.Printf("Output: %s\n", result)
fmt.Println(strings.Repeat("-", 40))
}
}
/*
OUTPUT:
=== Testing: Get Word Before Last ===
Input: "python go"
Output: python
----------------------------------------
Input: " many spaces here now "
Output: here
----------------------------------------
Input: "OneWord"
Output: null
----------------------------------------
Input: ""
Output: null
----------------------------------------
Input: " "
Output: null
----------------------------------------
Input: "Hello, world!"
Output: Hello
----------------------------------------
Input: "Tabs and
newlines work too"
Output: work
----------------------------------------
Input: "Unicode 世界、こんにちは"
Output: 世界
----------------------------------------
Input: "Ends with punctuation."
Output: with
----------------------------------------
Input: "Multiple words, with punctuation, here!"
Output: punctuation
----------------------------------------
Input: "state-of-the-art program example"
Output: program
----------------------------------------
*/