import scala.util.matching.Regex
// Scala's String is a wrapper around java.lang.String,
// so it handles UTF-8 (Unicode) out of the box.
/**
* Returns the word before the last word.
* Uses Unicode-aware regex to handle international scripts.
*/
def getWordBeforeLast(text: String): String = {
// \p{L} matches any Unicode letter, \p{N} matches any Unicode digit.
// [^\p{L}\p{N}]+ matches one or more characters that are NOT letters or numbers.
// The 'u' flag (in some engines) or simply the property escape handles UTF-16 natively.
val words = text.split("""[^\p{L}\p{N}]+""").filter(_.nonEmpty)
if (words.length < 2) "null"
else words(words.length - 2)
}
@main def runTests(): Unit = {
println("=== Testing: Get Word Before Last ===\n")
val tests = List(
"python scala",
" many spaces here now ",
"OneWord",
"",
" ",
"Hello, world!",
"Tabs\tand\nnewlines work too",
"Unicode 世界、こんにちは",
"Ends with punctuation.",
"Multiple words, with punctuation, here!",
"state-of-the-art program example"
)
tests.foreach { t =>
val result = getWordBeforeLast(t)
println(s"""Input: "$t"""")
println(s"Output: $result")
println("-" * 40)
}
}
/*
OUTPUT:
=== Testing: Get Word Before Last ===
Input: "python scala"
Output: python
----------------------------------------
Input: " many spaces here now "
Output: here
----------------------------------------
Input: "OneWord"
Output: null
----------------------------------------
Input: ""
Output: null
----------------------------------------
Input: " "
Output: null
----------------------------------------
Input: "Hello, world!"
Output: Hello
----------------------------------------
Input: "Tabs and
newlines work too"
Output: work
----------------------------------------
Input: "Unicode 世界、こんにちは"
Output: 世界
----------------------------------------
Input: "Ends with punctuation."
Output: with
----------------------------------------
Input: "Multiple words, with punctuation, here!"
Output: punctuation
----------------------------------------
Input: "state-of-the-art program example"
Output: program
----------------------------------------
*/