How to get the word before the last word from a string (edge‑case‑safe) in Scala

1 Answer

0 votes
import scala.util.matching.Regex

// Scala's String is a wrapper around java.lang.String, 
// so it handles UTF-8 (Unicode) out of the box.

/**
 * Returns the word before the last word.
 * Uses Unicode-aware regex to handle international scripts.
 */
def getWordBeforeLast(text: String): String = {
  // \p{L} matches any Unicode letter, \p{N} matches any Unicode digit.
  // [^\p{L}\p{N}]+ matches one or more characters that are NOT letters or numbers.
  // The 'u' flag (in some engines) or simply the property escape handles UTF-16 natively.
  val words = text.split("""[^\p{L}\p{N}]+""").filter(_.nonEmpty)

  if (words.length < 2) "null"
  else words(words.length - 2)
}

@main def runTests(): Unit = {
  println("=== Testing: Get Word Before Last ===\n")

  val tests = List(
    "python scala",
    "  many   spaces   here   now  ",
    "OneWord",
    "",
    "   ",
    "Hello, world!",
    "Tabs\tand\nnewlines work too",
    "Unicode 世界、こんにちは",
    "Ends with punctuation.",
    "Multiple words, with punctuation, here!",
    "state-of-the-art program example"
  )

  tests.foreach { t =>
    val result = getWordBeforeLast(t)
    
    println(s"""Input: "$t"""")
    println(s"Output: $result")
    println("-" * 40)
  }
}



/*
OUTPUT:

=== Testing: Get Word Before Last ===

Input: "python scala"
Output: python
----------------------------------------
Input: "  many   spaces   here   now  "
Output: here
----------------------------------------
Input: "OneWord"
Output: null
----------------------------------------
Input: ""
Output: null
----------------------------------------
Input: "   "
Output: null
----------------------------------------
Input: "Hello, world!"
Output: Hello
----------------------------------------
Input: "Tabs	and
newlines work too"
Output: work
----------------------------------------
Input: "Unicode 世界、こんにちは"
Output: 世界
----------------------------------------
Input: "Ends with punctuation."
Output: with
----------------------------------------
Input: "Multiple words, with punctuation, here!"
Output: punctuation
----------------------------------------
Input: "state-of-the-art program example"
Output: program
----------------------------------------

*/

 



answered Mar 29 by avibootz

Related questions

...