How to get the word before the last word from a string (edge‑case‑safe) in Go

1 Answer

0 votes
package main

import (
	"fmt"
	"regexp"
	"strings"
)

// getWordBeforeLast returns the second-to-last word in a string.
// It uses Unicode-aware regex to ensure scripts like Kanji/Hiragana
// are treated as words while punctuation (like 、) acts as a separator.
func getWordBeforeLast(text string) string {
	// \pL matches any Unicode letter, \pN matches any Unicode number.
	// We split by any sequence that is NOT a letter or a number.
	re := regexp.MustCompile(`[^\pL\pN]+`)
	
	// Split the string and filter out empty strings
	rawWords := re.Split(text, -1)
	var words []string
	for _, w := range rawWords {
		if w != "" {
			words = append(words, w)
		}
	}

	if len(words) < 2 {
		return "null"
	}

	// Return the word before the last word
	return words[len(words)-2]
}

func main() {
	fmt.Println("=== Testing: Get Word Before Last ===\n")

	tests := []string{
		"python go",
		"  many   spaces   here   now  ",
		"OneWord",
		"",
		"   ",
		"Hello, world!",
		"Tabs\tand\nnewlines work too",
		"Unicode 世界、こんにちは",
		"Ends with punctuation.",
		"Multiple words, with punctuation, here!",
		"state-of-the-art program example",
	}

	for _, t := range tests {
		result := getWordBeforeLast(t)
		
		fmt.Printf("Input: \"%s\"\n", t)
		fmt.Printf("Output: %s\n", result)
		fmt.Println(strings.Repeat("-", 40))
	}
}


/*
OUTPUT:

=== Testing: Get Word Before Last ===

Input: "python go"
Output: python
----------------------------------------
Input: "  many   spaces   here   now  "
Output: here
----------------------------------------
Input: "OneWord"
Output: null
----------------------------------------
Input: ""
Output: null
----------------------------------------
Input: "   "
Output: null
----------------------------------------
Input: "Hello, world!"
Output: Hello
----------------------------------------
Input: "Tabs	and
newlines work too"
Output: work
----------------------------------------
Input: "Unicode 世界、こんにちは"
Output: 世界
----------------------------------------
Input: "Ends with punctuation."
Output: with
----------------------------------------
Input: "Multiple words, with punctuation, here!"
Output: punctuation
----------------------------------------
Input: "state-of-the-art program example"
Output: program
----------------------------------------

*/

 



answered Mar 29 by avibootz
edited Mar 29 by avibootz

Related questions

...