How to get the word before the last word from a string (edge‑case‑safe) in Pascal

1 Answer

0 votes
program GetWordBeforeLastDemo;

{$mode objfpc}{$H+}{$J-} // Use Object Pascal mode, long strings, and typed constants

uses
  SysUtils, Classes;

{ 
  Splits a string into words based on ASCII and common Unicode separators.
  Pascal's ExtractStrings is great for ASCII, but for Unicode delimiters 
  like '、', a manual scan is more reliable in a cross-platform way.
}
function GetWordBeforeLast(const Text: string): string;
var
  Words: TStringList;
  CurrentWord: string;
  I: Integer;
  U: string;
begin
  Result := '';
  Words := TStringList.Create;
  try
    CurrentWord := '';
    I := 1;
    while I <= Length(Text) do
    begin
      // Check for Japanese Comma (UTF-8: E3 80 81)
      if (I <= Length(Text) - 2) and 
         (Byte(Text[I]) = $E3) and (Byte(Text[I+1]) = $80) and (Byte(Text[I+2]) = $81) then
      begin
        if CurrentWord <> '' then Words.Add(CurrentWord);
        CurrentWord := '';
        Inc(I, 3);
      end
      // Check for ASCII separators (Space and Punctuation)
      else if Text[I] in [' ', #9, #10, #13, ',', '.', '!', '?', ';', ':', '"', '''', '(', ')', '[', ']', '{', '}', '-', '/', '\'] then
      begin
        if CurrentWord <> '' then Words.Add(CurrentWord);
        CurrentWord := '';
        Inc(I);
      end
      else
      begin
        CurrentWord := CurrentWord + Text[I];
        Inc(I);
      end;
    end;
    
    // Add the final word if exists
    if CurrentWord <> '' then Words.Add(CurrentWord);

    // Return the second to last word if available
    if Words.Count >= 2 then
      Result := Words[Words.Count - 2];
  finally
    Words.Free;
  end;
end;

var
  Tests: array[0..10] of string = (
    'python pascal',
    '  many   spaces   here   now  ',
    'OneWord',
    '',
    '   ',
    'Hello, world!',
    'Tabs'#9'and'#10'newlines work too',
    'Unicode 世界、こんにちは',
    'Ends with punctuation.',
    'Multiple words, with punctuation, here!',
    'state-of-the-art program example'
  );
  T: string;
  Output: string;

begin
  WriteLn('=== Testing: Get Word Before Last ===');
  WriteLn;

  for T in Tests do
  begin
    Output := GetWordBeforeLast(T);
    if Output = '' then Output := 'null';
    
    WriteLn('Input: "', T, '"');
    WriteLn('Output: ', Output);
    WriteLn('----------------------------------------');
  end;
end.



{
OUTPUT:

=== Testing: Get Word Before Last ===

Input: "python pascal"
Output: python
----------------------------------------
Input: "  many   spaces   here   now  "
Output: here
----------------------------------------
Input: "OneWord"
Output: null
----------------------------------------
Input: ""
Output: null
----------------------------------------
Input: "   "
Output: null
----------------------------------------
Input: "Hello, world!"
Output: Hello
----------------------------------------
Input: "Tabs    and
newlines work too"
Output: work
----------------------------------------
Input: "Unicode 世界、こんにちは"
Output: 世界
----------------------------------------
Input: "Ends with punctuation."
Output: with
----------------------------------------
Input: "Multiple words, with punctuation, here!"
Output: punctuation
----------------------------------------
Input: "state-of-the-art program example"
Output: program
----------------------------------------

}

 



answered Mar 29 by avibootz
edited Mar 29 by avibootz

Related questions

...