How to split a string on multiple multi‑character delimiters (and keep them) in Java

2 Answers

0 votes
import java.util.ArrayList;
import java.util.List;

public class SplitKeepMultiDelims {

    private static boolean isDelimChar(char c, String[] delims) {
        for (String d : delims) {
            if (!d.isEmpty() && d.charAt(0) == c) {
                return true;
            }
        }
        
        return false;
    }

    public static List<String> splitKeepMultiDelims(String s, String[] delims) {
        List<String> result = new ArrayList<>();
        int i = 0;

        while (i < s.length()) {
            char c = s.charAt(i);

            if (isDelimChar(c, delims)) {
                // Count repeated delimiter characters
                int start = i;
                while (i < s.length() && s.charAt(i) == c) {
                    i++;
                }
                result.add(s.substring(start, i));
            } else {
                // Collect normal text until next delimiter run
                int start = i;
                while (i < s.length() && !isDelimChar(s.charAt(i), delims)) {
                    i++;
                }
                result.add(s.substring(start, i));
            }
        }

        return result;
    }

    public static void main(String[] args) {
        String s = "aa==bbb---cccc++++ddddd";
        String[] delims = {"=", "-", "+"};

        List<String> parts = splitKeepMultiDelims(s, delims);

        for (String p : parts) {
            System.out.print("[" + p + "] ");
        }
    }
}



/*
run:

[aa] [==] [bbb] [---] [cccc] [++++] [ddddd] 

*/

 



answered Mar 9 by avibootz
0 votes
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

public class SplitAndKeep {

    public static List<String> splitAndKeep(String text, Set<Character> delims) {
        List<String> result = new ArrayList<>();
        if (text == null || text.isEmpty()) {
            return result;
        }

        int start = 0;

        for (int i = 1; i < text.length(); i++) {
            char prev = text.charAt(i - 1);
            char curr = text.charAt(i);

            boolean prevIsDelim = delims.contains(prev);
            boolean currIsDelim = delims.contains(curr);

            boolean shouldSplit =
                    (prevIsDelim != currIsDelim) ||          // text ↔ delim
                    (prevIsDelim && currIsDelim && prev != curr); // delim type changed

            if (shouldSplit) {
                result.add(text.substring(start, i));
                start = i;
            }
        }

        // Add final segment
        result.add(text.substring(start));

        return result;
    }

    public static void main(String[] args) {
        String s = "aa==bbb---cccc++++ddddd";
        
        Set<Character> delims = new HashSet<>();
        delims.add('=');
        delims.add('-');
        delims.add('+');

        List<String> parts = splitAndKeep(s, delims);

        for (String p : parts) {
            System.out.print("[" + p + "] ");
        }
    }
}



/*
run:

[aa] [==] [bbb] [---] [cccc] [++++] [ddddd] 

*/

 



answered Mar 10 by avibootz

Related questions

...