How to group words in a string by the first N letters in Python

1 Answer

0 votes
import re
from collections import defaultdict

def group_by_first_n_letter(s, n=3):
    words = re.findall(r"[a-zA-Z]+", s.lower())
    groups = defaultdict(list)

    for w in words:
        if len(w) >= n:
            groups[w[:n]].append(w)

    return dict(groups)


s = "The lowly inhabitants of the lowland were surprised to see the lower branches of the trees."

groups = group_by_first_n_letter(s, 3)

for prefix, words in groups.items():
    print(prefix, ":", words)
    
print()
    
for prefix, words in groups.items():
    print(f"{prefix}: {', '.join(words)}")



'''
run:
  
the : ['the', 'the', 'the', 'the']
low : ['lowly', 'lowland', 'lower']
inh : ['inhabitants']
wer : ['were']
sur : ['surprised']
see : ['see']
bra : ['branches']
tre : ['trees']

the: the, the, the, the
low: lowly, lowland, lower
inh: inhabitants
wer: were
sur: surprised
see: see
bra: branches
tre: trees

'''

 



answered Mar 12 by avibootz
edited Mar 13 by avibootz
...