import re
from collections import defaultdict
def group_by_all_prefixes(s):
words = re.findall(r"[a-zA-Z]+", s.lower())
groups = defaultdict(list)
for w in words:
for i in range(1, len(w) + 1):
groups[w[:i]].append(w)
return dict(groups)
s = "The Lowly inhabitants of the lowland were surprised to see the lower branches of the trees."
groups = group_by_all_prefixes(s)
# Show only prefixes that appear in 2+ words
filtered = {p: ws for p, ws in groups.items() if len(ws) >= 2}
for prefix, words in filtered.items():
print(prefix, ":", words)
print()
for prefix, words in filtered.items():
print(f"{prefix}: {', '.join(words)}")
print()
# show with counters
for prefix, words in filtered.items():
#print(f"{prefix} | group_count={len(words)} | prefix_len={len(prefix)} | {words}")
print(f"{prefix} | prefix_len={len(prefix)} | group_count={len(words)} | {words}")
'''
run:
t : ['the', 'the', 'to', 'the', 'the', 'trees']
th : ['the', 'the', 'the', 'the']
the : ['the', 'the', 'the', 'the']
l : ['lowly', 'lowland', 'lower']
lo : ['lowly', 'lowland', 'lower']
low : ['lowly', 'lowland', 'lower']
lowl : ['lowly', 'lowland']
o : ['of', 'of']
of : ['of', 'of']
s : ['surprised', 'see']
t: the, the, to, the, the, trees
th: the, the, the, the
the: the, the, the, the
l: lowly, lowland, lower
lo: lowly, lowland, lower
low: lowly, lowland, lower
lowl: lowly, lowland
o: of, of
of: of, of
s: surprised, see
t | prefix_len=1 | group_count=6 | ['the', 'the', 'to', 'the', 'the', 'trees']
th | prefix_len=2 | group_count=4 | ['the', 'the', 'the', 'the']
the | prefix_len=3 | group_count=4 | ['the', 'the', 'the', 'the']
l | prefix_len=1 | group_count=3 | ['lowly', 'lowland', 'lower']
lo | prefix_len=2 | group_count=3 | ['lowly', 'lowland', 'lower']
low | prefix_len=3 | group_count=3 | ['lowly', 'lowland', 'lower']
lowl | prefix_len=4 | group_count=2 | ['lowly', 'lowland']
o | prefix_len=1 | group_count=2 | ['of', 'of']
of | prefix_len=2 | group_count=2 | ['of', 'of']
s | prefix_len=1 | group_count=2 | ['surprised', 'see']
'''