How to find the longest shared prefix among all words in a string with VB.NET

1 Answer

0 votes
Imports System
Imports System.Collections.Generic
Imports System.Text.RegularExpressions

Module LongestSharedPrefix

    ' Extract alphabetic words (lowercased)
    Function ExtractWords(text As String) As List(Of String)
        Dim words As New List(Of String)()
        Dim m = Regex.Matches(text, "[A-Za-z]+")

        For Each match As Match In m
            words.Add(match.Value.ToLower())
        Next

        Return words
    End Function

    ' Build prefix → list of words that share that prefix
    Function GroupByPrefixes(words As List(Of String)) As Dictionary(Of String, List(Of String))
        Dim groups As New Dictionary(Of String, List(Of String))()

        For Each w In words
            For i = 1 To w.Length
                Dim prefix = w.Substring(0, i)

                If Not groups.ContainsKey(prefix) Then
                    groups(prefix) = New List(Of String)()
                End If

                groups(prefix).Add(w)
            Next
        Next

        Return groups
    End Function

    ' Find the longest prefix that appears in 2+ words
    Function LongestSharedPrefix(groups As Dictionary(Of String, List(Of String))) As String
        Dim best As String = ""

        For Each kvp In groups
            Dim prefix = kvp.Key
            Dim list = kvp.Value

            If list.Count >= 2 AndAlso prefix.Length > best.Length Then
                best = prefix
            End If
        Next

        Return best
    End Function

    Sub Main()
        Dim text As String =
            "The Lowly inhabitants of the lowland were surprised to see " &
            "the lower branches of the trees."

        Dim words = ExtractWords(text)
        Dim groups = GroupByPrefixes(words)
        Dim prefix = LongestSharedPrefix(groups)

        If prefix <> "" Then
            Dim group = groups(prefix)

            Console.WriteLine("Longest shared prefix:")
            Console.WriteLine($"{prefix} | prefix_len={prefix.Length} | group_count={group.Count} | [{String.Join(", ", group)}]")
        Else
            Console.WriteLine("No shared prefix found.")
        End If
    End Sub

End Module



' run:
'
' Longest shared prefix:
' lowl | prefix_len=4 | group_count=2 | [lowly, lowland]
'

 



answered Mar 14 by avibootz

Related questions

...