How to group words in a string by the first N letters in Scala

2 Answers

0 votes
import scala.util.matching.Regex

def groupByFirstNLetters(s: String, n: Int = 3): Map[String, List[String]] = {
  val wordRegex: Regex = "[A-Za-z]+".r
  val words = wordRegex.findAllIn(s.toLowerCase).toList

  words
    .filter(_.length >= n)
    .foldLeft(Map.empty[String, List[String]]) { (groups, w) =>
      val prefix = w.take(n)
      groups + (prefix -> (groups.getOrElse(prefix, Nil) :+ w))
    }
}

val s =
  "The lowly inhabitants of the lowland were surprised to see the lower branches of the trees."

val groups = groupByFirstNLetters(s, 3)

// Print version 1
groups.foreach { case (prefix, words) =>
  println(s"$prefix : ${words.mkString("[", ", ", "]")}")
}

println()

// Print version 2
groups.foreach { case (prefix, words) =>
  println(s"$prefix: ${words.mkString(", ")}")
}





/*
run:

tre : [trees]
inh : [inhabitants]
sur : [surprised]
low : [lowly, lowland, lower]
wer : [were]
see : [see]
bra : [branches]
the : [the, the, the, the]

tre: trees
inh: inhabitants
sur: surprised
low: lowly, lowland, lower
wer: were
see: see
bra: branches
the: the, the, the, the

*/

 



answered Mar 13 by avibootz
0 votes
import scala.util.matching.Regex

def groupByFirstNLetters(s: String, n: Int = 3): Map[String, List[String]] = {
  val wordRegex = "[A-Za-z]+".r

  wordRegex
    .findAllIn(s.toLowerCase)
    .filter(_.length >= n)
    .toList
    .groupBy(_.take(n))
}


val s =
  "The lowly inhabitants of the lowland were surprised to see the lower branches of the trees."

val groups = groupByFirstNLetters(s, 3)

// Print version 1
groups.foreach { case (prefix, words) =>
  println(s"$prefix : ${words.mkString("[", ", ", "]")}")
}

println()

// Print version 2
groups.foreach { case (prefix, words) =>
  println(s"$prefix: ${words.mkString(", ")}")
}





/*
run:

tre : [trees]
inh : [inhabitants]
sur : [surprised]
low : [lowly, lowland, lower]
wer : [were]
see : [see]
bra : [branches]
the : [the, the, the, the]

tre: trees
inh: inhabitants
sur: surprised
low: lowly, lowland, lower
wer: were
see: see
bra: branches
the: the, the, the, the

*/

 



answered Mar 13 by avibootz
...