using System;
using System.Collections.Generic;
class KeywordMatching
{
/*
Tokenize text into words.
- Keeps only letters and digits
- Splits on punctuation and spaces
*/
static HashSet<string> Tokenize(string text)
{
HashSet<string> words = new HashSet<string>();
string word = "";
foreach (char c in text) {
if (char.IsLetterOrDigit(c)) {
word += char.ToLower(c);
}
else if (word.Length > 0) {
words.Add(word);
word = "";
}
}
if (word.Length > 0)
words.Add(word);
return words;
}
/*
// Find keyword matches across THREE OR MORE texts
// -------------------------------------------------------------
This function receives a vector of sets.
It returns the intersection of ALL sets.
*/
static HashSet<string> FindMatchesMultiple(List<HashSet<string>> allSets)
{
if (allSets.Count == 0)
return new HashSet<string>();
// Start with the first set
HashSet<string> result = new HashSet<string>(allSets[0]);
// Intersect with each remaining set
for (int i = 1; i < allSets.Count; i++) {
HashSet<string> temp = new HashSet<string>();
foreach (string w in result) {
if (allSets[i].Contains(w)) {
temp.Add(w);
}
}
result = temp;
}
return result;
}
static void Main()
{
// -------------------------------------------------------------
// Three text blocks to compare
// -------------------------------------------------------------
string text1 =
"Machine learning allows computers to learn from data. " +
"It is widely used in modern applications.";
string text2 =
"Data science uses machine learning techniques. " +
"Applications rely on data-driven models.";
string text3 =
"Modern applications of machine learning include data analysis, " +
"automation, and intelligent systems.";
// -------------------------------------------------------------
// Tokenize all texts
// -------------------------------------------------------------
HashSet<string> words1 = Tokenize(text1);
HashSet<string> words2 = Tokenize(text2);
HashSet<string> words3 = Tokenize(text3);
// Put them into a vector for multi-text comparison
List<HashSet<string>> allSets = new List<HashSet<string>>()
{
words1, words2, words3
};
// -------------------------------------------------------------
// Find keyword matches across ALL texts
// -------------------------------------------------------------
HashSet<string> matches = FindMatchesMultiple(allSets);
// -------------------------------------------------------------
// Output results
// -------------------------------------------------------------
Console.WriteLine("Matched Keywords Across ALL Texts:");
foreach (string w in matches)
Console.Write(w + " ");
Console.WriteLine();
}
}
/*
run:
Matched Keywords Across ALL Texts:
machine learning data applications
*/