static void Main(string[] args) {
var result = getContent();
var length = result.GetLength(0);
double[] doc1Array = new double[length];
double[] doc2Array = new double[length]; //first doc
for (int i = 0; i < length; i++) {
doc1Array[i] = result[i, 0];
} //second doc
for (int i = 0; i < length; i++) {
doc2Array[i] = result[i, 1];
}
var cosSimilarity = CalculateCosineSimilarity(doc1Array, doc2Array);
Console.WriteLine("Similarity between Query and Doc2: ");
Console.WriteLine(cosSimilarity);
Console.ReadKey();
}
private static double
public static double[, ] GetWeights(List < List < string >> splitedDocuments, string[] unicalWords) {
double[, ] matrix = new double[unicalWords.Count(), splitedDocuments.Count];
for (int i = 0; i < splitedDocuments.Count; i++) {
var document = splitedDocuments[i].GroupBy(g => g).Select(s => new {
s.Key, s.ToList().Count
}).ToDictionary(t => t.Key, t => t.Count);
for (int j = 0; j < unicalWords.Count(); j++) {
var key = unicalWords[j];
var value = document.Where(w => w.Key == key).FirstOrDefault().Key != null ? document.Where(w => w.Key == key).FirstOrDefault().Value : 0;
var conteinsCount = splitedDocuments.Where(w => w.Contains(key)).Count();
var tf = (double) value / splitedDocuments[i].Count;
var idf = 1 + Math.Log((double) splitedDocuments.Count / splitedDocuments.Where(w => w.Contains(key)).Count());
double weight = tf * idf;
matrix[j, i] = weight;
}
}
return matrix;
}
private static double CalculateCosineSimilarity(double[] vecA, double[] vecB) {
var dotProduct = DotProduct(vecA, vecB);
var magnitudeOfA = Magnitude(vecA);
var magnitudeOfB = Magnitude(vecB);
return dotProduct / (magnitudeOfA * magnitudeOfB);
}
private static double DotProduct(double[] vecA, double[] vecB) {
double dotProduct = 0;
for (var i = 0; i < vecA.Length; i++) {
dotProduct += (vecA[i] * vecB[i]);
}
return dotProduct;
}
private static double Magnitude(double[] vector) {
return Math.Sqrt(DotProduct(vector, vector));
}
[, ] getContent() {
List < List < string >> documents = new List < List < string >> ();
string query = "life learning";
string documnet1 = "The game of life is a game of everlasting learning";
string documnet2 = "The unexamined life is not worth living";
string documnet3 = "Never stop learning";
var splitedQuery = query.Split(' ').ToList();
var splitedDocument1 = documnet1.Split(' ').ToList();
var splitedDocument2 = documnet2.Split(' ').ToList();
var splitedDocument3 = documnet3.Split(' ').ToList();
var unicalWords = (query + " " + document1 + " " + document2 + " " + document3).Split(' ').GroupBy(g => g).Select(s => s.Key).ToArray();
documents.Add(splitedQuery);
documents.Add(splitedDocument1);
documents.Add(splitedDocument2);
documents.Add(splitedDocument3);
var array = GetWeights(documents, unicalWords);
return array;
I want to check similarity of one document with multiple queries, store it an array sort by descending.