Hi,
I need help! I'm trying to implement a glob pattern generator, that would take a list of strings as input, and that would output the longest glob pattern that matches those strings.
input:
so I have a car
so I have a bicycle
so you have a car
output:
so * have a *
Is there such a tool already somewhere?
function lcs(S, T) {
var D = [];
var LCS_len = 0;
var LCS = [];
for (var i = 0; i < S.length; i++) {
D[i] = [];
for (var j = 0; j < T.length; j++) {
if (S[i] == T[j]) {
if (i == 0 || j == 0) {
D[i][j] = 1;
} else {
D[i][j] = D[i-1][j-1] + 1;
}
if (D[i][j] > LCS_len) {
LCS_len = D[i][j];
LCS = [S.substring(i-LCS_len+1, i+1)];
} else if (D[i][j] == LCS_len) {
LCS.push(S.substring(i-LCS_len+1, i+1));
}
} else {
D[i][j] = 0;
}
}
}
return LCS[0];
}
function multiLcs(strings) {
let candidate = strings[0];
for (let str1 of strings)
for (let str2 of strings) {
let subs = lcs(str1, str2);
if (!subs) return '';
if (subs.length < candidate.length)
candidate = subs;
}
return candidate;
}
function recurGenGlob(strings) {
let center = multiLcs(strings);
if (center.length == '') return '';
let pre = strings.map(str => str.slice(0, str.indexOf(center)));
let post = strings.map(str => str.slice(str.indexOf(center) + center.length));
center = recurGenGlob(pre) + '*' + center + '*' + recurGenGlob(post);
return center;
}
function genGlob(strings) {
let center = recurGenGlob(strings.map(s => ' '+s+' '));
let result = '';
let star = false;
for (let i = 0; i < center.length; i++) {
if (center[i] != '*' || star)
result += center[i];
star = center[i] == '*';
}
return result.trim();
}
doesn't work with
"and so I have a car",
"so I have a bicycle",
"so you have a car",
Here is the new corrected genGlob().
genGlob = (strings) => {
let center = recurGenGlob(strings);
while (center.indexOf('**') > -1)
center = center.replace(/\*\*/g, '*');
let startStar = false,
endStar = false,
startLetter = strings[0][0],
endLetter = strings[0][strings[0].length-1];
for (let s of strings) {
if (s[0] != startLetter) startStar = true;
if (s[s.length-1] != endLetter) endStar = true;
}
if (!startStar) center = center.slice(1);
if (!endStar) center = center.slice(0, -1);
if (center.length == 0) center = '*';
return center;
}
This will work with any N number of words and sentences:
using System;
using System.Collections.Generic;
using System.Linq;
public class Program
{
public static void Main()
{
List<string> alreadyCounted = new List<string>();
var counts = new Dictionary<string, int>();
string[] sets = new string[] { "so I have a car, but this car is used", "so I have a bicycle", "so you have a car" };
foreach (String aSet in sets)
{
var sentenceSplit = aSet.Split(new char[] { ' ', ',' });
alreadyCounted.Clear();
foreach (String aWord in sentenceSplit)
{
var trimmed = aWord.Trim();
if (counts.ContainsKey(trimmed) && !alreadyCounted.Contains(trimmed))
counts[trimmed] = counts[trimmed] + 1;
else if(!alreadyCounted.Contains(trimmed))
counts.Add(trimmed, 1);
alreadyCounted.Add(trimmed);
}
}
var results = counts.Where(x => x.Value == sets.Length);
if (results != null)
{
foreach (KeyValuePair<string, int> entry in results)
Console.WriteLine(entry.Key);
}
}
}
Try it on Fiddle: https://dotnetfiddle.net/
Its a small mod to get it to work with a file where you just load the file into the string array.
I placed a sequence check so if not all sentences are in a somewhat similar sequence it will print no matches.
using System;
using System.Collections.Generic;
using System.Linq;
public class Program
{
public static void Main()
{
List<string> alreadyCounted = new List<string>();
var counts = new Dictionary<string, Tally>();
string[] sets = new string[] { "so I have a car, but this car is used", "so I have a bicycle", "so you have a car", "so, I have a knife too" };
foreach (String aSet in sets)
{
var sentenceSplit = aSet.Split(new char[] { ' ', ',' });
alreadyCounted.Clear();
int pos = 0;
foreach (String aWord in sentenceSplit)
{
pos++;
var trimmed = aWord.Trim();
if (counts.ContainsKey(trimmed) && !alreadyCounted.Contains(trimmed))
{
counts[trimmed].Count = counts[trimmed].Count + 1;
counts[trimmed].Positions.Add(pos);
}
else if(!alreadyCounted.Contains(trimmed))
counts.Add(trimmed, new Tally(1, pos));
alreadyCounted.Add(trimmed);
}
}
var results = counts.Where(x => x.Value.Count == sets.Length);
if (results != null && CheckSequence(results.ToList()))
{
foreach (KeyValuePair<string, Tally> entry in results)
{
Console.Write(entry.Key + ": ");
foreach(int pos in entry.Value.Positions)
{
Console.Write(pos.ToString() + ",");
}
Console.WriteLine("");
}
}
else
Console.WriteLine("No matches that are sequentially similar.");
}
private static bool CheckSequence(List<KeyValuePair<string, Tally>> results)
{
foreach (KeyValuePair<string, Tally> anEntry in results)
{
int temp = anEntry.Value.Positions[0];
for (int idx = 1; idx <anEntry.Value.Positions.Count; idx++)
{
if (temp > anEntry.Value.Positions[idx])
{
return false;
}
temp = anEntry.Value.Positions[idx];
}
}
return true;
}
}
sealed class Tally
{
public int Count {set; get;}
public List<int> Positions {set; get;}
public Tally(int count, int pos)
{
Count = count;
Positions = new List<int>();
Positions.Add(pos);
}
}
I was thinking word frequency analysis...
<?php
$inputs = <<<EOD
so I have a car
so I have a bicycle
so you have a car
EOD;
$table=array_count_values(str_word_count($inputs, 1));
foreach($table as $word=>$freq){
if($freq==3){
$words[]=$word;
}
}
echo var_export($words,TRUE);
?>
array (
0 => 'so',
1 => 'have',
2 => 'a',
)
Check this one:
"car I have a car, but this car is used",
"so I have a car",
"I so car have you"
Mine is wrong...
Getting:
Expected: