/* fuzzydupe.c - Remove fuzzy duplicates from stdin
 *
 * Copyright (C) 2000 Alan Shutko
 *
 * 
 * Description:
 * 
 *  This program will try to trim duplicate lines, where they may have
 * different spacing or punctuation.  It accepts a fuzzily sorted
 * input (GNU sort with a locale seemed to work at one time) and will
 * compare the alpha content of lines, after stripping out all
 * non-alpha characters and converting alphas to uppercase.  It will
 * output the first of a fuzzily matched set to stdout and discard the
 * rest.
 * 
 * History:
 * ats     10/23/00   Created
 *
 * Tokens: ::Header:: fuzzydupe.h
 */

#include <stdio.h>
#include <string.h>
#include <ctype.h>

static void crunch(char *crunched, char *input);

int main(void)
{
    char last_input[200], last_crunched[200];
    char this_input[200], this_crunched[200];
    
    while (NULL != fgets(this_input, sizeof(this_input), stdin))
    {
        crunch(this_crunched, this_input);

        if (strcmp(this_crunched, last_crunched) != 0)
        {
            strcpy(last_input, this_input);
            strcpy(last_crunched, this_crunched);
            fputs(last_input, stdout);
            printf("%s\n%s", this_crunched, last_crunched);
        }
    }
    return 0;
}

static void crunch(char *crunched, char *input)
{
    int i, o;

    for (i = 0, o = 0; input[i]; i++)
        if (isalpha(input[i]))
            crunched[o++] = toupper(input[i]);

    crunched[o] = '\0';
}

            
