/***********************************************************************
*
* Receives a ldap search filter and generates an awk file 
* 
* Currently deals with the following three filter types - 
* PRESENCE, EQUALITY, and SUBSTRING. ex)
* $ grid-info-ldap-filter2awk (objectclass = *)
* $ grid-info-ldap-filter2awk (objectclass = GlobusTop)
* $ grid-info-ldap-filter2awk (objectclass = *Top*)
*
* and combinations of those basic filters
* $ grid-info-ldap-filter2awk (&(objectclass=*)(cputype=sparc))
* $ grid-info-ldap-filter2awk (|(objectclass=*)(cputype=sparc))
* $ grid-info-ldap-filter2awk (&(|(objectclass=*)(cputype=*))(objectclass=*T*))
*
***********************************************************************/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#define		WHATEVER	-1
#define		PRESENCE	0
#define		EQUALITY	1
#define		SUBSTRING	2
#define		GTEQ		3
#define		LTEQ		4

#define		TERM		0
#define		AND		1
#define		OR		2

#define		MAXLIST		100
#define		MAXSTRING	100

char		*term_node(char *);
char		*rem_lwhite(char *);
char		*rem_rwhite(char *);
char		*gen_re(int, char *, char *);
char		*gen_sub(char *);
char		*upper_lower(char *);
char		*gen_pattern(char *);
char		*add_base_scope(char *, char *, int);
char		*add_space(char *);
char		*to_lower(char *);
int		split_pattern(char *, char ***);
int		split_str(char *, int, char **, char **);
int		write_pattern(char *, char *);
int		is_term(char *);
int		clear_results(char ***, int);

main(int argc, char *argv[])
{
char *str=NULL, *patt=NULL;

  if (argc != 5) {
    fprintf(stderr, 
           "Usage: grid-info-ldap-filter2awk base scope filter filename\n");
    exit (1);
  }

  /*
  * generate the regular expression for the ldap search filter
  */
  str = gen_pattern(argv[3]);

  if (!str) {
    exit (1);
  } 

  /*
  * generate the regular expression for the ldap search filter
  */
  patt = add_base_scope(str, argv[1], atoi(argv[2]));

  free(str);

  if (!patt) {
    exit (1);
  } 

  /*
  * write the awk file
  */
  else if (write_pattern(patt, argv[4]) != 0) {
    free(patt);
    exit (1);
  }
  else {
    free(patt);
    exit (0);
  }
}

/***********************************************************************
* receives an ldap search filter string and
* generates a regular expression string
***********************************************************************/
char *
gen_pattern(char *str)
{
int res, no, i, cnt=0;
char **pattern_list, **re_list, *result;

  /* check the string if it's terminal, AND filter, or OR filter */
  res = is_term(str);

  switch (res) {
    case TERM:
      return term_node(str);
    case AND:
      /*
      * split the filter into a list of filters
      * resulting filters also can be AND/OR filters
      * the number of filters should not be less than 2
      */
      no = split_pattern(str, &pattern_list);
      if (no < 1) {
        return NULL;
      }
      
      /*
      * generate regular expression strings for each of
      * the list of filters
      */
      re_list = (char **) calloc(no, sizeof(char *));
      for (i=0; i<no; i++) {
        re_list[i] = gen_pattern(pattern_list[i]);
        if (!re_list[i])
          return NULL;
      }

      for (i=0; i<no; i++)
        cnt += strlen(re_list[i]);

      /* 
      * make room for adding " && ", "(", ")" and "\0"
      */
      result = (char *) calloc(cnt + 4 * (no-1) + 3, sizeof(char));

      sprintf(result, "(%s", re_list[0]);
      for (i=1; i<no; i++) {
        (void) strcat(result, " && ");
        (void) strcat(result, re_list[i]);
      }
      (void) strcat(result, ")");

      /*
      * free allocated memory
      */
      (void) clear_results(&pattern_list, no);
      (void) clear_results(&re_list, no);

      return result;

    case OR:
      /*
      * split the filter into a list of filters
      * resulting filters also can be AND/OR filters
      * the number of filters should not be less than 2
      */
      no = split_pattern(str, &pattern_list);
      if (no < 1) {
        return NULL;
      }
      
      /*
      * generate regular expression strings for each of
      * the list of filters
      */
      re_list = (char **) calloc(no, sizeof(char *));
      for (i=0; i<no; i++) {
        re_list[i] = gen_pattern(pattern_list[i]);
        if (!re_list[i])
          return NULL;
      }

      for (i=0; i<no; i++)
        cnt += strlen(re_list[i]);

      /* 
      * make room for adding " && ", "(", ")" and "\0"
      */
      result = (char *) calloc(cnt + 4 * (no-1) + 3, sizeof(char));

      sprintf(result, "(%s", re_list[0]);
      for (i=1; i<no; i++) {
        (void) strcat(result, " || ");
        (void) strcat(result, re_list[i]);
      }
      (void) strcat(result, ")");

      /*
      * free allocated memory
      */
      (void) clear_results(&pattern_list, no);
      (void) clear_results(&re_list, no);

      return result;

    default:
      return NULL;
  }
}

/***********************************************************************
* receives a regular expression string
* add base and scope information to it
***********************************************************************/
char *
add_base_scope(char *str, char *base, int scope)
{
char *result=NULL, *patt=NULL, *res=NULL, *dn=NULL, *base_str=NULL;


  switch (scope) {
    case 0:
      dn = (char *) strdup("dn");
      base_str = add_space(base);
      patt = (char *) strdup(base_str);
      free(base_str);
      res = gen_re(EQUALITY, dn, patt);
      break;
    case 1:
      dn = (char *) strdup("dn");
      base_str = add_space(base);
      patt = (char *) calloc(strlen(base_str) + 12, sizeof(char));
      sprintf(patt, "[^,][^,]*, %s", base_str);
      res = gen_re(EQUALITY, dn, patt);
      free(base_str);
      break;
    case 2:
      dn = (char *) strdup("dn");
      base_str = add_space(base);
      patt = (char *) calloc(strlen(base_str) + 2, sizeof(char));
      sprintf(patt, "*%s", base_str);
      res = gen_re(SUBSTRING, dn, patt);
      free(base_str);
      break;
    default:
      fprintf(stderr, "Scope should be base, one, or sub\n");
      return NULL;
  }
      
  /* 
  * make room for adding " && ", "(", ")" and "\0"
  */
  result = (char *) calloc(strlen(str) + strlen(res) + 7, sizeof(char));

  sprintf(result, "(%s && %s)", str, res);

  /*
  * free allocated memory
  */
  if (res) free(res);

  return result;
}

/***********************************************************************
* add spaces back into the base
***********************************************************************/
char *
add_space(char *str)
{
char *result=NULL;
int cnt=1, i, size, no;

  size = strlen(str);
  for (i=0; i<size; i++) {
    if (str[i] == ',')
      cnt++;
  }

  result = (char *) calloc(size + cnt, sizeof(char *));

  no = 0;
  for (i=0; i<size; i++) {
    if (str[i] == ',' && i < size-1 && str[i+1] != ' ') {
      result[no++] = str[i];
      result[no++] = ' ';
    }
    else
      result[no++] = str[i];
  }
  result[no] = 0;

  return result;
}

/***********************************************************************
* receive the filter and divide it into a list of filters
***********************************************************************/
int
split_pattern(char *str, char ***pattern_list)
{
int inx=0, check=1, i, j, leng, cnt=0, start[MAXLIST], end[MAXLIST];
char tmp[MAXSTRING];

  if (!str) {
    fprintf(stderr, "Bad search filter\n");
    return -1;
  }
  /* smallest possible str is (&(a=b)) */
  if( (leng = strlen(str)) < 8) {
    fprintf(stderr, "Bad search filter: %s\n", str);
    return -1;
  }
  if(str[0] != '(') {
    fprintf(stderr, "Bad search filter: %s\n", str);
    return -1;
  }

  /*
  * remove white spaces before '&' or '|'
  */
  inx = 1;
  while(str[inx] == ' ' && inx < leng-1)
    inx++;
  if(str[inx] != '&' && str[inx] != '|') {
    fprintf(stderr, "Bad search filter: %s\n", str);
    return -1;
  }

  /*
  * extract the first level indexes of filters
  * i.e. for the filter (&(|(filter1)(filter2))(filter3))
  * this routine generates the indexes of (|(filter1)(filter2))
  * and (filter3) in the filter
  */
  check = 1;
  for (i=2; i<leng-1; i++) {
    if (str[i] == '(') {
      if (check == 1)
        start[cnt] = i;
      check++;
    }
    else if (str[i] == ')') {
      check--;
      if (check == 1)
        end[cnt++] = i;
    }
  } 

  /*
  * the number of the list of filters should not be
  * less than 1 and the last character should be ')'
  */
  if (cnt < 1) {
    fprintf(stderr, "Bad search filter: %s\n", str);
    return -1;
  }
  if (str[leng-1] != ')' || check != 1) {
    fprintf(stderr, "Bad search filter: %s\n", str);
    return -1;
  }

  /*
  * copy the substring of the filter into each of
  * the list of filters
  */
  *pattern_list = (char **) calloc(cnt, sizeof(char *));
  for (i=0; i<cnt; i++) {
    inx = 0;
    for (j=start[i]; j<=end[i]; j++)
      tmp[inx++] = str[j];
    tmp[inx] = 0;
    (*pattern_list)[i] = (char *) strdup(tmp);
  }

  return cnt;
}

/***********************************************************************
* check if the filter is terminal filter, AND filter or OR filter 
***********************************************************************/
int
is_term(char *str)
{
int leng, inx=0;

  if (!str) {
    fprintf(stderr, "Bad search filter\n");
    return -1;
  }

  leng = strlen(str);

  /*
  * the first character should be '('
  */
  if (str[inx] != '(') {
    fprintf(stderr, "Bad search filter: %s\n", str);
    return -1;
  }

  inx++;

  /*
  * remove white spaces before '&' or '|'
  */
  while (str[inx] == ' ' && inx < leng-1)
    inx++;

  if (inx >= leng-1) {
    fprintf(stderr, "Bad search filter: %s\n", str);
    return -1;
  }
  else if (str[inx] == '&')
    return AND;
  else if (str[inx] == '|')
    return OR;
  else
    return TERM;
}

/***********************************************************************
* write the awk script into a file
***********************************************************************/
int
write_pattern(char *str, char *filename)
{
  FILE *patt_fp;

  if ((patt_fp = fopen(filename, "w")) == NULL)
  {
    fprintf(stderr, "Cannot open %s\n", filename);
    return -1;
  }

  fprintf(patt_fp, "BEGIN {\n");
  fprintf(patt_fp, "        FS = \"\\n\"\n");
  fprintf(patt_fp, "        RS = \"\"\n");
  fprintf(patt_fp, "      }\n\n");
  fprintf(patt_fp, "      {\n");
  fprintf(patt_fp, "        record = tolower($0)\n");
  fprintf(patt_fp, "        if ( %s ) {\n", str);
  fprintf(patt_fp, "          print $0\n");
  fprintf(patt_fp, "          print \"\"\n");
  fprintf(patt_fp, "        }\n");
  fprintf(patt_fp, "      }\n");

  fclose(patt_fp);
  return 0;
}

/***********************************************************************
* generates a regular expression from two strings
***********************************************************************/
char *
gen_re(int sw, char *str1, char *str2)
{
char *res, *sub, *left=NULL, *right=NULL;

  switch (sw) {
    case PRESENCE:
      left = to_lower(str1);
      res = (char *) calloc(strlen(left) + 19, sizeof(char));
      sprintf(res, "record ~ /(^|\\n)%s:/", left);
      break;
    case EQUALITY:
      left = to_lower(str1);
      right = to_lower(str2);
      res = (char *) calloc(strlen(left) + strlen(right) + 26, sizeof(char));
      sprintf(res, "record ~ /(^|\\n)%s: %s($|\\n)/", left, right);
      break;
    case SUBSTRING:
      sub = gen_sub(str2);
      left = to_lower(str1);
      right = to_lower(sub);
      res = (char *) calloc(strlen(left) + strlen(right) + 26, sizeof(char));
      sprintf(res, "record ~ /(^|\\n)%s: %s($|\\n)/", left, right);
      free (sub);
      break;
    case GTEQ:
    case LTEQ:
    default:
      fprintf(stderr, "Not Defined\n");
      return NULL;
  }
  if (str1)
    free(str1);
  if (str2)
    free(str2);
  if (left)
    free(left);
  if (right)
    free(right);

  return res;
}

/***********************************************************************
* replace SUBSTRING character '*' with '[^\n]*' 
***********************************************************************/
char *
gen_sub(char *str)
{
char *sub;
int inx, i;
int leng = strlen(str);

  sub = (char *) calloc(leng * 6 + 1, sizeof(char));

  inx = 0;
  for (i=0; i<leng; i++) {
    if (str[i] == '*') {
      sub[inx++] = '[';
      sub[inx++] = '^';
      sub[inx++] = '\\';
      sub[inx++] = 'n';
      sub[inx++] = ']';
      sub[inx++] = '*';
    }
    else
      sub[inx++] = str[i];
  }
  sub[inx] = 0;

  return sub;
}

/***********************************************************************
* awk script changes the (attr: value) format data into lower cases
* this routine changes the regular expression to lower cases
***********************************************************************/
char *to_lower(char *str)
{
int leng, i, cnt=0;
char *change=NULL;

  leng = strlen(str);
  change = (char *) calloc(leng  + 1, sizeof(char));
  for (i=0; i<leng; i++) {
      change[cnt++] = (char) tolower((int) str[i]);
  }
  change[cnt] = 0;

  return change;
}

/***********************************************************************
* slapd server automatically changes the attribute to lower cases
* and the value to upper cases in (attribute=value) filter
* this routine changes the regular expression to deal with case 
* insensitive search
* i.e. this routine change the letter 'a' into '(a|A)'
***********************************************************************/
char *upper_lower(char *str)
{
int leng, i, cnt=0;
char *change=NULL;

  leng = strlen(str);
  change = (char *) calloc(leng * 5 + 1, sizeof(char));
  for (i=0; i<leng; i++) {
    /*
    * don't change in case of [^\n]
    */
    if (i > 0 && str[i] == 'n' && str[i-1] == '\\')
      change[cnt++] = str[i];
    else if ('a' <= str[i] && str[i] <= 'z') {
      change[cnt++] = '(';
      change[cnt++] = str[i];
      change[cnt++] = '|';
      change[cnt++] = (char) (str[i] - 32); 
      change[cnt++] = ')';
    }
    else if ('A' <= str[i] && str[i] <= 'Z') {
      change[cnt++] = '(';
      change[cnt++] = (char) (str[i] + 32); 
      change[cnt++] = '|';
      change[cnt++] = str[i];
      change[cnt++] = ')';
    }
    else
      change[cnt++] = str[i];
  }
  change[cnt] = 0;

  return change;
}

/***********************************************************************
* receives a terminal search filter string "(attribute=value)"
* and transforms it to r.e. "/attribute: value/"
***********************************************************************/
char *
term_node(char *str)
{
int leng=0, res=WHATEVER;
char *str1=NULL, *str2=NULL;

  if (!str) {
    fprintf(stderr, "No search filters\n");
    return NULL;
  }

  if ((leng = strlen(str)) < 5) {
    fprintf(stderr, "Bad search filter\n");
    return NULL;
  }

  if (str[0] != '(' || str[leng-1] != ')') {
    fprintf(stderr, "Bad search filter\n");
    return NULL;
  }

  res = split_str(str, '=', &str1, &str2);
  if (res == -1) {
    if (str1)
      free(str1);
    if (str2)
      free(str2);
    return NULL;
  }
  else {
    return gen_re(res, str1, str2);
  }
}

/***********************************************************************
* split a terminal filter string into two strings, i.e. (A=B) into A & B
* also determine the type of the filter - PRESENCE, EQUALITY, SUBSTRING 
***********************************************************************/
int
split_str(char *str, int value, char **first, char **second)
{
int inx, res=WHATEVER;
char *tmp, *tmpstr;
int leng = strlen(str);

  /* 
  * look for the index of "value" which is "="
  */
  tmp = (char *) calloc(leng+1, sizeof(char));
  for (inx=0; inx<leng; inx++) {
    if ( str[inx] == value) {
      tmp[inx] = 0;
      break;
    }
    tmp[inx] = str[inx];
  }

  if (inx >= leng) {
    fprintf(stderr, "Bad search filter\n");
    free(tmp);
    return -1;
  }

  /*
  * check if GTEQ or LTEQ
  */
  if (str[inx-1] == '>') {
    tmp[inx-1] = 0;
    res = GTEQ;
  }
  else if (str[inx-1] == '<') {
    tmp[inx-1] = 0;
    res = LTEQ;
  }

  /*
  * remove '(' and extra white spaces from the attribute
  */
  tmpstr = rem_lwhite(tmp);
  if (tmpstr) {
    *first = (char *) strdup(tmpstr);
  }
  else {
    return -1;
  }

  /*
  * remove ')' and extra white spaces from the value
  */
  tmpstr = rem_rwhite(str+inx);
  if (tmpstr) {
    *second = (char *) strdup(tmpstr);
  }
  else {
    return -1;
  }

  if ( res == WHATEVER ) { /* if not GTEQ or LTEQ */
    if ( (tmpstr = (char *) strchr(*second, '*')) == NULL )
      res = EQUALITY;
    else if (strlen(*second) == 1)
      res = PRESENCE;
    else
      res = SUBSTRING;
  }

  free(tmp);
  return res;
}

/***********************************************************************
* delete the extra white spaces and '(' from the attribute of the 
* terminal search filter "(attribute=value)" 
***********************************************************************/
char *
rem_lwhite(char *str)
{
int inx=0, leng;
char *tmp;

  while (str[inx] == '(' || str[inx] == ' ')
    inx++;

  tmp = str + inx;
  leng = strlen(tmp);
  while (leng > 0 && tmp[leng-1] == ' ')
    leng--;
  if (leng > 0) {
    tmp[leng] = 0;
    return tmp;
  }
  else {
    fprintf(stderr, "Bad attribute\n");
    return NULL;
  }
}
 
/***********************************************************************
* delete the extra white spaces and ')' from the value of the 
* terminal search filter "(attribute=value)" 
***********************************************************************/
char *
rem_rwhite(char *str)
{
int inx=0, leng;
char *tmp;

  while (str[inx] == '=' || str[inx] == ' ')
    inx++;

  tmp = str + inx;
  leng = strlen(tmp);
  while (leng > 0 && (tmp[leng-1] == ' ' || tmp[leng-1] == ')'))
    leng--;
  if (leng > 0) {
    tmp[leng] = 0;
    return tmp;
  }
  else {
    fprintf(stderr, "Bad value\n");
    return NULL;
  }
}
 
/***********************************************************************
* free allocated memory spaces
***********************************************************************/
int
clear_results( char ***results, int size )
{

int i;

  if ( *results == NULL || size == 0 ) return 0;

  for (i=0; i<size; i++)
    if ((*results)[i] != NULL) {
      free( (*results)[i] );
      (*results)[i] = NULL;
    }

  free(*results);
  *results = NULL;

  return 1;
}

