/*---------------------------------------------------------
Parse a text of Javascript code 1 char at the time, paying special attention 
to strings and the matching of {} and (), to build a tree structure
of BlockElements. Such a block has a start string and an end string.
It may also have a number of child blocks (contained in an array).

Example result of parsing:
while(i<3) {	  block 0 start string
  i++;		  block 0 child 0 start string (and empty end string)
  j--;		  block 0 child 1 start string (and empty end string)
  if(i=3) {		  block 0 child 2 start string
    j=0;		  block 0 child 2 child 0 start string (and empty end string)
  }		  block 0 child 2 end string
}		  block 0 end string

Copyright (C) 2001 Troels Jakobsen, scripter@get2net.dk
---------------------------------------------------------*/


// Define constants
var BLOCK_GENERIC = 0;
var BLOCK_IF	  = 1;
var BLOCK_FOR	  = 2;
var BLOCK_WHILE   = 3;
var BLOCK_WITH	  = 4;
var BLOCK_SWITCH  = 5;
var BLOCK_ELSE	  = 6;

// Global vars.
var rootBlock;
var parseErrors = new Array();

// Define parse error object
function ParseError(str, pos) {
  this.length = 2;
  this.str = str;
  this.pos = pos;
}

function addParseError(str, pos) {
  parseErr = new ParseError(str.replace(/<b>/g,'<b style=\'color:red\'>'), pos);
  parseErrors[parseErrors.length] = parseErr;
}

// Define block element object
function BlockElement(parent) {
  this.length = 8;		  // The complete statement might be:	while(i<3) {i++;j--;}
  this.strStart = "";		  // Start of statement if any	- eg.	while(i<3) {
  this.strEnd = "";		  // End of statement if any	- eg.	}
  this.type = BLOCK_GENERIC;	  // Type of block (for, if, etc)
  this.level = 0;		  // Current level (number of consecutive parents)
  this.pos = -1;		  // Start pos. of str inside parent block's str
  this.indented = false;	  // Indicates whether stmt. is indented in rel. to parent.
  this.parent = parent; 	  // Super-block
  this.children = new Array();	  // Sub-blocks

  if(this.parent!==null) {
    parent.children[parent.children.length] = this;
    this.level = parent.level+1;
  }
}

function scan(str) {
  parseErrors.length = 0;
  return doScan(str);
}

function doScan(str) {

  function isNextStatementChild(block, str, strpos) {
    if(block.type != BLOCK_GENERIC) {
      // Check if statement ends with { or ;
      var cc = nextSignificantChar(str, strpos+1);
      if(cc != '{' && cc != ';') {
	// We have a condtional/loop statement (for/if/while/etc.) without brackets.
	// Next stmt. (and ONLY next stmt.) must be treated as a "child" (indented).
	return true;
      }
    }
    return false;
  }

  function insertChildBlock(block) {
    block = new BlockElement(block);
    return block;
  }

  function insertSiblingBlock(block) {
    block = new BlockElement(block.parent);
    return block;
  }

  function nonIndentedParent(block) {
    var parentBlock = block;
    while (parentBlock.indented) {
      if(parentBlock.parent===null) {
	return parentBlock;
      }
      parentBlock = parentBlock.parent;
    }
    return parentBlock;
  }

  var c, pos, nipb;		     // Current char
  var strpos = -1;	     // Position within str
  var substr = "";	     // Current substring (since last substring)
  var block = null;	     // Current BlockElement
  var parseErr;
//  var insideSwitch = false;  // Are we currently parsing a switch statement?

  block = new BlockElement(null);
  block.level = -1;
  rootBlock = block;
  block = new BlockElement(block);

  while (strpos < str.length) {
    strpos++;
    c = str.charAt(strpos);
    switch (c) {

      case '"': 	     // String begin, read to end of string
      case '\'':
	pos = endOfStringIndex(str, c, strpos, false);
	if(pos != -1) {
	  substr += str.substring(strpos, pos+1);
	  strpos = pos;
	  block.strStart = substr;
	}
	else {
	  return false;
	}
	break;

      case ';': 	     // End of statement, except in for statements
	substr += c;
/*
	if(block.type == BLOCK_FOR) {
	  // We are inside a for statement, read to char before ending parenthesis
	  pos = matchingParenthesisIndex(str, "(", ")", strpos-substr.length);
	  if(pos == -1) {
	    addParseError("Expression is missing closing parenthesis <b>)</b>", strpos-substr.length);
	    return false;
	  }
	  else {
	    substr += str.substring(strpos+1, pos);
	    strpos = pos-1;
	    block.strStart = substr;
	  }
	}
	else {
*/
	  // End of statement
	  block.strStart = substr;
	  if(nextSignificantChar(str, strpos+1) == '/') {
	    // Seems we have a comment following this statement
	  }
	  else {
	    substr = "";

	    if(block.indented) {
	      nipb = nonIndentedParent(block);
	      if(nipb!==null) {
		block = insertSiblingBlock(nipb);
	      }
	      else {
		block = insertSiblingBlock(block);
              }
	    }
	    else {
	      block = insertSiblingBlock(block);
	    }
	  }
/*
	}
*/
	break;

      case '\n':	     // End of statement (we presume)
/*
	block.strStart = substr;
	substr = "";
	block = insertSiblingBlock(block);
	if(prevSignificantChar(str, strpos-1) != ';') {
alert('no ;');
	  block = insertSiblingBlock(block);
	}
*/
	break;

      case '\r':	     // CR
	break;

      case '(':
	substr += c;
	block.strStart = substr;
	// Find type of statement (for/if/while/etc.)
	block.type = getStatementType(substr);
	// Read to matching closing parenthesis
	pos = matchingParenthesisIndex(str, "(", ")", strpos-1);
	if(pos == -1) {
	  addParseError("Expression is missing closing parenthesis <b>)</b>", strpos);
	  return false;
	}
	else {
	  substr += str.substring(strpos+1, pos+1);
	  strpos = pos;
	  block.strStart = substr;
	}

	if(isNextStatementChild(block, str, strpos)) {
	  substr = "";
	  block = new BlockElement(block);
	  block.indented = true;
	}
	break;

      case ')': 	     // We only get here if there is not a corresponding start parenthesis
	addParseError("Closing parenthesis <b>)</b> without opening parenthesis <b>(</b>", strpos);
	return false;
//	break;

      case '{':
	substr += c;
	block.strStart = substr;
	block = insertChildBlock(block);
	if(matchingParenthesisIndex(str, "{", "}", strpos-substr.length) == -1) {
	  addParseError("Expression is missing closing bracket <b>}</b>", strpos);
	  return false;
	}
	substr = "";
	break;

      case '}':
	if(prevSignificantChar(str, strpos-1) != ';') {
	  if(block.indented) {
	    nipb = nonIndentedParent(block);
	    if(nipb!==null) {
	      // Last stmt. was indented and NOT terminated with a ;
	      // Hack! Why does this work?
	      block = insertSiblingBlock(nipb);
	    }
	  }
	}

	block = block.parent;
	if(block!==null) {
	  block.strEnd = c;
	  if(block.strStart.indexOf('{') == -1) {
	    addParseError("Closing bracket <b>}</b> without opening bracket <b>{</b>", strpos);
	    return false;
//	    break;
	  }
	  nipb = nonIndentedParent(block);
	  if(nipb!==null) {
	    block = insertSiblingBlock(nipb);
	  } else {
	    block = insertSiblingBlock(block);
          }

	}
	substr = "";
	break;

      case ':':
	substr += c;
	if(substr.indexOf("javascript")===0) {
	  block.strStart = substr;
	  substr = "";
	  block = insertSiblingBlock(block);
	}
	break;

      case '\\':
	substr += c;
	strpos++;
	substr += getChar(str, strpos);
	block.strStart = substr;
	break;

      case '/':
	var cc = prevSignificantChar(str, strpos);
	if(cc == '(' || cc == '=') {		 // We have a regular string expression
	  pos = endOfStringIndex(str, c, strpos, true);
	  if(pos != -1) {
	    substr += str.substring(strpos, pos+1);
	    strpos = pos;
	    block.strStart = substr;
	  }
	  else {
	    return false;
	  }
	}
	else if(str.charAt(strpos-1) == '/') {  // We have a single-line comment
	  pos = endOfLineIndex(str, strpos);
	  if(pos != -1) {
	    substr += str.substring(strpos, pos+1);
	    strpos = pos;
	    block.strStart = substr;
	  }
	  else {
	    return false;
	  }
	}
	else {
	  substr += c;
	  block.strStart = substr;
	}
	break;

      case '*':
	if(str.charAt(strpos-1)==='/') {	 /* We have a multiline comment */
	  pos = str.indexOf('*/', strpos);
	  if(pos != -1) {
	    substr += str.substring(strpos, pos+2) + '\n';
	    strpos = pos+1;
	    block.strStart = substr;
	  }
	  else {
	    return false;
	  }
	}
	else {
	  substr += c;
	  block.strStart = substr;
	}
	break;

      default:
	substr += c;
	block.strStart = substr;
	break;

    }
  }
  return true;
}

function matchingParenthesisIndex(str, startChar, endChar, startPos) {
  // Find position of matching parenthesis or -1 if not found. Ignore strings.
  // Starting parenthesis (startChar) must be part of str starting at startPos.
  var c;
  var strpos = startPos;
  var subParLvl = 0;

  while (strpos < str.length) {
    strpos++;
    c = str.charAt(strpos);
    switch (c) {

      case '"': 	     // String begin, read to end of string
      case '\'':
	pos = endOfStringIndex(str, c, strpos, false);
	if(pos != -1) {
	  strpos = pos;
	}
	else {
	  return -1;
	}
	break;

      case '/': 	     // Check for regular string expressions and comments
	var cc = prevSignificantChar(str, strpos);
	// Check for regular string expression, eg.  /hello{2}/i
	if(cc == '(' || cc == '=') {		 // We have a regular string expression
	  pos = endOfStringIndex(str, c, strpos, true);
	  if(pos != -1) {
	    strpos = pos;
	  }
	  else {
	    return -1;
	  }
	  break;
	}
	else if(str.charAt(strpos-1) == '/') {  // We have a single-line comment
	  pos = endOfLineIndex(str, strpos);
	  if(pos != -1) {
	    strpos = pos;
	  }
	  else {
	    return -1;
	  }
	}
	else {
	  // goto default
	}
	break;

      case '*':
	if(str.charAt(strpos-1) == '/') {	 /* We have a nultiline comment */
	  pos = str.indexOf('*/', strpos);
	  if(pos != -1) {
	    strpos = pos+1;
	  }
	  else {
	    return -1;
	  }
	}
	else {
	  // goto default
	}
	break;

      default:
	if(c == startChar) {
	  subParLvl++;
	}
	else if(c == endChar) {
	  subParLvl--;
	  if(subParLvl===0) {
	    return strpos;}
	}
	break;

    }
  }
  return -1;
}

function endOfStringIndex(str, chr, strpos, regStrExpr) {
  // Read to end of string and return end of string pos., or -1 if error
  var c, pos;

  while (strpos < str.length) {
    strpos++;
    c = str.charAt(strpos);
    switch (c) {

      case '\r':
      case '\n':
	// Line break; seems we have an unterminated string
	addParseError("Unterminated string", strpos-1);
	return -1;
//	break;

      case '\\':
	// We need to read an extra char in case it's \' or \"
	strpos++;
	break;

      case '[':
	if(regStrExpr) {
//	    strpos = matchingParenthesisIndex(str, '[', ']', strpos);
	  pos = endOfStringIndex(str, ']', strpos, true);
	  if(pos != -1) {
	    strpos = pos;
	  }
	  else {
	    // Seems we have an unterminated regular expression
	    addParseError("Unterminated regular expression - missing <b>]</b>", startPos);
	    return -1;
	  }
	}
	break;

      default:
	if(c == chr) {
	  return strpos;
	}
	break;

    }
  }

  // Seems we have an unterminated string
  addParseError("Unterminated string", startPos);
  return -1;
}

function endOfLineIndex(str, strpos) {
  // Read to end of line and return end of line pos., or -1 if error
  var c;

  while (strpos < str.length) {
    strpos++;
    c = str.charAt(strpos);
    switch (c) {

      case '\\':
	// We need to read an extra char in case it's \' or \"
	strpos++;
	break;

      default:
	if(c == '\n') {
	  return strpos;
	}
	break;

    }
  }

  // Seems we have an unterminated string
  addParseError("Is that an unterminated comment <b>//</b>?", strpos-1);
  return -1;
}

function getStatementType(str) {
  if(findSubstatement(str, "if")) { return BLOCK_IF; }
  if(findSubstatement(str, "for")) { return BLOCK_FOR; }
  if(findSubstatement(str, "while")) { return BLOCK_WHILE; }
  if(findSubstatement(str, "with")) { return BLOCK_WITH; }
  if(findSubstatement(str, "switch")) { return BLOCK_SWITCH; }
  return BLOCK_GENERIC;
}

function findSubstatement(str, substr) {
  str = trimLeadingSpaces(str).toLowerCase();
  substr = substr.toLowerCase();
  if(str.indexOf(substr)===0) {
    if(nextSignificantChar(str, substr.length) == '(') {
      return true;}}
  return false;
}

function getChar(str, strpos) {
  // Get char at strpos but check for invalid string positions
  var s = str.charAt(0);
  if(s!==undefined) { return s; }
  return '';
}

function nextSignificantChar(str, strpos) {
  // Get next char from strpos, ignoring spaces
  var s = str.substring(strpos);
  s = trimLeadingSpaces(s);
  if(s.length > 0) { return s.charAt(0); }
  return '';
}

function prevSignificantChar(str, strpos) {
  // Get previous char from strpos, ignoring spaces
  var s = str.substring(0, strpos);
  while ((s.length > 0) && (s.charAt(s.length-1) == ' ')) {
    s = s.substring(0, s.length-1);
  }
  if(s.length > 0) { return s.charAt(s.length-1); }
  return '';
}

/*---------------------------------------------------------
Present a formatted string of source code by traversing the
tree structure that was built by the above code.
---------------------------------------------------------*/

function getFormattedText() {
  var s = doGetFormattedText(rootBlock);
  s = s.replace(/\t/g, indent(1));
  return s;
}

function doGetFormattedText(block) {
  var s = "";
  if(block.strStart!=="") {
    if(trimLeadingSpacesAndTabs(block.strStart)!=="") {
      s = indent(block.level) + trimLeadingSpacesAndTabs(block.strStart) + "\r\n"; }}
  for(var i=0; i<=block.children.length-1; i++) {
    s += doGetFormattedText(block.children[i]);
  }
  if(block.strEnd!=="") {
      s += indent(block.level) + block.strEnd + "\r\n"; }
  return s;
}

function getCompressedText() {
  var s = doGetCompressedText(rootBlock);
  s = s.replace(/\t/g, "").replace(/\r/g, "").replace(/\n/g, "");
  s = replaceOutsideStrings(s, " ", "");
  s = s.replace(/else \{/g, "else{");
  return s;
}

function doGetCompressedText(block) { // the same but without indentations and line breaks
  var s = "";
  if(block.strStart!=="") {
    if(trimLeadingSpacesAndTabs(block.strStart)!=="") {
      s = trimLeadingSpacesAndTabs(block.strStart) + "\r\n"; }}
  for(var i=0; i<=block.children.length-1; i++) {
    s += doGetCompressedText(block.children[i]);
  }
  if(block.strEnd!=="") {
      s += block.strEnd + "\r\n"; }
  return s;
}

function indent(lvl) {
  var INDENTLEVEL = 2;
  var s = "";
  for(var i=0; i<lvl*INDENTLEVEL; i++) { s += " "; }
  return s;
}

function trimLeadingSpaces(str) {
  return str.replace(/^ +/, '');
}

function trimLeadingSpacesAndTabs(str) {
  var s = trimLeadingSpaces(str);
  while (s.match(/^\t/))  {
    s = s.replace(/^\t/, '');
    s = trimLeadingSpaces(s);
  }
  return s;
}

function replaceOutsideStrings(str, searchStr, replaceStr) {
  // Replace searchStr with replaceStr globally, except inside strings
  // and after reserved words (eg. var x=3).
  var c, pos;
  var strpos = -1;
  var lastpos = 0;
  var outstr = "";

  function reservedWord(resrvd) {
    if(str.substring(strpos).indexOf(resrvd)===0) {
      // Reserved word found; skip to end of that word
      strpos += resrvd.length-1;
      outstr += resrvd;
      return true;
    }
    return false;
  }

  var reserved1 = "var ";
  var reserved2 = "new ";
  var reserved3 = "function ";
  var reserved4 = "return ";
  var reserved5 = "throw ";
  var reserved6 = "do ";
  var reserved7 = "else ";
  var reserved8 = " in ";

  while (strpos < str.length) {
    strpos++;
    c = str.charAt(strpos);
    switch (c) {

      case '"': 	     // String begin, read to end of string
      case '\'':
	pos = endOfStringIndex(str, c, strpos);
	if(pos != -1) {
	  // Add unmodified string
	  outstr += str.substring(strpos, pos+1);
	  strpos = pos;
	}
	else {
	  return "";
	}
	break;

      case '\\':
	// We need to read an extra char in case it's \' or \"
	outstr += c;
	strpos++;
	outstr += str.charAt(strpos);
	break;

      case '/': 	     // Check for regular string expressions and comments
	var cc = prevSignificantChar(str, strpos);
	// Check for regular string expression, eg.  /hello{2}/i
	// Reg. strings must end with / . A / inside a set, eg. [abc+-/*] doesn't count.
	if(cc == '(' || cc == '=') {		 // We have a regular string expression
	  pos = endOfStringIndex(str, c, strpos, true);
	  if(pos != -1) {
	    // Add unmodified string
	    outstr += str.substring(strpos, pos+1);
	    strpos = pos;
	  }
	  else {
	    return "";
	  }
	}
	else { outstr += c; }
	break;
/*
      case '*':
	if(str.charAt(strpos-1) == '/') {	 // We have a comment
	  pos = str.indexOf('* /', strpos);
	  if(pos != -1) {
	    strpos = pos+1;
	  }
	  else {
	    return "";
	  }
	}
	else {
	  outstr += c;
	}
	break;
*/

      default:
	if(reservedWord(reserved1) || reservedWord(reserved2) || reservedWord(reserved3) ||
	    reservedWord(reserved4) || reservedWord(reserved5) || reservedWord(reserved6) ||
	    reservedWord(reserved7) || reservedWord(reserved8)) {
	  // The reservedWord method skips the reserved word
	}
	else if(c == searchStr.charAt(0)) {
	  if(str.substring(strpos).indexOf(searchStr)===0) {
	    // Search string found; replace and skip to end of search string
	    strpos += searchStr.length-1;
	    outstr += replaceStr;
	  }
	  else { outstr += c; } // No change
	}
	else { outstr += c; } // No change
	break;
    }
  }
  return outstr;
}
