/* GamMo Simple Morphological Analyzer

  version 0.85
  Date 2007/04/10  
  Copyright (c) 2007 by knit  
  License: MIT License  
    http://www.opensource.org/licenses/mit-license.php  
    日本語訳    
    http://www.opensource.jp/licenses/mit-license.html    

  Sample
    <script type="text/javascript" src="gammo.js" charset="UTF-8"></script>  
 */    

function GamMo() {
  this.kanji = new RegExp();
  this.hiragana = new RegExp();
  this.katakana = new RegExp();
  this.word = new RegExp();
  this.zenkaku = new RegExp();
  this.ascii = new RegExp();
  this.kigoh = new RegExp();
  this.space = new RegExp();
  this.joshi = new RegExp();
  this.syuJoshi = new RegExp();
  this.skipWords = new RegExp();
  this._compile();
}

GamMo.prototype.pattern = {
  "kanji": "([一-龠々〆ヵヶ]+)",
  "hiragana": "([ぁ-ん]+)",
  "katakana": "([ァ-ヴー]+)",
  "word": "([a-zA-Z0-9_]+)",
  "zenkaku": "([ａ-ｚＡ-Ｚ０-９]+)",
  "ascii": "([!-~]+)",
  "kigoh": "([、。！？（）「」『』’”!-\/:-@\[-\^`\{-~])",
  "space": "([　\\s\\n]+)"
  };

GamMo.prototype.skip = {
  "word": ["とても"]
  }


GamMo.prototype._compile = function() {
  this.kanji.compile(this.pattern.kanji, "g");
  this.hiragana.compile(this.pattern.hiragana, "g");
  this.katakana.compile(this.pattern.katakana, "g");
  this.word.compile(this.pattern.word, "g");
  this.zenkaku.compile(this.pattern.zenkaku, "g");
  this.ascii.compile(this.pattern.ascii, "g");
  this.kigoh.compile(this.pattern.kigoh, "g");
  this.space.compile(this.pattern.space, "g");
  this.joshi.compile("(.)?(か[もら]?|きり|くせに|[くぐ]らい|けれども?|[ただ]って|こそ|さえ|し[かも]|すら|ずつ|たり|だ[けの]|ったら|ってば|つつ|[てで][はも]|とか?|ところ[がでか]|ながら|など|な[らり]|なん[かて]|の[でにみ]|ばかり|ほ[かど]|まで|ものの|やら?|ゆえ|より|をば?|[がしてでにのはばへも])(.)?", "g");
  // this.joshi.compile("(.)?(から|まで|より|[がをにへでと])(.)?", "g");
  this.syuJoshi.compile("(か[いな]?|かしら|って|ったら|ってば|とも?|なあ?|ねえ?|ものか?|よう?|[さぜぞのやわ])([.?!、。？！]+)", "g");
  var skip = "(" + this.skip.word.join("|") + ")";
  this.skipWords.compile(skip, "g");
}

GamMo.prototype.parse = function(str, option) {
  //option = 1;
  var result = [];
  str = str.replace(/｜/g, "|");
  str = str.replace(this.syuJoshi, "｜$1｜$2");
  str = str.replace(this.joshi, function(m0, m1, m2, m3) {
    if (!m1) m1 = "";
    if (!m3) m3 = "";
    var result = "";
    var word = m1 + m2 + m3;
    if (word.match(/とても/g)) return word;
    // 助詞の繰り返しがない場合
    if (m2 != m3) {
      // 前後に区切りがあった場合
      if ((m1 == "｜") || (m3 == "｜")) {
        result = m1 + m2 + m3;
      } else {
        result = m1;
        if (!option) result += "｜"; // option がなかったら
        result += m2 + "｜" + m3;
      }
    } else {
      result = m1 + m2 + m3;
    }
    return result;
  });
  str = str.replace(this.kanji, "｜$1");
  str = str.replace(this.katakana, "｜$1｜");
  str = str.replace(this.word, "｜$1｜");
  str = str.replace(this.zenkaku, "｜$1｜");
  str = str.replace(this.space, "｜$1｜");
  str = str.replace(/｜*([ぁぃぅぇぉっゃゅょ])｜*/g, "$1");
str = str.replace(this.kigoh, "｜$1｜");
str = str.replace(/^｜|(｜)+|｜$/g, "$1");
  result = str.split("｜");

  return result;
}

GamMo.prototype.Keyword = function(str) {
  var result = [];
  var words = this.parse(str);
  for (var i = 0, l = words.length; i < l; i++) {
    if (this.kanji.test(words[i]) || this.katakana.test(words[i])) {
      result.push(words[i]);
    } else if (this.ascii.test(words[i]) || this.zenkaku.test(words[i])) {
    if (words[i].length > 1) result.push(words[i]);
    }
    //result.push(words[i]);
  }
  return result;
}

GamMo.prototype.Kanji = function(str) {
  return str.match(this.kanji) || false;
}

GamMo.prototype.Hiragana = function(str) {
  return str.match(this.hiragana) || false;
}

GamMo.prototype.Katakana = function(str) {
  return str.match(this.katakana) || false;
}

GamMo.prototype.Word = function(str) {
  return str.match(this.word) || false;
}

GamMo.prototype.Zenkaku = function(str) {
  return str.match(this.zenkaku) || false;
}

GamMo.prototype.Ascii = function(str) {
  return str.match(this.ascii) || false;
}

GamMo.prototype.Kigoh = function(str) {
  return str.match(this.kigoh) || false;
}

GamMo.prototype.Space = function(str) {
  return str.match(this.space) || false;
}

GamMo.prototype.Joshi = function(str) {
  return str.match(this.joshi) || false;
}




