WEB开发网
开发学院数据库MSSQL Server 如何将全文检索中的“干扰词”去除 阅读

如何将全文检索中的“干扰词”去除

 2007-11-11 10:40:22 来源:WEB开发网   
核心提示:包括以下内容,然后调用:remove_noise_word()即可<SCRIPT LANGUAGE=javascript><!-- noise_word_list_ch = new Array("?","about","$",&qu

包括以下内容,然后调用:remove_noise_word()即可

<SCRIPT LANGUAGE=javascript>
<!--
  noise_word_list_ch = new Array("?","about","$","1","2","3","4","5","6","7","8","9","0","_",
    "a","b","c","d","e","f","g","h","i","j","k","l","m","n","o",
    "p","q","r","s","t","u","v","w","x","y","z","after","all","also",
    "an","and","another","any","are","as","at","be","because","been",
    "before","being","between","both","but","by","came","can","come",
    "could","did","do","each","for","from","get","got","had","has",
    "have","he","her","here","him","himself","his","how","if","in","into",
    "is","it","like","make","many","me","might","more","most","much","must",
    "my","never","now","of","on","only","or","other","our","out","over","said",
    "same","see","should","since","some","still","such","take","than","that",
    "the","their","them","then","there","these","they","this","those","through",
    "to","too","under","up","very","was","way","we","well","were","what","where",
    "which","while","who","with","would","you","your",
    "的","一","不","在","人","有","是","为","以","于","上","他","而","后","之","来",
    "及","了","因","下","可","到","由","这","与","也","此","但","并","个","其","已",
     "无","小","我","们","起","最","再","今","去","好","只","又","或","很","亦","某",
    "把","那","你","乃","它");

function trim_str_key(inputVal){
 inputStr = inputVal.toString()
 while ((inputStr.charAt(inputStr.length - 1) == " ") || (inputStr.charAt(0) == " ")){    

   //如果最右边为空格则删去
   if (inputStr.charAt(inputStr.length - 1) == " "){
      inputStr = inputStr.substring(0,inputStr.length - 1)
   }
   //如果最左边为空格则删去
   if (inputStr.charAt(0) == " "){
    inputStr = inputStr.substring(1,inputStr.length)
   }
 }
 return inputStr
}

function is_ch_noise_word(str_key){
  var key_word = trim_str_key(str_key);
  key_word = key_word.toLowerCase();
  var listlength=noise_word_list_ch.length;
  var tmp_str = "";
  for(i=0;i<listlength;i++){
    tmp_str = noise_word_list_ch[i]
    if(tmp_str==key_word){ 
     return true;
    }
  }
  return false;

function remove_noise_word(str_source){
  var tmp_str = "";
  var ch = "";
  var str_out = "";
  var i = 0;
  str_source = trim_str_key(str_source); 
  var str_source_length = str_source.length;
 
  if(str_source_length == 0){
   return str_out;
  }
 
  for (i=0;i < str_source_length; i++){ 
   ch = str_source.charAt(i);
   if(ch==" "){ //如果为空格则表示是下一个关键词
 if(!(is_ch_noise_word(tmp_str))){ //不是干扰词就输出
   if(tmp_str!=" "){  //防止连续的两个空格
    str_out = str_out + tmp_str + " ";
   } 
 }
 tmp_str = "";
   }
   else{
    tmp_str = tmp_str + ch;
   }
  }
  str_out = str_out + tmp_str;  
  return trim_str_key(str_out); 
}

//下面是一个测试
//var abc = "av  n";
//var nnnn = remove_noise_word(abc);
//alert(nnnn);
//-->
</SCRIPT>

Tags:如何 全文检索 干扰

编辑录入:coldstar [复制链接] [打 印]
赞助商链接