WEB开发网
开发学院WEB开发Xml 简单的xml的识别!可以查出xml的错误 阅读

简单的xml的识别!可以查出xml的错误

 2012-05-21 11:04:07 来源:WEB开发网   
核心提示:不是完美支持w3c定义,但是标准的xml是可以完美识别得//获取下一个字符char gXml::readChar(){ this->readPos++ ; if (this->readPos >= (*this->fileData).length()) { return EOF ; } //过滤

不是完美支持w3c定义,但是标准的xml是可以完美识别得

//获取下一个字符
char gXml::readChar()
{
 this->readPos++ ;
 if (this->readPos >= (*this->fileData).length())
 {
 return EOF ;
 }
 //过滤特殊字符
 while ((*this->fileData)[this->readPos] <= 31 && 
 (*this->fileData)[this->readPos] != '\n')
 {
 this->readPos++ ;
 }
 return (*this->fileData)[this->readPos] ;
}
//读取接下来的一个单词
std::string gXml::readWord()
{
 //为了读取数据,先回退一个字符
 this->readPos-- ;
 std::string tempWord = "" ;
 char tempChar = this->readChar()  ;
 while (tempChar >= 0)
 { 
 if(tempChar != '\n' 
 && tempChar != ' ' 
  && tempChar != '='
   && tempChar != '?'
    && tempChar != '/'
     && tempChar != '\\'
      && tempChar != '>'
       && tempChar != '<')
 {
 tempWord += tempChar ;
 }else{
 break ;
 }
 tempChar = this->readChar() ;
 }
 this->readPos-- ;
 return tempWord ;
}
//文件的解析
int gXml::comFile()
{
 //清空数据堆栈
 while(!this->strStack.empty())
 {
 this->strStack.pop();
 }
 //判断数据是不是为空
 if ((*this->fileData).length() == 0)
 {
 return -1 ;
 }
 //开始识别
 //用于错误跟踪
 int codeLine = 1 ;   //用于记录识别到哪一行
 int codePos = 1 ;   //用户记录识别到哪个字符
 std::string tagName = "" ;   //xml标签名称
 std::string tagContent ="" ;   //标签的数据
 std::string attrName = "" ;   //属性名
 std::string attrValue = "" ;   //属性值

 //一个xml的节点
 this->xmlDom = new gXmlThree();   //声明一个xml树的节点
 gXmlThree * gXmlNode = this->xmlDom ;   //对一个节点的指针
 
 int valueSign = 0 ;   //用于标记属性值,是用 单引号 还是 双引号 包含的
 char charTemp  = 0 ;
 unsigned comState = 0 ;   //标记当前的状态

 while (true)
 {
 //读取数据并加以过滤
 charTemp = this->readChar() ;
 if(charTemp < 0) break ;
 //<记录当前识别得数据>
 codePos++ ;
 if(charTemp == '\n')
 {
 codeLine++ ;
 codePos = 1 ;
 continue;
 }
 //</>
 if(comState == 0)
 {
 //<debug>
 runFlow(0) ;
 //
 if (charTemp == ' ')
 {
 continue;
 }else if(charTemp != '<'){
 //<出错>
 this->errorInfo("", codeLine , codePos , "ERR:无法识别得字符位置!!") ;
 return -1 ;
 //</>
 }else{
 //转向状态1
 comState = 1 ;
 }
 }else if(comState == 1){
 //<debug>
 runFlow(1) ;
 //
 if (charTemp == '?')
 {
 //状态转向2
 //<?xml ?>
 comState = 2 ;
 }else if(charTemp == '!'){
 //状态转向2
 //<!-- -->
 comState = 3 ;
 }else if(charTemp == '/'){
 //</ >
 comState = 5 ;
 }else if(charTemp == '\\'){
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:标签名不能用\\开头!!") ;
 return -1 ;
 //</>
 }else if(charTemp == ' '){
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:标签名前面不能有空格!!") ;
 return -1 ;
 //</>
 }else{
 //状态转向4
 //<root >
 comState = 4 ;
 this->readPos-- ;
 }
 }else if (comState == 2){
 //<debug>
 runFlow(2) ;
 //
 //状态2
 if (charTemp == ' ')
 {
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:无法识别得语法,猜测<?xml ?和xml直接不能有空格 !!") ;
 return -1 ;
 //</>
 }else if (this->readWord() == "xml")
 {
 comState = 20 ;
 //压栈?xml 遇到?出栈
 stackNode node ;
 node.tagName = "?xml" ;
 node.xmlDomPoint = this->xmlDom ;
 this->strStack.push(node) ;
 continue; ;
 }else{
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:无法识别得元素便签,猜测是<?xml...!!") ;
 return -1 ;
 //</>
 }
 }else if(comState == 3){
 //<debug>
 runFlow(3) ;
 //
 //注释语句的识别
 //<!-- -->
 if(charTemp != '-')
 {
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:语法错,错误的注释语句!!") ;
 return -1 ;
 //</>
 }

 charTemp = this->readChar() ;
 
 if(charTemp == '-')
 {
 stackNode node ;
 node.tagName = "!--" ;
 node.xmlDomPoint = gXmlNode ;
 //<入栈>
 this->strStack.push(node) ;
 comState = 30 ;
 }else{
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:语法错,错误的注释语句!!") ;
 return -1 ;
 //</>
 }

 }else if(comState == 4){
 //<debug>
 runFlow(4) ;
 //
 //识别<root 语句>
 if (charTemp == ' ')
 {
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:语法错!多余的空格!!") ;
 return -1 ;
 //</>
 }else if(charTemp == '\\' ||
 charTemp == '/'||
 charTemp == '?'||
 charTemp == '!'){
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:标签名称不能用 \\ / ? ! 开头!!!") ;
 return -1 ;
 //</>
 }else{
 ////////////////////////////////////////////////////////////////////////
 /*这里的代码可以去除,主要是为了保证xml只能有一个根*/
 if (gXmlNode == this->xmlDom)
 {
 if(this->xmlDom->xmlChild.size() >= 1)
 {
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:xml文件中只能有一个根元素!!!") ;
 return -1 ;
 //</>
 }
 }
 ////////////////////////////////////////////////////////////////////////
 //读取标签名
 tagName = this->readWord() ;
 //添加新元素
 gXmlThree* aNewNode = new gXmlThree();
 aNewNode->xmlId = tagName ;
 gXmlNode->xmlChild.push_back(aNewNode) ;
 aNewNode->xmlParent = gXmlNode ;
 gXmlNode =aNewNode;

 stackNode node ;
 node.tagName = tagName ;
 node.xmlDomPoint = gXmlNode ;
 //压栈
 this->strStack.push(node) ;
 comState = 40 ;
 
 }

 }else if(comState == 5){
 //<debug>
 runFlow(5) ;
 //
 //识别关闭语句
 //</root _>
 if(charTemp == ' ')
 {
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:语法错!多余的空格!!") ;
 return -1 ;
 //</>
 }else if(charTemp == '\\' ||
 charTemp == '/'||
 charTemp == '?'||
 charTemp == '!'){
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:关闭语句不能用 \\ / ? ! 开头!!!") ;
 return -1 ;
 //</>
 }else{
 tagName = this->readWord() ;
 //<弹栈>
 if (!this->strStack.empty() && this->strStack.top().tagName == tagName)
 {
 this->strStack.pop() ;
 comState = 50 ;
 if(!this->strStack.empty())
 {
 gXmlNode = this->strStack.top().xmlDomPoint ;
 }else{
 gXmlNode = this->xmlDom ;
 }
 }else{
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:语法错,关闭语句"+tagName+"没有找到关闭对象!!") ;
 return -1 ;
 //</>
 }
 }
 }else if(comState == 20){
 //<debug>
 runFlow(20) ;
 //
 //读取元素的属性
 if (charTemp == ' ')
 {
 continue;
 }else{
 //读取属性名
 attrName = this->readWord() ;
 comState = 21 ;
 }

 }else if(comState == 21){
 //<debug>
 runFlow(21) ;
 //
 //检查=号
 if (charTemp == ' ')
 {
 continue;
 }else if(charTemp == '?'){
 comState = 26 ;
 }else if (charTemp != '='){
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:解析器不理解这个字符!!!") ;
 return -1 ;
 //</>
 }else if(charTemp == '='){
 //开始识别属性值
 comState = 22 ;
 }
 }else if(comState == 22){
 //<debug>
 runFlow(22) ;
 //
 if (charTemp == ' ')
 {
 continue;
 }else{
 if (charTemp != '\'' && charTemp != '\"')
 {
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:属性值必须是以\'或者\"开头!!") ;
 return -1 ;
 //</>
 }else{
 //记录属性值是用单引号还是双引号
 if (charTemp == '\'')
 {
 valueSign = -1 ;
 }else{
 valueSign = 1 ;
 }
 comState = 23 ;
 }
 }
 }else if(comState == 23){
 //<debug>
 runFlow(23) ;
 //
 //记录属性值
 if((charTemp == '\'' && valueSign == -1) ||
 (charTemp == '\"' && valueSign == 1))
 {
 //这里表示识别成功
 {
 cout<<attrName<<" "<<attrValue<<endl;
 attrName = "" ;
 attrValue = "" ;
 }
 //清楚标示
 valueSign = 0 ;
 comState = 24 ;

 }else{
 attrValue += charTemp ;
 }

 }else if(comState == 24){
 //<debug>
 runFlow(24) ;
 //
 //第一个属性的值和第二个属性名之间 必须要要有个空格
 if(charTemp != ' ' && charTemp != '?')
 {
 if(charTemp == '/' || charTemp == '\\')
 {
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:无法失败的字符!!") ;
 return -1 ;
 //</>
 }else{
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:前一个属性值和后一个属性名必须用空格隔开!!") ;
 return -1 ;
 //</>
 }
 }else if(charTemp == ' '){
 comState = 25 ;
 }else if (charTemp == '?'){
 comState = 26 ;
 }
 }else if(comState == 25){
 //<debug>
 runFlow(25) ;
 //
 if (charTemp == '?')
 {
 comState = 26 ;
 }else if(charTemp == '/' || charTemp =='\\'){
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:语法错,标签名不能用/或者\\开头!!") ;
 return -1 ;
 //</>
 }else if(charTemp != ' '){
 //回退一个字符
 this->readPos-- ;
 comState = 20 ;
 }else{
 continue;
 }
 }else if(comState == 26){
 //<debug>
 runFlow(26) ;
 //
 if(charTemp == '>')
 {
 if (!this->strStack.empty() && this->strStack.top().tagName == "?xml")
 {
 this->strStack.pop() ;
 comState = 0 ;
 }else{
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:语法错,标签的前后端不能匹配 猜测<?xml ...?>!!") ;
 return -1 ;
 //</>
 }
 }else if(charTemp != ' ')
 {
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:语法错,解析器无法理解!!") ;
 return -1 ;
 //</>
 }else{
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:无法识别得字符!!") ;
 return -1 ;
 //</>
 }
 }else if(comState == 30){
 //<debug>
 runFlow(30) ;
 //
 //<!-- 主要是识别 ! 后面的2个连续的-
 //这之前所有字符都被看做是注释语句
 if(charTemp != '-')
 {
 continue;
 }
 charTemp = this->readChar() ;
 if(charTemp == '-')
 {
 comState = 31 ;
 }else{
 continue ;
 }
 }else if(comState == 31){
 //<debug>
 runFlow(31) ;
 //
 if(charTemp == '>')
 {
 //弹出栈
 if (!this->strStack.empty() && this->strStack.top().tagName == "!--")
 {
 this->strStack.pop();
 comState = 100 ;
 }else{
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:语法错,猜测是注释语句,但是标签的前后端不匹配!!") ;
 return -1 ;
 //</>
 }
 }else{
 comState = 30 ;
 }

 }else if(comState == 40){
 //<debug>
 runFlow(40) ;
 //
 //读取这个标签的属性
 comState = 41 ;
 //这里重点注意,要回退一步
 this->readPos-- ;
 }else if(comState == 41){
 //<debug>
 runFlow(41) ;
 //
 //读取一个属性名
 if (charTemp == ' ')
 {
 continue;
 }else if (charTemp == '/'){
 comState = 47 ;
 }else if (charTemp== '>'){
 comState = 100 ;
 //回退一步
 this->readPos-- ;
 }else{
 //读取属性名
 attrName = this->readWord() ;
 comState = 42 ;
 }
 }else if(comState == 42){
 //<debug>
 runFlow(42) ;
 //
 //读取那个 =
 if (charTemp == ' ')
 {
 continue;
 }else if(charTemp != '='){
 //<出错>
 
 this->errorInfo("" , codeLine , codePos , "ERR:无法理解的字符,猜测应该是 =!!") ;
 return -1 ;
 //</>
 }else{
 comState = 43 ;
 }
 }else if(comState == 43){
 //<debug>
 runFlow(43) ;
 //
 //读取属性值
 if (charTemp == ' ')
 {
 continue;
 }else{
 //读取单引号 和 双引号
 if (charTemp == '\'')
 {
 valueSign = -1 ;
 comState = 44 ;
 }else if(charTemp == '\"'){
 valueSign = 1 ;
 comState = 44 ;
 }else{
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:无法识别得字符,属性值,必须用单引号或者双引号包含!!!") ;
 return -1 ;
 //</>
 }
 }
 }else if(comState == 44){
 //<debug>
 runFlow(44) ;
 //
 //识别属性值得结尾
 if ((charTemp == '\'' && valueSign == -1) ||
 (charTemp == '\"' && valueSign == 1))
 {
 //这里获得一个属性值
 {
 if (gXmlNode != NULL)
 {
 gXmlNode->xmlAttr->insertAttr(attrName , attrValue) ;
 }
 attrValue = "" ;
 attrName = "" ;
 valueSign = 0 ;
 comState = 45 ;
 }
 }else{
 attrValue+= charTemp ;
 }
 }else if(comState == 45){
 //<debug>
 runFlow(45) ;
 //
 //2个属性直接最少有一个空格
 if (charTemp == ' ')
 {
  comState = 46 ;
 }else if (charTemp == '>'){
 comState = 100 ;
 }else if(charTemp == '/'){
 comState = 47 ;
 }else{
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:属性元素之间用空格隔开!!!") ;
 return -1 ;
 //</>
 }
 }else if (comState == 46){
 //<debug>
 runFlow(46) ;
 //
 //判断是不是还有其他属性
 if (charTemp == ' ')
 {
 continue;
 }else if(charTemp == '/'){
 comState = 47 ;
 }else if(charTemp == '\\' ||
   charTemp == '/' ||
    charTemp == '=' ||
    charTemp == '?' ||
    charTemp == '!')
 {
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:属性名不能用\\ / ! ? = 开头!!!") ;
 return -1 ;
 //</>
 }else if(charTemp == '>'){
 comState = 100 ;
 }else{
 comState = 40 ;
 }
 }else if(comState == 47){
 //<debug>
 runFlow(47) ;
 //
 //识别<root  />
 if (charTemp == '>')
 {
 if (!this->strStack.empty())
 {
 this->strStack.pop() ;
 }
 if (!this->strStack.empty())
 {
 gXmlNode = this->strStack.top().xmlDomPoint ;
 }else{
 gXmlNode = this->xmlDom ;
 }

 comState = 100 ;
 }else{
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:语法错!解释器不理解!!!") ;
 return -1 ;
 //</>
 }
 }else if(comState == 50){
 //<debug>
 runFlow(50) ;
 //
 if(charTemp == ' ')
 {
 continue;
 }else if(charTemp != '>'){
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:关闭语句中不接受任何属性元素的输入!!!") ;
 return -1 ;
 //</>
 }else{
 //读取字符串
 comState = 100 ;
 }
 }else if(comState = 100){
 //<debug>
 runFlow(100) ;
 //
 //读取xml的内容
 if(charTemp != '<')
 {
 tagContent += charTemp ;
 }else{
 {
 if(gXmlNode != NULL)
 {
 gXmlNode->xmlStrContent.push_back(tagContent) ;
 }else{
 //<出错>
 this->errorInfo("" , codeLine , codePos , "ERR:语法错,此处不接受数据的输入!!!") ;
 return -1 ;
 //</>
 }
 //xml的首个内容字符串创建完成
 tagContent = "" ;
 }
 this->readPos -- ;
 comState = 0 ;
 }

 }
 }


 //最后检查栈
 while(!this->strStack.empty())
 {
 //<出错>
 std::string errorP = this->strStack.top().tagName;
 this->strStack.pop() ;
 this->errorInfo("" , -1 , -1 , "ERR:  <"+errorP+"没有关闭!!!") ;
 return -1 ;
 //</>
 }
 return 1 ;
}

Tags:简单 xml 识别

编辑录入:爽爽 [复制链接] [打 印]
赞助商链接