简单的xml的识别!可以查出xml的错误
2012-05-21 11:04:07 来源:WEB开发网核心提示:不是完美支持w3c定义,但是标准的xml是可以完美识别得//获取下一个字符char gXml::readChar(){ this->readPos++ ; if (this->readPos >= (*this->fileData).length()) { return EOF ; } //过滤
不是完美支持w3c定义,但是标准的xml是可以完美识别得
//获取下一个字符 char gXml::readChar() { this->readPos++ ; if (this->readPos >= (*this->fileData).length()) { return EOF ; } //过滤特殊字符 while ((*this->fileData)[this->readPos] <= 31 && (*this->fileData)[this->readPos] != '\n') { this->readPos++ ; } return (*this->fileData)[this->readPos] ; } //读取接下来的一个单词 std::string gXml::readWord() { //为了读取数据,先回退一个字符 this->readPos-- ; std::string tempWord = "" ; char tempChar = this->readChar() ; while (tempChar >= 0) { if(tempChar != '\n' && tempChar != ' ' && tempChar != '=' && tempChar != '?' && tempChar != '/' && tempChar != '\\' && tempChar != '>' && tempChar != '<') { tempWord += tempChar ; }else{ break ; } tempChar = this->readChar() ; } this->readPos-- ; return tempWord ; } //文件的解析 int gXml::comFile() { //清空数据堆栈 while(!this->strStack.empty()) { this->strStack.pop(); } //判断数据是不是为空 if ((*this->fileData).length() == 0) { return -1 ; } //开始识别 //用于错误跟踪 int codeLine = 1 ; //用于记录识别到哪一行 int codePos = 1 ; //用户记录识别到哪个字符 std::string tagName = "" ; //xml标签名称 std::string tagContent ="" ; //标签的数据 std::string attrName = "" ; //属性名 std::string attrValue = "" ; //属性值 //一个xml的节点 this->xmlDom = new gXmlThree(); //声明一个xml树的节点 gXmlThree * gXmlNode = this->xmlDom ; //对一个节点的指针 int valueSign = 0 ; //用于标记属性值,是用 单引号 还是 双引号 包含的 char charTemp = 0 ; unsigned comState = 0 ; //标记当前的状态 while (true) { //读取数据并加以过滤 charTemp = this->readChar() ; if(charTemp < 0) break ; //<记录当前识别得数据> codePos++ ; if(charTemp == '\n') { codeLine++ ; codePos = 1 ; continue; } //</> if(comState == 0) { //<debug> runFlow(0) ; // if (charTemp == ' ') { continue; }else if(charTemp != '<'){ //<出错> this->errorInfo("", codeLine , codePos , "ERR:无法识别得字符位置!!") ; return -1 ; //</> }else{ //转向状态1 comState = 1 ; } }else if(comState == 1){ //<debug> runFlow(1) ; // if (charTemp == '?') { //状态转向2 //<?xml ?> comState = 2 ; }else if(charTemp == '!'){ //状态转向2 //<!-- --> comState = 3 ; }else if(charTemp == '/'){ //</ > comState = 5 ; }else if(charTemp == '\\'){ //<出错> this->errorInfo("" , codeLine , codePos , "ERR:标签名不能用\\开头!!") ; return -1 ; //</> }else if(charTemp == ' '){ //<出错> this->errorInfo("" , codeLine , codePos , "ERR:标签名前面不能有空格!!") ; return -1 ; //</> }else{ //状态转向4 //<root > comState = 4 ; this->readPos-- ; } }else if (comState == 2){ //<debug> runFlow(2) ; // //状态2 if (charTemp == ' ') { //<出错> this->errorInfo("" , codeLine , codePos , "ERR:无法识别得语法,猜测<?xml ?和xml直接不能有空格 !!") ; return -1 ; //</> }else if (this->readWord() == "xml") { comState = 20 ; //压栈?xml 遇到?出栈 stackNode node ; node.tagName = "?xml" ; node.xmlDomPoint = this->xmlDom ; this->strStack.push(node) ; continue; ; }else{ //<出错> this->errorInfo("" , codeLine , codePos , "ERR:无法识别得元素便签,猜测是<?xml...!!") ; return -1 ; //</> } }else if(comState == 3){ //<debug> runFlow(3) ; // //注释语句的识别 //<!-- --> if(charTemp != '-') { //<出错> this->errorInfo("" , codeLine , codePos , "ERR:语法错,错误的注释语句!!") ; return -1 ; //</> } charTemp = this->readChar() ; if(charTemp == '-') { stackNode node ; node.tagName = "!--" ; node.xmlDomPoint = gXmlNode ; //<入栈> this->strStack.push(node) ; comState = 30 ; }else{ //<出错> this->errorInfo("" , codeLine , codePos , "ERR:语法错,错误的注释语句!!") ; return -1 ; //</> } }else if(comState == 4){ //<debug> runFlow(4) ; // //识别<root 语句> if (charTemp == ' ') { //<出错> this->errorInfo("" , codeLine , codePos , "ERR:语法错!多余的空格!!") ; return -1 ; //</> }else if(charTemp == '\\' || charTemp == '/'|| charTemp == '?'|| charTemp == '!'){ //<出错> this->errorInfo("" , codeLine , codePos , "ERR:标签名称不能用 \\ / ? ! 开头!!!") ; return -1 ; //</> }else{ //////////////////////////////////////////////////////////////////////// /*这里的代码可以去除,主要是为了保证xml只能有一个根*/ if (gXmlNode == this->xmlDom) { if(this->xmlDom->xmlChild.size() >= 1) { //<出错> this->errorInfo("" , codeLine , codePos , "ERR:xml文件中只能有一个根元素!!!") ; return -1 ; //</> } } //////////////////////////////////////////////////////////////////////// //读取标签名 tagName = this->readWord() ; //添加新元素 gXmlThree* aNewNode = new gXmlThree(); aNewNode->xmlId = tagName ; gXmlNode->xmlChild.push_back(aNewNode) ; aNewNode->xmlParent = gXmlNode ; gXmlNode =aNewNode; stackNode node ; node.tagName = tagName ; node.xmlDomPoint = gXmlNode ; //压栈 this->strStack.push(node) ; comState = 40 ; } }else if(comState == 5){ //<debug> runFlow(5) ; // //识别关闭语句 //</root _> if(charTemp == ' ') { //<出错> this->errorInfo("" , codeLine , codePos , "ERR:语法错!多余的空格!!") ; return -1 ; //</> }else if(charTemp == '\\' || charTemp == '/'|| charTemp == '?'|| charTemp == '!'){ //<出错> this->errorInfo("" , codeLine , codePos , "ERR:关闭语句不能用 \\ / ? ! 开头!!!") ; return -1 ; //</> }else{ tagName = this->readWord() ; //<弹栈> if (!this->strStack.empty() && this->strStack.top().tagName == tagName) { this->strStack.pop() ; comState = 50 ; if(!this->strStack.empty()) { gXmlNode = this->strStack.top().xmlDomPoint ; }else{ gXmlNode = this->xmlDom ; } }else{ //<出错> this->errorInfo("" , codeLine , codePos , "ERR:语法错,关闭语句"+tagName+"没有找到关闭对象!!") ; return -1 ; //</> } } }else if(comState == 20){ //<debug> runFlow(20) ; // //读取元素的属性 if (charTemp == ' ') { continue; }else{ //读取属性名 attrName = this->readWord() ; comState = 21 ; } }else if(comState == 21){ //<debug> runFlow(21) ; // //检查=号 if (charTemp == ' ') { continue; }else if(charTemp == '?'){ comState = 26 ; }else if (charTemp != '='){ //<出错> this->errorInfo("" , codeLine , codePos , "ERR:解析器不理解这个字符!!!") ; return -1 ; //</> }else if(charTemp == '='){ //开始识别属性值 comState = 22 ; } }else if(comState == 22){ //<debug> runFlow(22) ; // if (charTemp == ' ') { continue; }else{ if (charTemp != '\'' && charTemp != '\"') { //<出错> this->errorInfo("" , codeLine , codePos , "ERR:属性值必须是以\'或者\"开头!!") ; return -1 ; //</> }else{ //记录属性值是用单引号还是双引号 if (charTemp == '\'') { valueSign = -1 ; }else{ valueSign = 1 ; } comState = 23 ; } } }else if(comState == 23){ //<debug> runFlow(23) ; // //记录属性值 if((charTemp == '\'' && valueSign == -1) || (charTemp == '\"' && valueSign == 1)) { //这里表示识别成功 { cout<<attrName<<" "<<attrValue<<endl; attrName = "" ; attrValue = "" ; } //清楚标示 valueSign = 0 ; comState = 24 ; }else{ attrValue += charTemp ; } }else if(comState == 24){ //<debug> runFlow(24) ; // //第一个属性的值和第二个属性名之间 必须要要有个空格 if(charTemp != ' ' && charTemp != '?') { if(charTemp == '/' || charTemp == '\\') { //<出错> this->errorInfo("" , codeLine , codePos , "ERR:无法失败的字符!!") ; return -1 ; //</> }else{ //<出错> this->errorInfo("" , codeLine , codePos , "ERR:前一个属性值和后一个属性名必须用空格隔开!!") ; return -1 ; //</> } }else if(charTemp == ' '){ comState = 25 ; }else if (charTemp == '?'){ comState = 26 ; } }else if(comState == 25){ //<debug> runFlow(25) ; // if (charTemp == '?') { comState = 26 ; }else if(charTemp == '/' || charTemp =='\\'){ //<出错> this->errorInfo("" , codeLine , codePos , "ERR:语法错,标签名不能用/或者\\开头!!") ; return -1 ; //</> }else if(charTemp != ' '){ //回退一个字符 this->readPos-- ; comState = 20 ; }else{ continue; } }else if(comState == 26){ //<debug> runFlow(26) ; // if(charTemp == '>') { if (!this->strStack.empty() && this->strStack.top().tagName == "?xml") { this->strStack.pop() ; comState = 0 ; }else{ //<出错> this->errorInfo("" , codeLine , codePos , "ERR:语法错,标签的前后端不能匹配 猜测<?xml ...?>!!") ; return -1 ; //</> } }else if(charTemp != ' ') { //<出错> this->errorInfo("" , codeLine , codePos , "ERR:语法错,解析器无法理解!!") ; return -1 ; //</> }else{ //<出错> this->errorInfo("" , codeLine , codePos , "ERR:无法识别得字符!!") ; return -1 ; //</> } }else if(comState == 30){ //<debug> runFlow(30) ; // //<!-- 主要是识别 ! 后面的2个连续的- //这之前所有字符都被看做是注释语句 if(charTemp != '-') { continue; } charTemp = this->readChar() ; if(charTemp == '-') { comState = 31 ; }else{ continue ; } }else if(comState == 31){ //<debug> runFlow(31) ; // if(charTemp == '>') { //弹出栈 if (!this->strStack.empty() && this->strStack.top().tagName == "!--") { this->strStack.pop(); comState = 100 ; }else{ //<出错> this->errorInfo("" , codeLine , codePos , "ERR:语法错,猜测是注释语句,但是标签的前后端不匹配!!") ; return -1 ; //</> } }else{ comState = 30 ; } }else if(comState == 40){ //<debug> runFlow(40) ; // //读取这个标签的属性 comState = 41 ; //这里重点注意,要回退一步 this->readPos-- ; }else if(comState == 41){ //<debug> runFlow(41) ; // //读取一个属性名 if (charTemp == ' ') { continue; }else if (charTemp == '/'){ comState = 47 ; }else if (charTemp== '>'){ comState = 100 ; //回退一步 this->readPos-- ; }else{ //读取属性名 attrName = this->readWord() ; comState = 42 ; } }else if(comState == 42){ //<debug> runFlow(42) ; // //读取那个 = if (charTemp == ' ') { continue; }else if(charTemp != '='){ //<出错> this->errorInfo("" , codeLine , codePos , "ERR:无法理解的字符,猜测应该是 =!!") ; return -1 ; //</> }else{ comState = 43 ; } }else if(comState == 43){ //<debug> runFlow(43) ; // //读取属性值 if (charTemp == ' ') { continue; }else{ //读取单引号 和 双引号 if (charTemp == '\'') { valueSign = -1 ; comState = 44 ; }else if(charTemp == '\"'){ valueSign = 1 ; comState = 44 ; }else{ //<出错> this->errorInfo("" , codeLine , codePos , "ERR:无法识别得字符,属性值,必须用单引号或者双引号包含!!!") ; return -1 ; //</> } } }else if(comState == 44){ //<debug> runFlow(44) ; // //识别属性值得结尾 if ((charTemp == '\'' && valueSign == -1) || (charTemp == '\"' && valueSign == 1)) { //这里获得一个属性值 { if (gXmlNode != NULL) { gXmlNode->xmlAttr->insertAttr(attrName , attrValue) ; } attrValue = "" ; attrName = "" ; valueSign = 0 ; comState = 45 ; } }else{ attrValue+= charTemp ; } }else if(comState == 45){ //<debug> runFlow(45) ; // //2个属性直接最少有一个空格 if (charTemp == ' ') { comState = 46 ; }else if (charTemp == '>'){ comState = 100 ; }else if(charTemp == '/'){ comState = 47 ; }else{ //<出错> this->errorInfo("" , codeLine , codePos , "ERR:属性元素之间用空格隔开!!!") ; return -1 ; //</> } }else if (comState == 46){ //<debug> runFlow(46) ; // //判断是不是还有其他属性 if (charTemp == ' ') { continue; }else if(charTemp == '/'){ comState = 47 ; }else if(charTemp == '\\' || charTemp == '/' || charTemp == '=' || charTemp == '?' || charTemp == '!') { //<出错> this->errorInfo("" , codeLine , codePos , "ERR:属性名不能用\\ / ! ? = 开头!!!") ; return -1 ; //</> }else if(charTemp == '>'){ comState = 100 ; }else{ comState = 40 ; } }else if(comState == 47){ //<debug> runFlow(47) ; // //识别<root /> if (charTemp == '>') { if (!this->strStack.empty()) { this->strStack.pop() ; } if (!this->strStack.empty()) { gXmlNode = this->strStack.top().xmlDomPoint ; }else{ gXmlNode = this->xmlDom ; } comState = 100 ; }else{ //<出错> this->errorInfo("" , codeLine , codePos , "ERR:语法错!解释器不理解!!!") ; return -1 ; //</> } }else if(comState == 50){ //<debug> runFlow(50) ; // if(charTemp == ' ') { continue; }else if(charTemp != '>'){ //<出错> this->errorInfo("" , codeLine , codePos , "ERR:关闭语句中不接受任何属性元素的输入!!!") ; return -1 ; //</> }else{ //读取字符串 comState = 100 ; } }else if(comState = 100){ //<debug> runFlow(100) ; // //读取xml的内容 if(charTemp != '<') { tagContent += charTemp ; }else{ { if(gXmlNode != NULL) { gXmlNode->xmlStrContent.push_back(tagContent) ; }else{ //<出错> this->errorInfo("" , codeLine , codePos , "ERR:语法错,此处不接受数据的输入!!!") ; return -1 ; //</> } //xml的首个内容字符串创建完成 tagContent = "" ; } this->readPos -- ; comState = 0 ; } } } //最后检查栈 while(!this->strStack.empty()) { //<出错> std::string errorP = this->strStack.top().tagName; this->strStack.pop() ; this->errorInfo("" , -1 , -1 , "ERR: <"+errorP+"没有关闭!!!") ; return -1 ; //</> } return 1 ; }
更多精彩
赞助商链接