简单的xml的识别!可以查出xml的错误
2012-05-21 11:04:07 来源:WEB开发网核心提示:不是完美支持w3c定义,但是标准的xml是可以完美识别得//获取下一个字符char gXml::readChar(){ this->readPos++ ; if (this->readPos >= (*this->fileData).length()) { return EOF ; } //过滤
不是完美支持w3c定义,但是标准的xml是可以完美识别得
//获取下一个字符
char gXml::readChar()
{
this->readPos++ ;
if (this->readPos >= (*this->fileData).length())
{
return EOF ;
}
//过滤特殊字符
while ((*this->fileData)[this->readPos] <= 31 &&
(*this->fileData)[this->readPos] != '\n')
{
this->readPos++ ;
}
return (*this->fileData)[this->readPos] ;
}
//读取接下来的一个单词
std::string gXml::readWord()
{
//为了读取数据,先回退一个字符
this->readPos-- ;
std::string tempWord = "" ;
char tempChar = this->readChar() ;
while (tempChar >= 0)
{
if(tempChar != '\n'
&& tempChar != ' '
&& tempChar != '='
&& tempChar != '?'
&& tempChar != '/'
&& tempChar != '\\'
&& tempChar != '>'
&& tempChar != '<')
{
tempWord += tempChar ;
}else{
break ;
}
tempChar = this->readChar() ;
}
this->readPos-- ;
return tempWord ;
}
//文件的解析
int gXml::comFile()
{
//清空数据堆栈
while(!this->strStack.empty())
{
this->strStack.pop();
}
//判断数据是不是为空
if ((*this->fileData).length() == 0)
{
return -1 ;
}
//开始识别
//用于错误跟踪
int codeLine = 1 ; //用于记录识别到哪一行
int codePos = 1 ; //用户记录识别到哪个字符
std::string tagName = "" ; //xml标签名称
std::string tagContent ="" ; //标签的数据
std::string attrName = "" ; //属性名
std::string attrValue = "" ; //属性值
//一个xml的节点
this->xmlDom = new gXmlThree(); //声明一个xml树的节点
gXmlThree * gXmlNode = this->xmlDom ; //对一个节点的指针
int valueSign = 0 ; //用于标记属性值,是用 单引号 还是 双引号 包含的
char charTemp = 0 ;
unsigned comState = 0 ; //标记当前的状态
while (true)
{
//读取数据并加以过滤
charTemp = this->readChar() ;
if(charTemp < 0) break ;
//<记录当前识别得数据>
codePos++ ;
if(charTemp == '\n')
{
codeLine++ ;
codePos = 1 ;
continue;
}
//</>
if(comState == 0)
{
//<debug>
runFlow(0) ;
//
if (charTemp == ' ')
{
continue;
}else if(charTemp != '<'){
//<出错>
this->errorInfo("", codeLine , codePos , "ERR:无法识别得字符位置!!") ;
return -1 ;
//</>
}else{
//转向状态1
comState = 1 ;
}
}else if(comState == 1){
//<debug>
runFlow(1) ;
//
if (charTemp == '?')
{
//状态转向2
//<?xml ?>
comState = 2 ;
}else if(charTemp == '!'){
//状态转向2
//<!-- -->
comState = 3 ;
}else if(charTemp == '/'){
//</ >
comState = 5 ;
}else if(charTemp == '\\'){
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:标签名不能用\\开头!!") ;
return -1 ;
//</>
}else if(charTemp == ' '){
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:标签名前面不能有空格!!") ;
return -1 ;
//</>
}else{
//状态转向4
//<root >
comState = 4 ;
this->readPos-- ;
}
}else if (comState == 2){
//<debug>
runFlow(2) ;
//
//状态2
if (charTemp == ' ')
{
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:无法识别得语法,猜测<?xml ?和xml直接不能有空格 !!") ;
return -1 ;
//</>
}else if (this->readWord() == "xml")
{
comState = 20 ;
//压栈?xml 遇到?出栈
stackNode node ;
node.tagName = "?xml" ;
node.xmlDomPoint = this->xmlDom ;
this->strStack.push(node) ;
continue; ;
}else{
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:无法识别得元素便签,猜测是<?xml...!!") ;
return -1 ;
//</>
}
}else if(comState == 3){
//<debug>
runFlow(3) ;
//
//注释语句的识别
//<!-- -->
if(charTemp != '-')
{
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:语法错,错误的注释语句!!") ;
return -1 ;
//</>
}
charTemp = this->readChar() ;
if(charTemp == '-')
{
stackNode node ;
node.tagName = "!--" ;
node.xmlDomPoint = gXmlNode ;
//<入栈>
this->strStack.push(node) ;
comState = 30 ;
}else{
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:语法错,错误的注释语句!!") ;
return -1 ;
//</>
}
}else if(comState == 4){
//<debug>
runFlow(4) ;
//
//识别<root 语句>
if (charTemp == ' ')
{
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:语法错!多余的空格!!") ;
return -1 ;
//</>
}else if(charTemp == '\\' ||
charTemp == '/'||
charTemp == '?'||
charTemp == '!'){
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:标签名称不能用 \\ / ? ! 开头!!!") ;
return -1 ;
//</>
}else{
////////////////////////////////////////////////////////////////////////
/*这里的代码可以去除,主要是为了保证xml只能有一个根*/
if (gXmlNode == this->xmlDom)
{
if(this->xmlDom->xmlChild.size() >= 1)
{
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:xml文件中只能有一个根元素!!!") ;
return -1 ;
//</>
}
}
////////////////////////////////////////////////////////////////////////
//读取标签名
tagName = this->readWord() ;
//添加新元素
gXmlThree* aNewNode = new gXmlThree();
aNewNode->xmlId = tagName ;
gXmlNode->xmlChild.push_back(aNewNode) ;
aNewNode->xmlParent = gXmlNode ;
gXmlNode =aNewNode;
stackNode node ;
node.tagName = tagName ;
node.xmlDomPoint = gXmlNode ;
//压栈
this->strStack.push(node) ;
comState = 40 ;
}
}else if(comState == 5){
//<debug>
runFlow(5) ;
//
//识别关闭语句
//</root _>
if(charTemp == ' ')
{
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:语法错!多余的空格!!") ;
return -1 ;
//</>
}else if(charTemp == '\\' ||
charTemp == '/'||
charTemp == '?'||
charTemp == '!'){
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:关闭语句不能用 \\ / ? ! 开头!!!") ;
return -1 ;
//</>
}else{
tagName = this->readWord() ;
//<弹栈>
if (!this->strStack.empty() && this->strStack.top().tagName == tagName)
{
this->strStack.pop() ;
comState = 50 ;
if(!this->strStack.empty())
{
gXmlNode = this->strStack.top().xmlDomPoint ;
}else{
gXmlNode = this->xmlDom ;
}
}else{
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:语法错,关闭语句"+tagName+"没有找到关闭对象!!") ;
return -1 ;
//</>
}
}
}else if(comState == 20){
//<debug>
runFlow(20) ;
//
//读取元素的属性
if (charTemp == ' ')
{
continue;
}else{
//读取属性名
attrName = this->readWord() ;
comState = 21 ;
}
}else if(comState == 21){
//<debug>
runFlow(21) ;
//
//检查=号
if (charTemp == ' ')
{
continue;
}else if(charTemp == '?'){
comState = 26 ;
}else if (charTemp != '='){
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:解析器不理解这个字符!!!") ;
return -1 ;
//</>
}else if(charTemp == '='){
//开始识别属性值
comState = 22 ;
}
}else if(comState == 22){
//<debug>
runFlow(22) ;
//
if (charTemp == ' ')
{
continue;
}else{
if (charTemp != '\'' && charTemp != '\"')
{
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:属性值必须是以\'或者\"开头!!") ;
return -1 ;
//</>
}else{
//记录属性值是用单引号还是双引号
if (charTemp == '\'')
{
valueSign = -1 ;
}else{
valueSign = 1 ;
}
comState = 23 ;
}
}
}else if(comState == 23){
//<debug>
runFlow(23) ;
//
//记录属性值
if((charTemp == '\'' && valueSign == -1) ||
(charTemp == '\"' && valueSign == 1))
{
//这里表示识别成功
{
cout<<attrName<<" "<<attrValue<<endl;
attrName = "" ;
attrValue = "" ;
}
//清楚标示
valueSign = 0 ;
comState = 24 ;
}else{
attrValue += charTemp ;
}
}else if(comState == 24){
//<debug>
runFlow(24) ;
//
//第一个属性的值和第二个属性名之间 必须要要有个空格
if(charTemp != ' ' && charTemp != '?')
{
if(charTemp == '/' || charTemp == '\\')
{
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:无法失败的字符!!") ;
return -1 ;
//</>
}else{
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:前一个属性值和后一个属性名必须用空格隔开!!") ;
return -1 ;
//</>
}
}else if(charTemp == ' '){
comState = 25 ;
}else if (charTemp == '?'){
comState = 26 ;
}
}else if(comState == 25){
//<debug>
runFlow(25) ;
//
if (charTemp == '?')
{
comState = 26 ;
}else if(charTemp == '/' || charTemp =='\\'){
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:语法错,标签名不能用/或者\\开头!!") ;
return -1 ;
//</>
}else if(charTemp != ' '){
//回退一个字符
this->readPos-- ;
comState = 20 ;
}else{
continue;
}
}else if(comState == 26){
//<debug>
runFlow(26) ;
//
if(charTemp == '>')
{
if (!this->strStack.empty() && this->strStack.top().tagName == "?xml")
{
this->strStack.pop() ;
comState = 0 ;
}else{
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:语法错,标签的前后端不能匹配 猜测<?xml ...?>!!") ;
return -1 ;
//</>
}
}else if(charTemp != ' ')
{
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:语法错,解析器无法理解!!") ;
return -1 ;
//</>
}else{
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:无法识别得字符!!") ;
return -1 ;
//</>
}
}else if(comState == 30){
//<debug>
runFlow(30) ;
//
//<!-- 主要是识别 ! 后面的2个连续的-
//这之前所有字符都被看做是注释语句
if(charTemp != '-')
{
continue;
}
charTemp = this->readChar() ;
if(charTemp == '-')
{
comState = 31 ;
}else{
continue ;
}
}else if(comState == 31){
//<debug>
runFlow(31) ;
//
if(charTemp == '>')
{
//弹出栈
if (!this->strStack.empty() && this->strStack.top().tagName == "!--")
{
this->strStack.pop();
comState = 100 ;
}else{
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:语法错,猜测是注释语句,但是标签的前后端不匹配!!") ;
return -1 ;
//</>
}
}else{
comState = 30 ;
}
}else if(comState == 40){
//<debug>
runFlow(40) ;
//
//读取这个标签的属性
comState = 41 ;
//这里重点注意,要回退一步
this->readPos-- ;
}else if(comState == 41){
//<debug>
runFlow(41) ;
//
//读取一个属性名
if (charTemp == ' ')
{
continue;
}else if (charTemp == '/'){
comState = 47 ;
}else if (charTemp== '>'){
comState = 100 ;
//回退一步
this->readPos-- ;
}else{
//读取属性名
attrName = this->readWord() ;
comState = 42 ;
}
}else if(comState == 42){
//<debug>
runFlow(42) ;
//
//读取那个 =
if (charTemp == ' ')
{
continue;
}else if(charTemp != '='){
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:无法理解的字符,猜测应该是 =!!") ;
return -1 ;
//</>
}else{
comState = 43 ;
}
}else if(comState == 43){
//<debug>
runFlow(43) ;
//
//读取属性值
if (charTemp == ' ')
{
continue;
}else{
//读取单引号 和 双引号
if (charTemp == '\'')
{
valueSign = -1 ;
comState = 44 ;
}else if(charTemp == '\"'){
valueSign = 1 ;
comState = 44 ;
}else{
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:无法识别得字符,属性值,必须用单引号或者双引号包含!!!") ;
return -1 ;
//</>
}
}
}else if(comState == 44){
//<debug>
runFlow(44) ;
//
//识别属性值得结尾
if ((charTemp == '\'' && valueSign == -1) ||
(charTemp == '\"' && valueSign == 1))
{
//这里获得一个属性值
{
if (gXmlNode != NULL)
{
gXmlNode->xmlAttr->insertAttr(attrName , attrValue) ;
}
attrValue = "" ;
attrName = "" ;
valueSign = 0 ;
comState = 45 ;
}
}else{
attrValue+= charTemp ;
}
}else if(comState == 45){
//<debug>
runFlow(45) ;
//
//2个属性直接最少有一个空格
if (charTemp == ' ')
{
comState = 46 ;
}else if (charTemp == '>'){
comState = 100 ;
}else if(charTemp == '/'){
comState = 47 ;
}else{
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:属性元素之间用空格隔开!!!") ;
return -1 ;
//</>
}
}else if (comState == 46){
//<debug>
runFlow(46) ;
//
//判断是不是还有其他属性
if (charTemp == ' ')
{
continue;
}else if(charTemp == '/'){
comState = 47 ;
}else if(charTemp == '\\' ||
charTemp == '/' ||
charTemp == '=' ||
charTemp == '?' ||
charTemp == '!')
{
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:属性名不能用\\ / ! ? = 开头!!!") ;
return -1 ;
//</>
}else if(charTemp == '>'){
comState = 100 ;
}else{
comState = 40 ;
}
}else if(comState == 47){
//<debug>
runFlow(47) ;
//
//识别<root />
if (charTemp == '>')
{
if (!this->strStack.empty())
{
this->strStack.pop() ;
}
if (!this->strStack.empty())
{
gXmlNode = this->strStack.top().xmlDomPoint ;
}else{
gXmlNode = this->xmlDom ;
}
comState = 100 ;
}else{
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:语法错!解释器不理解!!!") ;
return -1 ;
//</>
}
}else if(comState == 50){
//<debug>
runFlow(50) ;
//
if(charTemp == ' ')
{
continue;
}else if(charTemp != '>'){
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:关闭语句中不接受任何属性元素的输入!!!") ;
return -1 ;
//</>
}else{
//读取字符串
comState = 100 ;
}
}else if(comState = 100){
//<debug>
runFlow(100) ;
//
//读取xml的内容
if(charTemp != '<')
{
tagContent += charTemp ;
}else{
{
if(gXmlNode != NULL)
{
gXmlNode->xmlStrContent.push_back(tagContent) ;
}else{
//<出错>
this->errorInfo("" , codeLine , codePos , "ERR:语法错,此处不接受数据的输入!!!") ;
return -1 ;
//</>
}
//xml的首个内容字符串创建完成
tagContent = "" ;
}
this->readPos -- ;
comState = 0 ;
}
}
}
//最后检查栈
while(!this->strStack.empty())
{
//<出错>
std::string errorP = this->strStack.top().tagName;
this->strStack.pop() ;
this->errorInfo("" , -1 , -1 , "ERR: <"+errorP+"没有关闭!!!") ;
return -1 ;
//</>
}
return 1 ;
}
更多精彩
赞助商链接
