[XML处置]_[使用libxml2的xpath特性修改xml文件内容]

[XML处理]_[使用libxml2的xpath特性修改xml文件内容]

场景:

1.在软件需要保存一些配置项时,使用数据库的话比较复杂,查看内容也不容易.纯文本文件对utf8字符支持也不好.

2.这时候使用xml是最佳选择,使用跨平台库libxml2。

3.基于xpath的保存方式对保存局部内容非常方便。

4.参考例子xpath2.c

5.实际耗时: 2小时.


文件1: Makefile

CP="cp"

build-post: test.exe
	${CP} E:/software/Lib/file/xml-libxml2-2.7.1/win32/release/share/libxml2-2.dll .

test.exe:test.o
	g++ -o test.exe test.o -LE:/software/Lib/file/xml-libxml2-2.7.1/win32/release/share -lxml2

test.o:test.cpp
	g++ -IE:/software/Lib/file/xml-libxml2-2.7.1/win32/release/share/include -c test.cpp -o test.o


文件2:test.cpp

#include <stdio.h>
#include <assert.h>
#include <string>
#include <iostream>
#include <map>

#include "libxml/tree.h"
#include "libxml/parser.h"
#include "libxml/xpath.h"
#include "libxml/xpathInternals.h"
#include "libxml/xmlsave.h"

using namespace std;

static void _UpdateXpathNodes(xmlNodeSetPtr nodes, const xmlChar* value) 
{
    int size;
    int i;
    
    assert(value);
    size = (nodes) ? nodes->nodeNr : 0;
    for(i = size - 1; i >= 0; i--) 
	{
		assert(nodes->nodeTab[i]);
		xmlNodeSetContent(nodes->nodeTab[i], value);
		if (nodes->nodeTab[i]->type != XML_NAMESPACE_DECL)
		{
			nodes->nodeTab[i] = NULL;
		}
   }
}

static int _UpdateWithXpath(xmlXPathContextPtr xpathCtx,const char* key,const char* value)
{
	xmlXPathObjectPtr xpathObj;
    
	xpathObj = xmlXPathEvalExpression(BAD_CAST key, xpathCtx);
    if(!xpathObj)
	{
        fprintf(stderr,"Error: unable to evaluate xpath expression \"%s\"\n", key);
        return -1;
    }
    _UpdateXpathNodes(xpathObj->nodesetval, BAD_CAST value);
    xmlXPathFreeObject(xpathObj);
	return 0;
}

static int _UpdateXml(const char* path,map<string,string>& keyValue)
{
	xmlDocPtr doc;
	xmlXPathContextPtr xpathCtx; 
	doc = xmlParseFile(path);
    if (!doc)
	{
		fprintf(stderr, "Error: unable to parse file \"%s\"\n", path);
		return(-1);
    }

    /* Create xpath evaluation context */
    xpathCtx = xmlXPathNewContext(doc);
    if(!xpathCtx)
	{
        fprintf(stderr,"Error: unable to create new XPath context\n");
        xmlFreeDoc(doc); 
        return(-1);
    }
	//3.update
	map<string,string>::iterator iter;
	map<string,string>::iterator end = keyValue.end();
	for(iter = keyValue.begin();iter!= end;iter++)
	{
		cout << "word: " << iter->first << ", count: " << iter->second << endl;
		_UpdateWithXpath(xpathCtx,iter->first.c_str(),iter->second.c_str());
	}
	
	xmlXPathFreeContext(xpathCtx);
	//4.save
	xmlSaveCtxtPtr saveCtxtPtx = xmlSaveToFilename(path,"UTF-8",XML_SAVE_FORMAT);
	if(!saveCtxtPtx)
	{
		xmlFreeDoc(doc);
		return -1;
	}

    if(-1 == xmlSaveDoc(saveCtxtPtx,doc))
	{
		xmlFreeDoc(doc);
		return -1;
	}
	xmlSaveClose(saveCtxtPtx);
	//xmlDocDump(stdout, doc);
	//5.free
    xmlFreeDoc(doc); 
    return 0;
}

int UpdateXml(const char* path,map<string,string>& keyValue)
{
	/* Init libxml */     
    xmlInitParser();
	int res  = _UpdateXml(path,keyValue);
	xmlCleanupParser();
	return res;
}

int main(int argc, char *argv[])
{
	printf("Hello, world\n");
	map<string,string> m;
	m["//doc/parent/discarded/@info"] = string("info attri");
	m["//doc/parent/discarded[2]"] = string("change second discarded text 中文");
	int ret = UpdateXml("xpath2.res",m);
	assert(!ret);
	ret = UpdateXml("xpath2.res",m);
	assert(!ret);
	ret = UpdateXml("xpath2.res",m);
	assert(!ret);
	return 0;
}


文件3: xpath2.res

<?xml version="1.0" encoding="UTF-8"?>
<doc>
  <parent>
    <discarded info="test">discarded</discarded>
    <preserved/>
    This text node must be discarded
    <discarded>test</discarded>
    <preserved>
      content1
      <child1/>
      <child2>content2</child2>
      <preserved>too</preserved>
      <child2>content3</child2>
      <preserved/>
      <child2>content4</child2>
      <preserved/>
      <child2>content5</child2>
      content6
    </preserved>
  </parent>
</doc>


文件4:修改后的 xpath2.res

<?xml version="1.0" encoding="UTF-8"?>
<doc>
  <parent>
    <discarded info="info attri">discarded</discarded>
    <preserved/>
    This text node must be discarded
    <discarded>change second discarded text 中文</discarded>
    <preserved>
      content1
      <child1/>
      <child2>content2</child2>
      <preserved>too</preserved>
      <child2>content3</child2>
      <preserved/>
      <child2>content4</child2>
      <preserved/>
      <child2>content5</child2>
      content6
    </preserved>
  </parent>
</doc>