在一次做项目的时候,团队分配任务做数据集,用 labelimage 来打标,汇总数据时发现 xml 中的图片路径各不相同,于是就写了这个工具来修改 xml 中的图片路径。
先打开一个 xml 文件,观察一下它的结构
<annotation>
<folder>zhua_qu</folder>
<filename>2021_03_16_16_42_11_296.jpg</filename>
<path>D:\xyolo\images\train\2021_03_16_16_42_11_296.jpg</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>640</width>
<height>480</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>zhua_qu</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>389</xmin>
<ymin>225</ymin>
<xmax>522</xmax>
<ymax>359</ymax>
</bndbox>
</object>
</annotation>
可以发现图片文件名在 <filename> 和 </filename > 标签之间,图片路径在 < path > 和 </path > 标签之间,理论上来说我们只要将 < path > 和 </path > 之间的图片路径替换成我们想要的就可以了
这个路径我们怎么生成,将要替换成的路径加上文件名就可以了,在这里我们不用考虑 c++ 烦人的中文乱码问题,就算我们要替换成中文路径,但是我们不考虑读取这些路径,直接替换 xml 中的图片路径即可。
到这里,替换的问题已经有思路了,然后我们再考虑批处理的问题,这里我用了 <io.h> 这个库来对指定的目录进行遍历,如果遇到目录就用递归的方法继续遍历,对检测到的文件进行过滤,留下 xml 文件的路径,压入一个 vector 容器中,后面 pop 出来调用替换的函数或者类来处理。
main.cpp
#include <iostream>
#include <vector>
#include <string>
#include <chrono>
#include "replace.h"
#include "getfiles.h"
void help(const char* name)
{
std::string progname = name;
size_t lastPos = progname.find_last_of("/\\");
progname = progname.substr(lastPos + 1);
// 显示帮助
std::cout << std::endl;
std::cout << "This tool replaces the file path of the image in the XML file in the dataset" << std::endl << std::endl;
std::cout << "Usage: " << std::endl;
std::cout << "\t.\\" << progname << " [DataSet Path] [Replace Path]" << std::endl << std::endl;
std::cout << "For example: " << std::endl;
std::cout << "\t.\\main.exe C:\\Users\\17740\\Desktop\\DataSet\\ D:\\xyolo\\images\\train\\"
<< std::endl << std::endl;
}
int main(int argc, char **argv)
{
// 显示帮助
if(argc == 1 || (argc == 2 &&(strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0)))
{
help(argv[0]);
return 0;
}
// 开始计时
auto start = std::chrono::system_clock::now();
std::string filePath = argv[1];
std::string replace = argv[2];
std::vector<std::string> files;
// 规范化字符串
if(filePath.find_last_of("\\") == filePath.length() - 1)
{
filePath.pop_back();
}
// 获取目录中的全部xml文件的路径
getFiles(filePath, files);
std::cout << "Replace...";
int size = files.size();
for (int i = 0; i < size; i++)
{
//std::cout << files[i].c_str() << std::endl;
Replace r(replace, files[i]);
r.work();
}
std::cout << "\rDone... " << std::endl << std::endl;
// 结束计时
std::chrono::duration<double> diff = std::chrono::system_clock::now() - start;
std::cout << "\tUsed: " << diff.count() << " Second" << std::endl;
return 0;
}
replace.h
#include <iostream>
#include <fstream>
#include <string>
class Replace
{
private:
std::string str;
std::fstream f;
char buf[1024];
private:
std::string replace;
std::string xmlname;
std::string filename;
std::string filename_label_start;
std::string filename_label_end;
std::string path_label_start;
std::string path_label__end;
int filename_pos_start;
int filename_pos_end;
int filenameLength;
int path_pos_start;
int path_pos__end;
int pathLength;
public:
bool work();
public:
Replace(const std::string& replace, const std::string& xmlname);
};
replace.cpp
#include "replace.h"
Replace::Replace(const std::string& replace, const std::string& xmlname)
{
this->replace = replace;
this->xmlname = xmlname;
filename_label_start = "<filename>";
filename_label_end = "</filename>";
path_label_start = "<path>";
path_label__end = "</path>";
filename_pos_start = 0;
filename_pos_end = 0;
filenameLength = 0;
path_pos_start = 0;
path_pos__end = 0;
pathLength = 0;
if(this->replace.find_last_of("\\") < this->replace.length() - 1)
{
this->replace.push_back('\\');
}
}
bool Replace::work()
{
f.open(xmlname.c_str());
if(!f.is_open())
{
return false;
}
while(!f.eof())
{
f.getline(buf, 1024);
str.append(buf);
str.append("\n");
}
str.pop_back();
// 从filname标签中截取filename
filename_pos_start = str.find(filename_label_start.c_str());
filename_pos_end = str.find(filename_label_end.c_str());
filenameLength = filename_pos_end - filename_pos_start - filename_label_start.length();
filename = str.substr(filename_pos_start + filename_label_start.length(), filenameLength);
// 将path标签中的文件路径替换为定义的路径加上文件名
path_pos_start = str.find(path_label_start.c_str());
path_pos__end = str.find(path_label__end.c_str());
pathLength = path_pos__end - path_pos_start - path_label_start.length();
str.replace(path_pos_start + path_label_start.length(), pathLength, (replace + filename).c_str());
f.clear();
f.seekp(0, std::ios::beg);
f << str;
f.close();
return true;
}
getfiles.h
#include <iostream>
#include <vector>
#include <string>
#include <io.h>
void getFiles(std::string path, std::vector<std::string>& files)
{
long long hFile = 0;
struct _finddata_t fileinfo;
std::string p;
if((hFile = _findfirst(p.assign(path).append("\\*").c_str(), &fileinfo)) != -1)
{
do
{
if((fileinfo.attrib & _A_SUBDIR))
{
if(strcmp(fileinfo.name,".") != 0 && strcmp(fileinfo.name,"..") != 0)
{
getFiles(p.assign(path).append("\\").append(fileinfo.name), files);
}
}
else
{
std::string s = fileinfo.name;
size_t lastPos = s.find_last_of(".");
if(strcmp(s.substr(lastPos + 1).c_str(), "xml") == 0)
{
files.push_back(p.assign(path).append("\\").append(fileinfo.name));
//files.push_back(p.assign(path).append(fileinfo.name));
}
}
}while(_findnext(hFile, &fileinfo) == 0);
_findclose(hFile);
}
}
VocFilePathRepalce.exe [DataSet Path] [Replace Path]
VocFilePathRepalce.exe [数据集所在的文件路径] [替换 xml 中的图片路径]
例子:
.\VocFilePathRepalce.exe F:\DataSet\ D:\xyolo\images\train\