@dragonfive
2015-12-06T05:41:15.000000Z
字数 2590
阅读 610
c#编程
最近在做船只识别方面的事情,需要大量的负样本来训练adaboost分类器。我从网上下载到一个pascal_voc的数据集,需要找到不包含船只的那些复制出来。
对于每个图片有一个xml文件,介绍了这个文件的信息,有个object标签介绍了图片中目标类别
比如上面这副图片的xml文件为:
<annotation><folder>VOC2007</folder><filename>000001.jpg</filename><source><database>The VOC2007 Database</database><annotation>PASCAL VOC2007</annotation><image>flickr</image><flickrid>341012865</flickrid></source><owner><flickrid>Fried Camels</flickrid><name>Jinky the Fruit Bat</name></owner><size><width>353</width><height>500</height><depth>3</depth></size><segmented>0</segmented><object><name>dog</name><pose>Left</pose><truncated>1</truncated><difficult>0</difficult><bndbox><xmin>48</xmin><ymin>240</ymin><xmax>195</xmax><ymax>371</ymax></bndbox></object><object><name>person</name><pose>Left</pose><truncated>1</truncated><difficult>0</difficult><bndbox><xmin>8</xmin><ymin>12</ymin><xmax>352</xmax><ymax>498</ymax></bndbox></object></annotation>
比如上面这个就包括dog和person
我们需要做的就是这道叶子节点里的name看是不是boat如果不是的话就取到这个图片
using System;using System.Collections.Generic;using System.IO;using System.Linq;using System.Text;using System.Threading.Tasks;using System.Xml;namespace 获取负样本{class Program{static void Main(string[] args){/*首先获取xml目录下的所有文件的目录列表和名称*///List<string> fileNames = new List<string>();//存储文件名;//C:\Users\dragonfive\Desktop\pascal_voc\VOCtrainval_06-Nov-2007\VOCdevkit\VOC2007\Annotationsstring path =@"C:\Users\dragonfive\Desktop\pascal_voc\VOCtrainval_06-Nov-2007\VOCdevkit\VOC2007\Annotations\";string imageSourcePath = @"C:\Users\dragonfive\Desktop\pascal_voc\VOCtrainval_06-Nov-2007\VOCdevkit\VOC2007\JPEGImages\";string imageDestPath = @"D:\IP_CV_WorkSpace\Img\NegSample\";int numberOfNegSample = 0;foreach (var file in Directory.GetFiles(path,"*.xml"))//这个获取的文件名带前面的目录;{//Console.WriteLine(file);//获取该路径的不带扩展名的文件名;string fileName = Path.GetFileNameWithoutExtension(file);//Console.WriteLine(fileName);//下面读取xml的内容//string xmlData = File.ReadAllText(file, Encoding.Default);//Console.WriteLine(xmlData);/*循环完成每个xml文件的解析,如果没有boat就复制到新的目录*/XmlDocument doc = new XmlDocument();doc.Load(file);XmlElement root = doc.DocumentElement;XmlNodeList listNodes = root.SelectNodes("/annotation/object/name");bool hasBoat = false;foreach (XmlNode node in listNodes ){//Console.WriteLine(node.InnerText);//如果其中含有boat就continue,否则赋值到负样本的位置;if (node.InnerText == "boat"){hasBoat = true;Console.WriteLine(fileName+"里面有船");break;}}if (hasBoat==false){//复制File.Copy(imageSourcePath + fileName + ".jpg", imageDestPath + fileName + ".jpg",true);Console.WriteLine("成功复制"+fileName);numberOfNegSample++;}//Console.ReadKey();}Console.WriteLine("共计复制负样本个数为:" + numberOfNegSample);Console.ReadKey();}}}