Browse Source

feat: yolo xml文件转txt文件算法

kindring 10 months ago
parent
commit
583c028450
1 changed files with 235 additions and 52 deletions
  1. 235 52
      js/yolo 数据集工具/parse.js

+ 235 - 52
js/yolo 数据集工具/parse.js

@@ -5,6 +5,7 @@
 const fs = require('fs');
 const path = require('path');
 const readline = require('readline');
+const xml2js = require('xml2js');
 
 function readFile(filePath) {
     return new Promise((resolve, reject) => {
@@ -34,19 +35,18 @@ function parseYolo(fileData) {
         // 判断是否为空行
         if (line.trim() === '') return;
         let [type, x, y, width, height] = line.split(' ');
-        result.push({ type, x, y, width, height });
+        result.push({type, x, y, width, height});
     });
     return result;
 }
 
 
-
 async function writeFile(filePath, fileData) {
     return new Promise((resolve, reject) => {
         // 判断父级目录是否存在
         let dirPath = path.dirname(filePath);
         if (!fs.existsSync(dirPath)) {
-            fs.mkdirSync(dirPath, {recursive: true });
+            fs.mkdirSync(dirPath, {recursive: true});
         }
 
         fs.writeFile(filePath, fileData, (err, data) => {
@@ -61,7 +61,7 @@ async function mvFile(filePath, targetPath) {
         // 判断父级目录是否存在
         let dirPath = path.dirname(filePath);
         if (!fs.existsSync(dirPath)) {
-            fs.mkdirSync(dirPath, {recursive: true });
+            fs.mkdirSync(dirPath, {recursive: true});
         }
         fs.rename(filePath, targetPath, (err, data) => {
             if (err) return reject(err);
@@ -70,13 +70,29 @@ async function mvFile(filePath, targetPath) {
     })
 }
 
+// 写入文件
+async function writeFile(filePath, fileData) {
+    return new Promise((resolve, reject) => {
+        // 判断父级目录是否存在
+        let dirPath = path.dirname(filePath);
+        if (!fs.existsSync(dirPath)) {
+            fs.mkdirSync(dirPath, {recursive: true});
+        }
+        fs.writeFile(filePath, fileData, (err, data) => {
+            if (err) return reject(err);
+            resolve(data);
+        });
+    })
+}
+
+
 async function cpFile(filePath, targetPath) {
     return new Promise((resolve, reject) => {
         // 判断父级目录是否存在
         let dirPath = path.dirname(targetPath);
         if (!fs.existsSync(dirPath)) {
             console.log(`路径${dirPath} 不存在`)
-            fs.mkdirSync(dirPath, {recursive: true });
+            fs.mkdirSync(dirPath, {recursive: true});
             console.log('路径不存在')
         }
         // console.log('路径不存在')
@@ -92,7 +108,7 @@ async function cpFile(filePath, targetPath) {
 // 删除指定目录
 function rmDir(path) {
     return new Promise((resolve, reject) => {
-        fs.rmdir(path, { recursive: true }, (err, data) => {
+        fs.rmdir(path, {recursive: true}, (err, data) => {
             if (err) return reject(err);
             resolve(data);
         });
@@ -100,16 +116,6 @@ function rmDir(path) {
 }
 
 
-
-
-
-
-
-
-
-
-
-
 const dimensionType = {
     "-1": "无标注",
     "0": "吊车",
@@ -121,7 +127,7 @@ const dimensionType = {
 }
 
 
-async function main(){
+async function main() {
 
     // let labelsPath = `E:\\图库\\ai\\epower_v2\\newLabels`
     // let labelsPath = `E:\\图库\\ai\\epower_v2\\处理labels`
@@ -151,7 +157,7 @@ async function main(){
 
     // 获取当前时间
     let now = new Date();
-    let nowStr = `${now.getFullYear()}-${now.getMonth()+1}-${now.getDate()} ${now.getHours()}点${now.getMinutes()}分${now.getSeconds()}秒`;
+    let nowStr = `${now.getFullYear()}-${now.getMonth() + 1}-${now.getDate()} ${now.getHours()}点${now.getMinutes()}分${now.getSeconds()}秒`;
     resultStrArr.push(`[I] [START] 开始统计分析数据标注信息 ${nowStr}`);
     resultStrArr.push(`[I] 原标注信息路径: ${labelsPath}`);
     resultStrArr.push(`[I] 原图片文件路径: ${labelsPath}`);
@@ -210,7 +216,7 @@ async function main(){
             let emptyFilePath = path.join(emptyPath, `${labelName}.${imageExts[0]}`);
             logStrArr.push(`[E] ${labelName}内容为空 空文件 ${emptyFilePath}`);
             console.log(`空文件: ${emptyFilePath}`)
-            cpFile(imagePath, emptyFilePath).then(_=>_)
+            cpFile(imagePath, emptyFilePath).then(_ => _)
             continue;
         }
         // 获取文件标注类型
@@ -250,16 +256,15 @@ async function main(){
         }
 
 
-
         // 尝试复制文件
         let transferFilePath = path.join(transferPath, fileTypeKeys);
-        let transferImagePath = path.join(transferFilePath, `images/${imageName}`  );
+        let transferImagePath = path.join(transferFilePath, `images/${imageName}`);
         let transferLabelPath = path.join(transferFilePath, `labels/${labelFile}`);
         // 文件转移
         // 同步复制文件
 
-        cpFile(filePath, transferLabelPath).then(_=>_)
-        cpFile(imagePath, transferImagePath).then(_=>_)
+        cpFile(filePath, transferLabelPath).then(_ => _)
+        cpFile(imagePath, transferImagePath).then(_ => _)
         logStrArr.push(`${fileTypeKeys}: ${countMap[fileTypeKeys]} ${filePath}  ${imagePath} ===>  ${transferLabelPath} ${transferImagePath} `);
         console.log(logStrArr[logStrArr.length - 1]);
     }
@@ -278,26 +283,23 @@ async function main(){
 }
 
 
-
-
-
 // 将新处理的labels 重新写回至原目录
-function reCopyToRaw(){
+function reCopyToRaw() {
     let labelsPath = `E:\\图库\\ai\\epower\\newLabels`
     // 数据转移目录
     let transferPath = `E:\\图库\\ai\\epower\\parse`
-        // "0": "吊车",
-        // "1": "塔吊",
-        // "2": "烟火",
-        // "3": "施工机械",
-        // "4": "导线异物",
+    // "0": "吊车",
+    // "1": "塔吊",
+    // "2": "烟火",
+    // "3": "施工机械",
+    // "4": "导线异物",
     // 吊车-导线异物
     // 施工机械-导线异物
     // 塔吊-导线异物
     // 塔吊-施工机械
 
     let fileTypeKeys = [1, 3]
-    fileTypeKeys = fileTypeKeys.map(item =>  dimensionType[item] ? dimensionType[item] : '未知类型');
+    fileTypeKeys = fileTypeKeys.map(item => dimensionType[item] ? dimensionType[item] : '未知类型');
     let targetType = fileTypeKeys.join('-');
     let targetPath = path.join(transferPath, targetType);
     let targetLabelPath = path.join(targetPath, 'labels');
@@ -307,7 +309,7 @@ function reCopyToRaw(){
     for (let i = 0; i < labelFiles.length; i++) {
         let labelFilePath = path.join(targetLabelPath, labelFiles[i]);
         // 拷贝文件
-        cpFile(labelFilePath, path.join(labelsPath, labelFiles[i])).then(_=>_)
+        cpFile(labelFilePath, path.join(labelsPath, labelFiles[i])).then(_ => _)
     }
     console.log(targetPath);
 }
@@ -323,7 +325,7 @@ async function copyLabel() {
     let subDirs = fs.readdirSync(allLabelsPath);
     for (let i = 0; i < subDirs.length; i++) {
 
-        if (subDirs[i] == 'classes.txt'){
+        if (subDirs[i] == 'classes.txt') {
             console.log(`skip classes.txt`)
             continue;
         }
@@ -344,28 +346,37 @@ async function copyLabel() {
     }
 }
 
-// 将转移出来的label文件合并至一个文件夹内
-async function copyLabelToAll() {
-    let allLabelsPath = `E:\\图库\\ai\\epower\\tmpLabels_2`
-    let baseImagesPath = `E:\\图库\\ai\\epower\\images`
-    let transferPath = `E:\\图库\\ai\\epower\\all_tmpLabels_2`
-    let trabsferImaegsPath = `E:\\图库\\ai\\epower\\all_tmpImages_2`
+/**
+ * 将分类好的label文件复制至指定目录
+ * @param allClassLabelsPath
+ * @param baseImagesPath
+ * @param resultPath
+ * @return {Promise<void>}
+ */
+async function copyLabelToAll(allClassLabelsPath, baseImagesPath, resultPath) {
+    console.log(`将尝试通过分类后的label文件进行最终数据获取\n
+     labels:${allClassLabelsPath} \n
+     images:${baseImagesPath} \n
+     复制至 ${resultPath} 下`)
+    let result_labelsPath = path.join(resultPath, 'labels');
+    let result_imagesPath = path.join(resultPath, 'images');
+    // 创建目录
     let imageExts = ['jpg']
 
     // 获取子目录
-    let subDirs = fs.readdirSync(allLabelsPath);
+    let subDirs = fs.readdirSync(allClassLabelsPath);
     for (let i = 0; i < subDirs.length; i++) {
 
-        if (subDirs[i] == 'classes.txt'){
+        if (subDirs[i] == 'classes.txt') {
             console.log(`skip classes.txt`)
             continue;
         }
-        let subDir = path.join(allLabelsPath, subDirs[i]);
+        let subDir = path.join(allClassLabelsPath, subDirs[i]);
         console.log(subDir)
         // 获取子目录下的labels
         let labelFiles = fs.readdirSync(subDir);
         for (let j = 0; j < labelFiles.length; j++) {
-            if (labelFiles[j] == 'classes.txt'){
+            if (labelFiles[j] == 'classes.txt') {
                 console.log(`skip classes.txt`)
                 continue;
             }
@@ -373,7 +384,7 @@ async function copyLabelToAll() {
             // 获取文件路径
             let labelFilePath = path.join(subDir, labelFiles[j]);
             // 复制文件 至新目录 新目录: 新目录/子目录名称
-            let newFilePath = path.join(transferPath, labelFiles[j]);
+            let newFilePath = path.join(result_labelsPath, labelFiles[j]);
             // 复制文件
             cpFile(labelFilePath, newFilePath)
 
@@ -384,7 +395,7 @@ async function copyLabelToAll() {
             imagePath = path.join(baseImagesPath, imageName);
 
             // 复制图片
-            let newImagePath = path.join(trabsferImaegsPath, imageName);
+            let newImagePath = path.join(result_imagesPath, imageName);
             console.log(`移动图片 ${imagePath} => ${newImagePath}`)
             // 判断图片是否存在
             if (!imagePath) {
@@ -396,17 +407,55 @@ async function copyLabelToAll() {
     }
 }
 
+// 通过labels文件获取图片
+async function getImageByLabel(labelsPath, baseImagePath, resultPath) {
+    console.log(`将尝试通过label文件进行最终数据获取\n
+     labels:${labelsPath} \n
+     images:${baseImagePath} \n
+     复制至 ${resultPath} 下`)
+    let labelFiles = fs.readdirSync(labelsPath);
+    let result_labelsPath = path.join(resultPath, 'labels');
+    let result_imagesPath = path.join(resultPath, 'images');
+
+    for (let i = 0; i < labelFiles.length; i++) {
+        let labelFilePath = path.join(labelsPath, labelFiles[i]);
+        if (!fs.statSync(labelFilePath).isFile()) {
+            console.log(`[E] ${labelFiles[i]} 不是文件, 该函数暂不支持`)
+            continue;
+        }
+        // 获取文件名
+        let labelName = labelFiles[i].replace('.txt', '');
+        // 文件名转换为图片名
+        let imageName = `${labelName}.jpg`;
+        let result_labelFilePath = path.join(result_labelsPath, labelFiles[i]);
+        let imagePath = path.join(baseImagePath, imageName);
+        if (!fs.statSync(imagePath).isFile()) {
+            console.error(`[E] 无法找到图片文件${imagePath}`)
+            continue;
+        }
+        cpFile(labelFilePath, result_labelFilePath)
+        cpFile(imagePath, path.join(result_imagesPath, imageName))
+    }
+}
+
+/**
+ *
+ * @param input
+ * @return {*}
+ */
+
 function replaceNewlines(input) {
     return input.replace(/(\r\n|\r|\n)/g, '\r\n');
 }
+
 // 将label 文件中的换行 从 \n 替换为 \r\n
-function relineLabel(){
+function relineLabel() {
     let labelsPath = `E:\\图库\\ai\\epower\\all_tmpLabels_2`
     // 遍历目录
     let labelFiles = fs.readdirSync(labelsPath);
     for (let i = 0; i < labelFiles.length; i++) {
         let labelFilePath = path.join(labelsPath, labelFiles[i]);
-        if (!fs.statSync(labelFilePath).isFile()){
+        if (!fs.statSync(labelFilePath).isFile()) {
             continue;
         }
         console.log(`start reline ${labelFilePath}`);
@@ -417,14 +466,148 @@ function relineLabel(){
 }
 
 
+// 读取classes.txt
+// 将classes.txt 中的的内容进行分组
+// 随后读取对应的文件目录, 解析xml文件 , 将其中的坐标等信息转换为yolo格式
+async function xmlToYolo(basePath, resultPath) {
+    // 读取classes.txt
+    let classesPath = path.join(basePath, 'classes.txt');
+    let classes = fs.readFileSync(classesPath, 'utf-8').split('\n');
+    let classesMap = {};
+    for (let i = 0; i < classes.length; i++) {
+        let className = classes[i].trim();
+        className = className.replace(/\s|\r\n|\n|\r/g, '');
+        classesMap[className] = i;
+    }
+    // 拷贝classes.txt
+    cpFile(classesPath, path.join(resultPath, 'classes.txt'))
+    console.log(classesMap)
+    // 遍历目录
+    let labelFiles = fs.readdirSync(basePath);
+    for (let i = 0; i < labelFiles.length; i++) {
+        let fileName = labelFiles[i];
+        let labelFilePath = path.join(basePath, fileName);
+        // console.log(`start parse ${labelFilePath}`)
+        if (fs.statSync(labelFilePath).isDirectory()) {
+            continue;
+        }
+        // 判断文件是否为xml格式
+        if (!fileName.endsWith('.xml')) {
+            console.log(`[E] ${fileName} 不是xml, 该函数暂不支持解析`)
+            continue;
+        }
+        let yoloFileName = fileName.replace('.xml', '.txt');
+        let resultFilePath = path.join(resultPath, yoloFileName);
+        await yoloXml2yolo(labelFilePath, classesMap, resultFilePath);
+    }
+
+}
 
-main();
 
+function convert(size, box) {
+    const dw = 1 / size.width;
+    const dh = 1 / size.height;
+    // 数据类型转换为float
+    box.xmin = parseFloat(box.xmin);
+    box.ymin = parseFloat(box.ymin);
+    box.xmax = parseFloat(box.xmax);
+    box.ymax = parseFloat(box.ymax);
+    const x = (box.xmin + box.xmax) / 2; // (x_min + x_max) / 2.0
+    const y = (box.ymin + box.ymax) / 2; // (y_min + y_max) / 2.0
+    const w = box.xmax - box.xmin; // x_max - x_min
+    const h = box.ymax - box.ymin; // y_max - y_min
+    const newX = x * dw;
+    const newW = w * dw;
+    const newY = y * dh;
+    const newH = h * dh;
+    return [newX, newY, newW, newH];
+}
+function _xmlToJson(xml) {
+    return new Promise((resolve, reject) => {
+        let parser = new xml2js.Parser({
+            explicitArray: false,
+            mergeAttrs: true,
+            explicitRoot: false
+        });
+        parser.parseString(xml, function (err, result) {
+            // console.log(result)
+            resolve(result);
+        });
+    })
+}
+/**
+ * 将yolo xml文件转换为yolo格式
+ * @param xmlPath
+ * @param classMap
+ * @param resultPath
+ */
+async function yoloXml2yolo(xmlPath, classMap, resultPath) {
+    let xml = fs.readFileSync(xmlPath, 'utf-8');
+    // console.log(xml)
+    let obj = await _xmlToJson(xml)
+    // 直接生成对应的 x1,y1, x2,y2
+    // console.log(obj)
+    let width = obj.size.width;
+    let height = obj.size.height;
+    let str = "";
+    // 如果只有一个对象则
+    let objects = []
+    if (!obj.object.length){
+        console.log(`${obj.filename} object is object`)
+        console.log(obj)
+        objects.push(obj.object)
+    }else {
+        objects = obj.object
+    }
+    for (let i = 0; i < objects.length; i++){
+        let sub = objects[i];
+        // console.log(sub)
+        let objName = classMap[sub.name];
+        let arr = convert(obj.size, sub.bndbox)
+        str += `${objName} ${arr.join(" ")}\r\n`
+    }
+    // console.log(str)
+    // 创建并写入文件
+    await writeFile(resultPath, str)
+}
+
+
+
+xmlToYolo("E:\\图库\\验证数据集\\labels - 副本", "E:\\图库\\验证数据集\\result")
+
+
+// main();
+
+/**将分类好的label文件, 拷贝至新目录 parse  => newLabels
+ 只会拷贝对应类型的文件
+ */
 // reCopyToRaw();
 
-// copyLabel();
 
-// copyLabelToAll()
+/**
+ * 将分类好的label文件, 拷贝至新目录 parse  => tmpLabels_2
+ * 只拷贝对应的label文件
+ *
+ */
+// copyLabel();
 
+/**
+ * 将分类好的label文件, 统一合并至一个文件夹内, 分类好的labels 应该是labels的子目录
+ * 并且拷贝对应的图片
+ * 分类好的label文件目录: tmpLabels_2
+ * 基础图片目录: images
+ * 输出label目录: all_tmpLabels_2
+ * 输出图片目录: all_tmpImages_2
+ */
+
+// 分类好的label文件目录 E:\图库\ai\epower\tmpLabels_2
+let allClassLabelsPath = `E:\\图库\\ai\\epower\\验证数据集_labels`
+// 基础图片目录
+let baseImagesPath = `E:\\图库\\ai\\epower\\images`
+// 输出目录
+let resultPath = `E:\\图库\\ai\\epower\\验证数据集`
+// copyLabelToAll(allClassLabelsPath, baseImagesPath, resultPath)
+
+// getImageByLabel(`E:\\图库\\ai\\epower\\验证数据集_labels`, `E:\\图库\\ai\\epower\\images`, resultPath)
 // relineLabel();