// 1. 读取指定目录文件, 获取文件内容. // 2. 解析内容, 获取图片的标注信息 // 3. 统计标注信息 const fs = require('fs'); const path = require('path'); const readline = require('readline'); const xml2js = require('xml2js'); function readFile(filePath) { return new Promise((resolve, reject) => { fs.readFile(filePath, 'utf-8', (err, data) => { if (err) return reject(err); resolve(data); }); }) } function readDir(filePath) { return new Promise((resolve, reject) => { fs.readdir(filePath, (err, files) => { if (err) return reject(err); resolve(files); }); }) } function parseYolo(fileData) { // 解析文件内容, 获取图片的标注信息 // 类型 x坐标 y坐标 宽度 高度 // 1 0.391297 0.095892 0.280578 0.179688 let result = []; let lines = fileData.split('\n'); lines.forEach(line => { // 判断是否为空行 if (line.trim() === '') return; let [type, x, y, width, height] = line.split(' '); result.push({type, x, y, width, height}); }); return result; } async function writeFile(filePath, fileData) { return new Promise((resolve, reject) => { // 判断父级目录是否存在 let dirPath = path.dirname(filePath); if (!fs.existsSync(dirPath)) { fs.mkdirSync(dirPath, {recursive: true}); } fs.writeFile(filePath, fileData, (err, data) => { if (err) return reject(err); resolve(data); }); }) } async function mvFile(filePath, targetPath) { return new Promise((resolve, reject) => { // 判断父级目录是否存在 let dirPath = path.dirname(filePath); if (!fs.existsSync(dirPath)) { fs.mkdirSync(dirPath, {recursive: true}); } fs.rename(filePath, targetPath, (err, data) => { if (err) return reject(err); resolve(data); }); }) } // 写入文件 async function writeFile(filePath, fileData) { return new Promise((resolve, reject) => { // 判断父级目录是否存在 let dirPath = path.dirname(filePath); if (!fs.existsSync(dirPath)) { fs.mkdirSync(dirPath, {recursive: true}); } fs.writeFile(filePath, fileData, (err, data) => { if (err) return reject(err); resolve(data); }); }) } async function cpFile(filePath, targetPath) { return new Promise((resolve, reject) => { // 判断父级目录是否存在 let dirPath = path.dirname(targetPath); if (!fs.existsSync(dirPath)) { console.log(`路径${dirPath} 不存在`) fs.mkdirSync(dirPath, {recursive: true}); console.log('路径不存在') } // console.log('路径不存在') fs.copyFile(filePath, targetPath, (err, data) => { if (err) return reject(err); resolve(data); }); }) } // 删除指定目录 function rmDir(path) { return new Promise((resolve, reject) => { fs.rmdir(path, {recursive: true}, (err, data) => { if (err) return reject(err); resolve(data); }); }) } const dimensionType = { "-1": "无标注", "0": "吊车", "1": "塔吊", "2": "烟火", "3": "施工机械", "4": "导线异物", "5": "烟雾" } async function main() { // let labelsPath = `E:\\图库\\ai\\epower_v2\\newLabels` // let labelsPath = `E:\\图库\\ai\\epower_v2\\处理labels` // let labelsPath = `E:\\图库\\ai\\epower_v2\\labels` let labelsPath = `E:\\图库\\ai\\epower\\all_tmpLabels_2` let imagesPath = `E:\\图库\\ai\\epower\\images` let emptyPath = `E:\\图库\\ai\\epower\\empty` let imageExts = ['jpg'] let logsPath = `E:\\图库\\ai\\epower\\logs` // 数据转移目录 let transferPath = `E:\\图库\\ai\\epower\\parse` // 清除数据转移目录 // 判断数据转移目录是否存在 if (fs.existsSync(transferPath)) { // 删除数据转移目录 await rmDir(transferPath); } let countMap = {}; let logStrArr = []; let resultStrArr = []; // 获取文件列表 let labelFiles = await readDir(labelsPath); let imagesFiles = await readDir(imagesPath); // 获取当前时间 let now = new Date(); let nowStr = `${now.getFullYear()}-${now.getMonth() + 1}-${now.getDate()} ${now.getHours()}点${now.getMinutes()}分${now.getSeconds()}秒`; resultStrArr.push(`[I] [START] 开始统计分析数据标注信息 ${nowStr}`); resultStrArr.push(`[I] 原标注信息路径: ${labelsPath}`); resultStrArr.push(`[I] 原图片文件路径: ${labelsPath}`); resultStrArr.push(`[I] 输出文件路径: ${labelsPath}`); resultStrArr.push(`[I] 输出文件路径: ${logsPath}`); resultStrArr.push(`[I] 标注文件数量: ${labelFiles.length}`); resultStrArr.push(`[I] 图片文件数量: ${imagesFiles.length}`); resultStrArr.push(`[I] 图片后缀: ${imageExts}`); // 判断labels中是否包含 classes.txt. 如果有则 将文件 复制至 transferPath 目录 if (fs.existsSync(path.join(labelsPath, 'classes.txt'))) { await cpFile(path.join(labelsPath, 'classes.txt'), path.join(transferPath, 'classes.txt')) } // 将 resultArr 的内容, 转移至 logStrArr resultStrArr.forEach(item => { logStrArr.push(item); }); for (const [i, labelFile] of labelFiles.entries()) { let filePath = path.join(labelsPath, labelFile); // 获取对应的图片路径, 与 label 文件的 名称相同 let labelName = labelFile.replace('.txt', ''); // 读取文件内容 let fileData = await readFile(filePath); // 解析文件内容 let result = parseYolo(fileData); // 图片后缀可能为 jpg, png 等 let imagePath = ''; let imageName = labelName; for (let j = 0; j < imageExts.length; j++) { imagePath = path.join(imagesPath, `${labelName}.${imageExts[0]}`); imageName = `${labelName}.${imageExts[0]}`; if (!fs.existsSync(imagePath)) { imagePath = ''; continue; } } // 判断图片是否存在 if (!imagePath) { logStrArr.push(`[E] 图片不存在 ${filePath} ${fileData} 可能是图片后缀异常 [ ${imageExts.join(', ')} ]`); continue; } readline.cursorTo(process.stdout, 0); readline.clearScreenDown(process.stdout); process.stdout.write(`${i} / ${labelFiles.length} | 解析文件: ${filePath} 中\n`); // console.log(result); // 空数据统计 if (result.length === 0) { countMap['空'] = (countMap['空'] || 0) + 1; // 复制图片文件到 emptyPath let emptyFilePath = path.join(emptyPath, `${labelName}.${imageExts[0]}`); logStrArr.push(`[E] ${labelName}内容为空 空文件 ${emptyFilePath}`); console.log(`空文件: ${emptyFilePath}`) cpFile(imagePath, emptyFilePath).then(_ => _) continue; } // 获取文件标注类型 let fileType = {}; for (let j = 0; j < result.length; j++) { // let type = item.type; let typeName = dimensionType[result[j].type]; if (!typeName) { console.log('未知类型') logStrArr.push(`[E] 未知类型 ${filePath} ${fileData}`); continue; } if (fileType[typeName]) { fileType[typeName]++; } else { fileType[typeName] = 1; } } console.log(fileType) if (Object.keys(fileType).length === 0) { countMap['异常标注'] = (countMap['异常标注'] || 0) + 1; continue; } // 获取标注的所有类型, 将图片进行区分 类型1:数量 类型2:数量 类型1-类型2:数量 let fileTypeKeys = Object.keys(fileType); // 排序 fileTypeKeys.sort(); // 数据转换 fileTypeKeys = fileTypeKeys.join('-'); // 转换类型 if (countMap[fileTypeKeys]) { countMap[fileTypeKeys]++; } else { countMap[fileTypeKeys] = 1; } // 尝试复制文件 let transferFilePath = path.join(transferPath, fileTypeKeys); let transferImagePath = path.join(transferFilePath, `images/${imageName}`); let transferLabelPath = path.join(transferFilePath, `labels/${labelFile}`); // 文件转移 // 同步复制文件 cpFile(filePath, transferLabelPath).then(_ => _) cpFile(imagePath, transferImagePath).then(_ => _) logStrArr.push(`${fileTypeKeys}: ${countMap[fileTypeKeys]} ${filePath} ${imagePath} ===> ${transferLabelPath} ${transferImagePath} `); console.log(logStrArr[logStrArr.length - 1]); } // 输出结果 console.log(countMap); // 将分析结果写入文件 logStrArr.push(`\r\n${JSON.stringify(countMap, null, 4)}`); resultStrArr.push(`${JSON.stringify(countMap, null, 4)}`); // 保存结果 let logStr = logStrArr.join('\r\n'); let resultStr = resultStrArr.join('\r\n'); await writeFile(path.join(logsPath, 'result.txt'), resultStr); await writeFile(path.join(logsPath, 'log.txt'), logStr); console.log('end'); } // 将新处理的labels 重新写回至原目录 function reCopyToRaw() { let labelsPath = `E:\\图库\\ai\\epower\\newLabels` // 数据转移目录 let transferPath = `E:\\图库\\ai\\epower\\parse` // "0": "吊车", // "1": "塔吊", // "2": "烟火", // "3": "施工机械", // "4": "导线异物", // 吊车-导线异物 // 施工机械-导线异物 // 塔吊-导线异物 // 塔吊-施工机械 let fileTypeKeys = [1, 3] fileTypeKeys = fileTypeKeys.map(item => dimensionType[item] ? dimensionType[item] : '未知类型'); let targetType = fileTypeKeys.join('-'); let targetPath = path.join(transferPath, targetType); let targetLabelPath = path.join(targetPath, 'labels'); let logStrArr = []; // 遍历目录 let labelFiles = fs.readdirSync(targetLabelPath); for (let i = 0; i < labelFiles.length; i++) { let labelFilePath = path.join(targetLabelPath, labelFiles[i]); // 拷贝文件 cpFile(labelFilePath, path.join(labelsPath, labelFiles[i])).then(_ => _) } console.log(targetPath); } // 复制分类好的label文件至指定目录 async function copyLabel() { // let allLabelsPath = `E:\\图库\\ai\\epower_v2\\parse` let allLabelsPath = `E:\\图库\\ai\\epower\\parse` // let transferPath = `E:\\图库\\ai\\epower_v2\\tmpLabels_2` let transferPath = `E:\\图库\\ai\\epower\\tmpLabels_2` // 获取子目录 let subDirs = fs.readdirSync(allLabelsPath); for (let i = 0; i < subDirs.length; i++) { if (subDirs[i] == 'classes.txt') { console.log(`skip classes.txt`) continue; } let subDir = path.join(allLabelsPath, subDirs[i], 'labels'); console.log(subDir) // 获取子目录下的labels let labelFiles = fs.readdirSync(subDir); for (let j = 0; j < labelFiles.length; j++) { // 获取文件路径 let labelFilePath = path.join(subDir, labelFiles[j]); // 复制文件 至新目录 新目录: 新目录/子目录名称 let newDir = path.join(transferPath, subDirs[i]); let newFilePath = path.join(newDir, labelFiles[j]); // 复制文件 await cpFile(labelFilePath, newFilePath) } } } /** * 将分类好的label文件复制至指定目录 * @param allClassLabelsPath * @param baseImagesPath * @param resultPath * @return {Promise} */ async function copyLabelToAll(allClassLabelsPath, baseImagesPath, resultPath) { console.log(`将尝试通过分类后的label文件进行最终数据获取\n labels:${allClassLabelsPath} \n images:${baseImagesPath} \n 复制至 ${resultPath} 下`) let result_labelsPath = path.join(resultPath, 'labels'); let result_imagesPath = path.join(resultPath, 'images'); // 创建目录 let imageExts = ['jpg'] // 获取子目录 let subDirs = fs.readdirSync(allClassLabelsPath); for (let i = 0; i < subDirs.length; i++) { if (subDirs[i] == 'classes.txt') { console.log(`skip classes.txt`) continue; } let subDir = path.join(allClassLabelsPath, subDirs[i]); console.log(subDir) // 获取子目录下的labels let labelFiles = fs.readdirSync(subDir); for (let j = 0; j < labelFiles.length; j++) { if (labelFiles[j] == 'classes.txt') { console.log(`skip classes.txt`) continue; } let labelName = labelFiles[j].replace('.txt', ''); // 获取文件路径 let labelFilePath = path.join(subDir, labelFiles[j]); // 复制文件 至新目录 新目录: 新目录/子目录名称 let newFilePath = path.join(result_labelsPath, labelFiles[j]); // 复制文件 cpFile(labelFilePath, newFilePath) // 复制图片 // 图片后缀可能为 jpg, png 等 let imagePath = ''; let imageName = `${labelName}.jpg`; imagePath = path.join(baseImagesPath, imageName); // 复制图片 let newImagePath = path.join(result_imagesPath, imageName); console.log(`移动图片 ${imagePath} => ${newImagePath}`) // 判断图片是否存在 if (!imagePath) { console.log(`[E] 图片不存在 ${labelFilePath} 可能是图片后缀异常 [ ${imageExts.join(', ')} ]`); continue; } cpFile(imagePath, newImagePath) } } } // 通过labels文件获取图片 async function getImageByLabel(labelsPath, baseImagePath, resultPath) { console.log(`将尝试通过label文件进行最终数据获取\n labels:${labelsPath} \n images:${baseImagePath} \n 复制至 ${resultPath} 下`) let labelFiles = fs.readdirSync(labelsPath); let result_labelsPath = path.join(resultPath, 'labels'); let result_imagesPath = path.join(resultPath, 'images'); for (let i = 0; i < labelFiles.length; i++) { let labelFilePath = path.join(labelsPath, labelFiles[i]); if (!fs.statSync(labelFilePath).isFile()) { console.log(`[E] ${labelFiles[i]} 不是文件, 该函数暂不支持`) continue; } // 获取文件名 let labelName = labelFiles[i].replace('.txt', ''); // 文件名转换为图片名 let imageName = `${labelName}.jpg`; let result_labelFilePath = path.join(result_labelsPath, labelFiles[i]); let imagePath = path.join(baseImagePath, imageName); if (!fs.statSync(imagePath).isFile()) { console.error(`[E] 无法找到图片文件${imagePath}`) continue; } cpFile(labelFilePath, result_labelFilePath) cpFile(imagePath, path.join(result_imagesPath, imageName)) } } /** * * @param input * @return {*} */ function replaceNewlines(input) { return input.replace(/(\r\n|\r|\n)/g, '\r\n'); } // 将label 文件中的换行 从 \n 替换为 \r\n function relineLabel() { let labelsPath = `E:\\图库\\ai\\epower\\all_tmpLabels_2` // 遍历目录 let labelFiles = fs.readdirSync(labelsPath); for (let i = 0; i < labelFiles.length; i++) { let labelFilePath = path.join(labelsPath, labelFiles[i]); if (!fs.statSync(labelFilePath).isFile()) { continue; } console.log(`start reline ${labelFilePath}`); let fileContent = fs.readFileSync(labelFilePath, 'utf-8'); fileContent = replaceNewlines(fileContent); fs.writeFileSync(labelFilePath, fileContent) } } // 读取classes.txt // 将classes.txt 中的的内容进行分组 // 随后读取对应的文件目录, 解析xml文件 , 将其中的坐标等信息转换为yolo格式 async function xmlToYolo(basePath, resultPath) { // 读取classes.txt let classesPath = path.join(basePath, 'classes.txt'); let classes = fs.readFileSync(classesPath, 'utf-8').split('\n'); let classesMap = {}; for (let i = 0; i < classes.length; i++) { let className = classes[i].trim(); className = className.replace(/\s|\r\n|\n|\r/g, ''); classesMap[className] = i; } // 拷贝classes.txt cpFile(classesPath, path.join(resultPath, 'classes.txt')) console.log(classesMap) // 遍历目录 let labelFiles = fs.readdirSync(basePath); for (let i = 0; i < labelFiles.length; i++) { let fileName = labelFiles[i]; let labelFilePath = path.join(basePath, fileName); // console.log(`start parse ${labelFilePath}`) if (fs.statSync(labelFilePath).isDirectory()) { continue; } // 判断文件是否为xml格式 if (!fileName.endsWith('.xml')) { console.log(`[E] ${fileName} 不是xml, 该函数暂不支持解析`) continue; } let yoloFileName = fileName.replace('.xml', '.txt'); let resultFilePath = path.join(resultPath, yoloFileName); await yoloXml2yolo(labelFilePath, classesMap, resultFilePath); } } function convert(size, box) { const dw = 1 / size.width; const dh = 1 / size.height; // 数据类型转换为float box.xmin = parseFloat(box.xmin); box.ymin = parseFloat(box.ymin); box.xmax = parseFloat(box.xmax); box.ymax = parseFloat(box.ymax); const x = (box.xmin + box.xmax) / 2; // (x_min + x_max) / 2.0 const y = (box.ymin + box.ymax) / 2; // (y_min + y_max) / 2.0 const w = box.xmax - box.xmin; // x_max - x_min const h = box.ymax - box.ymin; // y_max - y_min const newX = x * dw; const newW = w * dw; const newY = y * dh; const newH = h * dh; return [newX, newY, newW, newH]; } function _xmlToJson(xml) { return new Promise((resolve, reject) => { let parser = new xml2js.Parser({ explicitArray: false, mergeAttrs: true, explicitRoot: false }); parser.parseString(xml, function (err, result) { // console.log(result) resolve(result); }); }) } /** * 将yolo xml文件转换为yolo格式 * @param xmlPath * @param classMap * @param resultPath */ async function yoloXml2yolo(xmlPath, classMap, resultPath) { let xml = fs.readFileSync(xmlPath, 'utf-8'); // console.log(xml) let obj = await _xmlToJson(xml) // 直接生成对应的 x1,y1, x2,y2 // console.log(obj) let width = obj.size.width; let height = obj.size.height; let str = ""; // 如果只有一个对象则 let objects = [] if (!obj.object.length){ console.log(`${obj.filename} object is object`) console.log(obj) objects.push(obj.object) }else { objects = obj.object } for (let i = 0; i < objects.length; i++){ let sub = objects[i]; // console.log(sub) let objName = classMap[sub.name]; let arr = convert(obj.size, sub.bndbox) str += `${objName} ${arr.join(" ")}\r\n` } // console.log(str) // 创建并写入文件 await writeFile(resultPath, str) } xmlToYolo("E:\\图库\\验证数据集\\labels - 副本", "E:\\图库\\验证数据集\\result") // main(); /**将分类好的label文件, 拷贝至新目录 parse => newLabels 只会拷贝对应类型的文件 */ // reCopyToRaw(); /** * 将分类好的label文件, 拷贝至新目录 parse => tmpLabels_2 * 只拷贝对应的label文件 * */ // copyLabel(); /** * 将分类好的label文件, 统一合并至一个文件夹内, 分类好的labels 应该是labels的子目录 * 并且拷贝对应的图片 * 分类好的label文件目录: tmpLabels_2 * 基础图片目录: images * 输出label目录: all_tmpLabels_2 * 输出图片目录: all_tmpImages_2 */ // 分类好的label文件目录 E:\图库\ai\epower\tmpLabels_2 let allClassLabelsPath = `E:\\图库\\ai\\epower\\验证数据集_labels` // 基础图片目录 let baseImagesPath = `E:\\图库\\ai\\epower\\images` // 输出目录 let resultPath = `E:\\图库\\ai\\epower\\验证数据集` // copyLabelToAll(allClassLabelsPath, baseImagesPath, resultPath) // getImageByLabel(`E:\\图库\\ai\\epower\\验证数据集_labels`, `E:\\图库\\ai\\epower\\images`, resultPath) // relineLabel();