123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613 |
- // 1. 读取指定目录文件, 获取文件内容.
- // 2. 解析内容, 获取图片的标注信息
- // 3. 统计标注信息
- const fs = require('fs');
- const path = require('path');
- const readline = require('readline');
- const xml2js = require('xml2js');
- function readFile(filePath) {
- return new Promise((resolve, reject) => {
- fs.readFile(filePath, 'utf-8', (err, data) => {
- if (err) return reject(err);
- resolve(data);
- });
- })
- }
- function readDir(filePath) {
- return new Promise((resolve, reject) => {
- fs.readdir(filePath, (err, files) => {
- if (err) return reject(err);
- resolve(files);
- });
- })
- }
- function parseYolo(fileData) {
- // 解析文件内容, 获取图片的标注信息
- // 类型 x坐标 y坐标 宽度 高度
- // 1 0.391297 0.095892 0.280578 0.179688
- let result = [];
- let lines = fileData.split('\n');
- lines.forEach(line => {
- // 判断是否为空行
- if (line.trim() === '') return;
- let [type, x, y, width, height] = line.split(' ');
- result.push({type, x, y, width, height});
- });
- return result;
- }
- async function writeFile(filePath, fileData) {
- return new Promise((resolve, reject) => {
- // 判断父级目录是否存在
- let dirPath = path.dirname(filePath);
- if (!fs.existsSync(dirPath)) {
- fs.mkdirSync(dirPath, {recursive: true});
- }
- fs.writeFile(filePath, fileData, (err, data) => {
- if (err) return reject(err);
- resolve(data);
- });
- })
- }
- async function mvFile(filePath, targetPath) {
- return new Promise((resolve, reject) => {
- // 判断父级目录是否存在
- let dirPath = path.dirname(filePath);
- if (!fs.existsSync(dirPath)) {
- fs.mkdirSync(dirPath, {recursive: true});
- }
- fs.rename(filePath, targetPath, (err, data) => {
- if (err) return reject(err);
- resolve(data);
- });
- })
- }
- // 写入文件
- async function writeFile(filePath, fileData) {
- return new Promise((resolve, reject) => {
- // 判断父级目录是否存在
- let dirPath = path.dirname(filePath);
- if (!fs.existsSync(dirPath)) {
- fs.mkdirSync(dirPath, {recursive: true});
- }
- fs.writeFile(filePath, fileData, (err, data) => {
- if (err) return reject(err);
- resolve(data);
- });
- })
- }
- async function cpFile(filePath, targetPath) {
- return new Promise((resolve, reject) => {
- // 判断父级目录是否存在
- let dirPath = path.dirname(targetPath);
- if (!fs.existsSync(dirPath)) {
- console.log(`路径${dirPath} 不存在`)
- fs.mkdirSync(dirPath, {recursive: true});
- console.log('路径不存在')
- }
- // console.log('路径不存在')
- fs.copyFile(filePath, targetPath, (err, data) => {
- if (err) return reject(err);
- resolve(data);
- });
- })
- }
- // 删除指定目录
- function rmDir(path) {
- return new Promise((resolve, reject) => {
- fs.rmdir(path, {recursive: true}, (err, data) => {
- if (err) return reject(err);
- resolve(data);
- });
- })
- }
- const dimensionType = {
- "-1": "无标注",
- "0": "吊车",
- "1": "塔吊",
- "2": "烟火",
- "3": "施工机械",
- "4": "导线异物",
- "5": "烟雾"
- }
- async function main() {
- // let labelsPath = `E:\\图库\\ai\\epower_v2\\newLabels`
- // let labelsPath = `E:\\图库\\ai\\epower_v2\\处理labels`
- // let labelsPath = `E:\\图库\\ai\\epower_v2\\labels`
- let labelsPath = `E:\\图库\\ai\\epower\\all_tmpLabels_2`
- let imagesPath = `E:\\图库\\ai\\epower\\images`
- let emptyPath = `E:\\图库\\ai\\epower\\empty`
- let imageExts = ['jpg']
- let logsPath = `E:\\图库\\ai\\epower\\logs`
- // 数据转移目录
- let transferPath = `E:\\图库\\ai\\epower\\parse`
- // 清除数据转移目录
- // 判断数据转移目录是否存在
- if (fs.existsSync(transferPath)) { // 删除数据转移目录
- await rmDir(transferPath);
- }
- let countMap = {};
- let logStrArr = [];
- let resultStrArr = [];
- // 获取文件列表
- let labelFiles = await readDir(labelsPath);
- let imagesFiles = await readDir(imagesPath);
- // 获取当前时间
- let now = new Date();
- let nowStr = `${now.getFullYear()}-${now.getMonth() + 1}-${now.getDate()} ${now.getHours()}点${now.getMinutes()}分${now.getSeconds()}秒`;
- resultStrArr.push(`[I] [START] 开始统计分析数据标注信息 ${nowStr}`);
- resultStrArr.push(`[I] 原标注信息路径: ${labelsPath}`);
- resultStrArr.push(`[I] 原图片文件路径: ${labelsPath}`);
- resultStrArr.push(`[I] 输出文件路径: ${labelsPath}`);
- resultStrArr.push(`[I] 输出文件路径: ${logsPath}`);
- resultStrArr.push(`[I] 标注文件数量: ${labelFiles.length}`);
- resultStrArr.push(`[I] 图片文件数量: ${imagesFiles.length}`);
- resultStrArr.push(`[I] 图片后缀: ${imageExts}`);
- // 判断labels中是否包含 classes.txt. 如果有则 将文件 复制至 transferPath 目录
- if (fs.existsSync(path.join(labelsPath, 'classes.txt'))) {
- await cpFile(path.join(labelsPath, 'classes.txt'), path.join(transferPath, 'classes.txt'))
- }
- // 将 resultArr 的内容, 转移至 logStrArr
- resultStrArr.forEach(item => {
- logStrArr.push(item);
- });
- for (const [i, labelFile] of labelFiles.entries()) {
- let filePath = path.join(labelsPath, labelFile);
- // 获取对应的图片路径, 与 label 文件的 名称相同
- let labelName = labelFile.replace('.txt', '');
- // 读取文件内容
- let fileData = await readFile(filePath);
- // 解析文件内容
- let result = parseYolo(fileData);
- // 图片后缀可能为 jpg, png 等
- let imagePath = '';
- let imageName = labelName;
- for (let j = 0; j < imageExts.length; j++) {
- imagePath = path.join(imagesPath, `${labelName}.${imageExts[0]}`);
- imageName = `${labelName}.${imageExts[0]}`;
- if (!fs.existsSync(imagePath)) {
- imagePath = '';
- continue;
- }
- }
- // 判断图片是否存在
- if (!imagePath) {
- logStrArr.push(`[E] 图片不存在 ${filePath} ${fileData} 可能是图片后缀异常 [ ${imageExts.join(', ')} ]`);
- continue;
- }
- readline.cursorTo(process.stdout, 0);
- readline.clearScreenDown(process.stdout);
- process.stdout.write(`${i} / ${labelFiles.length} | 解析文件: ${filePath} 中\n`);
- // console.log(result);
- // 空数据统计
- if (result.length === 0) {
- countMap['空'] = (countMap['空'] || 0) + 1;
- // 复制图片文件到 emptyPath
- let emptyFilePath = path.join(emptyPath, `${labelName}.${imageExts[0]}`);
- logStrArr.push(`[E] ${labelName}内容为空 空文件 ${emptyFilePath}`);
- console.log(`空文件: ${emptyFilePath}`)
- cpFile(imagePath, emptyFilePath).then(_ => _)
- continue;
- }
- // 获取文件标注类型
- let fileType = {};
- for (let j = 0; j < result.length; j++) {
- // let type = item.type;
- let typeName = dimensionType[result[j].type];
- if (!typeName) {
- console.log('未知类型')
- logStrArr.push(`[E] 未知类型 ${filePath} ${fileData}`);
- continue;
- }
- if (fileType[typeName]) {
- fileType[typeName]++;
- } else {
- fileType[typeName] = 1;
- }
- }
- console.log(fileType)
- if (Object.keys(fileType).length === 0) {
- countMap['异常标注'] = (countMap['异常标注'] || 0) + 1;
- continue;
- }
- // 获取标注的所有类型, 将图片进行区分 类型1:数量 类型2:数量 类型1-类型2:数量
- let fileTypeKeys = Object.keys(fileType);
- // 排序
- fileTypeKeys.sort();
- // 数据转换
- fileTypeKeys = fileTypeKeys.join('-');
- // 转换类型
- if (countMap[fileTypeKeys]) {
- countMap[fileTypeKeys]++;
- } else {
- countMap[fileTypeKeys] = 1;
- }
- // 尝试复制文件
- let transferFilePath = path.join(transferPath, fileTypeKeys);
- let transferImagePath = path.join(transferFilePath, `images/${imageName}`);
- let transferLabelPath = path.join(transferFilePath, `labels/${labelFile}`);
- // 文件转移
- // 同步复制文件
- cpFile(filePath, transferLabelPath).then(_ => _)
- cpFile(imagePath, transferImagePath).then(_ => _)
- logStrArr.push(`${fileTypeKeys}: ${countMap[fileTypeKeys]} ${filePath} ${imagePath} ===> ${transferLabelPath} ${transferImagePath} `);
- console.log(logStrArr[logStrArr.length - 1]);
- }
- // 输出结果
- console.log(countMap);
- // 将分析结果写入文件
- logStrArr.push(`\r\n${JSON.stringify(countMap, null, 4)}`);
- resultStrArr.push(`${JSON.stringify(countMap, null, 4)}`);
- // 保存结果
- let logStr = logStrArr.join('\r\n');
- let resultStr = resultStrArr.join('\r\n');
- await writeFile(path.join(logsPath, 'result.txt'), resultStr);
- await writeFile(path.join(logsPath, 'log.txt'), logStr);
- console.log('end');
- }
- // 将新处理的labels 重新写回至原目录
- function reCopyToRaw() {
- let labelsPath = `E:\\图库\\ai\\epower\\newLabels`
- // 数据转移目录
- let transferPath = `E:\\图库\\ai\\epower\\parse`
- // "0": "吊车",
- // "1": "塔吊",
- // "2": "烟火",
- // "3": "施工机械",
- // "4": "导线异物",
- // 吊车-导线异物
- // 施工机械-导线异物
- // 塔吊-导线异物
- // 塔吊-施工机械
- let fileTypeKeys = [1, 3]
- fileTypeKeys = fileTypeKeys.map(item => dimensionType[item] ? dimensionType[item] : '未知类型');
- let targetType = fileTypeKeys.join('-');
- let targetPath = path.join(transferPath, targetType);
- let targetLabelPath = path.join(targetPath, 'labels');
- let logStrArr = [];
- // 遍历目录
- let labelFiles = fs.readdirSync(targetLabelPath);
- for (let i = 0; i < labelFiles.length; i++) {
- let labelFilePath = path.join(targetLabelPath, labelFiles[i]);
- // 拷贝文件
- cpFile(labelFilePath, path.join(labelsPath, labelFiles[i])).then(_ => _)
- }
- console.log(targetPath);
- }
- // 复制分类好的label文件至指定目录
- async function copyLabel() {
- // let allLabelsPath = `E:\\图库\\ai\\epower_v2\\parse`
- let allLabelsPath = `E:\\图库\\ai\\epower\\parse`
- // let transferPath = `E:\\图库\\ai\\epower_v2\\tmpLabels_2`
- let transferPath = `E:\\图库\\ai\\epower\\tmpLabels_2`
- // 获取子目录
- let subDirs = fs.readdirSync(allLabelsPath);
- for (let i = 0; i < subDirs.length; i++) {
- if (subDirs[i] == 'classes.txt') {
- console.log(`skip classes.txt`)
- continue;
- }
- let subDir = path.join(allLabelsPath, subDirs[i], 'labels');
- console.log(subDir)
- // 获取子目录下的labels
- let labelFiles = fs.readdirSync(subDir);
- for (let j = 0; j < labelFiles.length; j++) {
- // 获取文件路径
- let labelFilePath = path.join(subDir, labelFiles[j]);
- // 复制文件 至新目录 新目录: 新目录/子目录名称
- let newDir = path.join(transferPath, subDirs[i]);
- let newFilePath = path.join(newDir, labelFiles[j]);
- // 复制文件
- await cpFile(labelFilePath, newFilePath)
- }
- }
- }
- /**
- * 将分类好的label文件复制至指定目录
- * @param allClassLabelsPath
- * @param baseImagesPath
- * @param resultPath
- * @return {Promise<void>}
- */
- async function copyLabelToAll(allClassLabelsPath, baseImagesPath, resultPath) {
- console.log(`将尝试通过分类后的label文件进行最终数据获取\n
- labels:${allClassLabelsPath} \n
- images:${baseImagesPath} \n
- 复制至 ${resultPath} 下`)
- let result_labelsPath = path.join(resultPath, 'labels');
- let result_imagesPath = path.join(resultPath, 'images');
- // 创建目录
- let imageExts = ['jpg']
- // 获取子目录
- let subDirs = fs.readdirSync(allClassLabelsPath);
- for (let i = 0; i < subDirs.length; i++) {
- if (subDirs[i] == 'classes.txt') {
- console.log(`skip classes.txt`)
- continue;
- }
- let subDir = path.join(allClassLabelsPath, subDirs[i]);
- console.log(subDir)
- // 获取子目录下的labels
- let labelFiles = fs.readdirSync(subDir);
- for (let j = 0; j < labelFiles.length; j++) {
- if (labelFiles[j] == 'classes.txt') {
- console.log(`skip classes.txt`)
- continue;
- }
- let labelName = labelFiles[j].replace('.txt', '');
- // 获取文件路径
- let labelFilePath = path.join(subDir, labelFiles[j]);
- // 复制文件 至新目录 新目录: 新目录/子目录名称
- let newFilePath = path.join(result_labelsPath, labelFiles[j]);
- // 复制文件
- cpFile(labelFilePath, newFilePath)
- // 复制图片
- // 图片后缀可能为 jpg, png 等
- let imagePath = '';
- let imageName = `${labelName}.jpg`;
- imagePath = path.join(baseImagesPath, imageName);
- // 复制图片
- let newImagePath = path.join(result_imagesPath, imageName);
- console.log(`移动图片 ${imagePath} => ${newImagePath}`)
- // 判断图片是否存在
- if (!imagePath) {
- console.log(`[E] 图片不存在 ${labelFilePath} 可能是图片后缀异常 [ ${imageExts.join(', ')} ]`);
- continue;
- }
- cpFile(imagePath, newImagePath)
- }
- }
- }
- // 通过labels文件获取图片
- async function getImageByLabel(labelsPath, baseImagePath, resultPath) {
- console.log(`将尝试通过label文件进行最终数据获取\n
- labels:${labelsPath} \n
- images:${baseImagePath} \n
- 复制至 ${resultPath} 下`)
- let labelFiles = fs.readdirSync(labelsPath);
- let result_labelsPath = path.join(resultPath, 'labels');
- let result_imagesPath = path.join(resultPath, 'images');
- for (let i = 0; i < labelFiles.length; i++) {
- let labelFilePath = path.join(labelsPath, labelFiles[i]);
- if (!fs.statSync(labelFilePath).isFile()) {
- console.log(`[E] ${labelFiles[i]} 不是文件, 该函数暂不支持`)
- continue;
- }
- // 获取文件名
- let labelName = labelFiles[i].replace('.txt', '');
- // 文件名转换为图片名
- let imageName = `${labelName}.jpg`;
- let result_labelFilePath = path.join(result_labelsPath, labelFiles[i]);
- let imagePath = path.join(baseImagePath, imageName);
- if (!fs.statSync(imagePath).isFile()) {
- console.error(`[E] 无法找到图片文件${imagePath}`)
- continue;
- }
- cpFile(labelFilePath, result_labelFilePath)
- cpFile(imagePath, path.join(result_imagesPath, imageName))
- }
- }
- /**
- *
- * @param input
- * @return {*}
- */
- function replaceNewlines(input) {
- return input.replace(/(\r\n|\r|\n)/g, '\r\n');
- }
- // 将label 文件中的换行 从 \n 替换为 \r\n
- function relineLabel() {
- let labelsPath = `E:\\图库\\ai\\epower\\all_tmpLabels_2`
- // 遍历目录
- let labelFiles = fs.readdirSync(labelsPath);
- for (let i = 0; i < labelFiles.length; i++) {
- let labelFilePath = path.join(labelsPath, labelFiles[i]);
- if (!fs.statSync(labelFilePath).isFile()) {
- continue;
- }
- console.log(`start reline ${labelFilePath}`);
- let fileContent = fs.readFileSync(labelFilePath, 'utf-8');
- fileContent = replaceNewlines(fileContent);
- fs.writeFileSync(labelFilePath, fileContent)
- }
- }
- // 读取classes.txt
- // 将classes.txt 中的的内容进行分组
- // 随后读取对应的文件目录, 解析xml文件 , 将其中的坐标等信息转换为yolo格式
- async function xmlToYolo(basePath, resultPath) {
- // 读取classes.txt
- let classesPath = path.join(basePath, 'classes.txt');
- let classes = fs.readFileSync(classesPath, 'utf-8').split('\n');
- let classesMap = {};
- for (let i = 0; i < classes.length; i++) {
- let className = classes[i].trim();
- className = className.replace(/\s|\r\n|\n|\r/g, '');
- classesMap[className] = i;
- }
- // 拷贝classes.txt
- cpFile(classesPath, path.join(resultPath, 'classes.txt'))
- console.log(classesMap)
- // 遍历目录
- let labelFiles = fs.readdirSync(basePath);
- for (let i = 0; i < labelFiles.length; i++) {
- let fileName = labelFiles[i];
- let labelFilePath = path.join(basePath, fileName);
- // console.log(`start parse ${labelFilePath}`)
- if (fs.statSync(labelFilePath).isDirectory()) {
- continue;
- }
- // 判断文件是否为xml格式
- if (!fileName.endsWith('.xml')) {
- console.log(`[E] ${fileName} 不是xml, 该函数暂不支持解析`)
- continue;
- }
- let yoloFileName = fileName.replace('.xml', '.txt');
- let resultFilePath = path.join(resultPath, yoloFileName);
- await yoloXml2yolo(labelFilePath, classesMap, resultFilePath);
- }
- }
- function convert(size, box) {
- const dw = 1 / size.width;
- const dh = 1 / size.height;
- // 数据类型转换为float
- box.xmin = parseFloat(box.xmin);
- box.ymin = parseFloat(box.ymin);
- box.xmax = parseFloat(box.xmax);
- box.ymax = parseFloat(box.ymax);
- const x = (box.xmin + box.xmax) / 2; // (x_min + x_max) / 2.0
- const y = (box.ymin + box.ymax) / 2; // (y_min + y_max) / 2.0
- const w = box.xmax - box.xmin; // x_max - x_min
- const h = box.ymax - box.ymin; // y_max - y_min
- const newX = x * dw;
- const newW = w * dw;
- const newY = y * dh;
- const newH = h * dh;
- return [newX, newY, newW, newH];
- }
- function _xmlToJson(xml) {
- return new Promise((resolve, reject) => {
- let parser = new xml2js.Parser({
- explicitArray: false,
- mergeAttrs: true,
- explicitRoot: false
- });
- parser.parseString(xml, function (err, result) {
- // console.log(result)
- resolve(result);
- });
- })
- }
- /**
- * 将yolo xml文件转换为yolo格式
- * @param xmlPath
- * @param classMap
- * @param resultPath
- */
- async function yoloXml2yolo(xmlPath, classMap, resultPath) {
- let xml = fs.readFileSync(xmlPath, 'utf-8');
- // console.log(xml)
- let obj = await _xmlToJson(xml)
- // 直接生成对应的 x1,y1, x2,y2
- // console.log(obj)
- let width = obj.size.width;
- let height = obj.size.height;
- let str = "";
- // 如果只有一个对象则
- let objects = []
- if (!obj.object.length){
- console.log(`${obj.filename} object is object`)
- console.log(obj)
- objects.push(obj.object)
- }else {
- objects = obj.object
- }
- for (let i = 0; i < objects.length; i++){
- let sub = objects[i];
- // console.log(sub)
- let objName = classMap[sub.name];
- let arr = convert(obj.size, sub.bndbox)
- str += `${objName} ${arr.join(" ")}\r\n`
- }
- // console.log(str)
- // 创建并写入文件
- await writeFile(resultPath, str)
- }
- xmlToYolo("E:\\图库\\验证数据集\\labels - 副本", "E:\\图库\\验证数据集\\result")
- // main();
- /**将分类好的label文件, 拷贝至新目录 parse => newLabels
- 只会拷贝对应类型的文件
- */
- // reCopyToRaw();
- /**
- * 将分类好的label文件, 拷贝至新目录 parse => tmpLabels_2
- * 只拷贝对应的label文件
- *
- */
- // copyLabel();
- /**
- * 将分类好的label文件, 统一合并至一个文件夹内, 分类好的labels 应该是labels的子目录
- * 并且拷贝对应的图片
- * 分类好的label文件目录: tmpLabels_2
- * 基础图片目录: images
- * 输出label目录: all_tmpLabels_2
- * 输出图片目录: all_tmpImages_2
- */
- // 分类好的label文件目录 E:\图库\ai\epower\tmpLabels_2
- let allClassLabelsPath = `E:\\图库\\ai\\epower\\验证数据集_labels`
- // 基础图片目录
- let baseImagesPath = `E:\\图库\\ai\\epower\\images`
- // 输出目录
- let resultPath = `E:\\图库\\ai\\epower\\验证数据集`
- // copyLabelToAll(allClassLabelsPath, baseImagesPath, resultPath)
- // getImageByLabel(`E:\\图库\\ai\\epower\\验证数据集_labels`, `E:\\图库\\ai\\epower\\images`, resultPath)
- // relineLabel();
|