Bläddra i källkod

feat: yolo数据标注

kindring 11 månader sedan
förälder
incheckning
00b13c5221
1 ändrade filer med 430 tillägg och 0 borttagningar
  1. 430 0
      js/yolo 数据集工具/parse.js

+ 430 - 0
js/yolo 数据集工具/parse.js

@@ -0,0 +1,430 @@
+// 1. 读取指定目录文件, 获取文件内容.
+// 2. 解析内容, 获取图片的标注信息
+// 3. 统计标注信息
+
+const fs = require('fs');
+const path = require('path');
+const readline = require('readline');
+
+function readFile(filePath) {
+    return new Promise((resolve, reject) => {
+        fs.readFile(filePath, 'utf-8', (err, data) => {
+            if (err) return reject(err);
+            resolve(data);
+        });
+    })
+}
+
+function readDir(filePath) {
+    return new Promise((resolve, reject) => {
+        fs.readdir(filePath, (err, files) => {
+            if (err) return reject(err);
+            resolve(files);
+        });
+    })
+}
+
+function parseYolo(fileData) {
+    // 解析文件内容, 获取图片的标注信息
+    // 类型 x坐标 y坐标 宽度 高度
+    // 1 0.391297 0.095892 0.280578 0.179688
+    let result = [];
+    let lines = fileData.split('\n');
+    lines.forEach(line => {
+        // 判断是否为空行
+        if (line.trim() === '') return;
+        let [type, x, y, width, height] = line.split(' ');
+        result.push({ type, x, y, width, height });
+    });
+    return result;
+}
+
+
+
+async function writeFile(filePath, fileData) {
+    return new Promise((resolve, reject) => {
+        // 判断父级目录是否存在
+        let dirPath = path.dirname(filePath);
+        if (!fs.existsSync(dirPath)) {
+            fs.mkdirSync(dirPath, {recursive: true });
+        }
+
+        fs.writeFile(filePath, fileData, (err, data) => {
+            if (err) return reject(err);
+            resolve(data);
+        });
+    })
+}
+
+async function mvFile(filePath, targetPath) {
+    return new Promise((resolve, reject) => {
+        // 判断父级目录是否存在
+        let dirPath = path.dirname(filePath);
+        if (!fs.existsSync(dirPath)) {
+            fs.mkdirSync(dirPath, {recursive: true });
+        }
+        fs.rename(filePath, targetPath, (err, data) => {
+            if (err) return reject(err);
+            resolve(data);
+        });
+    })
+}
+
+async function cpFile(filePath, targetPath) {
+    return new Promise((resolve, reject) => {
+        // 判断父级目录是否存在
+        let dirPath = path.dirname(targetPath);
+        if (!fs.existsSync(dirPath)) {
+            console.log(`路径${dirPath} 不存在`)
+            fs.mkdirSync(dirPath, {recursive: true });
+            console.log('路径不存在')
+        }
+        // console.log('路径不存在')
+
+        fs.copyFile(filePath, targetPath, (err, data) => {
+            if (err) return reject(err);
+            resolve(data);
+        });
+    })
+}
+
+
+// 删除指定目录
+function rmDir(path) {
+    return new Promise((resolve, reject) => {
+        fs.rmdir(path, { recursive: true }, (err, data) => {
+            if (err) return reject(err);
+            resolve(data);
+        });
+    })
+}
+
+
+
+
+
+
+
+
+
+
+
+
+const dimensionType = {
+    "-1": "无标注",
+    "0": "吊车",
+    "1": "塔吊",
+    "2": "烟火",
+    "3": "施工机械",
+    "4": "导线异物",
+    "5": "烟雾"
+}
+
+
+async function main(){
+
+    // let labelsPath = `E:\\图库\\ai\\epower_v2\\newLabels`
+    // let labelsPath = `E:\\图库\\ai\\epower_v2\\处理labels`
+    // let labelsPath = `E:\\图库\\ai\\epower_v2\\labels`
+    let labelsPath = `E:\\图库\\ai\\epower\\all_tmpLabels_2`
+
+    let imagesPath = `E:\\图库\\ai\\epower\\images`
+    let emptyPath = `E:\\图库\\ai\\epower\\empty`
+    let imageExts = ['jpg']
+    let logsPath = `E:\\图库\\ai\\epower\\logs`
+
+    // 数据转移目录
+    let transferPath = `E:\\图库\\ai\\epower\\parse`
+    // 清除数据转移目录
+    // 判断数据转移目录是否存在
+    if (fs.existsSync(transferPath)) {        // 删除数据转移目录
+        await rmDir(transferPath);
+    }
+
+    let countMap = {};
+
+    let logStrArr = [];
+    let resultStrArr = [];
+    // 获取文件列表
+    let labelFiles = await readDir(labelsPath);
+    let imagesFiles = await readDir(imagesPath);
+
+    // 获取当前时间
+    let now = new Date();
+    let nowStr = `${now.getFullYear()}-${now.getMonth()+1}-${now.getDate()} ${now.getHours()}点${now.getMinutes()}分${now.getSeconds()}秒`;
+    resultStrArr.push(`[I] [START] 开始统计分析数据标注信息 ${nowStr}`);
+    resultStrArr.push(`[I] 原标注信息路径: ${labelsPath}`);
+    resultStrArr.push(`[I] 原图片文件路径: ${labelsPath}`);
+    resultStrArr.push(`[I] 输出文件路径: ${labelsPath}`);
+    resultStrArr.push(`[I] 输出文件路径: ${logsPath}`);
+    resultStrArr.push(`[I] 标注文件数量: ${labelFiles.length}`);
+    resultStrArr.push(`[I] 图片文件数量: ${imagesFiles.length}`);
+    resultStrArr.push(`[I] 图片后缀: ${imageExts}`);
+
+    // 判断labels中是否包含 classes.txt. 如果有则 将文件 复制至 transferPath 目录
+    if (fs.existsSync(path.join(labelsPath, 'classes.txt'))) {
+        await cpFile(path.join(labelsPath, 'classes.txt'), path.join(transferPath, 'classes.txt'))
+    }
+
+    // 将 resultArr 的内容, 转移至 logStrArr
+    resultStrArr.forEach(item => {
+        logStrArr.push(item);
+    });
+
+    for (const [i, labelFile] of labelFiles.entries()) {
+
+        let filePath = path.join(labelsPath, labelFile);
+        // 获取对应的图片路径, 与 label 文件的 名称相同
+        let labelName = labelFile.replace('.txt', '');
+        // 读取文件内容
+        let fileData = await readFile(filePath);
+        // 解析文件内容
+        let result = parseYolo(fileData);
+
+        // 图片后缀可能为 jpg, png 等
+        let imagePath = '';
+        let imageName = labelName;
+        for (let j = 0; j < imageExts.length; j++) {
+            imagePath = path.join(imagesPath, `${labelName}.${imageExts[0]}`);
+            imageName = `${labelName}.${imageExts[0]}`;
+            if (!fs.existsSync(imagePath)) {
+                imagePath = '';
+                continue;
+            }
+        }
+        // 判断图片是否存在
+        if (!imagePath) {
+            logStrArr.push(`[E] 图片不存在 ${filePath} ${fileData} 可能是图片后缀异常 [ ${imageExts.join(', ')} ]`);
+            continue;
+        }
+
+        readline.cursorTo(process.stdout, 0);
+        readline.clearScreenDown(process.stdout);
+        process.stdout.write(`${i} / ${labelFiles.length} | 解析文件: ${filePath} 中\n`);
+        // console.log(result);
+        // 空数据统计
+        if (result.length === 0) {
+            countMap['空'] = (countMap['空'] || 0) + 1;
+            // 复制图片文件到 emptyPath
+
+            let emptyFilePath = path.join(emptyPath, `${labelName}.${imageExts[0]}`);
+            logStrArr.push(`[E] ${labelName}内容为空 空文件 ${emptyFilePath}`);
+            console.log(`空文件: ${emptyFilePath}`)
+            cpFile(imagePath, emptyFilePath).then(_=>_)
+            continue;
+        }
+        // 获取文件标注类型
+        let fileType = {};
+        for (let j = 0; j < result.length; j++) {
+            // let type = item.type;
+            let typeName = dimensionType[result[j].type];
+            if (!typeName) {
+                console.log('未知类型')
+                logStrArr.push(`[E] 未知类型 ${filePath} ${fileData}`);
+                continue;
+            }
+            if (fileType[typeName]) {
+                fileType[typeName]++;
+            } else {
+                fileType[typeName] = 1;
+            }
+        }
+        console.log(fileType)
+        if (Object.keys(fileType).length === 0) {
+            countMap['异常标注'] = (countMap['异常标注'] || 0) + 1;
+            continue;
+        }
+
+        // 获取标注的所有类型, 将图片进行区分  类型1:数量  类型2:数量 类型1-类型2:数量
+        let fileTypeKeys = Object.keys(fileType);
+        // 排序
+        fileTypeKeys.sort();
+        // 数据转换
+        fileTypeKeys = fileTypeKeys.join('-');
+
+        // 转换类型
+        if (countMap[fileTypeKeys]) {
+            countMap[fileTypeKeys]++;
+        } else {
+            countMap[fileTypeKeys] = 1;
+        }
+
+
+
+        // 尝试复制文件
+        let transferFilePath = path.join(transferPath, fileTypeKeys);
+        let transferImagePath = path.join(transferFilePath, `images/${imageName}`  );
+        let transferLabelPath = path.join(transferFilePath, `labels/${labelFile}`);
+        // 文件转移
+        // 同步复制文件
+
+        cpFile(filePath, transferLabelPath).then(_=>_)
+        cpFile(imagePath, transferImagePath).then(_=>_)
+        logStrArr.push(`${fileTypeKeys}: ${countMap[fileTypeKeys]} ${filePath}  ${imagePath} ===>  ${transferLabelPath} ${transferImagePath} `);
+        console.log(logStrArr[logStrArr.length - 1]);
+    }
+
+    // 输出结果
+    console.log(countMap);
+    // 将分析结果写入文件
+    logStrArr.push(`\r\n${JSON.stringify(countMap, null, 4)}`);
+    resultStrArr.push(`${JSON.stringify(countMap, null, 4)}`);
+    // 保存结果
+    let logStr = logStrArr.join('\r\n');
+    let resultStr = resultStrArr.join('\r\n');
+    await writeFile(path.join(logsPath, 'result.txt'), resultStr);
+    await writeFile(path.join(logsPath, 'log.txt'), logStr);
+    console.log('end');
+}
+
+
+
+
+
+// 将新处理的labels 重新写回至原目录
+function reCopyToRaw(){
+    let labelsPath = `E:\\图库\\ai\\epower\\newLabels`
+    // 数据转移目录
+    let transferPath = `E:\\图库\\ai\\epower\\parse`
+        // "0": "吊车",
+        // "1": "塔吊",
+        // "2": "烟火",
+        // "3": "施工机械",
+        // "4": "导线异物",
+    // 吊车-导线异物
+    // 施工机械-导线异物
+    // 塔吊-导线异物
+    // 塔吊-施工机械
+
+    let fileTypeKeys = [1, 3]
+    fileTypeKeys = fileTypeKeys.map(item =>  dimensionType[item] ? dimensionType[item] : '未知类型');
+    let targetType = fileTypeKeys.join('-');
+    let targetPath = path.join(transferPath, targetType);
+    let targetLabelPath = path.join(targetPath, 'labels');
+    let logStrArr = [];
+    // 遍历目录
+    let labelFiles = fs.readdirSync(targetLabelPath);
+    for (let i = 0; i < labelFiles.length; i++) {
+        let labelFilePath = path.join(targetLabelPath, labelFiles[i]);
+        // 拷贝文件
+        cpFile(labelFilePath, path.join(labelsPath, labelFiles[i])).then(_=>_)
+    }
+    console.log(targetPath);
+}
+
+
+// 复制分类好的label文件至指定目录
+async function copyLabel() {
+    // let allLabelsPath = `E:\\图库\\ai\\epower_v2\\parse`
+    let allLabelsPath = `E:\\图库\\ai\\epower\\parse`
+    // let transferPath = `E:\\图库\\ai\\epower_v2\\tmpLabels_2`
+    let transferPath = `E:\\图库\\ai\\epower\\tmpLabels_2`
+    // 获取子目录
+    let subDirs = fs.readdirSync(allLabelsPath);
+    for (let i = 0; i < subDirs.length; i++) {
+
+        if (subDirs[i] == 'classes.txt'){
+            console.log(`skip classes.txt`)
+            continue;
+        }
+        let subDir = path.join(allLabelsPath, subDirs[i], 'labels');
+        console.log(subDir)
+        // 获取子目录下的labels
+        let labelFiles = fs.readdirSync(subDir);
+        for (let j = 0; j < labelFiles.length; j++) {
+            // 获取文件路径
+            let labelFilePath = path.join(subDir, labelFiles[j]);
+            // 复制文件 至新目录 新目录: 新目录/子目录名称
+            let newDir = path.join(transferPath, subDirs[i]);
+            let newFilePath = path.join(newDir, labelFiles[j]);
+
+            // 复制文件
+            await cpFile(labelFilePath, newFilePath)
+        }
+    }
+}
+
+// 将转移出来的label文件合并至一个文件夹内
+async function copyLabelToAll() {
+    let allLabelsPath = `E:\\图库\\ai\\epower\\tmpLabels_2`
+    let baseImagesPath = `E:\\图库\\ai\\epower\\images`
+    let transferPath = `E:\\图库\\ai\\epower\\all_tmpLabels_2`
+    let trabsferImaegsPath = `E:\\图库\\ai\\epower\\all_tmpImages_2`
+    let imageExts = ['jpg']
+
+    // 获取子目录
+    let subDirs = fs.readdirSync(allLabelsPath);
+    for (let i = 0; i < subDirs.length; i++) {
+
+        if (subDirs[i] == 'classes.txt'){
+            console.log(`skip classes.txt`)
+            continue;
+        }
+        let subDir = path.join(allLabelsPath, subDirs[i]);
+        console.log(subDir)
+        // 获取子目录下的labels
+        let labelFiles = fs.readdirSync(subDir);
+        for (let j = 0; j < labelFiles.length; j++) {
+            if (labelFiles[j] == 'classes.txt'){
+                console.log(`skip classes.txt`)
+                continue;
+            }
+            let labelName = labelFiles[j].replace('.txt', '');
+            // 获取文件路径
+            let labelFilePath = path.join(subDir, labelFiles[j]);
+            // 复制文件 至新目录 新目录: 新目录/子目录名称
+            let newFilePath = path.join(transferPath, labelFiles[j]);
+            // 复制文件
+            cpFile(labelFilePath, newFilePath)
+
+            // 复制图片
+            // 图片后缀可能为 jpg, png 等
+            let imagePath = '';
+            let imageName = `${labelName}.jpg`;
+            imagePath = path.join(baseImagesPath, imageName);
+
+            // 复制图片
+            let newImagePath = path.join(trabsferImaegsPath, imageName);
+            console.log(`移动图片 ${imagePath} => ${newImagePath}`)
+            // 判断图片是否存在
+            if (!imagePath) {
+                console.log(`[E] 图片不存在 ${labelFilePath} 可能是图片后缀异常 [ ${imageExts.join(', ')} ]`);
+                continue;
+            }
+            cpFile(imagePath, newImagePath)
+        }
+    }
+}
+
+function replaceNewlines(input) {
+    return input.replace(/(\r\n|\r|\n)/g, '\r\n');
+}
+// 将label 文件中的换行 从 \n 替换为 \r\n
+function relineLabel(){
+    let labelsPath = `E:\\图库\\ai\\epower\\all_tmpLabels_2`
+    // 遍历目录
+    let labelFiles = fs.readdirSync(labelsPath);
+    for (let i = 0; i < labelFiles.length; i++) {
+        let labelFilePath = path.join(labelsPath, labelFiles[i]);
+        if (!fs.statSync(labelFilePath).isFile()){
+            continue;
+        }
+        console.log(`start reline ${labelFilePath}`);
+        let fileContent = fs.readFileSync(labelFilePath, 'utf-8');
+        fileContent = replaceNewlines(fileContent);
+        fs.writeFileSync(labelFilePath, fileContent)
+    }
+}
+
+
+
+main();
+
+// reCopyToRaw();
+
+// copyLabel();
+
+// copyLabelToAll()
+
+// relineLabel();
+