parse.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. // 1. 读取指定目录文件, 获取文件内容.
  2. // 2. 解析内容, 获取图片的标注信息
  3. // 3. 统计标注信息
  4. const fs = require('fs');
  5. const path = require('path');
  6. const readline = require('readline');
  7. function readFile(filePath) {
  8. return new Promise((resolve, reject) => {
  9. fs.readFile(filePath, 'utf-8', (err, data) => {
  10. if (err) return reject(err);
  11. resolve(data);
  12. });
  13. })
  14. }
  15. function readDir(filePath) {
  16. return new Promise((resolve, reject) => {
  17. fs.readdir(filePath, (err, files) => {
  18. if (err) return reject(err);
  19. resolve(files);
  20. });
  21. })
  22. }
  23. function parseYolo(fileData) {
  24. // 解析文件内容, 获取图片的标注信息
  25. // 类型 x坐标 y坐标 宽度 高度
  26. // 1 0.391297 0.095892 0.280578 0.179688
  27. let result = [];
  28. let lines = fileData.split('\n');
  29. lines.forEach(line => {
  30. // 判断是否为空行
  31. if (line.trim() === '') return;
  32. let [type, x, y, width, height] = line.split(' ');
  33. result.push({ type, x, y, width, height });
  34. });
  35. return result;
  36. }
  37. async function writeFile(filePath, fileData) {
  38. return new Promise((resolve, reject) => {
  39. // 判断父级目录是否存在
  40. let dirPath = path.dirname(filePath);
  41. if (!fs.existsSync(dirPath)) {
  42. fs.mkdirSync(dirPath, {recursive: true });
  43. }
  44. fs.writeFile(filePath, fileData, (err, data) => {
  45. if (err) return reject(err);
  46. resolve(data);
  47. });
  48. })
  49. }
  50. async function mvFile(filePath, targetPath) {
  51. return new Promise((resolve, reject) => {
  52. // 判断父级目录是否存在
  53. let dirPath = path.dirname(filePath);
  54. if (!fs.existsSync(dirPath)) {
  55. fs.mkdirSync(dirPath, {recursive: true });
  56. }
  57. fs.rename(filePath, targetPath, (err, data) => {
  58. if (err) return reject(err);
  59. resolve(data);
  60. });
  61. })
  62. }
  63. async function cpFile(filePath, targetPath) {
  64. return new Promise((resolve, reject) => {
  65. // 判断父级目录是否存在
  66. let dirPath = path.dirname(targetPath);
  67. if (!fs.existsSync(dirPath)) {
  68. console.log(`路径${dirPath} 不存在`)
  69. fs.mkdirSync(dirPath, {recursive: true });
  70. console.log('路径不存在')
  71. }
  72. // console.log('路径不存在')
  73. fs.copyFile(filePath, targetPath, (err, data) => {
  74. if (err) return reject(err);
  75. resolve(data);
  76. });
  77. })
  78. }
  79. // 删除指定目录
  80. function rmDir(path) {
  81. return new Promise((resolve, reject) => {
  82. fs.rmdir(path, { recursive: true }, (err, data) => {
  83. if (err) return reject(err);
  84. resolve(data);
  85. });
  86. })
  87. }
  88. const dimensionType = {
  89. "-1": "无标注",
  90. "0": "吊车",
  91. "1": "塔吊",
  92. "2": "烟火",
  93. "3": "施工机械",
  94. "4": "导线异物",
  95. "5": "烟雾"
  96. }
  97. async function main(){
  98. // let labelsPath = `E:\\图库\\ai\\epower_v2\\newLabels`
  99. // let labelsPath = `E:\\图库\\ai\\epower_v2\\处理labels`
  100. // let labelsPath = `E:\\图库\\ai\\epower_v2\\labels`
  101. let labelsPath = `E:\\图库\\ai\\epower\\all_tmpLabels_2`
  102. let imagesPath = `E:\\图库\\ai\\epower\\images`
  103. let emptyPath = `E:\\图库\\ai\\epower\\empty`
  104. let imageExts = ['jpg']
  105. let logsPath = `E:\\图库\\ai\\epower\\logs`
  106. // 数据转移目录
  107. let transferPath = `E:\\图库\\ai\\epower\\parse`
  108. // 清除数据转移目录
  109. // 判断数据转移目录是否存在
  110. if (fs.existsSync(transferPath)) { // 删除数据转移目录
  111. await rmDir(transferPath);
  112. }
  113. let countMap = {};
  114. let logStrArr = [];
  115. let resultStrArr = [];
  116. // 获取文件列表
  117. let labelFiles = await readDir(labelsPath);
  118. let imagesFiles = await readDir(imagesPath);
  119. // 获取当前时间
  120. let now = new Date();
  121. let nowStr = `${now.getFullYear()}-${now.getMonth()+1}-${now.getDate()} ${now.getHours()}点${now.getMinutes()}分${now.getSeconds()}秒`;
  122. resultStrArr.push(`[I] [START] 开始统计分析数据标注信息 ${nowStr}`);
  123. resultStrArr.push(`[I] 原标注信息路径: ${labelsPath}`);
  124. resultStrArr.push(`[I] 原图片文件路径: ${labelsPath}`);
  125. resultStrArr.push(`[I] 输出文件路径: ${labelsPath}`);
  126. resultStrArr.push(`[I] 输出文件路径: ${logsPath}`);
  127. resultStrArr.push(`[I] 标注文件数量: ${labelFiles.length}`);
  128. resultStrArr.push(`[I] 图片文件数量: ${imagesFiles.length}`);
  129. resultStrArr.push(`[I] 图片后缀: ${imageExts}`);
  130. // 判断labels中是否包含 classes.txt. 如果有则 将文件 复制至 transferPath 目录
  131. if (fs.existsSync(path.join(labelsPath, 'classes.txt'))) {
  132. await cpFile(path.join(labelsPath, 'classes.txt'), path.join(transferPath, 'classes.txt'))
  133. }
  134. // 将 resultArr 的内容, 转移至 logStrArr
  135. resultStrArr.forEach(item => {
  136. logStrArr.push(item);
  137. });
  138. for (const [i, labelFile] of labelFiles.entries()) {
  139. let filePath = path.join(labelsPath, labelFile);
  140. // 获取对应的图片路径, 与 label 文件的 名称相同
  141. let labelName = labelFile.replace('.txt', '');
  142. // 读取文件内容
  143. let fileData = await readFile(filePath);
  144. // 解析文件内容
  145. let result = parseYolo(fileData);
  146. // 图片后缀可能为 jpg, png 等
  147. let imagePath = '';
  148. let imageName = labelName;
  149. for (let j = 0; j < imageExts.length; j++) {
  150. imagePath = path.join(imagesPath, `${labelName}.${imageExts[0]}`);
  151. imageName = `${labelName}.${imageExts[0]}`;
  152. if (!fs.existsSync(imagePath)) {
  153. imagePath = '';
  154. continue;
  155. }
  156. }
  157. // 判断图片是否存在
  158. if (!imagePath) {
  159. logStrArr.push(`[E] 图片不存在 ${filePath} ${fileData} 可能是图片后缀异常 [ ${imageExts.join(', ')} ]`);
  160. continue;
  161. }
  162. readline.cursorTo(process.stdout, 0);
  163. readline.clearScreenDown(process.stdout);
  164. process.stdout.write(`${i} / ${labelFiles.length} | 解析文件: ${filePath} 中\n`);
  165. // console.log(result);
  166. // 空数据统计
  167. if (result.length === 0) {
  168. countMap['空'] = (countMap['空'] || 0) + 1;
  169. // 复制图片文件到 emptyPath
  170. let emptyFilePath = path.join(emptyPath, `${labelName}.${imageExts[0]}`);
  171. logStrArr.push(`[E] ${labelName}内容为空 空文件 ${emptyFilePath}`);
  172. console.log(`空文件: ${emptyFilePath}`)
  173. cpFile(imagePath, emptyFilePath).then(_=>_)
  174. continue;
  175. }
  176. // 获取文件标注类型
  177. let fileType = {};
  178. for (let j = 0; j < result.length; j++) {
  179. // let type = item.type;
  180. let typeName = dimensionType[result[j].type];
  181. if (!typeName) {
  182. console.log('未知类型')
  183. logStrArr.push(`[E] 未知类型 ${filePath} ${fileData}`);
  184. continue;
  185. }
  186. if (fileType[typeName]) {
  187. fileType[typeName]++;
  188. } else {
  189. fileType[typeName] = 1;
  190. }
  191. }
  192. console.log(fileType)
  193. if (Object.keys(fileType).length === 0) {
  194. countMap['异常标注'] = (countMap['异常标注'] || 0) + 1;
  195. continue;
  196. }
  197. // 获取标注的所有类型, 将图片进行区分 类型1:数量 类型2:数量 类型1-类型2:数量
  198. let fileTypeKeys = Object.keys(fileType);
  199. // 排序
  200. fileTypeKeys.sort();
  201. // 数据转换
  202. fileTypeKeys = fileTypeKeys.join('-');
  203. // 转换类型
  204. if (countMap[fileTypeKeys]) {
  205. countMap[fileTypeKeys]++;
  206. } else {
  207. countMap[fileTypeKeys] = 1;
  208. }
  209. // 尝试复制文件
  210. let transferFilePath = path.join(transferPath, fileTypeKeys);
  211. let transferImagePath = path.join(transferFilePath, `images/${imageName}` );
  212. let transferLabelPath = path.join(transferFilePath, `labels/${labelFile}`);
  213. // 文件转移
  214. // 同步复制文件
  215. cpFile(filePath, transferLabelPath).then(_=>_)
  216. cpFile(imagePath, transferImagePath).then(_=>_)
  217. logStrArr.push(`${fileTypeKeys}: ${countMap[fileTypeKeys]} ${filePath} ${imagePath} ===> ${transferLabelPath} ${transferImagePath} `);
  218. console.log(logStrArr[logStrArr.length - 1]);
  219. }
  220. // 输出结果
  221. console.log(countMap);
  222. // 将分析结果写入文件
  223. logStrArr.push(`\r\n${JSON.stringify(countMap, null, 4)}`);
  224. resultStrArr.push(`${JSON.stringify(countMap, null, 4)}`);
  225. // 保存结果
  226. let logStr = logStrArr.join('\r\n');
  227. let resultStr = resultStrArr.join('\r\n');
  228. await writeFile(path.join(logsPath, 'result.txt'), resultStr);
  229. await writeFile(path.join(logsPath, 'log.txt'), logStr);
  230. console.log('end');
  231. }
  232. // 将新处理的labels 重新写回至原目录
  233. function reCopyToRaw(){
  234. let labelsPath = `E:\\图库\\ai\\epower\\newLabels`
  235. // 数据转移目录
  236. let transferPath = `E:\\图库\\ai\\epower\\parse`
  237. // "0": "吊车",
  238. // "1": "塔吊",
  239. // "2": "烟火",
  240. // "3": "施工机械",
  241. // "4": "导线异物",
  242. // 吊车-导线异物
  243. // 施工机械-导线异物
  244. // 塔吊-导线异物
  245. // 塔吊-施工机械
  246. let fileTypeKeys = [1, 3]
  247. fileTypeKeys = fileTypeKeys.map(item => dimensionType[item] ? dimensionType[item] : '未知类型');
  248. let targetType = fileTypeKeys.join('-');
  249. let targetPath = path.join(transferPath, targetType);
  250. let targetLabelPath = path.join(targetPath, 'labels');
  251. let logStrArr = [];
  252. // 遍历目录
  253. let labelFiles = fs.readdirSync(targetLabelPath);
  254. for (let i = 0; i < labelFiles.length; i++) {
  255. let labelFilePath = path.join(targetLabelPath, labelFiles[i]);
  256. // 拷贝文件
  257. cpFile(labelFilePath, path.join(labelsPath, labelFiles[i])).then(_=>_)
  258. }
  259. console.log(targetPath);
  260. }
  261. // 复制分类好的label文件至指定目录
  262. async function copyLabel() {
  263. // let allLabelsPath = `E:\\图库\\ai\\epower_v2\\parse`
  264. let allLabelsPath = `E:\\图库\\ai\\epower\\parse`
  265. // let transferPath = `E:\\图库\\ai\\epower_v2\\tmpLabels_2`
  266. let transferPath = `E:\\图库\\ai\\epower\\tmpLabels_2`
  267. // 获取子目录
  268. let subDirs = fs.readdirSync(allLabelsPath);
  269. for (let i = 0; i < subDirs.length; i++) {
  270. if (subDirs[i] == 'classes.txt'){
  271. console.log(`skip classes.txt`)
  272. continue;
  273. }
  274. let subDir = path.join(allLabelsPath, subDirs[i], 'labels');
  275. console.log(subDir)
  276. // 获取子目录下的labels
  277. let labelFiles = fs.readdirSync(subDir);
  278. for (let j = 0; j < labelFiles.length; j++) {
  279. // 获取文件路径
  280. let labelFilePath = path.join(subDir, labelFiles[j]);
  281. // 复制文件 至新目录 新目录: 新目录/子目录名称
  282. let newDir = path.join(transferPath, subDirs[i]);
  283. let newFilePath = path.join(newDir, labelFiles[j]);
  284. // 复制文件
  285. await cpFile(labelFilePath, newFilePath)
  286. }
  287. }
  288. }
  289. // 将转移出来的label文件合并至一个文件夹内
  290. async function copyLabelToAll() {
  291. let allLabelsPath = `E:\\图库\\ai\\epower\\tmpLabels_2`
  292. let baseImagesPath = `E:\\图库\\ai\\epower\\images`
  293. let transferPath = `E:\\图库\\ai\\epower\\all_tmpLabels_2`
  294. let trabsferImaegsPath = `E:\\图库\\ai\\epower\\all_tmpImages_2`
  295. let imageExts = ['jpg']
  296. // 获取子目录
  297. let subDirs = fs.readdirSync(allLabelsPath);
  298. for (let i = 0; i < subDirs.length; i++) {
  299. if (subDirs[i] == 'classes.txt'){
  300. console.log(`skip classes.txt`)
  301. continue;
  302. }
  303. let subDir = path.join(allLabelsPath, subDirs[i]);
  304. console.log(subDir)
  305. // 获取子目录下的labels
  306. let labelFiles = fs.readdirSync(subDir);
  307. for (let j = 0; j < labelFiles.length; j++) {
  308. if (labelFiles[j] == 'classes.txt'){
  309. console.log(`skip classes.txt`)
  310. continue;
  311. }
  312. let labelName = labelFiles[j].replace('.txt', '');
  313. // 获取文件路径
  314. let labelFilePath = path.join(subDir, labelFiles[j]);
  315. // 复制文件 至新目录 新目录: 新目录/子目录名称
  316. let newFilePath = path.join(transferPath, labelFiles[j]);
  317. // 复制文件
  318. cpFile(labelFilePath, newFilePath)
  319. // 复制图片
  320. // 图片后缀可能为 jpg, png 等
  321. let imagePath = '';
  322. let imageName = `${labelName}.jpg`;
  323. imagePath = path.join(baseImagesPath, imageName);
  324. // 复制图片
  325. let newImagePath = path.join(trabsferImaegsPath, imageName);
  326. console.log(`移动图片 ${imagePath} => ${newImagePath}`)
  327. // 判断图片是否存在
  328. if (!imagePath) {
  329. console.log(`[E] 图片不存在 ${labelFilePath} 可能是图片后缀异常 [ ${imageExts.join(', ')} ]`);
  330. continue;
  331. }
  332. cpFile(imagePath, newImagePath)
  333. }
  334. }
  335. }
  336. function replaceNewlines(input) {
  337. return input.replace(/(\r\n|\r|\n)/g, '\r\n');
  338. }
  339. // 将label 文件中的换行 从 \n 替换为 \r\n
  340. function relineLabel(){
  341. let labelsPath = `E:\\图库\\ai\\epower\\all_tmpLabels_2`
  342. // 遍历目录
  343. let labelFiles = fs.readdirSync(labelsPath);
  344. for (let i = 0; i < labelFiles.length; i++) {
  345. let labelFilePath = path.join(labelsPath, labelFiles[i]);
  346. if (!fs.statSync(labelFilePath).isFile()){
  347. continue;
  348. }
  349. console.log(`start reline ${labelFilePath}`);
  350. let fileContent = fs.readFileSync(labelFilePath, 'utf-8');
  351. fileContent = replaceNewlines(fileContent);
  352. fs.writeFileSync(labelFilePath, fileContent)
  353. }
  354. }
  355. main();
  356. // reCopyToRaw();
  357. // copyLabel();
  358. // copyLabelToAll()
  359. // relineLabel();