[关闭]
@jeffjade 2016-10-10T15:04:14.000000Z 字数 6977 阅读 1419

Nodejs 各种小脚本

Node.js


Nodejs 遍历文件夹 获取 文件夹下所有文件列表

  1. var fs = require('fs');
  2. var root_path=process.argv[2];
  3. var w_file='res.lst';
  4. function getAllFiles(root){
  5. var res = [] , files = fs.readdirSync(root);
  6. files.forEach(function(file){
  7. var pathname = root+'/'+file
  8. , stat = fs.lstatSync(pathname);
  9. if (!stat.isDirectory()){
  10. res.push(pathname.replace(root_path,'.'));
  11. } else {
  12. res = res.concat(getAllFiles(pathname));
  13. }
  14. });
  15. return res
  16. }
  17. var w_content=getAllFiles(root_path).join('\n');
  18. fs.readFile(root_path+w_file,function(err , data){
  19. if(err && err.errno==33){
  20. fs.open(w_file,"w",0666,function(e,fd){
  21. if(e) throw e;
  22. fs.write(fd,w_content,0,'utf8',function(e){
  23. if(e) throw e;
  24. fs.closeSync(fd);
  25. })
  26. });
  27. } else{
  28. fs.writeFile(root_path+w_file,w_content,function(e){
  29. if(e) throw e
  30. })
  31. }
  32. })

参见: http://www.cnblogs.com/litao229/archive/2012/01/04/2312393.html

NodeJs 按行读取文件

  1. var fs = require('fs'),
  2. readline = require('readline');
  3. var rd = readline.createInterface({
  4. input: fs.createReadStream('./test.txt'),
  5. output: process.stdout,
  6. terminal: false
  7. });
  8. var i = 1;
  9. rd.on('line', function(line) {
  10. console.log('第'+ i +'行:'+ line);
  11. write(line);
  12. i++;
  13. });

Nodejs 过滤单个页面中不合法链接:

  1. var http = require("http"),
  2. fs = require('fs'),
  3. cheerio = require("cheerio");
  4. // var root_path=process.argv[2];
  5. var url = "http://www.jeffjade.com/2016/03/30/104-front-end-tutorial/"
  6. var saveLinkArr = {}
  7. // Utility function that downloads a URL and invokes callback with the data.
  8. function download(url, callback) {
  9. http.get(url, function(res) {
  10. var data = "";
  11. res.on('data', function(chunk) {
  12. data += chunk;
  13. });
  14. res.on("end", function() {
  15. callback(data);
  16. });
  17. }).on("error", function() {
  18. callback(null);
  19. });
  20. }
  21. function writeOut(resData){
  22. var filepath = './jeffJadeLinkData.js'
  23. var resJson = JSON.stringify(resData , null, 2 )
  24. fs.writeFile( filepath, resJson , function(e){
  25. if(e) throw e
  26. });
  27. }
  28. download(url, function(data) {
  29. if (data) {
  30. var $ = cheerio.load(data);
  31. $("tbody a").each(function(i, e) {
  32. console.log(i, $(e).attr("href"));
  33. saveLinkArr[i] = $(e).attr("href")
  34. });
  35. writeOut( saveLinkArr )
  36. console.log("done");
  37. } else {
  38. console.log("download error !")
  39. };
  40. });
  41. // ==================================================
  42. var http = require('http'),
  43. parse = require('url').parse;
  44. function urlget(url, callback) {
  45. var info = parse(url),
  46. path = info.pathname + (info.search || ''),
  47. options = {
  48. host: info.hostname,
  49. port: info.port || 80,
  50. path: path,
  51. method: 'GET'
  52. };
  53. var req = null, request_timeout = null;
  54. request_timeout = setTimeout(function() {
  55. request_timeout = null;
  56. req.abort();
  57. callback(new Error('Request timeout'));
  58. }, 5000);
  59. req = http.request(options, function(res) {
  60. clearTimeout(request_timeout);
  61. var chunks = [], length = 0, response_timeout = null;
  62. response_timeout = setTimeout(function() {
  63. response_timeout = null;
  64. req.abort();
  65. callback(new Error('Response timeout'));
  66. }, 5000);
  67. res.on('data', function(chunk) {
  68. length += chunk.length;
  69. chunks.push(chunk);
  70. }).on('end', function() {
  71. if(response_timeout) {
  72. // node0.5.x及以上:req.abort()会触发res的end事件
  73. clearTimeout(response_timeout);
  74. var data = new Buffer(length);
  75. // ... 此处忽略相同部分的代码 ...
  76. }
  77. }).on('error', function(err) {
  78. // ... 此处忽略相同部分的代码 ...
  79. }).on('aborted', function() {
  80. if(response_timeout) {
  81. // node0.5.x及以上:当res有效的时候,
  82. // req.abort()会触发res的aborted事件
  83. callback(new Error('Response aborted'), res);
  84. }
  85. });
  86. }).on('error', function(err) {
  87. callback(err);
  88. });
  89. req.end();
  90. };
  91. // https://fengmk2.com/ppt/those-things-using-nodejs/#slide-10

===============================

  1. var http = require('http'),
  2. parse = require('url').parse,
  3. fs = require('fs'),
  4. needFilterList = require('./jeffJadeLinkData.json');
  5. function requestUrl(url, callback) {
  6. var info = parse(url),
  7. path = info.pathname + (info.search || ''),
  8. options = {
  9. host: info.hostname,
  10. port: info.port || 80,
  11. path: path,
  12. method: 'GET'
  13. };
  14. var req = null,
  15. request_timeout = null;
  16. request_timeout = setTimeout(function() {
  17. request_timeout = null;
  18. req.abort();
  19. callback(url, new Error('Request timeout'));
  20. }, 5000);
  21. req = http.request(options, function(res) {
  22. clearTimeout(request_timeout);
  23. var chunks = [],
  24. length = 0,
  25. response_timeout = null;
  26. response_timeout = setTimeout(function() {
  27. response_timeout = null;
  28. req.abort();
  29. callback(url, new Error('Response timeout'));
  30. }, 5000);
  31. res.on('data', function(chunk) {
  32. length += chunk.length;
  33. chunks.push(chunk);
  34. }).on('end', function() {
  35. if (response_timeout) {
  36. // node0.5.x及以上:req.abort()会触发res的end事件
  37. clearTimeout(response_timeout);
  38. var data = new Buffer(length);
  39. // ... 此处忽略相同部分的代码 ...
  40. }
  41. }).on('error', function(err) {
  42. // ... 此处忽略相同部分的代码 ...
  43. callback(url, err);
  44. }).on('aborted', function() {
  45. if (response_timeout) {
  46. // node 0.5.x及以上:当res有效的时候,
  47. // req.abort()会触发res的aborted事件
  48. callback(url, new Error('Response aborted'), res);
  49. }
  50. });
  51. }).on('error', function(err) {
  52. console.log('what? error ? \n' + url)
  53. // callback(url, err);
  54. });
  55. req.end();
  56. };
  57. /*===============================================================Read Line: */
  58. var errUrlList = []
  59. function callback(errUrl, errInfo, errRes) {
  60. console.log("err url: " + errUrl)
  61. errUrlList.push(errUrl)
  62. writeOut(errUrlList)
  63. }
  64. function jadeFilterLink() {
  65. for (var key in needFilterList) {
  66. console.log("now turn:" + needFilterList[key])
  67. requestUrl(needFilterList[key], callback)
  68. }
  69. }
  70. function writeOut(resData) {
  71. var filepath = './err_url_list.json'
  72. var resJson = JSON.stringify(resData, null, 2)
  73. fs.writeFile(filepath, resJson, function(e) {
  74. if (e) throw e
  75. });
  76. }
  77. jadeFilterLink()


  1. var http = require("http"),
  2. fs = require('fs'),
  3. cheerio = require("cheerio"),
  4. parse = require('url').parse;
  5. // var root_path=process.argv[2];
  6. var url = "http://www.jeffjade.com/2016/03/30/104-front-end-tutorial/",
  7. saveLinkArr = {},
  8. errUrlList = []
  9. // Utility function that downloads a URL and invokes callback with the data.
  10. function download(url, callback) {
  11. http.get(url, function(res) {
  12. var data = "";
  13. res.on('data', function(chunk) {
  14. data += chunk;
  15. });
  16. res.on("end", function() {
  17. callback(data);
  18. });
  19. }).on("error", function() {
  20. callback(null);
  21. });
  22. }
  23. download(url, function(data) {
  24. if (data) {
  25. var $ = cheerio.load(data);
  26. $("tbody a").each(function(i, e) {
  27. console.log(i, $(e).attr("href"));
  28. saveLinkArr[i] = $(e).attr("href")
  29. });
  30. linkCount = saveLinkArr.length
  31. jadeFilterLink(saveLinkArr)
  32. console.log("url-link done !");
  33. } else {
  34. console.log("download error !")
  35. };
  36. });
  37. function requestUrl(url, callback) {
  38. var info = parse(url),
  39. path = info.pathname + (info.search || ''),
  40. options = {
  41. host: info.hostname,
  42. port: info.port || 80,
  43. path: path,
  44. method: 'GET'
  45. },
  46. req = null,
  47. request_timeout = null;
  48. request_timeout = setTimeout(function() {
  49. request_timeout = null;
  50. req.abort();
  51. callback(new Error('Request timeout'), url);
  52. }, 60000);
  53. req = http.request(options, function(res) {
  54. clearTimeout(request_timeout);
  55. var chunks = [],
  56. length = 0;
  57. res.on('data', function(chunk) {
  58. length += chunk.length;
  59. chunks.push(chunk);
  60. }).on('end', function() {
  61. var data = new Buffer(length);
  62. for (var i = 0, pos = 0, l = chunks.length; i < l; i++) {
  63. chunks[i].copy(data, pos);
  64. pos += chunks[i].length;
  65. }
  66. res.body = data;
  67. callback(null, url);
  68. }).on('error', function(err) {
  69. callback(err, url);
  70. });
  71. }).on('error', function(err) {
  72. // node0.5.x及以上,调用req.abort()会触发一次“socket hang up” error
  73. // 所以需要判断是否超时,如果是超时,则无需再回调异常结果
  74. if (request_timeout) {
  75. clearTimeout(request_timeout);
  76. callback(err, url);
  77. }
  78. });
  79. req.end();
  80. };
  81. function callback(err, errUrl) {
  82. if (err){
  83. console.log(err + ' ' + errUrl)
  84. errUrlList.push(errUrl + ' ' + err)
  85. outPrint(errUrlList)
  86. }
  87. }
  88. function jadeFilterLink(needFilterList) {
  89. for (var key in needFilterList) {
  90. // console.log("now turn:" + needFilterList[key])
  91. (function(key) {
  92. requestUrl(needFilterList[key], callback);
  93. })(key)
  94. }
  95. }
  96. // requestUrl("https://www.google.com/" , callback)
  97. function outPrint(resData) {
  98. var filepath = './err_url_list.json'
  99. var resJson = JSON.stringify(resData, null, 2)
  100. fs.writeFile(filepath, resJson, function(e) {
  101. if (e) throw e
  102. });
  103. }
添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注