Pull a specific string from an HTTP request in node.js

Go To StackoverFlow.com

0

NOOb here. I've got a HTTP request that pulls all of the content from a specific webpage. However, all I need is a specific string:"Most recent instantaneous value: ". In fact, I actually need to store the value that follows value:. Here is my code:

var http = require("http");

var options = {
 host: 'waterdata.usgs.gov',
 port: 80,
 path: '/ga/nwis/uv?cb_72036=on&cb_00062=on&format=gif_default&period=1&site_no=02334400',
 method: 'POST'
};

var req = http.request(options, function(res) {
 console.log('STATUS: ' + res.statusCode);
 console.log('HEADERS: ' + JSON.stringify(res.headers));
 res.setEncoding('utf8');
 res.on('data', function (chunk) {
 console.log('BODY: ' + chunk);
 });
});

req.on('error', function(e) {
 console.log('problem with request: ' + e.message);
});

// write data to request body
req.write('data\n');
req.write('data\n');
req.end();

I realize I don't need all the console.log statements, but do I need keep console.log('BODY: ' + chunk); so all of the data downloads?

2012-04-04 05:11
by mnort9


0

Never do it the way I'm doing it in this quick'n'dirty example. There are plenty of modules for DOM traversal, HTML/XML parsing, etc... They are a lot safer then a simple regex. But just so you get the general idea:

var http = require("http");

var options = {
    host: 'waterdata.usgs.gov',
    port: 80,
    path: '/ga/nwis/uv?cb_72036=on&cb_00062=on&format=gif_default&period=1&site_no=02334400',
};

function extract (body, cb) {
    if(!body) 
        return;

    var matches=body.match(/Most recent instantaneous value: ([^ ]+) /);
    if(matches)
        cb(matches[1]);
}

http.get(options, function(res) {
    res.setEncoding('utf8');
    res.on('data', function (chunk) {
        extract(chunk, function(v){ console.log(v); });
    });
}).on('error', function(e) {
    console.log('problem with request: ' + e.message);
});

Somehow I also got a different page when sending a POST instead of a GET request. So I changed that bit...

Regarding your second question: No you don't need to keep any of the console.log() statements. Just use callbacks and everything is fine! :-)

2012-04-04 07:31
by Sebastian Stumpf
Thank you. Why do you consider this method of doing it unsafe? Also, this solution is printing both instances of the string. I only need the first instance. Do I need a modifier or something - mnort9 2012-04-04 17:46
Figured out why it was printing the data twice - mnort9 2012-04-04 21:51
Just add a return in front of the callback if you only want the first value. On the topic of parsing HTML I recommend reading Jeff's opinion about it and of course this answer: http://stackoverflow.com/a/1732454/47913 - Sebastian Stumpf 2012-04-04 23:51
Ads