Zombie Error - Error getting an HTTP request

I use NodeJs and ZombieJS to receive url requests in a virtual browser environment.

I am using the following code:

var zombie = require('zombie'),
jsdom = require('jsdom'),
my_sandbox = require('sandbox'),
url = require('url'),
http = require('http'),
request = require('request'),
httpProxy = require('./lib/node-http-proxy'),
des = '',
util = require('util'),
colors = require('colors'),
is_host = true;

var s = new my_sandbox();
var browser = new zombie.Browser;

httpProxy.createServer(9000, 'localhost').listen(8000);

function zombieFetching(page) {
    browser.visit(page, { debug: false }, 
    function(err, browser, status) {
        if(err) {
        console.log('There is an error. Fix it');
        throw(err.message);
        } else {
           console.log('Browser visit successful') ;
        }
    });
}

var server = http.createServer(function (req, res) {
    var pathname = '';

    if(is_host) {
        dest = req.url.substr(0, req.url.length);
        pathname = dest;
        is_host = false;
    } else {
        pathname = req.url.substr(0, req.url.length);
         if(pathname.charAt(0) == "/") {
            console.log('new request');
            console.log(pathname);
            pathname = dest + pathname;
        }
    }

    request.get({uri: pathname}, function (err, response, html) {
            console.log('The pathname is:::::::::: ' + pathname);
            zombieFetching(pathname);
            res.end(html);
    });
});

server.listen(9000);

I see the following error when trying to get the url: "www.yahoo.com"

home/seed/Desktop/Cloud project/node_modules/zombie/node_modules/html5/lib/html5/tokenizer.js:62
                throw(e);
    ^
Error: undefined: Invalid character in tag name:   
    at Object.createElement (/home/seed/Desktop/Cloud project/node_modules/zombie/node_modules/jsdom/lib/jsdom/level1/core.js:1174:13)
    at TreeBuilder.createElement (/home/seed/Desktop/Cloud project/node_modules/zombie/node_modules/html5/lib/html5/treebuilder.js:29:25)
    at TreeBuilder.insert_element_normal (/home/seed/Desktop/Cloud project/node_modules/zombie/node_modules/html5/lib/html5/treebuilder.js:61:21)
    at TreeBuilder.insert_element (/home/seed/Desktop/Cloud project/node_modules/zombie/node_modules/html5/lib/html5/treebuilder.js:52:15)
    at Object.startTagOther (/home/seed/Desktop/Cloud project/node_modules/zombie/node_modules/html5/lib/html5/parser/in_body_phase.js:483:12)
    at Object.processStartTag (/home/seed/Desktop/Cloud project/node_modules/zombie/node_modules/html5/lib/html5/parser/phase.js:43:44)
    at EventEmitter.do_token (/home/seed/Desktop/Cloud project/node_modules/zombie/node_modules/html5/lib/html5/parser.js:94:20)
    at EventEmitter.<anonymous> (/home/seed/Desktop/Cloud project/node_modules/zombie/node_modules/html5/lib/html5/parser.js:112:30)
    at EventEmitter.emit (events.js:64:17)
    at EventEmitter.emitToken (/home/seed/Desktop/Cloud project/node_modules/zombie/node_modules/html5/lib/html5/tokenizer.js:84:7)

In addition, the log statements are as follows:

The pathname is:::::::::: http://www.yahoo.com/
The pathname is:::::::::: http://l1.yimg.com/a/i/ww/news/2011/05/06/zuckhouse-sm.jpg
The pathname is:::::::::: http://l1.yimg.com/a/i/ww/news/2011/05/07/cable-sm.jpg
The pathname is:::::::::: http://l.yimg.com/a/a/1-/flash/promotions/yahoo/081120/70x50iltlb_2.jpg

Browser visit successful

Browser visit successful

Browser visit successful

Browser visit successful

The pathname is:::::::::: http://l.yimg.com/a/i/vm/2011may/bird74.jpg
The pathname is:::::::::: http://www.yahoo.com/jserror?ad=1&target=cms&data=FPAD

From what I understand, the first four requests were successful. However, I don't know why the zombie is retrieving the wrong request:

"http://www.yahoo.com/jserror?ad=1&target=cms&data=FPAD"

Also, what causes an invalid character in the tag name error?

Thanks Sony

+3
source share
1 answer

favicon.ico ; . - HTTP-, , , , . , jserror?, - Zombie 301 () URL- , - . Zombie , .., .

browser.debug = true, , , , .

0

All Articles