WLW Examples

WLW Robot Examples

Example #1

Start URL

http://4business-werbeartikel.de/onlineshop.html

Robot

// steps.start always gets executed as the initial step on a start URL
steps.start = function() {
// Use jQuery selector and .each() function to generate next steps for categories. This technique is common in many robots.
$(".level2 a").each(function(i, v) { next(v.href, "drillCat"); });
// Must call done when finishing steps. Extension listens for done to start loading a new page.
// done() is almost always necessary to finish a step. Exeption: when step causes a new page to load automatically. For example from a click.
done();};steps.drillCat = function() {
// Also use jQuery selector and .each() function to generate next steps. This time next steps will be products scraping.
$(".item_list h5 a").each(function(i, v) { next(v.href, "scrapeProduct"); });
// Looking for next page links if there are multiple pages of products. ":first" in the selector ensures that we follow one link. Useful to include because next page usually duplicates at the top and the bottom of the page.
$(".pagination a:contains(weiter):first").each(function(i, v) {
// Notice here that we are calling the same step function drillCat. This is common across paginated products.
next(v.href, "drillCat"); });
// Don't forget done()
done();};steps.scrapeProduct = function() {
// This step executes on a product page. Forming product data from various selectors.
var item = { "Internal ID": $("tr:contains(Artikelnummer) td:last").text().trim(), "Categories": getCats(), "Name": $(".product-name").text().trim(),
// short desc is the first paragraph
"Short Description": $(".product-specs")

Example #2:

Start URL

http://www.3i-merchandising.de/start

Robot

steps.start = function() {

// In this robot we expect some product pages to fail. Chaging retry behavior.
// Robot will do retries every 30 seconds 3 times (on each step). We will allow 10000 retries for the whole run.setRetries(60*1000,3, 10000);

// Tricky part. This shop displays products in an iframe. Which is like another window inside HTML.
// We take iframe’s URL and open it as the next step to jump into it.var h = $(‘iframe’)[0].src next(h, “jump”); done(); }

steps.jump = function() {

// Got the iframe URL open and it redirects. Rare example when done() is not needed in a step.
// done() is triggered for us automatically by a redirect.next(“”, “drillCat”); }

// Tricky part. Here pagination still shows next page link even when we are on the last listing page.
// We parse text of how many out of how many products are displayed to know if we reached the last page.var results = $(“.foundtxt:first”).text().match(/\d+/g); // extract digits var total = results.shift(); var last = results.pop();

if(total !== last) {

// Notice next has a blank URL. This is to indicate that robot does not have to load a page. See comment below why.next(“”, “drillCat”);

// Robot will click on next page link and it will load a new page. This will also trigger done() for us.$(“#navigatieVerder”).click(); } else {

// Do done() only on the last page.done(); }

};

steps.scrapeProduct = function() {

// scraping product date as usual.
// one tricky part is that in the beginning we check some element that is present on products. This is to check that we really have a product page.