Pickaxe has come a long way in a year. I open sourced it and hosted it on github.
The language is powerful enough now for real use cases. To illustrate, below is an example of fetching airport weather data and writing it to a file using SQL like syntax. The (thread(threadcount)) hint next to the download page statement allows parallel downloads resulting in fast code. A youtube video is also shown below.
Never has scraping the web been so easy. To learn more, there are tutorials and docs on github. Visit the Pickaxe download page to run the code below.
create buffer states(state string)
insert into states
select 'TX'
insert into states
select 'AR'
insert into states
select 'OR'
create buffer station(url string, st string, city string, state string)
insert into station
select
pick 'td:nth-child(1) a' take attribute 'href', --link to details
pick 'td:nth-child(1) a', --station
pick 'td:nth-child(2)', --city
pick 'td:nth-child(4)' --state
from download page (select
'https://www.faa.gov/air_traffic/weather/asos/?state=' + state
from states)
where nodes = 'table.asos tbody tr'
create buffer stationReadings(url string, stamp string, time string, wind string, visibility string, weather string, temp string, humidity string)
insert into stationReadings
select
url,
pick 'td:nth-child(1)', --date
pick 'td:nth-child(2)', --time
pick 'td:nth-child(3)', --wind
pick 'td:nth-child(4)', --visibility
pick 'td:nth-child(5)', --weather
pick 'td:nth-child(7)', --temp
pick 'td:nth-child(11)' --humidity
from download page (select url from station) with (thread(10))
where nodes = 'table[cellspacing="3"] tr'
create file airportWeather(
city string,
state string,
stamp string,
time string,
wind string,
visibility string,
weather string,
humidity string,
temp string
)
with (
fieldterminator = '|',
rowterminator = '\r\n'
)
location 'C:\windows\temp\weather.txt'
insert overwrite airportWeather
select city, state, stamp, time, wind, visibility, weather, humidity, temp
from station s
join stationReadings r on r.url = s.url