GetWebPage
this function downlaod and returns the web page from a url.
It also exec the javascritp of the page
example:
declare @s nvarchar(max)
select @s=node
from [dbo].[GetWebPage]('https://www.google.it/','',1)
print @s
and this extract only from the tag "td"
declare @s nvarchar(max)
select @s=node
from [dbo].[GetWebPage]('https://www.google.it/','//td',1)
print @s
and has sense usign Sql xml functions
select CONVERT(xml,node)
from [dbo].[GetWebPage]('https://www.google.it/','//td',1)
paramenters:
a complete example
drop table #t2
--create a loop to have incremental number
;with
tmp1 as
(
select 1 t,11 n
union all
select 2 t,n+1
from tmp1 t1
where t1.n+1<=10
)
select t2.title,t2.link,t2.node node1,
t3.text,t3.node node2
--*
into #t2
from tmp1 t1
--find in the section title
cross apply
(
select --node.value('span[1]/td[3]/div[1]/a[1]/@href','varchar(max)'),
i1.node.value('span[1]/td[2]/div[1]/a[1]','varchar(max)') title,
'https://www.hwupgrade.it/forum/showthread.php?t='+right(node.value('span[1]/td[1]/@id','varchar(max)'),7) link,
*
--open the url and get tag from '//table/tbody/tr' and consider inner tags
from config.GetWebPage('https://www.hwupgrade.it/forum/forumdisplay.php?f=44&page='+CONVERT(varchar,t1.n)+'&order=desc','//table/tbody/tr',1) i1
where i1.node.value('span[1]/td[1]/@id','varchar(max)')like'td_threadstatusicon_%'
)t2
--get content for each title
outer apply
(
select i2.node.value('span[1]/div[1]/strong[1]','varchar(max)')title,
i2.node.value('span[1]/div[2]','varchar(max)')text,
*
from config.GetWebPage(t2.link,'//table/tr/td',1) i2
where 0=0
--and i1.node.value('span[1]/div[2]/@id','varchar(max)')like'post_message_%'
and i2.node.value('span[1]/div[1]/strong[1]','varchar(max)')=t2.title
)t3
where 0=0
select t1.title,t1.text,*
from #t2 t1
where 0=0