Purpose: get Y!News without garbage
<SCRIPT LANGUAGE="VBScript">
<!--
sub window_onLoad()
'-----------------------------------------------------------
Title = "Pure YahooReuter HomePage"
'---------process the file line by line-------------
NewHtm = "<!--version modified by Fredledingue's html cleaner -->" & VbCrlf & "<HTML><HEAD><TITLE>My home page</TITLE>" & VbCrlf
LineCount = 0
DelLineCount = 0
DelImgCount = 0
IsScript = False
IsGarbage = False
Shrink = False
'--------download source---------
With CreateObject("MSXML2.XMLHTTP")
.open "GET", "http://news.yahoo.com/news?tmpl=index&cid=586&/", False
.send
t =.responseText
End With
'--------prepare source text------
t = t & ">"
t = Replace(t,Chr(10), " ")
t = Replace(t,Chr(11), " ")
t = Replace(t,Chr(12), " ")
t = Replace(t,Chr(13), " ")
t = Replace(t,Chr(14), " ")
t = Replace(t,Chr(15), " ")
t = Replace(t,VbCrlf, "")
t = Replace(t,"href=""/","href=""http://news.yahoo.com/")
t = Replace(t," class=", "><class=")
t = Replace(t,"<h2>", "")
t = Replace(t,"</h2>", "")
t = Replace(t,">", ">" & VbCrlf)
t = Replace(t,"Copyright ©", "<FONT size=1>Copyright ©")
t = Replace(t,"More in Yahoo!", "<hr noshade size=""1"">More in Yahoo!")
t = Replace(t,"taken in reliance thereon.", "taken in reliance thereon.</FONT>")
t = VbCrlf & VbCrlf & VbCrlf & VbCrlf & VbCrlf & VbCrlf & VbCrlf & VbCrlf & VbCrlf & VbCrlf & VbCrlf & VbCrlf & VbCrlf & t & VbCrlf & "EndOfOriginalY" & VbCrlf & VbCrlf & VbCrlf & VbCrlf & VbCrlf & VbCrlf & VbCrlf & VbCrlf & VbCrlf & VbCrlf
'--------scan source text line by line------
L = Split(t, vbCrlf, -1, 1)
i = 0
Do Until L(i) = "EndOfOriginalY"
'--------count lines------
LineCount = LineCount +1
'---------remove unwanted code-------
If InStr(L(i),"class=")>0 Or InStr(L(i),"<NOSCRIPT")>0 Or InStr(L(i),"<noscript")>0 Or InStr(L(i),"</NOSCRIPT")>0 Or InStr(L(i),"</noscript")>0 Then
DelLineCount = DelLineCount +1
Else
'If InStr(L(i+1),"us.a1.yimg.com")>0 Then
'DelLineCount = DelLineCount +2
'i=i+1
'Else
'If InStr(L(i+2),"ADVERTISEMENT")>0 Then
'DelLineCount = DelLineCount +4
'i=i+8
'Else
'-------remove garbage-----
If IsGarbage = True Then
DelLineCount = DelLineCount +1
If InStr(L(i-2),"<a href=""0'>http://us.rd.yahoo.com")>0 Or _
InStr(L(i-1),"Include Photos")>0 Or _
InStr(L(i-10),"More News Feeds")>0 Or _
InStr(L(i+1),"</head>")>0 Or _
InStr(L(i+4),"More in Yahoo!")>0 Then
IsGarbage = False
'msgbox L(i+1) & vbcrlf & IsGarbage,,"debug 1"
End If
Else
If InStr(L(i),"<body onload=")>0 Or _
InStr(L(i+3),"<div id=""switcheroo")>0 Or _
InStr(L(i),"<h6>")>0 Or _
InStr(L(i),"<META HTTP-EQUIV=")>0 Or _
InStr(L(i),"id=""sidebar")>0 Then
DelLineCount = DelLineCount +1
IsGarbage = True
'msgbox L(i) & vbcrlf & IsGarbage,,"debug 2"
Else
'-------remove script-----
If IsScript = True Then
DelLineCount = DelLineCount +1
If InStr(L(i),"</script")>0 Or InStr(L(i),"</SCRIPT")>0 Then
IsScript = False
End If
Else
If InStr(L(i),"<script")>0 Or InStr(L(i),"<SCRIPT")>0 Then
DelLineCount = DelLineCount +1
If InStr(L(i),"</script")=0 And InStr(L(i),"</SCRIPT")=0 Then
IsScript = True
End If
Else
If InStr(L(i),"href='javascript")>0 Then
DelLineCount = DelLineCount +1
Else
'------------shrink end of page---------
If Shrink = True Then
If InStr(L(i),"<li")>0 Or _
InStr(L(i),"</li")>0 Or _
InStr(L(i),"<div")>0 Or _
InStr(L(i),"</div")>0 Or _
InStr(L(i),"<!--")>0 Then
DelLineCount = DelLineCount +1
Else
NewHtm = NewHtm & VbCrlf & Replace(L(i), "</a>", "</a> ") '-----espace entre les hyperliens
End If
Else
If InStr(L(i),"More in Yahoo!")>0 Then
NewHtm = NewHtm & VbCrlf & L(i)
Shrink = True
Else
'---------open links in new windows--------
If InStr(1,L(i),"href=""",1)>0 Then
NewHtm = NewHtm & Replace(L(i),">","target=""_blank"">")
Else
'---------remove notes------------
If InStr(L(i),"<!--")>0 Then
DelLineCount = DelLineCount +1
'------accpet line------------
Else
NewHtm = NewHtm & VbCrlf & L(i)
End If
End If
End If
End If
End If
End If
End If
End If
'End If
End If
End If
i = i +1
Loop
NewHtm = Left(NewHtm, Len(NewHtm)-1)& VbCrlf & "</BODY></HTML>"
'--------------------------------
'MsgBox "Lignes effacées: " & DelLineCount & "/" & LineCount,, Title
'---------End of Script-----------------
Document.Body.InnerHTML = NewHtm
end sub
-->
</SCRIPT>
save that to pureyahoonews.html