#!/usr/bin/env tclsh package require tclgumbo package require Tk package require tksvg package require Img # TODO: This is still ugly, refactor into functions and namespaces? text .t -yscrollcommand {.s set} -relief flat -font {Times 12} -wrap word -border 4 scrollbar .s -command {.t yview} pack .s -side right -fill y pack .t -side left -fill both -expand 1 set w .t $w tag config title -elide true $w tag config href -elide true $w tag config script -elide true $w tag config style -elide true $w tag config a -foreground blue -underline 1 $w tag config strong -font {Times 12 bold} $w tag config b -font {Times 12 bold} $w tag config i -font {Times 12 italic} $w tag config em -font {Times 12 italic} $w tag config pre -font {Courier 12} $w tag config code -font {Courier 12} $w tag config h1 -font {Times 18} $w tag config h2 -font {Times 18} $w tag config ul -lmargin1 40 -lmargin2 40 $w tag config dd -lmargin1 40 -lmargin2 40 $w tag config p $w tag bind a "$w config -cursor hand2" $w tag bind a "$w config -cursor {}" $w tag bind a "click $w %x %y" # TODO: Avoid globals variable baseAddress proc makeAddress {address} { global baseAddress if {1 == [regexp {^https?.*} $address]} { return "$address" } elseif {1 == [regexp {^/[^/].*} $address]} { return "[regsub {(https?://[^/]+)/.*} $baseAddress {\1}]$address" } elseif {1 == [regexp {^//.*} $address]} { return "https:$address" } else { return "$baseAddress$address" } } proc click {w x y} { global baseAddress set range [$w tag prevrange href [$w index @$x,$y]] set address [eval $w get $range] set result [exec browse -f viewhtml [makeAddress $address]] if {[llength $result] == 2} { displayPage $w [lindex $result 0] [lindex $result 1] } } proc displayNode {w node tagList} { set type [gumbo::node_get_type $node] if {$type == $gumbo::GUMBO_NODE_ELEMENT} { set tag [gumbo::element_get_tag_name $node] set attributes [gumbo::element_get_attributes $node] # TODO: This could be simplified in a way that allows for easy extension and won't end up in a long if-elseif-chain if {$tag == "a"} { $w insert end [lindex [array get $attributes] 1] [concat $tagList href] } elseif {$tag == "img"} { $w insert end "\n" set lattr [array get $attributes] set path [makeAddress [lindex $lattr [expr [lsearch -exact $lattr src] + 1]]] set localPath [lindex [split [exec phttp $path] { }] 2] image create photo $path -file $localPath $w image create end -image $path $w insert end "\n" } foreach child_node [gumbo::element_get_children $node] { displayNode $w $child_node [concat $tag $tagList] } # TODO: Handle margins and blocks better than this. if {$tag == "h1" || $tag == "h2" || $tag == "p"} { $w insert end "\n\n" {} } elseif {$tag == "pre" || $tag == "li" || $tag == "dt" || $tag == "ul" || $tag == "dd" || $tag == "dl" || $tag == "div" || $tag == "br"} { $w insert end "\n" {} } } elseif {$type == $gumbo::GUMBO_NODE_TEXT} { # TODO: This could be simplified. if {0 <= [lsearch $tagList pre]} { $w insert end [gumbo::text_get_text $node] $tagList } else { $w insert end [regsub -all {\s+} [gumbo::text_get_text $node] " "] $tagList } } } proc displayPage {w newBaseAddress filePath} { global baseAddress set baseAddress $newBaseAddress set file [open $filePath] set html [read $file] set output [gumbo::parse $html] close $file $w config -state normal $w delete 1.0 end displayNode $w [gumbo::output_get_root $output] [list] $w config -state disabled gumbo::destroy_output $output } displayPage $w [lindex $argv 1] [lindex $argv 0]