#!/usr/bin/env tclsh package require tclgumbo package require Tk # TODO: General notes enclose the script into functions, this is ugly right now. text .t -yscrollcommand {.s set} -relief flat -font {Times 12} -wrap word -border 4 scrollbar .s -command {.t yview} pack .s -side right -fill y pack .t -side left -fill both -expand 1 set w .t $w tag config title -elide true $w tag config href -elide true $w tag config a -foreground blue -underline 1 $w tag config strong -font {Times 12 bold} $w tag config b -font {Times 12 bold} $w tag config i -font {Times 12 italic} $w tag config em -font {Times 12 italic} $w tag config pre -font {Courier 12} $w tag config code -font {Courier 12} $w tag config h1 -font {Times 18} $w tag config h2 -font {Times 18} $w tag config ul -lmargin1 40 -lmargin2 40 $w tag config dd -lmargin1 40 -lmargin2 40 $w tag config p $w tag bind a "$w config -cursor hand2" $w tag bind a "$w config -cursor {}" $w tag bind a "click $w %x %y" # TODO: Extend argument parsing and handling set baseAddress [lindex $argv 1] set file [open [lindex $argv 0]] set html [read $file] close $file set output [gumbo::parse $html] proc click {w x y} { global baseAddress set range [$w tag prevrange href [$w index @$x,$y]] set address [eval $w get $range] # TODO: Handle external base address nicely # TODO: Handle /-prefixed addresses # TODO: Use browse script instead of viewhtml directly if {0 == [regexp {^https?.*} $address]} { exec "./viewhtml" "$baseAddress/$address" $baseAddress } else { exec "./viewhtml" $address $address } } proc displayNode {node tagList} { # TODO: Avoid using global variables. global w global baseAddress set type [gumbo::node_get_type $node] if {$type == $gumbo::GUMBO_NODE_ELEMENT} { set tag [gumbo::element_get_tag_name $node] set attributes [gumbo::element_get_attributes $node] # TODO: This could be simplified in a way that allows for easy extension and won't end up in a long if-elseif-chain if {$tag == "a"} { $w insert end [lindex [array get $attributes] 1] [concat $tagList href] } elseif {$tag == "img"} { $w insert end "\n" set lattr [array get $attributes] set path "$baseAddress/[lindex $lattr [expr [lsearch -exact $lattr src] + 1]]" image create photo $path -file $path $w image create end -image $path $w insert end "\n" } foreach child_node [gumbo::element_get_children $node] { displayNode $child_node [concat $tag $tagList] } # TODO: Handle margins and blocks better than this. if {$tag == "h1" || $tag == "h2" || $tag == "p"} { $w insert end "\n\n" {} } elseif {$tag == "pre" || $tag == "li" || $tag == "dt" || $tag == "ul" || $tag == "dd" || $tag == "dl"} { $w insert end "\n" {} } } elseif {$type == $gumbo::GUMBO_NODE_TEXT} { # TODO: This could be simplified. if {0 <= [lsearch $tagList pre]} { $w insert end [gumbo::text_get_text $node] $tagList } else { $w insert end [regsub -all {\s+} [gumbo::text_get_text $node] " "] $tagList } } } displayNode [gumbo::output_get_root $output] [list] $w config -state disabled gumbo::destroy_output $output