Chapter 5 Functions
Exercise 5.1: Change the findlinks program to traverse the n.FirstChild linked list using recursive calls to visit instead of a loop.
// visit appends to links each link found in n and returns the result.
func visit(links []string, n *html.Node) []string {
if n.Type == html.ElementNode && n.Data == "a" {
for _, a := range n.Attr {
if a.Key == "href" {
links = append(links, a.Val)
}
}
}
// this part, change from loop to recursive calls
if n.FirstChild != nil {
links = visit(links, n.FirstChild)
}
if n.NextSibling != nil {
links = visit(links, n.NextSibling)
}
return links
}
Exercise 5.2: Write a function to populate a mapping from element names—p, div, span, and so on—to the number of elements with that name in an HTML document tree.
// Copyright © 2017 [email protected]
// Copyright © 2016 Alan A. A. Donovan & Brian W. Kernighan.
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
// See page 123.
// Outline prints the outline of an HTML document tree.
package main
import (
"fmt"
"os"
"golang.org/x/net/html"
)
//!+
func main() {
doc, err := html.Parse(os.Stdin)
if err != nil {
fmt.Fprintf(os.Stderr, "summary: %v\n", err)
os.Exit(1)
}
var mapping = make(map[string]int)
summary(mapping, doc)
// summary
for k, v := range mapping {
fmt.Println(k, v)
}
}
func summary(mapping map[string]int, n *html.Node) {
if n.Type == html.ElementNode {
mapping[n.Data]++
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
summary(mapping, c)
}
}
//!-
Exercise 5.3: Write a function to print the contents of all text nodes in an HTML document tree. Do not descend into <script> or <style> elements, since their contents are not visible in a web browser.
// Copyright © 2017 [email protected]
// Copyright © 2016 Alan A. A. Donovan & Brian W. Kernighan.
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
// See page 123.
// Outline prints the outline of an HTML document tree.
package main
import (
"fmt"
"os"
"golang.org/x/net/html"
)
//!+
func main() {
doc, err := html.Parse(os.Stdin)
if err != nil {
fmt.Fprintf(os.Stderr, "outline: %v\n", err)
os.Exit(1)
}
text(doc)
}
func text(n *html.Node) {
// content in <style> or <script> are ignored
if n.Type == html.ElementNode &&
(n.Data == "style" || n.Data == "script") {
return
}
if n.Type == html.TextNode {
fmt.Println(n.Data)
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
text(c)
}
}
//!-
Exercise 5.4: Extend the visit function so that it extracts other kinds of links from the document, such as images, scripts, and style sheets.
var mapping = map[string]string{"a": "href", "img": "src", "script": "src", "link": "href"}
// visit appends to links each link found in n and returns the result.
func visit(target string, links []string, n *html.Node) []string {
if n.Type == html.ElementNode && n.Data == target {
for _, a := range n.Attr {
if a.Key == mapping[target] {
links = append(links, a.Val)
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
links = visit(target, links, c)
}
return links
}
Exercise 5.5: Implement countWordsAndImages. (See Exercise 4.9 for word-splitting.)
// Copyright © 2017 [email protected]
// Copyright © 2016 Alan A. A. Donovan & Brian W. Kernighan.
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
package main
import (
"fmt"
"net/http"
"golang.org/x/net/html"
"strings"
"os"
)
// CountWordsAndImages does an HTTP GET request for the HTML
// document url and returns the number of words and images in it.
func CountWordsAndImages(url string) (words, images int, err error) {
resp, err := http.Get(url)
if err != nil {
return
}
doc, err := html.Parse(resp.Body)
resp.Body.Close()
if err != nil {
err = fmt.Errorf("parsing HTML: %s", err)
return
}
words, images = countWordsAndImages(doc)
return
}
func countWordsAndImages(n *html.Node) (words, images int) {
// content in <style> or <script> are ignored
if n.Type == html.ElementNode {
if n.Data == "style" || n.Data == "script" {
return
} else if n.Data == "img" {
images++
}
} else if n.Type == html.TextNode {
text := strings.TrimSpace(n.Data)
for _, line := range strings.Split(text, "\n") {
if line != "" {
words += len(strings.Split(line, " "))
//fmt.Printf("%s %q %d\n", line, strings.Split(line, " "), len(strings.Split(line, " ")))
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
w, i := countWordsAndImages(c)
words += w
images += i
}
return
}
func main() {
fmt.Println(len(strings.Split("", " ")))
for _, url := range os.Args[1:] {
fmt.Println(CountWordsAndImages(url))
}
}
Exercise 5.6: Modify the corner function in gopl.io/ch3/surface (§3.2) to use named results and a bare return statement.
func corner(i, j int) (sx, sy float64) {
// Find point (x,y) at corner of cell (i,j).
x := xyrange * (float64(i)/cells - 0.5)
y := xyrange * (float64(j)/cells - 0.5)
// Compute surface height z.
z := f(x, y)
// Project (x,y,z) isometrically onto 2-D SVG canvas (sx,sy).
sx = width/2 + (x-y)*cos30*xyscale
sy = height/2 + (x+y)*sin30*xyscale - z*zscale
return
}
Exercise 5.7: Develop startElement and endElement into a general HTML pretty-printer. Print comment nodes, text nodes, and the attributes of each element (<a href='...'>
). Use short forms like <img/>
instead of <img> </img>
when an element has no children. Write a test to ensure that the output can be parsed successfully. (See Chapter 11.)
// Copyright © 2017 [email protected]
// Copyright © 2016 Alan A. A. Donovan & Brian W. Kernighan.
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
// See page 133.
// Outline prints the outline of an HTML document tree.
package main
import (
"fmt"
"net/http"
"os"
"golang.org/x/net/html"
"strings"
)
func main() {
for _, url := range os.Args[1:] {
outline(url)
}
}
func outline(url string) error {
resp, err := http.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
doc, err := html.Parse(resp.Body)
if err != nil {
return err
}
//!+call
forEachNode(doc, startElement, endElement)
//!-call
return nil
}
//!+forEachNode
// forEachNode calls the functions pre(x) and post(x) for each node
// x in the tree rooted at n. Both functions are optional.
// pre is called before the children are visited (preorder) and
// post is called after (postorder).
func forEachNode(n *html.Node, pre, post func(n *html.Node)) {
if pre != nil {
pre(n)
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
forEachNode(c, pre, post)
}
if post != nil {
post(n)
}
}
//!-forEachNode
//!+startend
var depth int
func startElement(n *html.Node) {
if n.Type == html.ElementNode {
var attribute string
for _, attr := range n.Attr {
attribute += fmt.Sprintf("%s=%q ", attr.Key, attr.Val)
}
child := ""
// <div /> is illegal
if n.Data == "img" && n.FirstChild == nil {
child = " /"
}
if len(attribute) > 1 {
attribute = attribute[:len(attribute)-1]
fmt.Printf("%*s<%s %s%s>\n", depth*2, "", n.Data, attribute, child)
} else {
fmt.Printf("%*s<%s%s>\n", depth*2, "", n.Data, child)
}
depth++
} else if n.Type == html.TextNode || n.Type == html.CommentNode {
if !(n.Parent.Type == html.ElementNode &&
(n.Parent.Data == "script" || n.Parent.Data == "style")) {
for _, line := range strings.Split(n.Data, "\n") {
line = strings.TrimSpace(line)
if line != "" && line != "\n" {
fmt.Printf("%*s%s\n", depth*2, "", line)
}
}
}
}
}
func endElement(n *html.Node) {
if n.Type == html.ElementNode {
depth--
// <div /> is illegal
if !(n.Data == "img" && n.FirstChild == nil) {
fmt.Printf("%*s</%s>\n", depth*2, "", n.Data)
}
}
}
//!-startend
Exercise 5.8: Modify forEachNode so that the pre and post functions return a boolean result indicating whether to continue the traversal. Use it to write a function ElementByID
with the following signature that finds the first HTML element with the specified id attribute. The function should stop the traversal as soon as a match is found.
func ElementByID(doc *html.Node, id string) *html.Node
// Copyright © 2017 [email protected]
// Copyright © 2016 Alan A. A. Donovan & Brian W. Kernighan.
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
// See page 133.
// Outline prints the outline of an HTML document tree.
package main
import (
"net/http"
"os"
"golang.org/x/net/html"
"fmt"
)
func main() {
for _, url := range os.Args[1:] {
outline(url)
}
}
func outline(url string) error {
resp, err := http.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
doc, err := html.Parse(resp.Body)
if err != nil {
return err
}
//!+call
if node := ElementByID(doc, "start"); node != nil {
fmt.Println(node.Type, node.Data)
}
//!-call
return nil
}
func ElementByID(doc *html.Node, id string) *html.Node {
pre := func(doc *html.Node) bool {
for _, attr := range doc.Attr {
if attr.Key == "id" && attr.Val == id {
return true
}
}
return false
}
if node, found := forEachNode(doc, pre, nil); found {
return node
}
return nil
}
//!+forEachNode
// forEachNode calls the functions pre(x) and post(x) for each node
// x in the tree rooted at n. Both functions are optional.
// pre is called before the children are visited (preorder) and
// post is called after (postorder).
func forEachNode(n *html.Node, pre, post func(n *html.Node) bool) (*html.Node, bool) {
if pre != nil {
if pre(n) {
return n, true
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
if node, ok := forEachNode(c, pre, post); ok {
return node, true
}
}
if post != nil {
if post(n) {
return n, true
}
}
return nil, false
}
Exercise 5.9: Write a function expand(s string, f func(string) string) string
that replaces each substring ‘‘$foo
’’ within s by the text returned by f("foo")
.
// expand replace "$foo" in s with foo("foo") and return the result
func expand(s string, f func(string) string) string {
items := strings.Split(s, " ")
for i, item := range items {
if strings.HasPrefix(item, "$") {
items[i] = f(item[1:])
}
}
return strings.Join(items, " ")
}
Exercise 5.10: Rewrite topoSort to use maps instead of slices and eliminate the initial sort. Verify that the results, though nondeterministic, are valid topological orderings.
// Copyright © 2017 [email protected]
// Copyright © 2016 Alan A. A. Donovan & Brian W. Kernighan.
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
// See page 136.
// The toposort program prints the nodes of a DAG in topological order.
package main
import (
"fmt"
)
//!+table
// prereqs maps computer science courses to their prerequisites.
var prereqs = map[string]map[string]bool{
"algorithms": {"data structures": true},
"calculus": {"linear algebra": true},
"compilers": {
"data structures": true,
"formal languages": true,
"computer organization": true,
},
"data structures": {"discrete math": true},
"databases": {"data structures": true},
"discrete math": {"intro to programming": true},
"formal languages": {"discrete math": true},
"networks": {"operating systems": true},
"operating systems": {"data structures": true, "computer organization": true},
"programming languages": {"data structures": true, "computer organization": true},
}
//!-table
//!+main
func main() {
for i, course := range topoSort(prereqs) {
fmt.Printf("%d:\t%s\n", i+1, course)
}
}
func topoSort(m map[string]map[string]bool) []string {
var order []string
seen := make(map[string]bool)
var visitAll func(items map[string]bool)
visitAll = func(items map[string]bool) {
for item, _ := range items {
if !seen[item] {
seen[item] = true
visitAll(m[item])
order = append(order, item)
}
}
}
var keys = make(map[string]bool)
for key := range m {
keys[key] = true
}
visitAll(keys)
return order
}
//!-main
Exercise 5.11: The instructor of the linear algebra course decides that calculus is now a prerequisite. Extend the topoSort function to report cycles.
// Copyright © 2017 [email protected]
// Copyright © 2016 Alan A. A. Donovan & Brian W. Kernighan.
// License: https://creativecommons.org/licenses/by-nc-sa/4.0/
// See page 136.
// The toposort program prints the nodes of a DAG in topological order.
package main
import (
"fmt"
"sort"
"log"
)
//!+table
// prereqs maps computer science courses to their prerequisites.
var prereqs = map[string][]string{
"algorithms": {"data structures"},
"calculus": {"linear algebra"},
"compilers": {
"data structures",
"formal languages",
"computer organization",
},
// a loop
"linear algebra": {"calculus"},
"data structures": {"discrete math"},
"databases": {"data structures"},
"discrete math": {"intro to programming"},
"formal languages": {"discrete math"},
"networks": {"operating systems"},
"operating systems": {"data structures", "computer organization"},
"programming languages": {"data structures", "computer organization"},
}
//!-table
//!+main
func main() {
order, acyclic := topoSort(prereqs)
if !acyclic {
log.Fatalln("Cyclic prerequisite")
}
for i, course := range order {
fmt.Printf("%d:\t%s\n", i+1, course)
}
}
// false means found cyclic prerequisite
func topoSort(m map[string][]string) ([]string, bool) {
var order []string
seen := make(map[string]bool)
// record all uncompleted search
stack := make(map[string]bool)
var visitAll func(items []string) bool
visitAll = func(items []string) bool {
for _, item := range items {
if !seen[item] {
seen[item] = true
// start search from `item`
stack[item] = true
if !visitAll(m[item]) {
return false
}
// `item` is done
stack[item] = false
order = append(order, item)
} else if stack[item] {
// detect a loop
return false
}
}
return true
}
var keys []string
for key := range m {
keys = append(keys, key)
}
sort.Strings(keys)
// have valid toposort
if found := visitAll(keys); found {
return order, true
}
return nil, false
}
//!-main
Exercise 5.12: The startElement and endElement functions in gopl.io/ch5/outline2(§5.5) share a global variable, depth. Turn them into anonymous functions that share a variable local to the outline function.
func outline(url string) error {
resp, err := http.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
doc, err := html.Parse(resp.Body)
if err != nil {
return err
}
//!+startend
var depth int
startElement := func(n *html.Node) {
if n.Type == html.ElementNode {
fmt.Printf("%*s<%s>\n", depth*2, "", n.Data)
depth++
}
}
endElement := func(n *html.Node) {
if n.Type == html.ElementNode {
depth--
fmt.Printf("%*s</%s>\n", depth*2, "", n.Data)
}
}
//!+call
forEachNode(doc, startElement, endElement)
//!-call
return nil
}
Exercise 5.13: Modify crawl to make local copies of the pages it finds, creating directories as necessary. Don’t make copies of pages that come from a different domain. For example, if the original page comes from golang.org, save all files from there, but exclude ones from vimeo.com.
var domain string
//!+crawl
// may crawl same url more than once
func crawl(a string) []string {
fmt.Println(a)
if domain == "" {
p, err := url.Parse(a);
if err != nil {
log.Fatalf("crawl %s get: %v", err)
}
domain = p.Hostname()
if strings.HasPrefix(domain, "www.") {
domain = domain[4:]
}
fmt.Printf("Domain: %s \n\n", domain)
}
list, err := links.Extract(a)
if err != nil {
log.Print(err)
}
// filter out all links with different domain
out := list[:0]
for _, l := range list {
p, err := url.Parse(l)
if err != nil {
// skip invalid url
continue
}
if strings.Contains(p.Hostname(), domain) {
fmt.Println(l)
out = append(out, l)
}
}
return out
}
Exercise 5.14: Use the breadthFirst function to explore a different structure. For example, you could use the course dependencies from the topoSort example (a directed graph), the file system hierarchy on your computer (a tree), or a list of bus or subway routes downloaded from your city government’s web site (an undirected graph).
//!+main
func main() {
// first build a reverse map of prereqs
// use breadthFirst search it, though meaningless
var ready = make(map[string][]string)
var keys []string
for key, values := range prereqs {
for _, class := range values {
ready[class] = append(ready[class], key)
}
keys = append(keys, key)
}
breadthFirst(func(s string) []string {
return ready[s]
}, keys)
}
Exercise 5.15: Write variadic functions max and min, analogous to sum. What should these functions do when called with no arguments? Write variants that require at least one argument.
func max(v int, vals ...int) (m int) {
m = v
for _, val := range vals {
if m < val {
m = val
}
}
return
}
func min(v int, vals ...int) (m int) {
m = v
for _, val := range vals {
if m > val {
m = val
}
}
return
}
Exercise 5.16: Write a variadic version of strings.Join.
func VariadicJoin(sep string, a ...string) string {
return strings.Join(a, sep)
}
func VariadicJoin2(sep string, a ...string) string {
switch len(a) {
case 0:
return ""
case 1:
return a[0]
case 2:
return a[0] + sep + a[1]
case 3:
return a[0] + sep + a[1] + sep + a[2]
}
n := len(sep) * (len(a) - 1)
for i := 0; i < len(a); i++ {
n += len(a[i])
}
b := make([]byte, n)
bp := copy(b, a[0])
for _, s := range a[1:] {
bp += copy(b[bp:], sep)
bp += copy(b[bp:], s)
}
return string(b)
}
Exercise 5.17: Write a variadic function ElementsByTagName that, given an HTML node tree and zero or more names, returns all the elements that match one of those names. Here are two example calls:
func ElementsByTagName(doc *html.Node, name ...string) []*html.Node
images := ElementsByTagName(doc, "img")
headings := ElementsByTagName(doc, "h1", "h2", "h3", "h4")
func ElementsByTagName(doc *html.Node, name ...string) (out []*html.Node) {
pre := func(doc *html.Node) {
for _, n := range name {
if doc.Type == html.ElementNode && doc.Data == n && doc.FirstChild != nil {
out = append(out, doc.FirstChild)
}
}
}
forEachNode(doc, pre, nil)
return
}
Exercise 5.18: Without changing its behavior, rewrite the fetch function to use defer to close the writable file.
// Close file, but prefer error from Copy, if any.
defer func() {
if closeErr := f.Close(); err == nil {
err = closeErr
}
}()
Exercise 5.19: Use panic and recover to write a function that contains no return statement yet returns a non-zero value.
func dummy() (ret int, err error) {
defer func() {
p := recover()
ret = 1
err = fmt.Errorf("internal error: %v", p)
}()
panic("panic with no reason, just panic!")
}
Notes
Go has no concept of default parameter values, nor any way to specify arguments by name, so the names of parameters and results don’t matter to the caller except as documentation.
Function parameters and named results are variables in the same lexical block as the function’s outermost local variables.
A function declaration without a body, indicating that the function is implemented in a language other than Go.
Recursion: it’s essential for processing recursive data structures.
The golang.org/x/... repositories hold packages designed and maintained by the Go team for applications such as networking, internationalized text processing, mobile platforms, image manipulation, cryptography, and developer tools. These packages are not in the standard library because they’re still under development or because they’re rarely needed by the majority of Go programmers.
Many programming language implementations use a fixed-size function call stack; sizes from 64KB to 2MB are typical. Fixed-size stacks impose a limit on the depth of recursion, so one must be careful to avoid astack overflowwhen traversing large data structures recursively; fixed-size stacks may even pose a security risk. In contrast, typical Go implementations use variable-size stacks that start small and grow as needed up to a limit on the order of a gigabyte. This lets us use recursion safely and without worrying about overflow.
Go’s garbage collector recycles unused memory, but do not assume it will release unused operating system resources like open files and network connections. They should be closed explicitly.
Errors are thus an important part of a package’s API or an application’s user interface, and failure is just one of several expected behaviors. This is the approach Go takes to error handling. A function for which failure is an expected behavior returns an additional result, conventionally the last one. If the failure has only one possible cause, the result is a boolean, usually called ok.
More often, and especially for I/O, the failure may have a variety of causes for which the caller will need an explanation. In such cases, the type of the additional result is error.
Failure:
The Go Programming Language.pdf
Go’s approach sets it apart from many other languages in which failures are reported usingexceptions, not ordinary values. Although Go does have an exception mechanism of sorts, as we will see in Section 5.9, it is used only for reporting truly unexpected errors that indicate a bug, not the routine errors that a robust program should be built to expect.
The reason for this design is that exceptions tend to entangle the description of an error with the control flow required to handle it, often leading to an undesirable outcome: routine errors are reported to the end user in the form of an incomprehensible stack trace, full of information about the structure of the program but lacking intelligible context about what went wrong.
By contrast, Go programs use ordinary control-flow mechanisms likeifandreturnto respond to errors. This style undeniably demands that more attention be paid to error-han- dling logic, but that is precisely the point.
Error-control stratergies: When a function call returns an error, it’s the caller’s responsibility to check it and take appropriate action.
First, and most common, is to propagate the error, so that a failure in a subroutine becomes a failure of the calling routine.
For errors that represent transient or unpredictable problems, it may make sense to retry the failed operation, possibly with a delay between tries, and perhaps with a limit on the number of attempts or the time spent trying before giving up entirely.
Third, if progress is impossible, the caller can print the error and stop the program gracefully, but this course of action should generally be reserved for the main package of a program. Library functions should usually propagate errors to the caller, unless the error is a sign of an internal inconsistency that is, a bug.
Fourth, in some cases, it’s sufficient just to log the error and then continue, perhaps with reduced functionality.
And fifth and finally, in rare cases we can safely ignore an error entirely.
Error handling in Go has a particular rhythm. After checking an error, failure is usually dealt with before success. If failure causes the function to return, the logic for success is not indented within an else block but follows at the outer level.
Functions tend to exhibit a common structure, with a series of initial checks to reject errors, followed by the substance of the function at the end, minimally indented.
EOF: the io package guarantees that any read failure caused by an end-of-file condition is always reported by a distinguished error, io.EOF.
Function Values: Functions are first-class values in GO. The zero value of a function type is nil. Function values may be compared with nil, but they are not comparable, so they may not be compared against each other or used as keys in a map.
Anonymous Functions: Named functions can be declared only at the package level, but we can use a function literal to denote a function value within any expression. A function literal is written like a function declaration, but without a name following the func keyword. It is an expression, and its value is called ananonymous function.
These hidden variable references are why we classify functions as reference types and why function values are not comparable. Function values like these are implemented using a technique called closures, and Go programmers often use this term for function values.
Caveat: Capturing Iteration Variable: read the book again. The reason is a consequence of the scope rules for loop variables. All function values created by this loop ‘‘capture’’ and share the same variable—an addressable storage location, not its value at that particular moment. The problem of iteration variable capture is most often encountered when using thegostate- ment (Chapter 8) or withdefer(which we will see in a moment) since both may delay the execution of a function value until after the loop has finished. But the problem is not inherent to go or defer.
Variadic Functions: Implicitly, the caller allocates an array, copies the arguments into it, and passes a slice of the entire array to the function. Although the...intparameter behaves like a slice within the function body, the type of a variadic function is distinct from the type of a function with an ordinary slice parameter.
func f(...int) {} func g([]int) {} fmt.Printf("%T\n", f) // "func(...int)" fmt.Printf("%T\n", g) // "func([]int)"
defer: A defer statement is an ordinary function or method call prefixed by the keyword defer. The function and argument expressions are evaluated when the statement is executed, but the actual call is deferred until the function that contains the defer statement has finished, whether normally, by executing a return statement or falling off the end, or abnormally, by panicking.
defer usage: A defer statement is often used with paired operations like open and close, connect and disconnect, or lock and unlock to ensure that resources are released in all cases, no matter how complex the control flow.
panic: During a typical panic, normal execution stops, all deferred function calls in that goroutine are executed, and the program crashes with a log message. This log message includes the panic value, which is usually an error message of some sort, and, for each goroutine, a stack trace showing the stack of function calls that were active at the time of the panic.
recover: If the built-in recover function is called within a deferred function and the function containing the defer statement is panicking,recover ends the current state of panic and returns the panic value. The function that was panicking does not continue where it left off but returns normally. If recover is called at any other time, it has no effect and returns nil. Read this part again.