1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | #!/usr/bin/env php <?php /** * Creates a TOC (table of contents) from a given HTML file. * <h1> is ignored; all other headings are used. * * @author Christian Weiske <cweiske@cweiske.de> * * Alternative if you have an XSLT 2 processor: http://xmlplease.com/tocxhtml */ if ($argc < 2) { echo "Please pass a file\n"; exit(1); } $file = $argv[1]; if (!file_exists($file)) { echo "File does not exist: $file\n"; exit(2); } if (substr($file, -4) != '.htm') { echo "build-toc.php works only on htm files\n"; exit(3); } $sx = simplexml_load_file($file); if ($sx === false) { echo "Failed to load XML\n"; exit(4); } $body = $sx->body; $body->registerXPathNamespace('h', 'http://www.w3.org/1999/xhtml'); $elems = $body->xpath( '//*[' //XHTML . 'self::h:h2 or self::h:h3 or self::h:h4 or self::h:h5 or self::h:h6' //HTML . ' or self::h2 or self::h3 or self::h4 or self::h5 or self::h6' . ']' ); $data = array(); $n = -1; foreach ($elems as $elem) { $name = $elem->getName(); $level = (int) $name{1}; $data[] = (object) array( 'title' => (string) $elem, 'id' => (string) $elem['id'], 'level' => $level, 'nextLevel' => null ); if (++$n >= 1) { $data[$n - 1]->nextLevel = $level; } } $html = "<ul class=\"toc\">\n"; foreach ($data as $h) { $i = str_repeat(' ', ($h->level -1) * 2 - 1); $html .= $i . '<li><a href="#' . $h->id . '">' . htmlspecialchars($h->title) . '</a>'; if ($h->nextLevel > $h->level) { $html .= "\n" . $i . ' <ul>' . "\n"; } else { $html .= '</li>' . "\n"; } if ($h->nextLevel < $h->level) { $html .= substr($i, 0, -1) . "</ul>\n" . substr($i, 0, -2) . "</li>\n"; } } $html .= "</ul>\n"; echo $html; ?> |