create TOC from HTML file

raw

build-toc.php

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env php
<?php
/**
 * Creates a TOC (table of contents) from a given HTML file.
 * <h1> is ignored; all other headings are used.
 *
 * @author Christian Weiske <cweiske@cweiske.de>
 *
 * Alternative if you have an XSLT 2 processor: http://xmlplease.com/tocxhtml
 */
if ($argc < 2) {
    echo "Please pass a file\n";
    exit(1);
}
$file = $argv[1];
if (!file_exists($file)) {
    echo "File does not exist: $file\n";
    exit(2);
}
if (substr($file, -4) != '.htm') {
    echo "build-toc.php works only on htm files\n";
    exit(3);
}
 
$sx = simplexml_load_file($file);
if ($sx === false) {
    echo "Failed to load XML\n";
    exit(4);    
}
 
$body = $sx->body;
$body->registerXPathNamespace('h', 'http://www.w3.org/1999/xhtml');
$elems = $body->xpath(
    '//*['
    //XHTML
    . 'self::h:h2 or self::h:h3 or self::h:h4 or self::h:h5 or self::h:h6'
    //HTML
    . ' or self::h2 or self::h3 or self::h4 or self::h5 or self::h6'
    . ']'
);
 
$data = array();
$n = -1;
foreach ($elems as $elem) {
    $name = $elem->getName();
    $level = (int) $name{1};
    $data[] = (object) array(
        'title' => (string) $elem,
        'id'    => (string) $elem['id'],
        'level' => $level,
        'nextLevel' => null
    );
    if (++$n >= 1) {
        $data[$n - 1]->nextLevel = $level;
    }
}
 
$html = "<ul class=\"toc\">\n";
foreach ($data as $h) {
    $i = str_repeat(' ', ($h->level -1) * 2 - 1);
    $html .= $i
        . '<li><a href="#' . $h->id . '">'
        . htmlspecialchars($h->title)
        . '</a>';
    if ($h->nextLevel > $h->level) {
        $html .= "\n"
            . $i . ' <ul>' . "\n";
    } else {
        $html .= '</li>' . "\n";
    }
    if ($h->nextLevel < $h->level) {
        $html .= substr($i, 0, -1) . "</ul>\n"
            . substr($i, 0, -2) . "</li>\n";
    }
}
 
$html .= "</ul>\n";
echo $html;
?>
 
raw

example-output.html

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
<ul class="toc">
 <li><a href="#detection">USB device detection</a>
  <ul>
   <li><a href="#detection-pc">Linux PC</a></li>
   <li><a href="#detection-ouya">OUYA</a></li>
   <li><a href="#whatnow">What now?</a></li>
  </ul>
 </li>
 <li><a href="#drivers">Linux drivers</a>
  <ul>
   <li><a href="#ouya-config">OUYA kernel configuration</a></li>
  </ul>
 </li>
 <li><a href="#own-kernel">A custom OUYA linux kernel</a>
  <ul>
   <li><a href="#prep">Preparation</a></li>
   <li><a href="#ramdisk">Ramdisk</a></li>
   <li><a href="#ndk">Android NDK</a></li>
   <li><a href="#kernel-git">OUYA kernel + configuration</a></li>
   <li><a href="#install">Installing the kernel</a></li>
   <li><a href="#dmesg">Success!</a></li>
  </ul>
 </li>
</ul>
 
Christian Weiske Christian Weiske
owner

History