-
Notifications
You must be signed in to change notification settings - Fork 74
/
Copy pathgenmd.js
259 lines (238 loc) · 8.26 KB
/
genmd.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
#!/usr/bin/env node
/**
* @fileoverview Script for generating Markdown (.md) files from KnowledgeBase (.TXT) files
* @author <a href="mailto:[email protected]">Jeff Parsons</a>
*
* This should be run from the project's root:
*
* node scripts/genmd.js txt | tee scripts/genmd.log
*/
"use strict";
let fs = require("fs");
let path = require("path");
let mkdirp = require("mkdirp");
let rootDir = "";
let TEST = false;
if (TEST) rootDir = "tmp/";
/**
* isAlphaNum(s)
*
* @param {string} s
* @return {boolean}
*/
function isAlphaNum(s)
{
if (!s) return false;
return !!s.match(/^[a-z0-9]*$/i);
}
/**
* toHex(n, cch)
*
* Number.toString(16) works, but it doesn't do zero-padded fixed-length upper-case output.
*
* @param {number} n
* @param {number} [cch] (default is 2 characters)
* @return {string}
*/
function toHex(n, cch = 2)
{
let s = "";
let ach = "0123456789ABCDEF";
while (cch--) {
s = ach[n & 0xf] + s;
n >>= 4;
}
return s;
}
/**
* replaceSpecial(s)
*
* Replace selected "special characters" with corresponding HTML entities, as well as any character sequences
* that might be misinterpreted by the Liquid template engine used by Jekyll and GitHub Pages.
*
* @param {string} s
* @return {string}
*/
function replaceSpecial(s)
{
return s.replace(/&/g, "&").replace(/\|/g, "|").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/\$/g, "$").replace(/\*/g, "*").replace(/\[/g, "[").replace(/\\/g, "\").replace(/]/g, "]").replace(/__/g, "__").replace(/{{/g, "{ {").replace(/}}/g, "} }");
}
/**
* validateASCII(s, sSource)
*
* Verify that all characters in the string are valid (7-bit) ASCII, and replace any characters that are
* either not supported or not valid.
*
* @param {string} s
* @param {string} sSource
* @return {string}
*/
function validateASCII(s, sSource)
{
let sNew = "";
for (let i = 0; i < s.length;) {
let ch = s[i];
let chCode = s.charCodeAt(i++);
if (chCode >= 0x7F || chCode < 0x20 && chCode != 0x09 && chCode != 0x0A && chCode != 0x0D) {
ch = "<" + toHex(chCode) + ">";
console.log(sSource + ", pos " + i + ": unrecognized character " + ch + " (" + chCode + ")");
/*
* Some CP-1252 characters slipped into the titles of a few KB articles, so deal with those here.
*/
if (chCode == 0x91 /* left single quote */ || chCode == 0x92 /* right single quote */) {
ch = "'";
}
if (chCode == 0x93 /* left double quote */ || chCode == 0x94 /* right double quote */) {
ch = '"';
}
}
sNew += ch;
}
return sNew;
}
/**
* processDir(sDir)
*
* @param {string} sDir
*/
function processDir(sDir) {
fs.readdir(sDir, function(err, aFiles) {
if (err) {
throw err;
}
let aPaths = aFiles.map(function(sFile) {
return path.join(sDir, sFile);
});
aPaths.forEach(function(sPath) {
if (fs.statSync(sPath).isDirectory()) {
processDir(sPath);
return;
}
if (path.extname(sPath) == ".TXT") {
processFile(sPath);
}
});
});
}
/**
* processFile(sFile)
*
* @param {string} sFile
*/
function processFile(sFile) {
let sNewDir = sFile.replace("txt/", rootDir + "kb/").replace(".TXT", "");
let sNewFile = path.join(sNewDir, "README.md");
// if (TEST) console.log("processing " + sFile + " new dir: " + sNewDir + " new file: " + sNewFile);
let sText = fs.readFileSync(sFile, "binary");
/*
Example text:
DOCUMENT:Q10022 07-SEP-2001 [word97]
TITLE :OFF97: EPS File Is Not Printed or Is Printed Incorrectly in NT
PRODUCT :Word 97 for Windows
PROD/VER:WINDOWS:97; winnt:4.0
OPER/SYS:
KEYWORDS:kbprint kbPrinting
======================================================================
*/
let re = /DOCUMENT:(Q[0-9]+)\s+([0-9A-Z-]+)\s+\[([^\]]*)]\s*TITLE\s*:([^\r\n]*)\s*PRODUCT\s*:([^\r\n]*)\s*PROD\/VER::?([^\r\n]*)\s*OPER\/SYS:([^\r\n]*)\s*KEYWORDS:([^\r\n]*)\s*=+[\r\n]+/g;
/*
* I don't really expect to find more than one KB article per TXT file; this loop is just a hold-over
* from the script I wrote to generate articles from the monolithic text files captured from the MSPL CD-ROMs.
*/
let match, aMatches = [];
while (match = re.exec(sText)) {
/*
* match[1]: article identifier
* match[2]: date
* match[3]: product code(s)
* match[4]: article title
* match[5]: product name(s)
* match[6]: product version(s)
* match[7]: operating system(s)
* match[8]: keyword(s)
*/
aMatches.push(match);
}
for (let i = 0; i < aMatches.length; i++) {
match = aMatches[i];
let iBody = match.index + match[0].length;
let cchBody;
if (i < aMatches.length - 1) cchBody = aMatches[i+1].index - iBody;
let sBody = sText.substr(iBody, cchBody).trim();
let j = sBody.indexOf("\r\nTHE INFORMATION PROVIDED IN THE MICROSOFT KNOWLEDGE BASE IS");
if (j >= 0) {
sBody = sBody.substr(0, j);
} else {
console.log("warning: no legal notice?");
}
processText(match[1], match[4], match[3], match[5], match[6], match[7], match[8], match[2], sBody, sNewDir, sNewFile);
}
}
/**
* processText(sID, sTitle, sProductID, sProductName, sProductVersions, sSystem, sKeywords, sDateModified, sText, sNewDir, sNewFile)
*
* @param {string} sID
* @param {string} sTitle
* @param {string} sProductID
* @param {string} sProductName
* @param {string} sProductVersions
* @param {string} sSystem
* @param {string} sKeywords
* @param {string} sDateModified
* @param {string} sText
* @param {string} sNewDir
* @param {string} sNewFile
*/
function processText(sID, sTitle, sProductID, sProductName, sProductVersions, sSystem, sKeywords, sDateModified, sText, sNewDir, sNewFile)
{
let sNewText = "{% raw %}\n\n";
sNewText += "\tArticle: " + sID + "\n";
sNewText += "\tProduct(s): " + sProductName + "\n";
sNewText += "\tVersion(s): " + sProductVersions + "\n";
sNewText += "\tOperating System(s): " + sSystem + "\n";
sNewText += "\tKeyword(s): " + sKeywords + "\n";
sNewText += "\tLast Modified: " + sDateModified + "\n\t\n";
let aLines = sText.split(/(?:^|\r?\n) ?/);
for (let l = 0; l < aLines.length;) {
sNewText += '\t' + validateASCII(aLines[l++], sID + ", line " + l) + '\n';
}
sNewText += "\n{% endraw %}\n";
let sNewTitle = replaceSpecial(validateASCII(sTitle, sID + " title"));
let sSiteDir = '/' + sNewDir.replace(rootDir, "");
sNewText = "---\nlayout: page\ntitle: \"" + sID + ": " + sNewTitle + "\"\npermalink: " + sSiteDir + "/\n---\n\n## " + sID + ": " + sNewTitle + "\n\n" + sNewText;
if (TEST) return;
if (!fs.existsSync(sNewDir)) {
mkdirp.sync(sNewDir);
}
const size = fs.existsSync(sNewFile) && fs.statSync(sNewFile).size || 0;
/*
* TODO: Add a flag to force overwrites, to be sure everything is updated. All we can say for sure
* (without wasting more time reading the file) is that if the sizes are different, the files differ.
*/
if (size != sNewText.length) fs.writeFileSync(sNewFile, sNewText);
/*
* Update listings by product ID, too.
*/
if (!isAlphaNum(sProductID)) {
throw new Error(sID + " contains unexpected product ID: " + sProductID);
}
sNewDir = rootDir + "id/" + sProductID.toLowerCase();
if (!fs.existsSync(sNewDir)) {
mkdirp.sync(sNewDir);
}
sNewFile = sNewDir + "/README.md";
sNewText = "- [" + sID + ": " + sNewTitle + "](../.." + sSiteDir + "/)\n";
if (!fs.existsSync(sNewFile)) {
sNewText = "---\nlayout: page\ntitle: \"" + sProductName + "\"\npermalink: /" + sNewDir.replace(rootDir, "") + "/\n---\n\n## KB Articles for " + sProductName + "\n\n" + sNewText;
}
fs.appendFileSync(sNewFile, sNewText);
}
if (process.argv.length <= 2) {
console.log("usage: node genmd [input directory]");
process.exit();
}
try {
processDir(process.argv[2]);
} catch (err) {
console.log(err.message);
}