forked from jdtsmith/idlwave
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_html_rinfo
executable file
·2278 lines (2028 loc) · 70.9 KB
/
get_html_rinfo
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/perl
#
# Program to extract the information from the HTML version of the IDL
# manuals (v5.6 and on) and IDL itself, to support IDLWAVE.
#
# This version supports IDL v6.1
#
# (c) 1999, 2000 Carsten Dominik <[email protected]>
# (c) 2001-2004 J.D. Smith <[email protected]>
#
# Requires the HTML documentation files distributed with IDL 6.0 or
# later, decompiled from HTMLHelp idl.chm with Microsoft's HTML Help
# Workshop, and massaged with fix_wp_anchors.pl
#
# Talks to the local version of IDL in order to get additional information.
#
# Call this program from the command line like this:
#
# get_html_rinfo -path path/to/htmlfiles/ -idl /path/to/idl/executable
#
# This will scan the HTML, extract routine information, talk to IDL
# and write the following file, needed by IDLWAVE:
#
# idlw-rinfo.el: Routine information for completion etc.
# The full manpage of this program is available with "perldoc get_html_rinfo".
# Commentary:
# ===========
#
# IDL currently contains more than 1400 functions, procedures and
# object methods with more than 6500 keywords. In order to support
# writing IDL programs, the IDLWAVE mode uses a list of routines and
# keywords. RSI does not provide such a list in machine-readable
# form. Therefore this program embarks on the task of extracting the
# necessary information from available sources - the IDL manuals which
# are supplied by RSI in HTMLHelp (and thence HTML) format.
# get_html_rinfo works by looking for the "Syntax" sections and
# extracting information from there.
#
# The program extracts information from manuals written by humans (the
# documentation department at RSI). Naturally, such documents are not
# always complete, consistent or free of typos. As a result of this,
# the list of routines and keywords extracted from the manuals will
# not be perfect. In order to account for incorrect or inconsistent
# "Syntax" entries in the manual, get_html_rinfo contains a number of
# special matchers which detect specific entries and correct them
# manually. See the definition of %specials in the BEGIN block. When
# a new version of IDL is released, the actions of these special
# matchers needs to be checked, because the involved syntax entries
# may have changed.
#
# Please contact the maintainer if you find any inconsistencies between
# the routine information supplied by IDLWAVE, and the IDL documentation.
#
# Full source-guided online help, via the decompiled HTML help files,
# is available for use in many different browsers and help
# systems. See the IDLWAVE documentation for details.
#
# Acknowledgement:
# ================
#
# Without Perl, the task of reverse-engineering thousands of pages of
# documentation would have been impossible. With Perl, it only takes
# a small (ok, medium-sized, bordering on large) program like this.
# Thanks to Larry Wall and the Perl community.
#
# Thanks to Mark Goosman & Stephanie Staley from RSI for granting me
# permission to extract and distribute routine information from the
# IDL manual, and to Doug Dirks for suggesting the use of decompiled
# HTML help instead of the (infinitely more complicated) PDF help used
# previously, and for patiently fixing the documentation typos this
# routine finds.
#
# Maintainer Information:
# =======================
# When a new version of IDL is published, the following things might need
# work in order to make the result of this program as good as possible.
# To find the corresponding places in the file, search for "UPDATE".
#
# 1. UPDATE: Manual sections
# -----------------------
# Each Routine description has many sections, only a few of which
# are of interest for routine scanning. The list can be corrected
# when necessary. Right now, the Syntax and Keywords sections are
# being parsed. This relies explicitly on the HTML "heading"
# syntax upon which the HTML files are split into sections. This
# syntax should be verified to continue to be operational.
#
# 2. UPDATE: Special matchers
# ------------------------
# The %specials hash contains all the special matchers which fix
# unusual or incorrect routine descriptions. They change the entry
# so that the parser does the right thing, or they add entries to
# special arrays. The first thing to do with a new version is to
# run get_html_rinfo with the -debug flag. Part of the output will
# be a list of all special matchers, with additional info how often
# the matcher matches, and if it did its actions correctly.
# Matchers which no longer match should be checked. Matching, but
# not acting may be due to the fact that RSI fixed that particular
# entry. It is also possible that new special matchers have to be
# written for new entries - you need to check what's new with the
# IDL version and if it is processed correctly. Read the
# documentation just before the definition of the %specials hash to
# find out how such a matcher must behave.
#
# 3. UPDATE: Special sections matchers & parsers
# -------------------------------------------
# Certain keywords are linked to a separate, special section, apart
# from the routines which use those keywords. Examples include
# Graphics Keywords, Multi-Threading Keywords, Device Keywords, and
# system variables. The %special_sections hash names these
# sections as keys, and two routines as values: one to detect
# whether a given html file is this section (matcher), and the
# other parse its keywords (parser). These special sections are
# scanned for keywords, and linked to from other routines for the
# with the relevant keywords.
#
# 4. UPDATE: Statement regexp
# ------------------------
# This is also a special matcher which makes sure that no routine
# info is produced by the syntax entries for IDL language
# constructs. As RSI adds new statements (as COMPILE_OPT in
# version 5.3 or SWITCH/BREAK/CONTINUE in version 5.4), this regexp
# must be extended to match the new statements as well.
#
# 5. UPDATE: Special help topics
# ---------------------------
# Some words which can show up in IDL source code have special help
# topics (not routine names) associated with them. The
# %special_topics hash links downcase versions of these words to
# the appropriate topics. Even if word and topic are the same, it
# must be mentioned here in order to trigger help on this word.
#
# 6. IMPONDERABLES: Who knows?
# -------------------------
# RSI has shown a propensity for making large changes to the
# overall documentation format which will impact how this program
# runs. Sadly, this may require you to understand more of what it
# does than you'd otherwise be willing to. An example: with
# IDLv6.0, all object method GetProperty, SetProperty, and Init
# keywords were consolidated into a "Class Properties" page.
#
# For more detailed info, run `get_html_rinfo' with the `-debug' flag
# and check the resulting files get_html_rinfo.cpl,
# get_html_rinfo.rej, and get_html_rinfo.log. The .cpl "complaint"
# file can be sent to the RSI documentation group for fixing any
# problems detected.
#
# The most important data structure is the hash %e, which collects all
# of the various routine data and cross-linking information. It's
# structure is.
#
# $e{class}{type}{routine} # The class, type (fun | pro) and routine name
# {"file"} # The file where this routine's syntax is found
# {"kwds"} # (Hash) Keyword Name => Link
# {"Get"} # (Array) List of "get" keywords
# {"Set"} # (Array) List of "set" keywords
# {"call"} # The calling sequence
# {"add_kwds"} # (Array) of refs to:
# \-> [needs, # needs or has keywords
# name, # routine to link to
# type, # type of routine to link to
# class, # class of routine to link to
# \@nokeys,# ref to key list to exclude
# special, # Use routine's special kwds
# get_set] # Use routine's get or set kwds
# {"has_special_section_kwds"} # Has special kwds to link
# {"needs_special_section_kwds"} # Needs special kwds linked
# {"extra"} # Hash of extra routines with kwds to link to
# \-> {link_file}
# {"kwds"} # (Hash) Kwd => Link
# {"special"} # Special sec. linked to (if any)
#
# The data which actually make it into the rinfo file include class,
# type, routine,"kwds" with links, and the "extra" file and their kwds
# with links. Everything else is for internal linking.
#
#============================================================================
# This file is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This file is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this file; see the file COPYING. If not, write to the
# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301, USA.
#============================================================================
require 5.004;
use Data::Dumper;
# Parse command line options and make file names
use Getopt::Long;
GetOptions("-debug" => \$debug,
"-path=s" => \$path,
"-idl=s" => \$idl,
"-xname=s" => \$ignore_name_re,
"-xclass=s" => \$ignore_class_re)
or usage();
$emacs=1;
my (%special_topics,%e,%special_sections,%sysvars,%txt_kwds,%properties,
%executive_commands,@syntax_kwds,@rejects,@complaints,@add_keywords,
@enter);
if (@ARGV) {
# Something on the command line
print STDERR "Unrecognized command line args: @ARGV\n";
exit(1);
}
$idl = $idl || "idl";
# Establish default output file names
$rinfofile = "idlw-rinfo.el";
$topicsfile = "idlw-help-topics.el";
# Check the path
$path ||= ".";
die "Invalid path $path\n" unless !$path || -d $path;
opendir(DIR,$path) or die "can't opendir $path: $!";
@files=grep {-r "$path/$_" && /\.html$/} readdir(DIR);
closedir(DIR);
# Open the REJECT and LOG files file for debugging information
if ($debug) {
open REJECT, ">get_html_rinfo.rej" or
die "Cannot write to get_html_rinfo.rej\n";
open COMPLAIN,">get_html_rinfo.cpl" or
die "Cannot write to get_html_rinfo.cpl\n";
open LOG, ">get_html_rinfo.log" or
die "Cannot write to get_html_rinfo.log\n";
}
# Open the lisp file for output
open RINFO,">$rinfofile" or die "Cannot open $rinfofile for writing: $!";
# Scan all of the files
FILE:
foreach $file (@files) {
# next unless $file=~/^CDF.*\.html/;
open(FILE,"$path/$file");
local $/=undef; #Slurp mode
$file_contents=<FILE>;
# Title
if ($file_contents=~m|<title>\s*([^<]+?)\s*</title>|) {
$title=$1;
} else {
$title="";
}
# Check for special topic sections, which have no keywords (like
# if...then...else)
foreach (keys %special_topics) {
if (!defined $special_topics{$_}{file} &&
$title=~m|^\Q$_\E$|i) {
$special_topics{$_}{file}=$file;
next FILE;
}
}
# Save files with upcase titles, for tracking classes
$files{uc $title}=$file;
# Check if file contains one of the various special keyword
# sections (Graphics, etc.), and parse those keywords.
foreach (keys %special_sections ) {
if (&{$special_sections{$_}{matcher}}) { #Is it a special section?
#Parse the section for keywords
%{$special_sections{$_}{kwds}}=&{$special_sections{$_}{parser}};
$special_sections{$_}{file}=$file;
next FILE;
}
}
# Look for system variable definitions
if ($title=~/System Variables$/) {
parse_heading($file_contents,2,sub{
my ($link,$sysvar,$txt)=@_;
$sysvar=~s/<.*?>//g;
$sysvar=~s/\s*System Variables?$//;
$sysvar=~tr/!//d;
$sysvar=uc $sysvar;
foreach $sysvar (split(/,\s*/,$sysvar)) {
%{$sysvars{$sysvar}{tags}}=
parse_heading($txt,3,sub{split(/,\s*/,$_[1]);})
if $txt=~/structure/i;
$sysvars{$sysvar}{main}=$link?"$file#wp$link":$file;
}
});
next FILE;
}
# Look for "Properties" pages for extra class props (new as of IDL v6.0)
if ($title=~/^\s*([A-Za-z_0-9]+)\s+Properties\s*$/) {
my $prop_class=$1;
parse_heading($file_contents,3,sub{
my ($link,$prop,$txt)=@_;
return if $prop=~/^\s*$/;
foreach $prop (split(/,\s*/,$prop)) {
$prop=~s/ *$//;
$properties{$prop_class}{kwds}{$prop}{link}=$link;
foreach (qw(Get Set Init)) {
if ($txt=~ m{$_:(?:<.*?>\s*)*\s*Yes}i) {
$properties{$prop_class}{kwds}{$prop}{$_}=1;
} else {$properties{$prop_class}{kwds}{$prop}{$_}=0};
}
}
});
# Maybe no properties were found: we must still delete PROPERTY kwds
$properties{$prop_class}{stub}++ unless defined $properties{$prop_class};
$properties{$prop_class}{file}=$file;
next FILE;
}
# Look for a version string
if(!$idlversion &&
$file_contents=~m|<a[^>]*>[^<]*?New Features in IDL ([0-9.]+)\s*</a>|) {
$idlversion=$1;
next FILE;
}
# Normal entries: Split into the component parts
@parts=split(m|^\s*<a\s+name="wp[0-9]+">\s+</a>\s*<h3\s+class="p?Heading2">\s+([^\n\r]+)\s+|mi,$file_contents);
shift @parts;
%parts=@parts;
# Extract the Syntax and Keyword parts
$syntax=$kwds="";
foreach $part (keys %parts) {
# Require a proper heading
if (!$syntax && $part=~/^Syntax/) {
$syntax=$parts{$part};
} elsif ($part=~/^((?:[A-Z][A-Za-z0-9_]+\s+){0,2}Keywords|
Keywords:\s*(?:[A-Z][a-z0-9_]+\s*){1,2})\s*$/x) {
$kwds.=$parts{$part}
unless $1=~/Thread Pool Keywords/; # Nothing useful in TPool
}
}
next FILE unless $syntax; # Normal entries require a syntax section
diag("$file($title):>>>>>>>>>>>>>>>>>>\n");
# Clear a few variables which are used by deeper routines to return stuff
@rejects = @complaints = @add_keywords = @enter = ();
clean_up_syntax();
# Apply the special syntax matchers
$old_syntax=$syntax;
&try_specials();
# See if there are reasons to reject or complain about this entry
if (@rejects) {
# Reject
$n_rejections += scalar(@rejects);
reject($syntax,$file,@rejects);
next FILE;
}
# Try to parse out the name, class, calling sequence, and syntax keywords
unless (parse_syntax($syntax)) {
# Make a note that this section could not be parsed and move on
# to the next file.
reject($syntax,$file,("Could not be parsed"));
next FILE;
}
# Parse the text keywords, and compare to syntax keywords.
@syntax_kwds=make_unique(@syntax_kwds);
%txt_kwds=parse_keywords($kwds);
#diag("GOT KWDS SECTION:\n >>>$kwds\n<<<\n\n");
# Complain strenuously
if (@complaints) {
# Keep, but complain
$n_complaints += scalar(@complaints);
complain($old_syntax,$file,@complaints);
}
diag("\n$file($title):<<<<<<<<<<<<<<<<<<\n\n\n");
# Normalize the class/routine cases
if ($class) {
$class = case_name("class",$class);
$name = case_name("method",$name);
} else {
$name = case_name("routine",$name);
}
my $fname=make_full_name($class,$name);
# If the special matchers have not set @enter, do it here
$enter[0] = [$name,$type,$class,$call] unless @enter;
# Write a message for debugging output
diag(sprintf("%-20s %-15s %-20s with %3d keywords\n",
$type,$class,$name,scalar(keys %txt_kwds)));
# Store the stuff.
foreach $a (@enter) {
my $name = $$a[0];
my $class = $$a[2];
my $type = $$a[1];
if ($ignore_name_re && $name =~ /$ignore_name_re/o) {
diag("Ignoring name $name because of -xname option\n");
$ignore_name_cnt++;
next;
}
if ($ignore_class_re && $class =~ /$ignore_class_re/o) {
diag("Ignoring name $name because of -xclass option\n");
$ignore_class_cnt++;
next;
}
$e{$class}{$type}{$name}{file}=$file;
%{$e{$class}{$type}{$name}{kwds}}=%txt_kwds if %txt_kwds;
push @{$e{$class}{$type}{$name}{Get}},@getkwds if @getkwds;
push @{$e{$class}{$type}{$name}{Set}},@setkwds if @setkwds;
$e{$class}{$type}{$name}{call} = $$a[3];
# Add keywords from other routines
push @{$e{$class}{$type}{$name}{add_kwds}},@add_keywords if
@add_keywords;
# Special section links
my @special_section_has=grep {$has_special_section_keywords{$_} eq "has"}
(keys %has_special_section_keywords);
push @{$e{$class}{$type}{$name}{has_special_secs_kwds}},
@special_section_has if @special_section_has;
my @special_section_needs=
grep {$has_special_section_keywords{$_} eq "needs"}
(keys %has_special_section_keywords);
push @{$e{$class}{$type}{$name}{needs_special_secs_kwds}},
@special_section_needs if @special_section_needs;
}
close(FILE);
}
# We have all the information now in one huge hash. A few things
# still need to be fixed...
# 1. The IDL manual for object methods GetProperty, SetProperty & Init
# include no keywords, which are documented on a special
# "Properties" page. These were collected during the scan - here
# we put them into the right space, and remove the vestigial
# "PROPERTY" keyword in the three methods
diag("****ALL PROPERTIES: \n".Dumper(%properties)."\n");
foreach $class (keys %properties) {
my $case_class=case_name("class",$class);
my $link_file=$properties{$class}{file};
next unless defined $e{$case_class};
foreach $type (qw(Get Set Init)) {
my $t_name=$type eq "Init"?"fun":"pro";
my $method_name=
case_name("method",$type eq "Init"?$type:($type."Property"));
next unless defined $e{$case_class}{$t_name}{$method_name};
delete $e{$case_class}{$t_name}{$method_name}{kwds}{PROPERTY};
diag("DELETING SPURIOUS PROPERTY KWD: $case_class,$t_name,$method_name\n");
next if defined $properties{$class}{stub};
$e{$class}{$typ}{$method_name}{extra}{$link_file}{special}=
"$class Properties";
foreach $kwd (keys %{$properties{$class}{kwds}}) {
next unless $properties{$class}{kwds}{$kwd}{$type};
$e{$case_class}{$t_name}{$method_name}{extra}{$link_file}{kwds}{$kwd}=
$properties{$class}{kwds}{$kwd}{link};
}
}
}
foreach $class (keys %e) {
next unless $class;
$iname = case_name("method","Init");
next unless defined $e{$class}{fun}{$iname};
$ifile = $e{$class}{fun}{$iname}{file};
foreach $getset ("Get","Set") {
$pname = case_name("method",$getset."Property");
if (defined $e{$class}{fun}{$iname} &&
@{$e{$class}{fun}{$iname}{$getset}}) {
foreach (@{$e{$class}{fun}{$iname}{$getset}}) {
$e{$class}{pro}{$pname}{extra}{$ifile}{kwds}{$_}=
$e{$class}{fun}{$iname}{kwds}{$_};
}
}
}
}
# 2. The special syntax matchers may have indicated that some
# routine's keywords are linked to special sections (like Graphics
# Keywords), or that keywords from special sections weren't
# mentioned in the syntax, and need to be added outright (like
# MultiThreading). Do this now.
foreach $class (keys %e) {
foreach $type (keys %{$e{$class}}) {
foreach $name (keys %{$e{$class}{$type}}) {
# Needs all the special section keywords added outright
if (defined($e{$class}{$type}{$name}{needs_special_secs_kwds})) {
foreach my $s (@{$e{$class}{$type}{$name}{needs_special_secs_kwds}}) {
unless (defined($special_sections{$s})) {
diag("No such special section: $s.");
next;
}
my $sfile=$special_sections{$s}{file};
%{$e{$class}{$type}{$name}{extra}{$sfile}{kwds}}=
%{$special_sections{$s}{kwds}};
$e{$class}{$type}{$name}{extra}{$sfile}{special}=$s;
}
}
# Already mentions keywords, just needs links
if (defined($e{$class}{$type}{$name}{has_special_secs_kwds})) {
foreach my $s (@{$e{$class}{$type}{$name}{has_special_secs_kwds}}) {
unless (defined($special_sections{$s})) {
diag("No such special section: $sec.");
next;
}
my $sfile=$special_sections{$s}{file};
foreach (keys %{$special_sections{$s}{kwds}}) {
# Skip if keyword not defined in routine, or already linked
next if(!defined($e{$class}{$type}{$name}{kwds}{$_}) or
$e{$class}{$type}{$name}{kwds}{$_});
delete $e{$class}{$type}{$name}{kwds}{$_}; #Move to extra kwds
$e{$class}{$type}{$name}{extra}{$sfile}{kwds}{$_}=
$special_sections{$s}{kwds}{$_};
$e{$class}{$type}{$name}{extra}{$sfile}{special}=$s;
}
}
}
}
}
}
# 3. Some special matchers may have put in a request to add or link
# keywords of one more routines (or maybe just special section
# keywords from a routine) to another. Since now we know all
# keywords of all routines and their origins (special sections,
# etc.), we can do this.
foreach $class (keys %e) {
foreach $type ("fun","pro") {
foreach $name (keys %{$e{$class}{$type}}) {
$n_routines_total++;
next unless defined($e{$class}{$type}{$name}{add_kwds});
foreach my $add (@{$e{$class}{$type}{$name}{add_kwds}}) {
my (%nokeys,%only_keys,$file_to_add,$keys_to_add,$special_to_add);
my ($has,$aname,$atype,$aclass,$only_keys,$nokeys,
$special_sec,$getset)=@$add;
# Does it already have the keywords, just requiring a link
$has=$has eq "has";
# Check if we didn't know the routine type at the time of
# addition; default to "pro" if it exists.
unless ($atype) {
if (defined($e{$aclass}{"pro"}{$aname})) { $atype="pro" }
elsif (defined($e{$aclass}{"fun"}{$aname})) { $atype="fun"};
}
# Is it a actual existing entry being asked for?
next unless defined($e{$aclass}{$atype}{$aname});
foreach (@$nokeys) {$nokeys{$_}++;}
foreach (@$only_keys) {$only_keys{$_}++;}
my $afile=$e{$aclass}{$atype}{$aname}{file};
# Look for "2 degree of separation" special section keywords
# only (e.g. "graphics keywords accepted by PLOT")
if($special_sec and defined($e{$aclass}{$atype}{$aname}{extra})) {
foreach $file (keys %{$e{$aclass}{$atype}{$aname}{extra}}) {
if($e{$aclass}{$atype}{$aname}{extra}{$file}{special} =~
/$special_sec/) {
$keys_to_add=$e{$aclass}{$atype}{$aname}{extra}{$file}{kwds};
$file_to_add=$file;
$special_to_add=
$e{$aclass}{$atype}{$aname}{extra}{$file}{special};
last;
}
}
} else {
# Otherwise, add from the set of regular keywords
if ($getset) {
my @getsetkeys;
push @getsetkeys, @{$e{$aclass}{$atype}{$aname}{Get}}
if ($getset=~/get/i);
push @getsetkeys, @{$e{$aclass}{$atype}{$aname}{Set}}
if ($getset=~/set/i);
$keys_to_add={map {$_ => $e{$aclass}{$atype}{$aname}{kwds}{$_}}
@getsetkeys};
} else {
$keys_to_add=$e{$aclass}{$atype}{$aname}{kwds};
}
}
$file_to_add=$file_to_add || $afile;
# Actually add the keys (moving unlinked ones to extra if necessary)
foreach (keys %$keys_to_add) {
next if (@$only_keys && !$only_keys{$_}) || $nokeys{$_};
if($has) {
#demand that it exists in kwds already
# if so, move and link it in the extra kwds section
next if !defined($e{$class}{$type}{$name}{kwds}{$_}) or
$e{$class}{$type}{$name}{kwds}{$_}; # Leave an existing link
delete $e{$class}{$type}{$name}{kwds}{$_}; # Move to extra keywords
}
$e{$class}{$type}{$name}{extra}{$file_to_add}{kwds}{$_}=
$$keys_to_add{$_};
$e{$class}{$type}{$name}{extra}{$file_to_add}{special}=
$special_to_add if $special_to_add;
}
}
} continue {
$n_keywords_total += scalar(keys %{$e{$class}{$type}{$name}{kwds}})
if defined $e{$class}{$type}{$name}{kwds};
if(defined($e{$class}{$type}{$name}{extra})) {
foreach (keys %{$e{$class}{$type}{$name}{extra}}) {
$n_keywords_total +=
scalar(keys %{$e{$class}{$type}{$name}{extra}{$_}{kwds}});
}
}
}
}
}
#print Dumper(%e),"\n";
#print Dumper(%special_sections),"\n";
diag("SYSVARS:\n".Dumper(%sysvars)."\n");
# Print debug information about how often each special matcher matched.
# Will only be visible in debugging mode.
diag("SPECIAL MATCHERS-------------------------------matched success failed\n");
foreach $key (sort ignoring_case keys %specials) {
diag(sprintf("%-50s %3d %3d %3d\n",
$key,$special_matcnt{$key},$special_actcnt{$key},
$special_matcnt{$key}-$special_actcnt{$key}));
}
diag(sprintf("\nProblematic entries: %d rejected, %d complains.\n",
$n_rejections,$n_complaints));
# Write the lisp file
# Also write some statistics to STDERR
write_rinfo_header();
print RINFO "(defconst idlwave-system-routines\n";
print RINFO " '(\n";
printf STDERR "\n Nr Class Npro Nfun Ntot Nkwd\n";
printf STDERR "----------------------------------------------------------\n";
$classcnt = -1;
foreach $class (sort ignoring_case keys %e) {
$npro = scalar(keys %{$e{$class}{"pro"}});
$nfun = scalar(keys %{$e{$class}{"fun"}});
$nkwd = 0;
foreach $type ("pro","fun") {
foreach $name (keys %{$e{$class}{$type}}) {
$nkwd += scalar keys %{$e{$class}{$type}{$name}{kwds}}
if defined($e{$class}{$type}{$name}{kwds});
if(defined($e{$class}{$type}{$name}{extra})) {
foreach $file (keys %{$e{$class}{$type}{$name}{extra}}) {
$nkwd+= scalar keys %{$e{$class}{$type}{$name}{extra}{$file}{kwds}};
}
}
}
}
$nprotot += $npro;
$nfuntot += $nfun;
$nclass++;
printf STDERR "%3d %-32s %4d %4d %5d %5d\n",
++$classcnt,$class,$npro,$nfun,$npro+$nfun,$nkwd;
foreach $type ("pro","fun") {
foreach $name (sort ignoring_case keys %{$e{$class}{$type}}) {
print RINFO " " . make_lisp_reader_string($class,$type,$name) . "\n";
}
}
}
print STDERR "-" x 58,"\n";
printf STDERR "Total %4d %4d %5d %5d\n",
$nprotot,$nfuntot,$nprotot+$nfuntot,$n_keywords_total;
printf STDERR "Routines ignored due to -xname: %4d\n",$ignore_name_cnt
if $ignore_name_re;
printf STDERR "Routines ignored due to -xclass: %4d\n",$ignore_class_cnt
if $ignore_class_re;
print RINFO <<EOF;
)
"$n_routines_total builtin routines with $n_keywords_total keywords for IDL version $idlversion.")
EOF
# Gather sysvar and class info, and write the info
talk_to_idl();
write_sysvar_info();
# Attach classes to the files documenting them.
foreach $class (keys %e) {
if (defined $files{uc $class} && !defined($classes{$class}{file})) {
$classes{$class}{file}=$files{uc $class};
}
}
write_classtag_info();
write_executive_commands();
print RINFO <<EOF;
;; Special words with associated help topic files
(defconst idlwave-help-special-topic-words
'(
EOF
foreach (sort ignoring_case keys %special_topics) {
foreach $word (sort ignoring_case @{$special_topics{$_}{words}}) {
print RINFO " (\"$word\" . \"$special_topics{$_}{file}\")\n";
}
}
print RINFO <<EOF;
)
"Association list of help files for special context words.")
EOF
write_rinfo_footer();
close RINFO;
printf STDERR "Wrote file $rinfofile (%4d kBytes)\n", (-s $rinfofile)/1024.;
# ==========================================================================
# ==========================================================================
# SUB-ROUTINES
###--- LOGS AND MESSAGES ---###
sub usage {
# Print usage information
print STDERR <<EOF;
usage: get_html_rinfo [-debug] [-path DIR] [-idl /path/to/idl]
[-xname REGEXP] [-xclass REGEXP]
EOF
exit(1);
}
sub diag {
# Write diagnosis to STDERR and to the LOG file if we are debugging.
my $msg = @_[0];
if ($debug) {
#print STDERR $msg;
print $msg;
print LOG $msg;
}
}
sub reject {
# Write a message to the reject file.
my($string,$file,@reasons) = @_;
if ($debug) {
print REJECT (("-" x 80) . "\n") x 2;
print REJECT "File: $file\n";
print REJECT "Syntax: $string\n";
foreach $reason (@reasons) {
print REJECT "Reason: $reason\n";
}
}
}
sub complain {
# Write a message to the reject file.
my($string,$file,@reasons) = @_;
if ($debug) {
print COMPLAIN (("-" x 80) . "\n") x 2;
print COMPLAIN "File: $file\n";
print COMPLAIN "Syntax: $string\n";
foreach $reason (@reasons) {
print COMPLAIN "Reason: $reason\n";
}
}
}
###--- SYNTAX ---###
sub clean_up_syntax {
# Clean up Syntax
$syntax=~s|<a\s+name="(?:wp)?[0-9]+">||; #Entro Syntax
$syntax=~s|<p\s+class="p?Note">.*?^</p>\s*||msg; # Remove notes
$syntax=~s|<h5\s+class="p?Heading4">.*?^</h5>\s*|or|msg; # And headings
$syntax=~s/and then,.*//s;
$syntax=~s/<br>/\n/g;
$syntax=~s/<.*?>//gs; # html tags
$syntax=~s/ / /g; # special chars
$syntax=~s/—/ - /g;
$syntax=~s/ / /g;
$syntax=~s/'/'/g;
$syntax=~s/^\s+//mg; # Blanks lines
$syntax=~s/\s+$//mg;
$syntax=~s/^\s*$//mg;
$syntax=~s/{(?!(?:,|X\s*\|\s*Y))[^}]*}//g; # Internal notes in {}
$syntax=~s/&([gl])t;/$1=="g"?">":"<"/eg; # > and <
$syntax=~s|^\s*or\s*||si;
# diag("Now using cleaned syntax:>>>>>>>>\n$syntax\n<<<<<<<<<<\n");
}
sub parse_syntax {
my $txt = $_[0];
# Initialize a few variables.
# Note that these are global vars which are interpreted by the caller.
@args = @syntax_kwds = @getkwds = @setkwds = ();
$name = $type = $class = $call = "";
# and any initial explanation string, as in for OPEN
$txt =~ s/^.*?\w:(?!:)\s*//;
# Determine type (function or procedure)
if ($txt =~ /^\s*[a-z][a-z0-9_]+\s*=/si) {
$type = "fun";
} else {
$type = "pro";
}
# Determine Class (ignore the name - we grab it later)
if ($txt =~ /->\s*\[([a-z][a-z0-9_]+)::?\]([a-z][a-z0-9_]+)/i) {
$class = $1;
diag("parse_syntax: Got CLASS: $class\n");
} else {diag("parse_syntax: No CLASS found\n");}
# Clean out the remarks about BLABLA keywords
$txt =~ s/^.*?\bkeywords\b.*?:.*?this.*?accepts.*?keywords.*$//gmi;
$txt =~ s/^.*?\bkeywords\b.*?://gmi; #Some have keywords after them.
# Clean out the "only in..." stuff
$txt =~ s/\(only\s*in[^\)]*\)//gi;
# Remove various other bits of detritus.
$txt =~ s/\bor [a-z0-9 ]+,/or/gi;
$txt =~ s/\bFor [a-z0-9 ]+:/or/gi;
# Collapse each chunk of whitespace into a single SPACE
$txt =~ s/[ \t\n\r]+/ /gm;
# Get all keywords. Keywords are things with `/' before it or with `='
# after it.
diag("AFTER SYNTAX CLEANUP:\n\n$txt\n\n");
while ($txt =~m!(\{[/XYZ |]*\}|/?\[XYZ?\])? # Leading XYZ multiplexer
(\/)? # Possibly a boolean
\b
(_?[A-Z][A-Z0-9_]*) # The actual keyword
(\ *\{\s*(Get|Set|Get\s*,\s*Set)\s*\})? # Method stuff
\s*
(=)?!gx
) {
($xyz,$slash,$identifier,$getset,$equal) = ($1,$2,$3,$5,$6);
next unless $slash || $equal || ($xyz && $xyz=~m|/|);
# Everything before the first keyword is part of the calling sequence
$call = $` unless $call;
# Some keywords have a {X|Y|Z} in front which must be expanded
if ($xyz) {
$xyz=~tr|XYZ||cd;
@ids=map {$_ . $identifier} split(/(?=[XYZ])/,$xyz);
} else {
@ids = ($identifier);
}
# Save the recognized keywords away
push @syntax_kwds,@ids;
# When the keyword has a Get or Set flag, add it to these lists,
# so it can be appended to the Get|SetProperty methods' keyword
# lists later on.
push @getkwds,@ids if $getset =~ /get/i;
push @setkwds,@ids if $getset =~ /set/i;
}
# If there was no keyword, the whole text must be calling sequence
$call = ($call || $txt);
# Final whitespace and parens are killed.
$call =~ s/[ \n\t\[\]\{\}\|,]*$//; #]
# We may have killed needed closing parenthesis - fix this.
$call = close_open_parens($call);
# Find the name of the routine in the calling sequence.
if ($override_name) {
$name=$override_name;
$override_name="";
} else {
if ($class) {
if ($call =~ /->\s*\[[^\]]+\]([a-z][a-z0-9$_]*)/i) {
$name = $1;
}
} else {
if ($type eq "fun") {
if ($call =~ /=\s*([a-z][a-z0-9\$_]*)/i) {
$name = $1;
}
} elsif ($type eq "pro") {
if ($call =~ /^\s*([a-z][a-z0-9\$_]*)/si) {
$name = $1;
}
}
}
}
# Return the name to indicate success
return $name;
}
# Parse text into "Heading" Sections, optionally doing something with
# the parsed text in a callback. If the callback is passed, it's
# return value is tested, and, if a true list, that link is saved for
# each name on the list for return. If the return value is the scalar
# -1, no more processing is done.
sub parse_heading {
my ($txt,$heading,$callback)=@_;
my %ret,@ret;
$heading=3 unless $heading;
my $hstr='<h'.($heading+1).'\s*class="p?Heading'.$heading.'">';
my @sections=split(m{<a\s*name="wp([^"]+)">\s*</a> # Embedded Link
$hstr\s*
(?:<NOBR>)?
\s*((?:(?:<NOBR>)?[^\n\r]+(?:</NOBR>)?)+)[\n\r<]
}xs,$txt);
shift @sections;
diag("SECTIONS ($hstr): ".join("\n++++++++++\n",@sections)."\n")
if ($file=~/sysvars/);
while (my ($link,$name,$text)=splice(@sections,0,3)) {
if($callback) {
my @ret=&$callback($link,$name,$text);
if(@ret) {
last if $ret[0]==-1;
next if $ret[0]==1;
map {$ret{$_}=$link} @ret;
}
} else { $ret{$name}=$link; }
}
return %ret
}
# Parse the keywords section, gathering links, and comparing to the
# syntax-derived list @syntax_kwds
sub parse_keywords {
my ($txt,$heading)=@_;
my %kwds=
parse_heading($txt,$heading,
sub {
my ($link,$kwd,$text) = @_;
my @ret;
if ($kwd=~/WIDGET_CONTROL\s+Keywords/i) {
push @complaints,
"WIDGET_CONTROL keywords listed as regular keyword.";
return -1; # Abort further processing
}
if ($kwd=~/Keywords?/) {
# Some "extra" keywords are annoyingly listed in
# amidst the regular keywords
my @all=($text=~m{[iI]n\s+addition.*?the\s+
((?:[A-Z0-9_]+,\s+)+)
and\s+([A-Z0-9_]+)\s+keywords?\s+to\s+
the\s+([:A-Z0-9_]+)\s+(pro|fun)}xs);
diag("EXTRA KEYWORDS, Got: \n$text\n\n");
return 1 unless @all;
my $type=pop @all;
my ($class,$routine);
$routine=pop @all;
unshift @all,split(/,\s+/,shift @all);
push @complaints,
"Extra keywords to link listed as normal keyword: " .
"$routine: ". join(",",@all);
($class,$routine)=($routine=~/(?:([a-z][a-z0-9_]+)::?)?
([a-z][a-z0-9_]+)/ix);
push @add_keywords,["has",$routine,$type,$class];
return 1;
}
# Take care of space surrounding underscores here too
$kwd=~s/([A-Z])(?: +_|_ +)([A-Z])/$1_$2/g;
return 1 unless
$kwd =~
m{( # The entire keyword text
(?:(\[XYZ?\](_?))? # opt. [XYZ]
(\!?[A-Z][A-Z0-9-_]*) # keyword
(?![a-z]) # No lowercase lets
(?:\s*,\s*)? # optional comma
)+) # >=1 of the above