-
Notifications
You must be signed in to change notification settings - Fork 33
/
Copy pathcsv-schema-1.1.html
2997 lines (2953 loc) · 163 KB
/
csv-schema-1.1.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html>
<head>
<title>CSV Schema Language 1.1</title>
<meta charset='utf-8'/>
<script src='https://www.w3.org/Tools/respec/respec-w3c-common' class='remove'></script>
<script class='remove'>
var respecConfig = {
// specification status (e.g. WD, LCWD, NOTE, etc.). If in doubt use ED.
specStatus: "unofficial",
additionalCopyrightHolders: "Mozilla Public Licence version 2.0",
// the specification's short name, as in https://www.w3.org/TR/short-name/
shortName: "csvs",
// if your specification has a subtitle that goes below the main
// formal title, define it here
subtitle : "A Language for Defining and Validating CSV Data",
// if you wish the publication date to be other than today, set this
publishDate: "2016-01-25",
// if the specification's copyright date is a range of years, specify
// the start date here:
// copyrightStart: "2005"
// if there is a previously published draft, uncomment this and set its YYYY-MM-DD date
// and its maturity status
previousMaturity: "ED",
previousPublishDate: "2014-08-23",
previousURI: "https://digital-preservation.github.io/csv-schema/csv-schema-1.0.html",
// if there a publicly available Editor's Draft, this is the link
// edDraftURI: "https://dev.w3.org/2009/dap/ReSpec.js/documentation.html",
edDraftURI: "https://digital-preservation.github.io/csv-schema/csv-schema-1.1.html",
// if this is a LCWD, uncomment and set the end of its review period
// lcEnd: "2009-08-05",
// editors, add as many as you like
// only "name" is required
editors: [
{ name: "Adam Retter",
company: "Evolved Binary Ltd",
companyURL: "https://adamretter.org.uk/" },
{ name: "David Underdown",
company: "The National Archives",
companyURL: "https://www.nationalarchives.gov.uk" },
{ name: "Rob Walpole",
company: "Devexe Ltd",
companyURL: "https://www.devexe.co.uk/"}
],
// authors, add as many as you like.
// This is optional, uncomment if you have authors as well as editors.
// only "name" is required. Same format as editors.
//authors: [
// { name: "Your Name", url: "https://example.org/",
// company: "Your Company", companyURL: "https://example.com/" },
//],
// name of the WG
wg: "The National Archives - Digital Preservation",
// URI of the public WG page
wgURI: "https://www.nationalarchives.gov.uk/information-management/projects-and-work/digital-preservation.htm",
// name (without the @w3c.org) of the public mailing to which comments are due
wgPublicList: "csvs",
// URI of the patent status for this WG, for Rec-track documents
// !!!! IMPORTANT !!!!
// This is important for Rec-track documents, do not copy a patent URI from a random
// document unless you know what you're doing. If in doubt ask your friendly neighbourhood
// Team Contact.
wgPatentURI: "",
// If specified, defines an array of alternate formats in which document is available (e.g., XML, Postscript). The format of the array is:
// alternateFormats:
doRDFa: "1.1",
};
</script>
<style>
<!--
div.exampleInner {
background-color: #D5DEE3;
border-top-width: 4px;
border-top-style: double;
border-top-color: lightGrey;
border-bottom-width: 4px;
border-bottom-style: double;
border-bottom-color: lightGrey;
padding: 4px;
margin: 0em;
}
code.function {
font-weight: bold;
}
code.return-type {
font-style: italic;
}
code.type {
font-style: italic;
}
span.explain {
font-family: sans-serif;
font-style: italic;
}
.principle, .point {
font: small-caps 100% sans-serif;
}
ol.nested {
counter-reset: item
}
li.nested {
display: block
}
li.nested:before {
content: counters(item, ".") ". ";
counter-increment: item
}
td.ebnf-num, td.ebnf-left, td.ebnf-bind, td.ebnf-right, td.ebnf-note {
vertical-align: text-top;
}
#ebnf table.ebnf-table tr {
margin-bottom: 2pt;
}
#ebnf table td.ebnf-left {
width: 18%;
}
#ebnf table td.ebnf-right {
width: 50%;
}
td.ebnf-note {
padding-left: 3pt;
}
/*
body {
counter-reset: ebnf;
}
:not(#ebnf) > table > tbody > tr > td.ebnf-num:before {
content: "{{" counters(ebnf, ".") "}}";
counter-increment: ebnf;
}
*/ /* do not auto-number the EBNF rules in the body descriptions, as sometimes it makes sense to discuss in an informative as opposed to normative order */
section #ebnf {
counter-reset: ebnf2;
}
#ebnf table > tbody > tr > td.ebnf-num:before {
content: "[" counters(ebnf2, ".") "]";
counter-increment: ebnf2;
}
-->
</style>
</head>
<body>
<section id="sotd">
This document represents the specification of the CSV Schema Language 1.1
as defined by <a href="https://www.nationalarchives.gov.uk">The National Archives</a>.
It is unclear yet whether this document will be submitted to a formal standards body
such as the <a href="https://w3.org">W3C</a>.
This version supersedes the original <a href="https://digital-preservation.github.io/csv-schema/csv-schema-1.0.html">CSV Schema Language 1.0</a> published on 28 August 2014.
</section>
<section id='abstract'>
<acronym title="Comma Separated Value">CSV</acronym> (Comma Separated Value) data comes in many shapes and sizes. Apart from [[RFC4180]] which is a fairly recent development (and often ignored),
there is a lack of formal definition as to CSV data formats, although in many ways this is one of the strengths of the CSV data format.
However, extracting structured information from CSV data for further processing or storage
can prove difficult if the CSV data is not well understood or perhaps not even uniform. CSV Schema
defines a textual language which can be used to define the data structure, types and rules for
CSV data formats.
</section>
<section id="introduction" class='informative'>
<h1>Introduction</h1>
<p>The intention of this document is two-fold:</p>
<ol>
<li>To be informative to users who are writing CSV Schemas, and provide a reference to the available syntax and functions.</li>
<li>To provide enough detail such that anyone with sufficient technical expertise should be able to implement a CSV Schema parser and/or CSV validator evaluating the rules defined in a CSV Schema.</li>
</ol>
<section id="background">
<h2>Background</h2>
<p>
The National Archives <acronym title="Digital Repository Infrastructure">DRI</acronym> (Digital Repository Infrastructure) system archives digitised and born-digital materials provided by <acronym title="Other Governmental Department">OGD</acronym>s (Other Government Departments)
and occasionally <acronym title="Non Governmental Organisation">NGO</acronym>s (Non-Governmental Organisations). For the purposes of Digital Preservation the system processes and archives large amounts of metadata, much
of this metadata is created by the supplying organisation or by transcription. The metadata is further processed, and ultimately stored both online in an
<acronym title="Resource Description Format">RDF</acronym> Triplestore and a majority subset archived in a non-RDF <acronym title="eXtensible Markup Language">XML</acronym> format.
However it was recognised that the creation of XML or RDF metadata by the supplier
was most likely unrealistic for either technical or financial reasons. As such, CSV was recognised as a simple data format that is human readable (to a degree), that almost anyone could create
simply; CSV is the <em>lowest common denominator</em> of structured data formats.
</p>
<p>
The National Archives have strict rules about various CSV file formats that they expect, and how the data in those file formats should be set out. To ensure the quality of their archival metadata
it was recognised that CSV files would have to be validated. It was recognised that development of a schema language for CSV (and associated tools) would be of great benefit. It was
also further recognised that a general CSV Schema language would be of greater benefit if it was made publicly available and invited collaboration from other organisations and
individuals; the problem of CSV data formats is certainly not unique to The National Archives.
</p>
<p>CSV Schema is a standard currently guided by The National Archives, but developed in an open source collaborative manner that invites collaboration and contributions from all interested parties.</p>
<p>A reference implementation has been created to prove the standard: The open source <a href="https://digital-preservation.github.io/csv-validator/">CSV Validator</a> application and API, offers both CSV Schema parsing and CSV file validation.</p>
</section>
<section id="principles">
<h2>Guiding Principles</h2>
<p>The design of the CSV Schema language has been influenced by a few guiding principles, understanding these will help you to understand how and why it is structured the way that it is.</p>
<ul>
<li>
<div class="principle">Simplicity</div>
<p>The language should be expressible in plain text and should be simple enough that non-technical domain experts could easily write it without having to know a programming language or data/document modelling language such as XML, JSON or RDF.</p>
<p><strong>Note</strong>, the CSV Schema Language is NOT itself expressed in CSV, it is expressed in a simple text format.</p>
</li>
<li>
<div class="principle">Context is King!</div>
<p>A schema rule is written for each column of the CSV file. Each set of column rules are asserted against each row of the CSV file. Each rule in the CSV Schema operates on the current context (e.g. defined Column and parsed Row), unless otherwise specified. This makes the rules short and concise.</p>
</li>
<li>
<div class="principle">Stream Processing</div>
<p>CSV files may be very large and so the CSV Schema Language was designed with concern for implementations, that although not required by the specification, MAY wish to read and process CSV data as a stream. Few operations require mnemonization of data from the CSV file, and where they do this is limited and should be optimisable to keep memory use to a minimum.</p>
</li>
<li>
<div class="principle">Sane Defaults</div>
<p>We try to do the right thing by default. CSV files and their brethren (Tab Separated Values etc.) can come in many shapes and sizes, by default we assume the CSV data format will comply with [[RFC4180]], of course we allow you to customize this behaviour in the CSV Schema.</p>
</li>
<li>
<div class="principle">Not a Programming Language.</div>
<p>This is worth stressing as it was something we had to keep sight of ourselves during development; CSV Schema is a simple data definition and validation language for CSV!</p>
</li>
</ul>
</section>
</section>
<section id="basics" class="informative">
<h1>Basics</h1>
<p>
A CSV Schema is really a rules based language which defines how data in each cell should be formatted.
Rules are expressed per-column of the CSV data. Rules are evaluated for each row in the CSV data.
A column rule may express constraints based on the content of other columns in the same row, however at present there is no scope for looking forward or backward through rows directly.
However, it is possible to check that a cell entry is unique within that column in the CSV file (or that the value of a combination of cells is unique)
</p>
<p>A CSV Schema is made up of two main parts:</p>
<ol class="nested">
<li class="nested"><span class="point"><a>Prolog</a></span>
<p>In turn this comprises (at most) two sections (the second being OPTIONAL):</p>
<ol class="nested">
<li class="nested">
<span class="point"><a>Version Declaration</a></span>
<p>The CSV Schema MUST explicitly state (as its first non-comment line) the version of the CSV Schema language that it uses. This is to allow for future evolution of the CSV Schema language to be easily handled by CSV Schema processors.</p>
</li>
<li class="nested">
<span class="point"><a>Global Directives</a></span>
<p>Global Directives apply to all processing of the CSV data. Global Directives for example allow you to define the separator sequence between columns in the CSV data. Global Directives appear before Column Rules and are prefixed with an <code>@</code> character.</p>
<p>The use of Global Directives is OPTIONAL, default values are used if they are not specified.</p>
</li>
</ol>
</li>
<li class="nested">
<span class="point"><a>Body</a></span>
<p>The Body of the CSV Schema MUST declare, in order, a <a title="Column Rules">Column Rule</a> for each Column in the CSV data. If validation of a Column is not desirable, then an empty rule is used.</p>
</li>
</ol>
<p>Let's now illustrate a simple CSV Schema that is concerned with CSV data about names, ages and gender:</p>
<pre class="example" data-lt="Simple CSV Schema">
version 1.1
@totalColumns 3
name: notEmpty
age: range(0, 120)
gender: is("m") or is("f") or is("t") or is("n")
</pre>
This CSV Schema basically defines that the CSV data must have 3 columns: the first
column, <em>name</em>, must have some sort of value; the second column, <em>age</em>,
must be a number between 0 and 120 inclusive; and the third column, <em>gender</em>,
must be one of the characters m, f, t or n. An example of CSV data that would match the
rule definitions in the CSV schema could be as follows:
<pre class="example" data-lt="Valid CSV Data">
name,age,gender
james,21,m
lauren,19,f
simon,57,m
</pre>
<p>An example of CSV data would fail the rule definitions in the CSV schema could be as follows:</p>
<pre class="example" data-lt="Invalid CSV Data">
name,age,gender
james,4 years,m
lauren,19,f
simon,57,male
</pre>
<p>The Invalid CSV Data example above fails when validated against the CSV Schema because: 1) at row 2 column 2, "4 years" is not a number between 1 and 120 inclusive, and 2) at row 4 column 3, "male" is not one of the characters m, f, t, or n.</p>
</section>
<section id="new-in-1.1" class="informative">
<h1>New in CSV Schema Language 1.1 - A brief introduction to the new features of CSV Schema Language 1.1</h1>
<p>
The last 18 months with <a href="https://digital-preservation.github.io/csv-schema/csv-schema-1.0.html">CSV Schema Language 1.0</a> being in regular use at The National Archives
has highlighted a few additional <a title="Column Validation Expression">Column Validation Expressions</a> that would provide further useful validation,
simplify schema writing, or make schemas more readable. In addition the concept of a <a>String Provider</a> has been extended to allow concatenation
to produce a final string input to expressions from some set of other <a title="String Provider">String Providers</a>, and also a function to allow removal of a Windows file
extension to make certain comparisons more straightforward and robust.
</p>
<p>
The new <a title="Column Validation Expression">Column Validation Expressions</a> are the:
<ol>
<li><a>Upper Case Expression</a>, which asserts that all the characters in the column must be uppercase (according to the definitions in [[UTF-8]]).</li>
<li><a>Lower Case Expression</a>, which asserts that all the characters in the column must be lowercase (according to the definitions in [[UTF-8]]).</li>
<li><a>Integrity Check Expression</a>, this is effectively the converse of the <a>File Exists Expression</a>, checking that if there is a file present
in the folders referred to in a CSV file, it has an explicit reference within that CSV file.</li>
<li><a>XSD Date Time With Time Zone Expression</a>, this expression adapts the existing <a>XSD Date Time Expression</a> to make the timezone portion mandatory.</li>
<li><a>Identical Expression</a>, this asserts that all values in a certain column must be identical, but does not specify the precise value within the schema.
Within the CSV files received by The National Archives this is expected to be used in conjunction with a <a>Regular Expression Expression</a> to give the general form
for a batchcode field for a project, each line in a CSV should have the same batchcode, but we do not want to update the schema for each batch received to state
the exact batchcode.</li>
<li><a>Any Expression</a>, this is effectively a combination of the <a>Is Expression</a> and the <a>Or Expression</a> into one expression.</li>
<li><a>Switch Expression</a>, this allows a flatter expression of what would otherwise have to be expressed as a set of nested <a title="If Expression">If Expressions</a>.
Such expressions can be hard to read and maintain due to the number of sets of brackets that can be involved.</li>
</ol>
In addition the <a>Range Expression</a> has been extended to allow the creation of ranges that have only an upper or lower bound using similar syntax to that employed by
the <a>Length Expression</a>. This also required the addition of <a>Numeric Or Any</a> type (to allow ranges to have negative values as bounds), or the <a>Wildcard Literal</a>.
In the <a>Length Expression</a> we used only <a>Positive Integer Or Any</a> since a negative value has no sensible meaning for the length of a field.
There is also one new <a title="Global Directives">Global Directive</a>, the <a>Permit Empty Directive</a>. This allows a file with no data rows to be treated as valid.
</p>
</section>
<section>
<h1>Schema structure</h1>
<p>
The CSV schema language is formally a <a href="https://en.wikipedia.org/wiki/Context-free_grammar">context-free grammar</a>
expressed in <a href="https://en.wikipedia.org/wiki/EBNF"><dfn title="EBNF">Extended Backhaus-Naur Form</dfn></a> (EBNF - see also [[RFC5234]])
</p>
<p>
The following subsections examine the structure of a CSV Schema in more detail.
Each subsection comprises definitions of terms, cross-references to other definitions,
the relevant portion of the <a>EBNF</a> (links on the lefthandside go to the appendix containing the full EBNF, those on the right to a fuller explanation of those term(s)),
and examples of correct usage.
</p>
<p>
A <dfn>Schema</dfn> MUST comprise both <a>Prolog</a> and <a>Body</a>.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[1]</td>
<td class="ebnf-left"><a title="ebnf-schema">Schema</a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>Prolog</a> <a>Body</a></td>
</tr>
</table>
<section>
<h2>Prolog</h2>
<p>The <dfn>Prolog</dfn> of a CSV Schema MUST contain the <a>Version Declaration</a> and MAY contain one or more <a>Global Directives</a>.</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[2]</td>
<td class="ebnf-left"><a title="ebnf-prolog"><dfn title="prolog-def">Prolog</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>VersionDecl</a> <a>GlobalDirectives</a></td>
</tr>
</table>
<section>
<h3>Version Declaration</h3>
<p>
The <dfn>Version Declaration</dfn> declares explicitly which version of the CSV Schema language is in use.
This MUST be either <code>version 1.0</code> or <code>version 1.1</code>.
If the version is not valid this is considered a <a>Schema Error</a>.
If the version is declared as 1.0 but the CSV Schema attempts to use features of 1.1 this is also considered a <a>Schema Error</a>.
The Version Declaration is MANDATORY.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[3]</td>
<td class="ebnf-left"><a title="ebnf-version-decl"><dfn>VersionDecl</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right">("version 1.0" | "version 1.1")</td>
</tr>
</table>
<section>
<h4>Example Version Declaration</h4>
<pre class="example" data-lt="Version Declaration Syntax">
version 1.0
</pre>
</section>
</section>
<section>
<h3>Global Directives</h3>
<p>
The <dfn>Global Directives</dfn> allow you to modify the overall processing of a CSV file or how subsequent <a title="Column Definition">Column Definitions</a> are evaluated.
The use of Global Directives within a CSV Schema is OPTIONAL.
The last two Global Directives described (<a>No Header Directive</a> and <a>Ignore Column Name Case Directive</a>) are mutually exclusive,
they MUST NOT both be used in a single schema.
There is no inherent reason why the Global Directives should be in the order shown, <a>EBNF</a> does not directly cater for unordered lists.
You could explicitly list each possible ordering, but that would require 4!=24 orderings to be included in the ENBF.
All directives (both Global Directives and <a>Column Directives</a>) used in the CSV Schema are indicated by the <dfn>Directive Prefix</dfn>,
defined as the character <code>@</code> i.e. the [[UTF-8]] character code <code>0x40</code>.
</p>
<p>
Whitespace is not generally significant, so Global Directives can be entered all on a single line, or each on separate lines
(see <a href="#example-global-directives"></a>).
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[4]</td>
<td class="ebnf-left"><a title="ebnf-global-directives"><dfn>GlobalDirectives</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>SeparatorDirective</a>? <a>QuotedDirective</a>? <a>TotalColumnsDirective</a>? <a>PermitEmptyDirective</a>? (<a>NoHeaderDirective</a> | <a>IgnoreColumnNameCaseDirective</a>)?</td>
<td class="ebnf-note">/* <a>xgc:unordered</a> */</td>
</tr>
<tr>
<td class="ebnf-num">[5]</td>
<td class="ebnf-left"><a title="ebnf-directive-prefix"><dfn>DirectivePrefix</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right">"@"</td>
<td class="ebnf-note"></td>
</tr>
</table>
<section>
<h4>Separator Directive</h4>
<p>The <dfn>Separator Directive</dfn> allows you to specify the separator character that is used between columns in the CSV data.
As with all <a>Global Directives</a> the Separator Directive is OPTIONAL, if not supplied the default value is assumed.
By default the separator is a <code>comma (,)</code> i.e. the [[UTF-8]] character code <code>0x2c</code> (as specified in [[RFC4180]]).
</p>
<p>
The Separator Directive takes a MANDATORY parameter in the form of either a <a>Separator Tab Expression</a> or a <a>Separator Character</a>.
</p>
<p>A <dfn>Separator Tab Expression</dfn> indicates that the separator comprises a <emphasis>tab</emphasis> character, i.e. [[UTF-8]] character code <code>0x09</code>.</p>
<p>A <dfn>Separator Character</dfn> is a <a>Character Literal</a>: the character which is to be treated as the column separator.</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[6]</td>
<td class="ebnf-left"><a title="ebnf-separator-directive"><dfn>SeparatorDirective</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>DirectivePrefix</a> "separator" (<a>SeparatorTabExpr</a> | <a>SeparatorChar</a>)</td>
</tr>
<tr>
<td class="ebnf-num">[7]</td>
<td class="ebnf-left"><a title="ebnf-separator-tab-expr"><dfn>SeparatorTabExpr</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right">"TAB" | '\t'</td>
</tr>
<tr>
<td class="ebnf-num">[8]</td>
<td class="ebnf-left"><a title="ebnf-separator-char"><dfn>SeparatorChar</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a title="ebnf-character-literal">CharacterLiteral</a></td>
</tr>
</table>
</section>
<section>
<h4>Quoted Directive</h4>
<p>The <dfn>Quoted Directive</dfn> allows you to specify whether or not all columns are <em>quoted</em>.
That is to say that their values are encased in <em>quotation mark</em> characters, i.e. [[UTF-8]] character code <code>0x22</code>.
In practice most CSV libraries are able to detect and handle the presence or absence of quotes and handle it appropriately,
but implementations of this schema language should be able to decide how to handle this situation.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[9]</td>
<td class="ebnf-left"><a title="ebnf-quoted-directive"><dfn>QuotedDirective</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>DirectivePrefix</a> "quoted"</td>
</tr>
</table>
</section>
<section>
<h4>Total Columns Directive</h4>
<p>
The <dfn>Total Columns Directive</dfn> allows you to specify the total number of data columns expected to make up each row of the CSV file.
The parser will also verify that the <a>Body</a> of the CSV Schema contains the same number of <a>Column Rules</a>,
a mismatch is considered a <a>Schema Error</a>.
The Total Columns Directive is OPTIONAL: when this directive is not used this verification of the number of Column Rules cannot be performed,
and it will be assumed that you have supplied the correct number of Column Rules.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[10]</td>
<td class="ebnf-left"><a title="ebnf-total-columns-directive"><dfn>TotalColumnsDirective</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>DirectivePrefix</a> "totalColumns" <a>PositiveNonZeroIntegerLiteral</a></td>
</tr>
</table>
</section>
<section>
<h4>Permit Empty Directive</h4>
<p>
<em>This is a new expression in CSV Schema Language 1.1</em>
</p>
<p>
The <dfn>Permit Empty Directive</dfn> allows you to specify that the CSV file can be empty: i.e. there is no row data.
The Permit Empty Directive is OPTIONAL: when not present an empty file will cause a validation error.
The Permit Empty Directive can be used in conjunction with the <a>No Header Directive</a> thereby permitting a completely empty CSV file.
If the <a>No Header Directive</a> is not present then a minimum of one row containing column names must be provided.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[11]</td>
<td class="ebnf-left"><a title="ebnf-permit-empty-directive"><dfn>PermitEmptyDirective</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>DirectivePrefix</a> "permitEmpty"</td>
</tr>
</table>
</section>
<section>
<h4>No Header Directive</h4>
<p>
The <dfn>No Header Directive</dfn> is used to indicate that the CSV file to be validated does not contain a header row: i.e. all rows are data rows.
The No Header Directive is OPTIONAL: when this directive is not used the parser assumes by default that the first row of the CSV file to be validated contains column names,
not data, and so the first row is skipped during validation.
</p>
<p>The No Header Directive is mutually exclusive to the use of the <a>Ignore Column Name Case Directive</a>, when one is used, the other MUST NOT be.</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[12]</td>
<td class="ebnf-left"><a title="ebnf-no-header-directive"><dfn>NoHeaderDirective</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>DirectivePrefix</a> "noHeader"</td>
</tr>
</table>
</section>
<section>
<h4>Ignore Column Name Case Directive</h4>
<p>
The <dfn>Ignore Column Name Case Directive</dfn> is intended to tell the parser to ignore mismatches in case between the <a title="Column Identifier">Column Identifiers</a> supplied in a CSV file to be validated
and those used in giving the <a title="Column Definition">Column Definitions</a> in the schema.</p>
<p>
The Ignore Column Name Case Directive is mutually exclusive to the use of the <a>No Header Directive</a>, when one is used, the other MUST NOT be.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[13]</td>
<td class="ebnf-left"><a title="ebnf-ignore-column-name-case-directive"><dfn>IgnoreColumnNameCaseDirective</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>DirectivePrefix</a> "ignoreColumnNameCase"</td>
</tr>
</table>
</section>
<section>
<h4>Example Global Directives</h4>
<pre class="example" data-lt="Global Directives Syntax 1">
@separator ';' @quoted @totalColumns 21 @noHeader
</pre>
<pre class="example" data-lt="Global Directives Syntax 2">
@separator TAB
@quoted
@totalColumns 21
@permitEmpty
@ignoreColumnNameCase
</pre>
</section>
</section>
</section>
<section>
<h2>Body</h2>
<p>
The <dfn>Body</dfn> of a CSV Schema comprises at least one <dfn>Body Part</dfn>,
each of which is a combination of OPTIONAL <a title="Comment">Comments</a> with a <a>Column Definition</a> (in either order).
A Column Definition MUST be included.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[14]</td>
<td class="ebnf-left"><a title="ebnf-body"><dfn title="body-def">Body</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>BodyPart</a>+</td>
</tr>
<tr>
<td class="ebnf-num">[15]</td>
<td class="ebnf-left"><a title="ebnf-body-part"><dfn>BodyPart</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a title="ebnf-comment">Comment</a>* <a title="ebnf-column-definition">ColumnDefinition</a> <a title="ebnf-comment">Comment</a>*</td>
</tr>
</table>
<section>
<h3>Comments</h3>
<p>
There are two types of <dfn>Comment</dfn>: either <a title="Single Line Comment">Single Line</a> or <a title="Multiple Line Comment">Multiple Line</a>.
</p>
<p>
A <dfn>Single Line Comment</dfn> is started with two <code>forward slashes (//)</code>, i.e. the [[UTF-8]] character codes <code>0x2F 0x2F</code>.
It is terminated by any [[UTF-8]] character that creates a line-break.
</p>
<p>
A <dfn>Multiple Line Comment</dfn> is started using the combination of a <code>forward slash (/)</code> and an <code>asterisk (*)</code>,
i.e. the [[UTF-8]] character codes <code>0x2F 0x2A</code>.
It is terminated by the reverse combination, <code>asterisk (*)</code> <code>forward slash (/)</code>,
i.e. the [[UTF-8]] character codes <code>0x2A 0x2F</code>.
<strong>Any</strong> [[UTF-8]] character except asterisk may be used between these comment markers, even if it forces a new line.
Comments do not need to start at the beginning of a line, but must be either before or after a complete <a>Column Definition</a> or another <a>Comment</a>.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[16]</td>
<td class="ebnf-left"><a title="ebnf-comment"><dfn title="comment-def">Comment</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>SingleLineComment</a> | <a>MultiLineComment</a></td>
<td class="ebnf-note"></td>
</tr>
<tr>
<td class="ebnf-num">[17]</td>
<td class="ebnf-left"><a title="ebnf-single-line-comment"><dfn>SingleLineComment</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right">//[\S\t ]*</td>
<td class="ebnf-note">/* <a>xgc:regular-expression</a> */</td>
</tr>
<tr>
<td class="ebnf-num">[18]</td>
<td class="ebnf-left"><a title="ebnf-multi-line-comment"><dfn>MultiLineComment</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right">\/\*(?:[^*\r\n]+|(?:\r?\n))*\*\/</td>
<td class="ebnf-note">/* <a>xgc:regular-expression</a> */</td>
</tr>
</table>
<section>
<h4>Example Comments</h4>
<pre class="example" data-lt="Comment Syntax">
//This Comment is a Single Line Comment it terminates at this line break
/*This Comment is a Multi Line Comment:
it
can
go
on
for as many lines as you like, until you type*/
</pre>
</section>
</section>
<section>
<h3>Column Definitions</h3>
<p>
<dfn title="Column Definition">Column Definitions</dfn> comprise a <a>Column Identifier</a> or <a>Quoted Column Identifier</a> followed by a <code>colon (:)</code>,
i.e. the [[UTF-8]] character code <code>0x3A</code>, followed by a <a title="Column Rules">Column Rule</a>.
There MUST be a Column Definition for every column in the CSV that will be validated against the Schema, however the Column Rule can be left empty if no validation is needed for a specific column.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[19]</td>
<td class="ebnf-left"><a title="ebnf-column-definition"><dfn>ColumnDefinition</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right">(<a>ColumnIdentifier</a> | <a>QuotedColumnIdentifier</a>) ":" <a>ColumnRule</a></td>
</tr>
</table>
<section>
<h4>Column Identifiers</h4>
<p>
There are two classes of identifier that can be used for columns, the original simple <dfn>Column Identifier</dfn>, and the <dfn>Quoted Column Identifier</dfn>.
</p>
<p>A <a>Column Identifier</a> is either a <a>Positive Non Zero Integer Literal</a> (most commonly used when the CSV file to be validated has no header row -
see <a>No Header Directive</a>) which indicates the offset of the column (starting from 1), or an <a>Ident</a>.
</p>
<p>
The <a>Quoted Column Identifier</a> allows a greater range of characters to be used in naming the column than can be supported by an <a>Ident</a>,
but the identifier MUST be wrapped in <code>quotation marks (")</code>,
i.e. the [[UTF-8]] character code <code>0x22</code> (this is implicit in its definition as a <a>String Literal</a>).
</p>
<p>
Identifiers MUST be unique within a single Schema.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[20]</td>
<td class="ebnf-left"><a title="ebnf-column-identifier"><dfn>ColumnIdentifier</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>PositiveNonZeroIntegerLiteral</a> | <a>Ident</a></td>
</tr>
<tr>
<td class="ebnf-num">[21]</td>
<td class="ebnf-left"><a title="ebnf-quoted-column-identifier"><dfn>QuotedColumnIdentifier</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>StringLiteral</a></td>
</tr>
</table>
<section>
<h5>Usage</h5>
<pre class="example" data-lt="Column Identifier and Quoted Column Identifier Syntax">
a_column_identifier
"a quoted column identifier"
</pre>
</section>
</section>
<section>
<h4>Column Rules</h4>
<p>
A <dfn title="Column Rules">Column Rule</dfn> is a combination of any number of <a title="Column Validation Expression">Column Validation Expressions</a>,
along with OPTIONAL <a>Column Directives</a>.
You MAY use an empty Column Rule if there is no requirement for an individual column to be validated.
</p>
<p>
As <a title="Column Validation Expression">Column Validation Expressions</a> are the primary means of applying validation,
they are described in their own full section of this document.
The range and variety of expressions available make supplying comprehensive examples here impractical,
though some will be used to show the basic structure of a Column Rule.
</p>
<p>
White space is not generally important within a Column Rule, but the whole rule must be on a single line.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[22]</td>
<td class="ebnf-left"><a title="ebnf-column-rule"><dfn>ColumnRule</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>ColumnValidationExpr</a>* <a>ColumnDirectives</a></td>
</tr>
</table>
<section>
<h5>Column Directives</h5>
<p>
There are four OPTIONAL <dfn>Column Directives</dfn> that are used
to modify aspects of how the <a>Column Rules</a> are evaluated.
Like <a>Global Directives</a>, Column Directives are indicated by the <a>Directive Prefix</a>,
defined as the character <code>@</code> i.e. the [[UTF-8]] character code <code>0x40</code>.
</p>
<p>
The Column Directives are the <a>Optional Directive</a>, the <a>Match Is False Directive</a>, the <a>Ignore Case Directive</a>,
and the <a>Warning Directive</a>. The column directives may be specified in any order (though there is no straightforward way to express this in <a>EBNF</a>
without listing every possible order).</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[23]</td>
<td class="ebnf-left"><a title="ebnf-column-directives"><dfn>ColumnDirectives</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>OptionalDirective</a>? <a>MatchIsFalseDirective</a>? <a>IgnoreCaseDirective</a>? <a>WarningDirective</a></td>
<td class="ebnf-note">/* <a>xgc:unordered</a> */</td>
</tr>
</table>
<section>
<h6>Optional Directive</h6>
<p>
The <dfn>Optional Directive</dfn> is used when completion of the data field in the original CSV file to be validated is OPTIONAL.
When this directive is used the data in the column is considered valid if the <a title="Column Rules">Column Rule</a> evaluates to true, or if the column is empty.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[24]</td>
<td class="ebnf-left"><a title="ebnf-optional-directive"><dfn>OptionalDirective</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>DirectivePrefix</a> "optional"</td>
</tr>
</table>
</section>
<section>
<h6>Match Is False Directive</h6>
<p>
The <dfn>Match Is False Directive</dfn> is used to flip the result of a test from negative to positive (or vice-versa).
It may be very simple to write a condition which matches the data considered to be invalid, while the equivalent for valid data would be very convoluted.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[25]</td>
<td class="ebnf-left"><a title="ebnf-match-is-false-directive"><dfn>MatchIsFalseDirective</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>DirectivePrefix</a> "matchIsFalse"</td>
</tr>
</table>
</section>
<section>
<h6>Ignore Case Directive</h6>
<p>
The <dfn>Ignore Case Directive</dfn> is used when the case of a column value is not important.
Two strings which differ only in the case used for characters within the string would be considered a match for all string related column rules.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[26]</td>
<td class="ebnf-left"><a title="ebnf-ignore-case-directive"><dfn>IgnoreCaseDirective</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>DirectivePrefix</a> "ignoreCase"</td>
</tr>
</table>
</section>
<section>
<h6>Warning Directive</h6>
<p>
The <dfn>Warning Directive</dfn> is used to convert a <a>Validation Error</a> into a <a>Validation Warning</a>.
This is useful if you wish to be alerted to a data condition which is unusual, but not necessarily invalid.
For instance, at The National Archives we have come across archival material where the clerk who originally completed a form
wrote down an "impossible" date such as 30 February or 31 April. We have to do our best to accept the data as originally supplied
(we have no idea if it is the day or month of the date which is actually incorrect), but we also wish to ensure that additional
Quality Assurance checking is performed to ensure this is not a transcription error. Warnings are listed in the validation report,
but the data file MAY still considered valid if only warnings are present.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[27]</td>
<td class="ebnf-left"><a title="ebnf-warning-directive"><dfn>WarningDirective</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>DirectivePrefix</a> "warning"</td>
</tr>
</table>
</section>
</section>
</section>
<section>
<h4>Column Definitions examples</h4>
<pre class="example" data-lt="Column Definition Syntax">
a_column_title: is("somedata") or is("otherdata") @optional @matchIsFalse @ignoreCase @warning
another_column_title: not("somedata") and not("otherdata") @ignoreCase
</pre>
<p>
The two Column Definitions are both validating the data in their respective columns against the explicit strings <code>somedata</code> and <code>otherdata</code>.
Ignoring the <a>Column Directives</a> for the moment, the column rule defined for <code>a_column_title</code> would return true if the CSV data for that column
contained either the precise string <code>somedata</code> or <code>otherdata</code>. However, the <a>Optional Directive</a> means a completely empty column
would also be acceptable. Also, since the <a>Ignore Case Directive</a> is also applied, the strings <code>SomeData</code> or <code>OTHERDATA</code> (for example)
would also be acceptable. But, since the <a>Match Is False Directive</a> is in effect, the validation is inverted, it would actually be any string <em>other</em>
than the two specified which would be regarded as acceptable data. Since the <a>Warning Directive</a> is also used, a validation failure would not be considered
an error though.
</p>
<p>
The second Column Definition (with the effect of the <a>Match Is False Directive</a> on the first taken into account) is actually logically equivalent to the first
(see <a href="https://en.wikipedia.org/wiki/De_Morgan%27s_laws">De Morgan's Laws</a>). However, since the <a>Optional Directive</a> has not been used, an empty column
would not be valid data, and since the <a>Warning Directive</a> has also not been included, a <a>Validation Warning</a> would be raised instead of a <a>Validation Error</a>.
</p>
</section>
</section>
</section>
</section>
<section>
<h1>Column Validation Expressions</h1>
<p>
The key building blocks for <a>Column Rules</a> are <dfn title="Column Validation Expression">Column Validation Expressions</dfn>.
These are divided into two main classes, <a title="Non Conditional Expression">Non Conditional Expressions</a> and <a title="Conditional Expression">Conditional Expressions</a>.
Non Conditional Expressions boil down to checks resulting in a pass or fail (a number of expressions may be combined to produce an overall validation check), Conditional Expressions allow for more subtle checks,
if for example you are validating a <code>title</code> column which allows the values <code>Mr</code>, <code>Mrs</code>, <code>Ms</code>, <code>Miss</code> and <code>Dr</code>,
You could construct a Conditional Expression which also checks the <code>sex</code> column and if that contains <code>female</code>,
then <code>Mr</code> would be regarded as invalid (strictly speaking that would also require the use of an <a>Explicit Context Expression</a> to refer to the other column,
but that is a subexpression of the Non Conditional Expression class).
</p>
<p><strong>NOTE</strong> To increase control over expression applicability and to avoid creating a <a href="https://en.wikipedia.org/wiki/Left_recursion">left-recursive</a> grammar (which could lead to problems for various parser implementations),
<a title="Column Validation Expression">Column Validation Expressions</a> have been further split into <a title="Combinatorial Expression">Combinatorial Expressions</a> and <a title="Non Combinatorial Expression">Non Combinatorial Expressions</a>.</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[28]</td>
<td class="ebnf-left"><a title="ebnf-column-validation-expr"><dfn>ColumnValidationExpr</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>CombinatorialExpr</a> | <a>NonCombinatorialExpr</a></td>
</tr>
</table>
<section>
<h3>Combinatorial Expressions</h3>
<p>
A <dfn>Combinatorial Expression</dfn> combines one or more <a title="Column Validation Expression">Column Validation Expressions</a>, which allows more complicated tests
on the validity of data in a column.
There are two types, <a title="Or Expression">Or Expressions</a> and <a title="And Expression">And Expressions</a>.
They are of equal precedence, and evaluation of <a title="Column Validation Expression">Column Validation Expressions</a> is performed from <em>left-to-right</em>.
See also the <a>Any Expression</a> which is logically equivalent to a series of <a title="Is Expression">Is Expressions</a>
joined by <a title="Or Expression">Or Expressions</a>.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[29]</td>
<td class="ebnf-left"><a title="ebnf-combinatorial-expr"><dfn>CombinatorialExpr</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>OrExpr</a> | <a>AndExpr</a></td>
</tr>
</table>
<section>
<h4>Or Expressions</h4>
<p>
An <dfn>Or Expression</dfn> is used as a standard boolean operator to indicate that the column data should be treated as being <em>valid if either (or both)</em>
the expressions linked by the Or Expression evaluate to true.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[30]</td>
<td class="ebnf-left"><a title="ebnf-or-expr"><dfn>OrExpr</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>NonCombinatorialExpr</a> "or" <a>ColumnValidationExpr</a></td>
</tr>
</table>
</section>
<section>
<h4>And Expressions</h4>
<p>
An <dfn>And Expression</dfn> is used as a standard boolean operator to indicate that the column data should be treated as being <em>valid when both</em>
the expressions linked by the And Expression evaluate to true. Use of an explicit And Expression is OPTIONAL:
if two <a title="Column Validation Expression">Column Validation Expressions</a> are written in succession for the same column they will be treated as having an
implicit And Expression joining them.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[31]</td>
<td class="ebnf-left"><a title="ebnf-and-expr"><dfn>AndExpr</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>NonCombinatorialExpr</a> "and" <a>ColumnValidationExpr</a></td>
</tr>
</table>
</section>
</section>
<section>
<h2>Non Combinatorial Expressions</h2>
<p>A <dfn>Non Combinatorial Expression</dfn> is a Column Validation Expression which is evaluated by itself,
unless it is combined with another through a <a>Combinatorial Expression</a>.
The majority of Column Validation Expressions are of the Non-Combinatorial Expression class.</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[32]</td>
<td class="ebnf-left"><a title="ebnf-non-combinatorial-expr"><dfn>NonCombinatorialExpr</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>NonConditionalExpr</a> | <a>ConditionalExpr</a></td>
</tr>
</table>
<section>
<h2>Non Conditional Expressions</h2>
<p>
<dfn title="Non Conditional Expression">Non Conditional Expressions</dfn> are divided into three classes of sub-expressions:
<a title="Single Expression">Single Expressions</a>, <a title="External Single Expression">External Single Expressions</a>, and <a title="Parenthesized Expression">Parenthesized Expressions</a>.
The first two are individual validation checks (differing in that the second allows access to some resource outside the CSV file being validated),
whilst the last provides a mechanism for controlling the evaluation order of complex compound expressions.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[33]</td>
<td class="ebnf-left"><a title="ebnf-non-conditional-expr"><dfn>NonConditionalExpr</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>SingleExpr</a> | <a>ExternalSingleExpr</a> | <a>ParenthesizedExpr</a></td>
</tr>
</table>
<section>
<h3>Single Expressions</h3>
<p>
<dfn title="Single Expression">Single Expressions</dfn> are the basic building blocks of <a>Column Rules</a>. There are currently 27 available for use
as of CSV Schema Language 1.1 (and some have their own subexpressions used as parameters), although the first is really used as an OPTIONAL modifier to the rest.
In many cases values can be provided to the test either as an explicit string (or number where appropriate), or by reference to the value held by another column.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[34]</td>
<td class="ebnf-left"><a title="ebnf-single-expr"><dfn>SingleExpr</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>ExplicitContextExpr</a>? (<a>IsExpr</a> | <a>AnyExpr</a> | <a>NotExpr</a> | <a>InExpr</a> | <a>StartsWithExpr</a> |
<a>EndsWithExpr</a> | <a>RegExpExpr</a> | <a>RangeExpr</a> | <a>LengthExpr</a> | <a>EmptyExpr</a> | <a>NotEmptyExpr</a> | <a>UniqueExpr</a> |
<a>UriExpr</a> | <a>XsdDateTimeExpr</a> | <a>XsdDateTimeWithTimeZoneExpr</a> | <a>XsdDateExpr</a> | <a>XsdTimeExpr</a> | <a>UkDateExpr</a> | <a>DateExpr</a> |
<a>PartialUkDateExpr</a> | <a>PartialDateExpr</a> | <a>Uuid4Expr</a> | <a>PositiveIntegerExpr</a> | <a>UpperCaseExpr</a> | <a>LowerCaseExpr</a> |
<a>IdenticalExpr</a>)</td>
</tr>
</table>
<section>
<h4>Explicit Context Expressions</h4>
<p>
The <dfn>Explicit Context Expression</dfn> is used to indicate that the expression following should be tested against the value in a foreign column (explicit context),
rather than the current column (which is the default context).
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[35]</td>
<td class="ebnf-left"><a title="ebnf-explicit-context-expr"><dfn>ExplicitContextExpr</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right"><a>ColumnRef</a> "/"</td>
</tr>
<tr>
<td class="ebnf-num">[36]</td>
<td class="ebnf-left"><a title="ebnf-column-ref"><dfn>ColumnRef</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right">"$" (<a>ColumnIdentifier</a> | <a>QuotedColumnIdentifier</a>)</td>
</tr>
</table>
<section>
<h5>Usage</h5>
<pre class="example" data-lt="Explicit Context Expression Syntax">
a_column: is("some string") and $another_column/starts("some string") //here two tests are combined on a single line, the second test here looks to the second column
another_column: //to check it's value starts with "some string"
</pre>
</section>
</section>
<section>
<h4>Is Expressions</h4>
<p>
An <dfn>Is Expression</dfn> checks that the value of the column is identical to the supplied string or the value in the referenced column.
</p>
<table class="ebnf-table">
<tr>
<td class="ebnf-num">[37]</td>
<td class="ebnf-left"><a title="ebnf-is-expr"><dfn>IsExpr</dfn></a></td>
<td class="ebnf-bind">::=</td>
<td class="ebnf-right">"is(" <a>StringProvider</a> ")"</td>
</tr>
</table>
<section>
<h5>Usage</h5>
<pre class="example" data-lt="Is Expression Syntax">
a_column: is("some string") //the contents of a_column must be the string "some string"
another_column: is($a_column) //the contents of another_column must be the value held in a_column, treated as a string
</pre>
</section>
</section>
<section>
<h4>Any Expressions</h4>