-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy path.travis.yml
126 lines (107 loc) · 5.8 KB
/
.travis.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
sudo: true
services:
- docker
addons:
apt:
packages:
- jq
before_install:
- /bin/sh .travis.updateDocker.sh
- sudo sysctl -w vm.max_map_count=262144
# No longer need to override the defaults to enable CORS
# - cp test/elasticsearch/elasticsearch.yml elasticsearch-docker/build/elasticsearch/
# download test files for OCR via tesseract
- wget https://www.codeproject.com/KB/recipes/OCR-Chain-Code/image012.jpg -O test/data/image012.jpg
- wget https://raw.githubusercontent.com/tesseract-ocr/docs/master/AT-1995.pdf -O test/data/AT-1995.pdf
install:
- docker-compose up --build -d
- echo "Sleeping while ES boots up, Tike font cache is built, and fscrawler runs" && sleep 60
- ( docker-compose logs --tail 10 fscrawler | grep DEBUG | tail -n1 | grep -q "Fs crawler is going to sleep" ) || ( echo "need to sleep more 1" && sleep 60 )
- ( docker-compose logs --tail 10 fscrawler | grep DEBUG | tail -n1 | grep -q "Fs crawler is going to sleep" ) || ( echo "need to sleep more 2" && sleep 60 )
- ( docker-compose logs --tail 10 fscrawler | grep DEBUG | tail -n1 | grep -q "Fs crawler is going to sleep" ) || ( echo "need to sleep more 3" && sleep 60 )
- ( docker-compose logs --tail 10 fscrawler | grep DEBUG | tail -n1 | grep -q "Fs crawler is going to sleep" ) || ( echo "need to sleep more 4" && sleep 60 )
- ( docker-compose logs --tail 10 fscrawler | grep DEBUG | tail -n1 | grep -q "Fs crawler is going to sleep" ) || ( echo "need to sleep more 5" && sleep 60 )
- ( docker-compose logs --tail 10 fscrawler | grep DEBUG | tail -n1 | grep -q "Fs crawler is going to sleep" ) || ( echo "need to sleep more 6" && sleep 60 )
- ( docker-compose logs --tail 10 fscrawler | grep DEBUG | tail -n1 | grep -q "Fs crawler is going to sleep" ) || ( echo "need to sleep more 7" && sleep 60 )
script:
# In below, note 'jq' usage below to pretty-print
# http://stackoverflow.com/a/15231463/4126114
# Test that the sleep time was enough for fscrawler to complete
# 1st test for log message
# https://stackoverflow.com/a/27726913/4126114
# 2nd test for existence of status file
- docker-compose logs --tail 10 fscrawler | grep DEBUG | tail -n1 | grep -q "Fs crawler is going to sleep"
- docker-compose exec fscrawler ls /usr/share/fscrawler/config/docker-compose/_status.json
# Test that tesseract works
- docker-compose exec fscrawler tesseract --psm 3 /usr/share/fscrawler/data/image012.jpg /home/fscrawler/image012
- docker-compose exec fscrawler cat /home/fscrawler/image012.txt
- docker-compose exec fscrawler grep ABCDE /home/fscrawler/image012.txt
# show test.txt contents is not null and then check it in logs
# https://github.com/dadoonet/fscrawler/issues/276 (closed)
# TODO: how to automate testing this?
- docker-compose
exec fscrawler
cat /usr/share/fscrawler/data/test.txt
# Test that this run indexes all 3 files: test.txt, image012.jpg, AT-1995.pdf
- docker-compose
exec fscrawler
cat /usr/share/fscrawler/config/docker-compose/_status.json > test/actual_status.json
- cat test/actual_status.json
- test `cat test/actual_status.json |jq '.indexed'` -eq 3
# show bulk_size in settings file and then check it in logs
# https://github.com/dadoonet/fscrawler/issues/283
# TODO: how to automate testing this?
- docker-compose
exec fscrawler
cat /usr/share/fscrawler/config/docker-compose/_settings.json
# test that test.txt is indeed an ID since `filename_as_id: true` is used
# https://github.com/dadoonet/fscrawler/issues/282
#
# Edit: 2018-01-03 issue 282 was closed and filename_as_id now applies to non-xml or non-json
- docker-compose
exec elasticsearch1
curl -XGET http://elasticsearch1:9200/docker-compose/doc/test.txt?pretty
> test/actual_doc_1.json
- cat test/actual_doc_1.json | jq .
- test `cat test/actual_doc_1.json | jq '.found'` == "true"
# Check contents of OCR test files
- docker-compose
exec elasticsearch1
curl -XGET http://elasticsearch1:9200/docker-compose/doc/image012.jpg?pretty
> test/actual_doc_2.json
- cat test/actual_doc_2.json | jq .
- test `cat test/actual_doc_2.json | jq '.found'` == "true"
- docker-compose
exec elasticsearch1
curl -XGET http://elasticsearch1:9200/docker-compose/doc/AT-1995.pdf?pretty
> test/actual_doc_3.json
- cat test/actual_doc_3.json | jq .
- test `cat test/actual_doc_3.json | jq '.found'` == "true"
# show that example pipeline uploaded
- docker-compose exec tester curl -fsSL http://elasticsearch1:9200/_ingest/pipeline/example_pipeline?pretty
# test that fscrawler used the pipeline
# https://github.com/dadoonet/fscrawler/#using-ingest-node-pipeline
- docker-compose exec tester curl -XGET http://elasticsearch1:9200/docker-compose/doc/test.txt?pretty > test/actual_doc_4.json
- cat test/actual_doc_4.json | jq .
- test `cat test/actual_doc_4.json | jq '.found'` == "true"
- test `cat test/actual_doc_4.json | jq -r '._source.foo'` == "bar"
# test uploading a file through the REST API
# Note that it's not a problem to upload the same file "data/test.txt" that was already indexed
# because fscrawlerrest uses a different index
# https://github.com/dadoonet/fscrawler/#uploading-a-binary-document
- docker-compose exec tester curl -F "file=@/usr/share/fscrawler/data/test.txt" "http://fscrawlerrest:8080/_upload" > test/actual_doc_5.json
- cat test/actual_doc_5.json | jq .
- test `cat test/actual_doc_5.json | jq '.ok'` == "true"
# test that uploaded file through REST also gets ingest pipeline
# Note cannot just use filename as ID
- docker-compose exec tester curl -XGET `cat test/actual_doc_5.json | jq -r '.url'`?pretty > test/actual_doc_6.json
- cat test/actual_doc_6.json | jq .
- test `cat test/actual_doc_6.json | jq '.found'` == "true"
- test `cat test/actual_doc_6.json | jq -r '._source.foo'` == "bar"
after_script:
- docker-compose down -v
after_failure:
# show logs for debugging
- docker-compose logs elasticsearch1
- docker-compose logs fscrawler
- docker-compose logs fscrawlerrest