Skip to content

Commit

Permalink
Add autocomplete api + search boxes
Browse files Browse the repository at this point in the history
  • Loading branch information
drkane committed Mar 13, 2018
1 parent 6f64368 commit cddea94
Show file tree
Hide file tree
Showing 5 changed files with 250 additions and 17 deletions.
3 changes: 3 additions & 0 deletions data_import/create_elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
"source": {"type": "string"},
"name": {"type": "string"}
}
},
"complete_names": {
"type": "completion"
}
}
}
Expand Down
49 changes: 38 additions & 11 deletions data_import/import_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import os
import titlecase
import datetime
import math


def title_exceptions(word, **kwargs):
Expand Down Expand Up @@ -670,35 +671,61 @@ def import_ccni(chars={},
def clean_chars(chars={}, pc_es=None, es_pc_index="postcode", es_pc_type="postcode"):

ccount = 0
geocount = 0
for c in chars:
if pc_es:
geo_data = fetch_postcode(chars[c]["geo"]["postcode"], pc_es, es_pc_index, es_pc_type)
if geo_data:
chars[c]["geo"]["location"] = geo_data[0]
chars[c]["geo"]["areas"] = geo_data[1]
geocount += 1

chars[c]["url"] = parse_url(chars[c]["url"])
chars[c]["domain"] = get_domain(chars[c]["url"])
chars[c]['org-ids'] = add_org_id_prefix(chars[c])

chars[c]["alt_names"] = [n["name"] for n in chars[c]["names"] if n["name"] != chars[c]["known_as"]]
chars[c]["last_modified"] = datetime.datetime.now()

# @TODO capitalisation of names
chars[c] = clean_char(chars[c])

ccount += 1
if ccount % 10000 == 0:
print('\r', "[Geo] %s charites added location details" % ccount, end='')
print('\r', "[Geo] %s charites added location details" % ccount)
print('\r', "[Prepare] %s charites prepared for indexing" % ccount, end='')
print('\r', "[Prepare] %s charites prepared for indexing" % ccount)
print('\r', "[Geo] %s charites added location details" % geocount)

return chars

def clean_char(char):

char["url"] = parse_url(char["url"])
char["domain"] = get_domain(char["url"])
char['org-ids'] = add_org_id_prefix(char)

if not char["known_as"]:
char["known_as"] = char["names"][0]["name"]

names = list(set([n["name"] for n in char["names"]
if n["name"] != char["known_as"] and n["name"]]))

char["alt_names"] = names
all_names = names + [char["known_as"]]
words = set()
for n in all_names:
if n:
w = n.split()
words.update([" ".join(w[r:]) for r in range(len(w))])
char["complete_names"] = {
"input": list(words),
"weight": max(1, math.ceil(math.log1p((char.get("latest_income", 0) or 0))))
}

char["last_modified"] = datetime.datetime.now()

# @TODO capitalisation of names

return char


def save_to_elasticsearch(chars, es, es_index):

print('\r', "[elasticsearch] %s charities to save" % len(chars))
print('\r', "[elasticsearch] saving %s charities to %s index" % (len(chars), es_index))
results = bulk(es, list(chars.values()))
results = bulk(es, list(chars.values()), raise_on_error=False)
print('\r', "[elasticsearch] saved %s charities to %s index" % (results[0], es_index))
print('\r', "[elasticsearch] %s errors reported" % len(results[1]))

Expand Down
81 changes: 81 additions & 0 deletions data_import/reindex_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import argparse
import os
from elasticsearch import Elasticsearch
from elasticsearch.helpers import scan
from import_data import clean_char, save_to_elasticsearch


def main():

parser = argparse.ArgumentParser(description='Reindex charity data into elasticsearch')

parser.add_argument('--folder', type=str, default='data',
help='Root path of the data folder.')

# elasticsearch options
parser.add_argument('--es-host', default="localhost", help='host for the elasticsearch instance')
parser.add_argument('--es-port', default=9200, help='port for the elasticsearch instance')
parser.add_argument('--es-url-prefix', default='', help='Elasticsearch url prefix')
parser.add_argument('--es-use-ssl', action='store_true', help='Use ssl to connect to elasticsearch')
parser.add_argument('--es-index', default='charitysearch', help='index used to store charity data')
parser.add_argument('--es-type', default='charity', help='type used to store charity data')

# elasticsearch postcode options
parser.add_argument('--es-pc-host', default=None, help='host for the postcode elasticsearch instance')
parser.add_argument('--es-pc-port', default=9200, help='port for the postcode elasticsearch instance')
parser.add_argument('--es-pc-url-prefix', default='', help='Postcode elasticsearch url prefix')
parser.add_argument('--es-pc-use-ssl', action='store_true', help='Use ssl to connect to postcode elasticsearch')
parser.add_argument('--es-pc-index', default='postcode', help='index used to store postcode data')
parser.add_argument('--es-pc-type', default='postcode', help='type used to store postcode data')

parser.add_argument('--debug', action='store_true', help='Only load first 10000 rows for ccew')

args = parser.parse_args()

es = Elasticsearch(host=args.es_host, port=args.es_port, url_prefix=args.es_url_prefix, use_ssl=args.es_use_ssl)

potential_env_vars = [
"ELASTICSEARCH_URL",
"ES_URL",
"BONSAI_URL"
]
for e_v in potential_env_vars:
if os.environ.get(e_v):
es = Elasticsearch(os.environ.get(e_v))
break

if not es.ping():
raise ValueError("Elasticsearch connection failed")

pc_es = None # Elasticsearch postcode instance
if args.es_pc_host:
pc_es = Elasticsearch(host=args.es_pc_host, port=args.es_pc_port, url_prefix=args.es_pc_url_prefix, use_ssl=args.es_pc_use_ssl)
if not pc_es.ping():
raise ValueError("Connection failed - postcode elasticsearch")

res = scan(es, index=args.es_index, doc_type=args.es_type)
chars = {}
for r in res:
char = {
**r["_source"],
"_index": r["_index"],
"_type": r["_type"],
"_op_type": "index",
"_id": r["_id"],
}
chars[r["_id"]] = clean_char(char)
if len(chars) % 10000 == 0:
print('\r', "[Fetch] %s charites fetched from index" % len(chars), end='')
print('\r', "[Fetch] %s charites fetched from index" % len(chars))

if args.debug:
import random
import json
random_keys = random.choices(list(chars.keys()), k=10)
for r in random_keys:
print(r, chars[r])

save_to_elasticsearch(chars, es, args.es_index)

if __name__ == '__main__':
main()
29 changes: 28 additions & 1 deletion server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def esdoc_orresponse(query):
i["name"] = i["source"]["known_as"] + " (" + i["id"] + ")"
if not i["source"]["active"]:
i["name"] += " [INACTIVE]"
if i["name"].lower() == json.loads(query)["params"]["name"].lower() and i["score"] == res["hits"]["max_score"]:
if i["source"]["known_as"].lower() == json.loads(query)["params"]["name"].lower() and i["score"] == res["hits"]["max_score"]:
i["match"] = True
else:
i["match"] = False
Expand Down Expand Up @@ -269,6 +269,33 @@ def about():
return bottle.template('about', this_year=datetime.datetime.now().year)


@app.route('/autocomplete')
def autocomplete():
search = bottle.request.params.q
doc = {
"suggest": {
"suggest-1": {
"prefix": search,
"completion": {
"field": "complete_names",
"fuzzy" : {
"fuzziness" : 1
}
}
}
}
}
res = app.config["es"].search(
index=app.config["es_index"], doc_type="csv_data", body=doc,
_source_include=['known_as'])
return {"results": [
{
"label": x["_source"]["known_as"],
"value": x["_id"]
} for x in res.get("suggest", {}).get("suggest-1", [])[0]["options"]
]}


def get_csv_options():
# work out CSV options
csv_options = {}
Expand Down
105 changes: 100 additions & 5 deletions views/search_form.html
Original file line number Diff line number Diff line change
@@ -1,10 +1,105 @@
<form method="get" action="/">
<form method="get" action="/" id="search-autocomplete">
<div class="field has-addons has-addons-centered">
<p class="control is-expanded">
<div class="control is-expanded">
<input name="q" class="input is-large" type="text" placeholder="Search for a charity name or number" value="{{term}}">
</p>
<p class="control">
</div>
<div class="control">
<input type="submit" value="Search" class="button is-info is-large">
</p>
</div>
</div>
</form>
<script crossorigin src="https://unpkg.com/react@16/umd/react.development.js"></script>
<script crossorigin src="https://unpkg.com/react-dom@16/umd/react-dom.development.js"></script>


<!-- <script crossorigin src="https://unpkg.com/react@16/umd/react.production.min.js"></script>
<script crossorigin src="https://unpkg.com/react-dom@16/umd/react-dom.production.min.js"></script> -->
<script src="https://unpkg.com/[email protected]/babel.min.js"></script>
<script type="text/babel">

class AutoComplete extends React.Component {
constructor(props) {
super(props);
this.state = {
"results": [],
"loading": false,
"q": props.value
}
this.handleChange = this.handleChange.bind(this);
}

handleChange(e) {
const element = this;
this.setState({"q": e.target.value});
if(this.state.q.length > 2){
this.setState({"loading": true});
fetch(`/autocomplete?q=${this.state.q}`)
.then(function(response) {
return response.json();
})
.then(function(myJson) {
element.setState({
"results": myJson["results"],
"loading": false
});
});
} else{
this.setState({
"results": []
})
}
}

getHighlightedText(text, highlight) {
// Split text on higlight term, include term itself into parts, ignore case
// https://stackoverflow.com/questions/29652862/highlight-text-using-reactjs
var parts = text.split(new RegExp(`(${highlight})`, 'gi'));
return <span>{parts.map((part, i) => part.toLowerCase() === highlight.toLowerCase() ? <b key={i}>{part}</b> : part)}</span>;
}

render() {
return (
<div className="dropdown is-active" style={ {display: 'block', width: '100%'} }>
<div className="dropdown-trigger field has-addons has-addons-centered">
<div className={(this.state.loading ? "is-loading" : "") + " control is-expanded"}>
<input value={this.state.q}
name="q"
className="input is-large is-fullwidth"
placeholder="Search for a charity name or number"
type="text"
onChange={this.handleChange}
aria-haspopup="true"
aria-controls="dropdown-menu" />
</div>
<div className="control">
<input type="submit" value="Search" className="button is-info is-large" />
</div>
</div>
{ this.state.results.length > 0 &&
<div className="dropdown-menu" id="dropdown-menu" role="menu" style={ {width: '100%'} }>
<div className="dropdown-content">
{this.state.results.map((result, i) =>
<React.Fragment key={i}>
{i > 0 && <hr className="dropdown-divider" />}
<a href={"/charity/" + result.value} data-value={result.value} data-label={result.label} className="dropdown-item">
<div className="columns">
<div className="column">{this.getHighlightedText(result.label, this.state.q)}</div>
<div className="column is-italic has-text-grey is-narrow">{result.value}</div>
</div>
</a>
</React.Fragment>
)}
</div>
</div>
}
</div>
)
}
}

const search_form = document.querySelector('#search-autocomplete')
ReactDOM.render(
<AutoComplete value={search_form.q.value} />,
search_form.querySelector(".field")
);
</script>

0 comments on commit cddea94

Please sign in to comment.