application

#!/usr/bin/python
#encoding=utf8
import os
import os, sys
sys.path.append(os.path.dirname(__file__))
import re
#virtenv = os.environ['OPENSHIFT_PYTHON_DIR'] + '/virtenv/'
#virtualenv = os.path.join(virtenv, 'bin/activate_this.py')
#try:
#	execfile(virtualenv, dict(__file__=virtualenv))
#except IOError:
#	pass

# IMPORTANT: Put any additional includes below this line.  If placed above this
# line, it's possible required libraries won't be in your searchable path
def search1(word):
  import requests
  import re
  try:
    googleAnswer = requests.get(u'http://www.google.com/search?lr=lang_fa&q=%D9%81%D8%A7%D8%B1%D8%B3%DB%8C+'+  word, timeout=2).text
    googleAnswer = googleAnswer.replace(u'%D9%81%D8%A7%D8%B1%D8%B3%DB%8C+', '')
    didYouMeanSign = 'Did you mean:.*href="[/a-z?&_;=%]*q=([a-z]*).*<i>.*q=http'
    showingResultsForSign = 'Showing results for.*href="[/a-z?&_;=%]*q=([a-z]*).*<i>.*Search instead for'
    didYouMean = re.search(didYouMeanSign, googleAnswer)
    showingResultsFor = re.search(showingResultsForSign, googleAnswer)
    if didYouMean is not None:
      for i in didYouMean.group(1):
        if i not in ['a','e','i','o'] and i not in word:
           return word
      return didYouMean.group(1)
    elif showingResultsFor is not None:
      for i in showingResultsFor.group(1):
        if i not in ['a','e','i','o'] and i not in word:
           return word
      return showingResultsFor.group(1)
    return word
  except:
    return word
def search2(word):
  import requests
  import re
  try:
    googleAnswer = requests.get(u'http://www.google.com/search?lr=lang_fa&q='+  word, timeout=2).text
    didYouMeanSign = 'Did you mean:.*href="[/a-z?&_;=%]*q=([a-z]*).*<i>.*q=http'
    showingResultsForSign = 'Showing results for.*href="[/a-z?&_;=%]*q=([a-z]*).*<i>.*Search instead for'
    didYouMean = re.search(didYouMeanSign, googleAnswer)
    showingResultsFor = re.search(showingResultsForSign, googleAnswer)
    if didYouMean is not None:
      for i in didYouMean.group(1):
        if i not in ['a','e','i','o'] and i not in word:
           return word
      return didYouMean.group(1)
    elif showingResultsFor is not None:
      for i in showingResultsFor.group(1):
        if i not in ['a','e','i','o'] and i not in word:
           return word
      return showingResultsFor.group(1)
    return word
  except:
    return word
def googleSuggest(word):
  searchResult1 = search1(word)
  if searchResult1 != word:
    return searchResult1
  else:
    return search2(word)
from cgi import parse_qs, escape
def worddb(s="",d=""):
	xmlFile = os.path.dirname(__file__)+'/worddb.xml'
	import xml.etree.ElementTree as ET
	if s != "":
		try:
			tree = None
			try:
				tree = ET.parse(xmlFile)
			except IOError:
				open("worddb.xml","w").close()
				tree = ET.parse(xmlFile)
		except ET.ParseError:
			translate = ET.Element('translate')
			tree = ET.ElementTree(translate)
			tree.write(xmlFile)
		root = tree.getroot()
		if d == "":
			for word in root.findall("word"):
				fa = word.get("fa").encode("utf8")
				if s == fa:
					return word.get("fe") 
			return
		else:
			word = ET.SubElement(root, 'word')
			word.set("fa", s.decode("utf8"))
			word.set("fe", d.decode("utf8"))
			tree.write(xmlFile)
	return
		

		
def application(environ, start_response):
	import translator
	try:
		ctype = 'text/html; charset="utf8"'
		response_body = ""
		word=parse_qs(environ['QUERY_STRING']).get('word', [''])[0]
		if word == "":
			word = u"واژه".encode("utf8")
		text = worddb(word)
		if text != None:
			response_body = text	
		else:
			if environ['PATH_INFO'] == '/health':
				response_body = "1"
			elif environ['PATH_INFO'] == '/env':
				response_body = ['%s: %s' % (key, value)
							for key, value in sorted(environ.items())]
				response_body = '\n'.join(response_body)
			else:
				word = word.decode('u8')
				ctype = 'text/html; charset="utf8"'
				import requests
				xword = word.replace(u'+', u' ')
				for word in xword.split(u' '):
					reload(translator)
					tr = translator.translator()
					result = ''
					pron = word
					try:
						text = requests.get('http://www.loghatnaameh.org/dehkhodasearchresult-fa.html?searchtype=2&word='+word, timeout=2).text
						pText = text.find(u'href=\'dehkhodaworddetail')
						foundPText = 1
						p = ''
						while (pText >0):
							p = text[pText+85:pText+228]
							pText = text.find(u'href=\'dehkhodaworddetail', pText+1)
							foundPText = foundPText + 1
							if foundPText < 5 and re.search(word + u'\s*\.\s*\[', p) is not None and re.search(u'\[ *(.*) *\]', p) is not None:
								pron = re.search(u'\[ *(.*) *\]', p).group(1).split(u' / ')
								break
						# make working for last t arabic
						if word[-1] == u'\u0647' and pText < 0:
							word = word.replace(u'\u0647', u'\u0629')
							pText = text.find(u'href=\'dehkhodaworddetail')
							foundPText = 1
							p = ''
							while (pText >0):
								p = text[pText+85:pText+228]
								pText = text.find(u'href=\'dehkhodaworddetail', pText+1)
								foundPText = foundPText + 1
								if foundPText < 5 and re.search(word + u'\s*\.\s*\[', p) is not None and re.search(u'\[ *(.*) *\]', p) is not None:
									pron = re.search(u'\[ *(.*) *\]', p).group(1).split(u' / ')
									break
							word = word.replace(u'\u0629', u'\u0647')
						preI = ''
						if type(pron) is list:
							for i in pron:
								i = i.replace(u' ' ,'')
								j = 0
								k = 0
								while j < len(i):
									if j>0 and j%2==0:
										i=i[:k]+u' '+i[k:]
										k = k+1
									j = j + 1
									k = k + 1
								pattern = i.replace(u'\u0650' ,u'.')
								pattern = pattern.replace(u'\u064e' ,u'.')
								pattern = pattern.replace(u'\u064f' ,u'.')	
								if re.search(pattern, preI) is not None:
									i = re.sub(pattern, i, preI)
								preI = i
								i = re.sub(u'.ا', '', i)
								i = re.sub(u'.و', '', i)
							#	i = re.sub(u'.ی', '', i)
								i = i.replace(u'  ', u' ')
								resultTmp = googleSuggest(tr.trf2f(i.encode('u8'), word.encode('u8')))
								worddb(word.encode('u8'), resultTmp.encode('u8'))
								if False and ren(pron)>1:
									result = result + resultTmp + '/'
								else:
									result = resultTmp
						else:
							result = googleSuggest(tr.trf2f(pron.encode('u8'), word.encode('u8')) )
							worddb(word.encode('u8'), result.encode('u8'))
					except:
						text = requests.get('http://fa.wiktionary.org/wiki/'+word, timeout=2).text
						from bs4 import BeautifulSoup
						soup = BeautifulSoup(text)
						for p in soup.findAll("p"):
							if p.parent.name == "div":
								if re.search("\(.*\)",p.encode("utf8")) != None:
									pron = str(p)
									break
						#if 'p' in locals() and re.search("\(.*\)",p.encode("utf8")) != None:
						if pron != word:
							result = googleSuggest(tr.trf2f(pron, word.encode('u8')))
						else:
							pron=u"<p>("+word+u")</p>"
							result = googleSuggest(tr.trf2f(pron.encode('u8'), word.encode('u8')))
						worddb(word.encode('u8'), result.encode('u8'))
					response_body = response_body + ' ' + result
		response_body = response_body.encode('u8')
		status = '200 OK'
		response_headers = [('Content-Type', ctype), ('Content-Length', str(len(response_body)))]
		start_response(status, response_headers)
		return [response_body]
	except:
		status = '500 Internal Server Error'
		import traceback
		response_body = response_body +"<br/>\n"+traceback.format_exc()
		response_headers = [('Content-Type', ctype), ('Content-Length', str(len(response_body)))]
		start_response(status, response_headers)
		return [response_body]

#
# Below for testing only
#
if __name__ == '__main__':
	from wsgiref.simple_server import make_server
	httpd = make_server('localhost', 8051, application)
	# Wait for a single request, serve it and quit.
	httpd.handle_request()