From 6ab24f7d339912d14988d247797abecd07e91e04 Mon Sep 17 00:00:00 2001 From: Vasco Nunes Date: Thu, 30 Jun 2016 17:51:48 +0100 Subject: [PATCH] issue #6 --- cswxp1_tweets.py | 131 ++++++++ extlibs/html2text.py | 767 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 898 insertions(+) create mode 100644 cswxp1_tweets.py create mode 100644 extlibs/html2text.py diff --git a/cswxp1_tweets.py b/cswxp1_tweets.py new file mode 100644 index 0000000..116e0b1 --- /dev/null +++ b/cswxp1_tweets.py @@ -0,0 +1,131 @@ +#tweet live updates from CSEXPLORER1 +import tweepy +import subprocess, os +import datetime +import itertools + +cswspacepeek_dir=os.path.dirname(os.path.abspath(__file__)) +twiter_credentials_file=os.path.join(cswspacepeek_dir,"twiter_credentials.txt") +f1=open("C:\\temp\\help.txt") +html2text_file=os.path.join(cswspacepeek_dir,"extlibs","html2text.py") +raw_packets='http://aprs.fi/?c=raw&call=CT1EUS-11&limit=1000&view=normal' + +def get_api(cfg): + auth = tweepy.OAuthHandler(cfg['consumer_key'], cfg['consumer_secret']) + auth.set_access_token(cfg['access_token'], cfg['access_token_secret']) + return tweepy.API(auth) + +def tail(f, lines=1, _buffer=4098): + """Tail a file and get X lines from the end""" + # place holder for the lines found + lines_found = [] + + # block counter will be multiplied by buffer + # to get the block size from the end + block_counter = -1 + + # loop until we find X lines + while len(lines_found) < lines: + try: + f.seek(block_counter * _buffer, os.SEEK_END) + except IOError: # either file is too small, or too many lines requested + f.seek(0) + lines_found = f.readlines() + break + + lines_found = f.readlines() + + # we found enough lines, get out + if len(lines_found) > lines: + break + + # decrement the block counter to get the + # next X bytes + block_counter -= 1 + + return lines_found[-lines:] + +def get_rawpackets(link): + p=subprocess.Popen(["python",html2text_file,link],stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + f1=open("C:\\temp\\help.txt","w+") + f2=open("C:\\temp\\help2.txt","w+") + while p.stdout.readline()!='': + f=p.stdout.readline() + s=f.replace('\n', ' ').replace('\r', '').replace('WEST', '\n') + if s.find('[Latitude ')<0 and s.find('[Duplicate ')<0 and s.find('longitude are both 0]')<0 and s.find("[Rate limited")<0: + f2.write(s) + +def get_text_from_rawpackets(rawfile): + f1=open(rawfile) + f2=open("C:\\temp\\help.txt","w+") + with f1 as f: + for line in f: + if line.find("A=")>0: + altitude=int(int(line[line.find("A=")+2:line.find("A=")+8].strip())*0.3048) + temperature=line[line.find("'C")-2:line.find("'C")] + timestamp=line[-21:] + f2.write(timestamp.replace('\n','')+"| " + str(altitude)+"/ "+temperature+'\n') + + +def main(): + # twiter credentials + twiter_credentials=open(twiter_credentials_file) + lines=twiter_credentials.readlines() + cfg = { + "consumer_key" : lines[0], + "consumer_secret" : lines[1], + "access_token" : lines[2], + "access_token_secret" : lines[3] + } + + api = get_api(cfg) + + altitude_marks_climbing=[200,1000,3000,6000,10000,15000,25000,30000,35000] + tweets =[] + + while True: + get_rawpackets(raw_packets) + get_text_from_rawpackets("C:\\temp\\help2.txt") + f1=open("C:\\temp\\help.txt") + lastlines=tail(f1,lines=3) + altitude_base=0 + for a, b, c in itertools.combinations(lastlines, 3): + #Tweeting with the temperature values + if b.strip()[-2:]==c.strip()[-2:]: + temp=b.strip()[-2:].strip() + try: + if int(temp)<10: + tweet = "#CSEXPLORER1 Its getting colder! - %sC" % (str(temp)) + if not tweet in tweets: + tweets.append(tweet) + status = api.update_status(status=tweet) + if int(temp)<0: + tweet = "#CSEXPLORER1 Why didn't anyone send me a coat?! - %sC" % (str(temp)) + if not tweet in tweets: + tweets.append(tweet) + status = api.update_status(status=tweet) + except: + continue + # #Tweeting with the altitude values + try: + altitude=(int(a[a.find("|")+1:a.find("/")].strip())+int(b[b.find("|")+1:b.find("/")].strip())+int(c[c.find("|")+1:c.find("/")].strip()))/3 + if altitude>altitude_base: + for marks in altitude_marks_climbing: + if altitude>=marks: + tweet = "#CSEXPLORER1 I'm getting up! Nice view from up here! - alt: %sm" % (str(altitude)) + if not tweet in tweets: + tweets.append(tweet) + status = api.update_status(status=tweet) + altitude_base=altitude + elif altitude', 'larr':'<-', 'middot':'*', +'ndash':'-', 'oelig':'oe', 'aelig':'ae', +'agrave':'a', 'aacute':'a', 'acirc':'a', 'atilde':'a', 'auml':'a', 'aring':'a', +'egrave':'e', 'eacute':'e', 'ecirc':'e', 'euml':'e', +'igrave':'i', 'iacute':'i', 'icirc':'i', 'iuml':'i', +'ograve':'o', 'oacute':'o', 'ocirc':'o', 'otilde':'o', 'ouml':'o', +'ugrave':'u', 'uacute':'u', 'ucirc':'u', 'uuml':'u', +'lrm':'', 'rlm':''} + +unifiable_n = {} + +for k in unifiable.keys(): + unifiable_n[name2cp(k)] = unifiable[k] + +def charref(name): + if name[0] in ['x','X']: + c = int(name[1:], 16) + else: + c = int(name) + + if not UNICODE_SNOB and c in unifiable_n.keys(): + return unifiable_n[c] + else: + try: + return unichr(c) + except NameError: #Python3 + return chr(c) + +def entityref(c): + if not UNICODE_SNOB and c in unifiable.keys(): + return unifiable[c] + else: + try: name2cp(c) + except KeyError: return "&" + c + ';' + else: + try: + return unichr(name2cp(c)) + except NameError: #Python3 + return chr(name2cp(c)) + +def replaceEntities(s): + s = s.group(1) + if s[0] == "#": + return charref(s[1:]) + else: return entityref(s) + +r_unescape = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));") +def unescape(s): + return r_unescape.sub(replaceEntities, s) + +### End Entity Nonsense ### + +def onlywhite(line): + """Return true if the line does only consist of whitespace characters.""" + for c in line: + if c is not ' ' and c is not ' ': + return c is ' ' + return line + +def optwrap(text): + """Wrap all paragraphs in the provided text.""" + if not BODY_WIDTH: + return text + + assert wrap, "Requires Python 2.3." + result = '' + newlines = 0 + for para in text.split("\n"): + if len(para) > 0: + if para[0] != ' ' and para[0] != '-' and para[0] != '*': + for line in wrap(para, BODY_WIDTH): + result += line + "\n" + result += "\n" + newlines = 2 + else: + if not onlywhite(para): + result += para + "\n" + newlines = 1 + else: + if newlines < 2: + result += "\n" + newlines += 1 + return result + +def hn(tag): + if tag[0] == 'h' and len(tag) == 2: + try: + n = int(tag[1]) + if n in range(1, 10): return n + except ValueError: return 0 + +def dumb_property_dict(style): + """returns a hash of css attributes""" + return dict([(x.strip(), y.strip()) for x, y in [z.split(':', 1) for z in style.split(';') if ':' in z]]); + +def dumb_css_parser(data): + """returns a hash of css selectors, each of which contains a hash of css attributes""" + # remove @import sentences + importIndex = data.find('@import') + while importIndex != -1: + data = data[0:importIndex] + data[data.find(';', importIndex) + 1:] + importIndex = data.find('@import') + + # parse the css. reverted from dictionary compehension in order to support older pythons + elements = [x.split('{') for x in data.split('}') if '{' in x.strip()] + elements = dict([(a.strip(), dumb_property_dict(b)) for a, b in elements]) + + return elements + +def element_style(attrs, style_def, parent_style): + """returns a hash of the 'final' style attributes of the element""" + style = parent_style.copy() + if 'class' in attrs: + for css_class in attrs['class'].split(): + css_style = style_def['.' + css_class] + style.update(css_style) + if 'style' in attrs: + immediate_style = dumb_property_dict(attrs['style']) + style.update(immediate_style) + return style + +def google_list_style(style): + """finds out whether this is an ordered or unordered list""" + if 'list-style-type' in style: + list_style = style['list-style-type'] + if list_style in ['disc', 'circle', 'square', 'none']: + return 'ul' + return 'ol' + +def google_nest_count(style): + """calculate the nesting count of google doc lists""" + nest_count = 0 + if 'margin-left' in style: + nest_count = int(style['margin-left'][:-2]) / GOOGLE_LIST_INDENT + return nest_count + +def google_has_height(style): + """check if the style of the element has the 'height' attribute explicitly defined""" + if 'height' in style: + return True + return False + +def google_text_emphasis(style): + """return a list of all emphasis modifiers of the element""" + emphasis = [] + if 'text-decoration' in style: + emphasis.append(style['text-decoration']) + if 'font-style' in style: + emphasis.append(style['font-style']) + if 'font-weight' in style: + emphasis.append(style['font-weight']) + return emphasis + +def google_fixed_width_font(style): + """check if the css of the current element defines a fixed width font""" + font_family = '' + if 'font-family' in style: + font_family = style['font-family'] + if 'Courier New' == font_family or 'Consolas' == font_family: + return True + return False + +def list_numbering_start(attrs): + """extract numbering from list element attributes""" + if 'start' in attrs: + return int(attrs['start']) - 1 + else: + return 0 + +class _html2text(HTMLParser.HTMLParser): + def __init__(self, out=None, baseurl=''): + HTMLParser.HTMLParser.__init__(self) + + if out is None: self.out = self.outtextf + else: self.out = out + self.outtextlist = [] # empty list to store output characters before they are "joined" + try: + self.outtext = unicode() + except NameError: # Python3 + self.outtext = str() + self.quiet = 0 + self.p_p = 0 # number of newline character to print before next output + self.outcount = 0 + self.start = 1 + self.space = 0 + self.a = [] + self.astack = [] + self.acount = 0 + self.list = [] + self.blockquote = 0 + self.pre = 0 + self.startpre = 0 + self.code = False + self.br_toggle = '' + self.lastWasNL = 0 + self.lastWasList = False + self.style = 0 + self.style_def = {} + self.tag_stack = [] + self.emphasis = 0 + self.drop_white_space = 0 + self.inheader = False + self.abbr_title = None # current abbreviation definition + self.abbr_data = None # last inner HTML (for abbr being defined) + self.abbr_list = {} # stack of abbreviations to write later + self.baseurl = baseurl + + if options.google_doc: + del unifiable_n[name2cp('nbsp')] + unifiable['nbsp'] = ' _place_holder;' + + def feed(self, data): + data = data.replace("", "") + HTMLParser.HTMLParser.feed(self, data) + + def outtextf(self, s): + self.outtextlist.append(s) + if s: self.lastWasNL = s[-1] == '\n' + + def close(self): + HTMLParser.HTMLParser.close(self) + + self.pbr() + self.o('', 0, 'end') + + self.outtext = self.outtext.join(self.outtextlist) + + if options.google_doc: + self.outtext = self.outtext.replace(' _place_holder;', ' '); + + return self.outtext + + def handle_charref(self, c): + self.o(charref(c), 1) + + def handle_entityref(self, c): + self.o(entityref(c), 1) + + def handle_starttag(self, tag, attrs): + self.handle_tag(tag, attrs, 1) + + def handle_endtag(self, tag): + self.handle_tag(tag, None, 0) + + def previousIndex(self, attrs): + """ returns the index of certain set of attributes (of a link) in the + self.a list + + If the set of attributes is not found, returns None + """ + if not has_key(attrs, 'href'): return None + + i = -1 + for a in self.a: + i += 1 + match = 0 + + if has_key(a, 'href') and a['href'] == attrs['href']: + if has_key(a, 'title') or has_key(attrs, 'title'): + if (has_key(a, 'title') and has_key(attrs, 'title') and + a['title'] == attrs['title']): + match = True + else: + match = True + + if match: return i + + def drop_last(self, nLetters): + if not self.quiet: + self.outtext = self.outtext[:-nLetters] + + def handle_emphasis(self, start, tag_style, parent_style): + """handles various text emphases""" + tag_emphasis = google_text_emphasis(tag_style) + parent_emphasis = google_text_emphasis(parent_style) + + # handle Google's text emphasis + strikethrough = 'line-through' in tag_emphasis and options.hide_strikethrough + bold = 'bold' in tag_emphasis and not 'bold' in parent_emphasis + italic = 'italic' in tag_emphasis and not 'italic' in parent_emphasis + fixed = google_fixed_width_font(tag_style) and not \ + google_fixed_width_font(parent_style) and not self.pre + + if start: + # crossed-out text must be handled before other attributes + # in order not to output qualifiers unnecessarily + if bold or italic or fixed: + self.emphasis += 1 + if strikethrough: + self.quiet += 1 + if italic: + self.o("_") + self.drop_white_space += 1 + if bold: + self.o("**") + self.drop_white_space += 1 + if fixed: + self.o('`') + self.drop_white_space += 1 + self.code = True + else: + if bold or italic or fixed: + # there must not be whitespace before closing emphasis mark + self.emphasis -= 1 + self.space = 0 + self.outtext = self.outtext.rstrip() + if fixed: + if self.drop_white_space: + # empty emphasis, drop it + self.drop_last(1) + self.drop_white_space -= 1 + else: + self.o('`') + self.code = False + if bold: + if self.drop_white_space: + # empty emphasis, drop it + self.drop_last(2) + self.drop_white_space -= 1 + else: + self.o("**") + if italic: + if self.drop_white_space: + # empty emphasis, drop it + self.drop_last(1) + self.drop_white_space -= 1 + else: + self.o("_") + # space is only allowed after *all* emphasis marks + if (bold or italic) and not self.emphasis: + self.o(" ") + if strikethrough: + self.quiet -= 1 + + def handle_tag(self, tag, attrs, start): + #attrs = fixattrs(attrs) + if attrs is None: + attrs = {} + else: + attrs = dict(attrs) + + if options.google_doc: + # the attrs parameter is empty for a closing tag. in addition, we + # need the attributes of the parent nodes in order to get a + # complete style description for the current element. we assume + # that google docs export well formed html. + parent_style = {} + if start: + if self.tag_stack: + parent_style = self.tag_stack[-1][2] + tag_style = element_style(attrs, self.style_def, parent_style) + self.tag_stack.append((tag, attrs, tag_style)) + else: + dummy, attrs, tag_style = self.tag_stack.pop() + if self.tag_stack: + parent_style = self.tag_stack[-1][2] + + if hn(tag): + self.p() + if start: + self.inheader = True + self.o(hn(tag)*"#" + ' ') + else: + self.inheader = False + return # prevent redundant emphasis marks on headers + + if tag in ['p', 'div']: + if options.google_doc: + if start and google_has_height(tag_style): + self.p() + else: + self.soft_br() + else: + self.p() + + if tag == "br" and start: self.o(" \n") + + if tag == "hr" and start: + self.p() + self.o("* * *") + self.p() + + if tag in ["head", "style", 'script']: + if start: self.quiet += 1 + else: self.quiet -= 1 + + if tag == "style": + if start: self.style += 1 + else: self.style -= 1 + + if tag in ["body"]: + self.quiet = 0 # sites like 9rules.com never close + + if tag == "blockquote": + if start: + self.p(); self.o('> ', 0, 1); self.start = 1 + self.blockquote += 1 + else: + self.blockquote -= 1 + self.p() + + if tag in ['em', 'i', 'u']: self.o("_") + if tag in ['strong', 'b']: self.o("**") + if tag in ['del', 'strike']: + if start: + self.o("<"+tag+">") + else: + self.o("") + + if options.google_doc: + if not self.inheader: + # handle some font attributes, but leave headers clean + self.handle_emphasis(start, tag_style, parent_style) + + if tag == "code" and not self.pre: self.o('`') #TODO: `` `this` `` + if tag == "abbr": + if start: + self.abbr_title = None + self.abbr_data = '' + if has_key(attrs, 'title'): + self.abbr_title = attrs['title'] + else: + if self.abbr_title != None: + self.abbr_list[self.abbr_data] = self.abbr_title + self.abbr_title = None + self.abbr_data = '' + + if tag == "a" and not IGNORE_ANCHORS: + if start: + if has_key(attrs, 'href') and not (SKIP_INTERNAL_LINKS and attrs['href'].startswith('#')): + self.astack.append(attrs) + self.o("[") + else: + self.astack.append(None) + else: + if self.astack: + a = self.astack.pop() + if a: + if INLINE_LINKS: + self.o("](" + a['href'] + ")") + else: + i = self.previousIndex(a) + if i is not None: + a = self.a[i] + else: + self.acount += 1 + a['count'] = self.acount + a['outcount'] = self.outcount + self.a.append(a) + self.o("][" + str(a['count']) + "]") + + if tag == "img" and start and not IGNORE_IMAGES: + if has_key(attrs, 'src'): + attrs['href'] = attrs['src'] + alt = attrs.get('alt', '') + if INLINE_LINKS: + self.o("![") + self.o(alt) + self.o("]("+ attrs['href'] +")") + else: + i = self.previousIndex(attrs) + if i is not None: + attrs = self.a[i] + else: + self.acount += 1 + attrs['count'] = self.acount + attrs['outcount'] = self.outcount + self.a.append(attrs) + self.o("![") + self.o(alt) + self.o("]["+ str(attrs['count']) +"]") + + if tag == 'dl' and start: self.p() + if tag == 'dt' and not start: self.pbr() + if tag == 'dd' and start: self.o(' ') + if tag == 'dd' and not start: self.pbr() + + if tag in ["ol", "ul"]: + # Google Docs create sub lists as top level lists + if (not self.list) and (not self.lastWasList): + self.p() + if start: + if options.google_doc: + list_style = google_list_style(tag_style) + else: + list_style = tag + numbering_start = list_numbering_start(attrs) + self.list.append({'name':list_style, 'num':numbering_start}) + else: + if self.list: self.list.pop() + self.lastWasList = True + else: + self.lastWasList = False + + if tag == 'li': + self.pbr() + if start: + if self.list: li = self.list[-1] + else: li = {'name':'ul', 'num':0} + if options.google_doc: + nest_count = google_nest_count(tag_style) + else: + nest_count = len(self.list) + self.o(" " * nest_count) #TODO: line up
  1. s > 9 correctly. + if li['name'] == "ul": self.o(options.ul_item_mark + " ") + elif li['name'] == "ol": + li['num'] += 1 + self.o(str(li['num'])+". ") + self.start = 1 + + if tag in ["table", "tr"] and start: self.p() + if tag == 'td': self.pbr() + + if tag == "pre": + if start: + self.startpre = 1 + self.pre = 1 + else: + self.pre = 0 + self.p() + + def pbr(self): + if self.p_p == 0: self.p_p = 1 + + def p(self): self.p_p = 2 + + def soft_br(self): + self.pbr() + self.br_toggle = ' ' + + def o(self, data, puredata=0, force=0): + if self.abbr_data is not None: self.abbr_data += data + + if not self.quiet: + if options.google_doc: + # prevent white space immediately after 'begin emphasis' marks ('**' and '_') + lstripped_data = data.lstrip() + if self.drop_white_space and not (self.pre or self.code): + data = lstripped_data + if lstripped_data != '': + self.drop_white_space = 0 + + if puredata and not self.pre: + data = re.sub('\s+', ' ', data) + if data and data[0] == ' ': + self.space = 1 + data = data[1:] + if not data and not force: return + + if self.startpre: + #self.out(" :") #TODO: not output when already one there + self.startpre = 0 + + bq = (">" * self.blockquote) + if not (force and data and data[0] == ">") and self.blockquote: bq += " " + + if self.pre: + bq += " " + data = data.replace("\n", "\n"+bq) + + if self.start: + self.space = 0 + self.p_p = 0 + self.start = 0 + + if force == 'end': + # It's the end. + self.p_p = 0 + self.out("\n") + self.space = 0 + + if self.p_p: + self.out((self.br_toggle+'\n'+bq)*self.p_p) + self.space = 0 + self.br_toggle = '' + + if self.space: + if not self.lastWasNL: self.out(' ') + self.space = 0 + + if self.a and ((self.p_p == 2 and LINKS_EACH_PARAGRAPH) or force == "end"): + if force == "end": self.out("\n") + + newa = [] + for link in self.a: + if self.outcount > link['outcount']: + self.out(" ["+ str(link['count']) +"]: " + urlparse.urljoin(self.baseurl, link['href'])) + if has_key(link, 'title'): self.out(" ("+link['title']+")") + self.out("\n") + else: + newa.append(link) + + if self.a != newa: self.out("\n") # Don't need an extra line when nothing was done. + + self.a = newa + + if self.abbr_list and force == "end": + for abbr, definition in self.abbr_list.items(): + self.out(" *[" + abbr + "]: " + definition + "\n") + + self.p_p = 0 + self.out(data) + self.outcount += 1 + + def handle_data(self, data): + if r'\/script>' in data: self.quiet -= 1 + + if self.style: + self.style_def.update(dumb_css_parser(data)) + + self.o(data, 1) + + def unknown_decl(self, data): pass + +def wrapwrite(text): + text = text.encode('utf-8') + try: #Python3 + sys.stdout.buffer.write(text) + except AttributeError: + sys.stdout.write(text) + +def html2text_file(html, out=wrapwrite, baseurl=''): + h = _html2text(out, baseurl) + h.feed(html) + h.feed("") + return h.close() + +def html2text(html, baseurl=''): + return optwrap(html2text_file(html, None, baseurl)) + +class Storage: pass +options = Storage() +options.google_doc = False +options.ul_item_mark = '*' + +if __name__ == "__main__": + baseurl = '' + + p = optparse.OptionParser('%prog [(filename|url) [encoding]]', + version='%prog ' + __version__) + p.add_option("-g", "--google-doc", action="store_true", dest="google_doc", + default=False, help="convert an html-exported Google Document") + p.add_option("-d", "--dash-unordered-list", action="store_true", dest="ul_style_dash", + default=False, help="use a dash rather than a star for unordered list items") + p.add_option("-b", "--body-width", dest="body_width", action="store", type="int", + default=78, help="number of characters per output line, 0 for no wrap") + p.add_option("-i", "--google-list-indent", dest="list_indent", action="store", type="int", + default=GOOGLE_LIST_INDENT, help="number of pixels Google indents nested lists") + p.add_option("-s", "--hide-strikethrough", action="store_true", dest="hide_strikethrough", + default=False, help="hide strike-through text. only relevent when -g is specified as well") + (options, args) = p.parse_args() + + # handle options + if options.ul_style_dash: + options.ul_item_mark = '-' + else: + options.ul_item_mark = '*' + + BODY_WIDTH = options.body_width + GOOGLE_LIST_INDENT = options.list_indent + + # process input + if len(args) > 0: + file_ = args[0] + encoding = None + if len(args) == 2: + encoding = args[1] + if len(args) > 2: + p.error('Too many arguments') + + if file_.startswith('http://') or file_.startswith('https://'): + baseurl = file_ + j = urllib.urlopen(baseurl) + text = j.read() + if encoding is None: + try: + from feedparser import _getCharacterEncoding as enc + except ImportError: + enc = lambda x, y: ('utf-8', 1) + encoding = enc(j.headers, text)[0] + if encoding == 'us-ascii': + encoding = 'utf-8' + data = text.decode(encoding) + + else: + data = open(file_, 'rb').read() + if encoding is None: + try: + from chardet import detect + except ImportError: + detect = lambda x: {'encoding': 'utf-8'} + encoding = detect(data)['encoding'] + data = data.decode(encoding) + else: + data = sys.stdin.read() + wrapwrite(html2text(data, baseurl))