""" Graph file parsing. """ import sys, re import subprocess import msgstruct re_nonword = re.compile(r'([^0-9a-zA-Z_.]+)') re_plain = re.compile(r'graph [-0-9.]+ [-0-9.]+ [-0-9.]+$', re.MULTILINE) re_digraph = re.compile(r'\b(graph|digraph)\b', re.IGNORECASE) def guess_type(content): # try to see whether it is a directed graph or not, # or already a .plain file # XXX not a perfect heursitic if re_plain.match(content): return 'plain' # already a .plain file # look for the word 'graph' or 'digraph' followed by a '{'. bracepos = None lastfound = '' for match in re_digraph.finditer(content): position = match.start() if bracepos is None: bracepos = content.find('{', position) if bracepos < 0: break elif position > bracepos: break lastfound = match.group() if lastfound.lower() == 'digraph': return 'dot' if lastfound.lower() == 'graph': return 'neato' print >> sys.stderr, "Warning: could not guess file type, using 'dot'" return 'unknown' def dot2plain_graphviz(content, contenttype, use_codespeak=False): if contenttype != 'neato': cmdline = 'dot -Tplain' else: cmdline = 'neato -Tplain' #print >> sys.stderr, '* running:', cmdline close_fds = sys.platform != 'win32' p = subprocess.Popen(cmdline, shell=True, close_fds=close_fds, stdin=subprocess.PIPE, stdout=subprocess.PIPE) (child_in, child_out) = (p.stdin, p.stdout) try: import thread except ImportError: bkgndwrite(child_in, content) else: thread.start_new_thread(bkgndwrite, (child_in, content)) plaincontent = child_out.read() child_out.close() if not plaincontent: # 'dot' is likely not installed raise PlainParseError("no result from running 'dot'") return plaincontent def dot2plain_codespeak(content, contenttype): import urllib request = urllib.urlencode({'dot': content}) url = 'http://codespeak.net/pypy/convertdot.cgi' print >> sys.stderr, '* posting:', url g = urllib.urlopen(url, data=request) result = [] while True: data = g.read(16384) if not data: break result.append(data) g.close() plaincontent = ''.join(result) # very simple-minded way to give a somewhat better error message if plaincontent.startswith(' i + 2: texts.append(line[i]) if line[0] == 'stop': break if links: # only include the links that really appear in the graph seen = {} for text in texts: for word in re_nonword.split(text): if word and word in links and word not in seen: t = links[word] if isinstance(t, tuple): statusbartext, color = t else: statusbartext = t color = None if color is not None: yield (msgstruct.CMSG_ADD_LINK, word, statusbartext, color[0], color[1], color[2]) else: yield (msgstruct.CMSG_ADD_LINK, word, statusbartext) seen[word] = True if fixedfont: yield (msgstruct.CMSG_FIXED_FONT,) yield (msgstruct.CMSG_STOP_GRAPH,) def parse_dot(graph_id, content, links={}, fixedfont=False): contenttype = guess_type(content) if contenttype == 'plain': plaincontent = content else: try: plaincontent = dot2plain_graphviz(content, contenttype) except PlainParseError, e: raise ##print e ### failed, retry via codespeak ##plaincontent = dot2plain_codespeak(content, contenttype) return list(parse_plain(graph_id, plaincontent, links, fixedfont))