Added branch for the new makefile parser

author: Sebastian Parborg <darkdefende@gmail.com> 2011-06-16 21:09:41 +0200
committer: Sebastian Parborg <darkdefende@gmail.com> 2011-06-16 21:09:41 +0200
commit: 79af78d4222e0d3d8b7f6ea5916254f98e0908f6 (patch)
tree: 3104c117d3cfb446a2f38809cc5f37c33a970afa
parent: Added basic SCM support (diff)
download: ebuildgen-79af78d4222e0d3d8b7f6ea5916254f98e0908f6.tar.gz
ebuildgen-79af78d4222e0d3d8b7f6ea5916254f98e0908f6.tar.bz2
ebuildgen-79af78d4222e0d3d8b7f6ea5916254f98e0908f6.zip
2 files changed, 253 insertions, 136 deletions
diff --git a/filetypes/makefilecom.py b/filetypes/makefilecom.py
new file mode 100644
index 0000000..6bfd29b
--- /dev/null
+++ b/filetypes/makefilecom.py
@@ -0,0 +1,82 @@
+from ply import lex
+from ply import yacc
+
+def com_interp(string):
+    tokens = (
+            "COMMAND",
+            "COMMA",
+            "EQ",
+            "TEXT",
+            "PERCENT",
+            )
+    states = (
+            ("ccode", "exclusive"), #command code
+            )
+
+    # Match the first $(. Enter ccode state.
+    def t_ccode(t):
+        r'\$(\{|\()'
+        t.lexer.code_start = t.lexer.lexpos        # Record the starting position
+        t.lexer.level = 1                          # Initial level
+        t.lexer.begin('ccode')                     # Enter 'ccode' state
+
+    # Rules for the ccode state
+    def t_ccode_newcom(t):
+        r'\$(\{|\()'
+        t.lexer.level +=1
+
+    def t_ccode_endcom(t):
+        r'(\}|\))'
+        t.lexer.level -=1
+
+        # If closing command, return the code fragment
+        if t.lexer.level == 0:
+             t.value = t.lexer.lexdata[t.lexer.code_start:t.lexer.lexpos-1]
+             t.type = "COMMAND"
+             t.lexer.begin('INITIAL')
+             return t
+
+    def t_ccode_text(t):
+        "[^\$\(\{\)\}]"
+
+    def t_PERCENT(t):
+        r"\%"
+        return t
+
+    def t_EQ(t):
+        r"="
+        return t 
+
+    def t_COMMA(t):
+        r","
+        return t
+
+    def t_TEXT(t):
+        r"[^ \n\t:=\\,]+"
+        return t
+
+    def t_spacetab(t):
+        r"[ \t]"
+        pass
+
+    def t_ANY_error(t):
+        print("Illegal character '%s'" % t.value[0])
+        t.lexer.skip(1)
+
+    lexer = lex.lex()
+
+    lexer.input(string)
+    #for tok in lexer:
+    #    print(tok)
+
+    tokens = 0
+    for tok in lexer:
+        tokens += 1
+        print("gethere")
+
+    print(tokens)
+    if tokens == 1:
+        print("gapp")
+
+
+com_interp("HELOO")
diff --git a/filetypes/makefiles.py b/filetypes/makefiles.py
index 7448f5d..d87f66f 100644
--- a/filetypes/makefiles.py
+++ b/filetypes/makefiles.py
@@ -1,247 +1,282 @@
 from ply import lex
 from ply import yacc
+from makefilecom import com_interp
 
 def scanmakefile(makefile):
     tokens = (
             "VAR",
-            "COLON",
-            "PERCENT",
-            "TEXT",
-            "DOLLAR",
-            "LPAR",
-            "RPAR",
+            "DOTVAR",
             "END",
-            "EQUAL",
+            "COL",
+            "SEMICOL",
+            "EQ",
+            "PEQ",
+            "CEQ",
+            "QEQ",
+            "TEXT",
+            "COMMAND",
+            "PERCENT",
             "ENDTAB",
-            "LESS",
+            "LIT",
+            "COMMA",
             )
 
     states = (
             ("com", "exclusive"),
+            ("ccode", "exclusive"), #command code
             )
 
+    # Match the first $(. Enter ccode state.
+    def t_ccode(t):
+        r'\$(\{|\()'
+        t.lexer.code_start = t.lexer.lexpos        # Record the starting position
+        t.lexer.level = 1                          # Initial level
+        t.lexer.begin('ccode')                     # Enter 'ccode' state
+
+    # Rules for the ccode state
+    def t_ccode_newcom(t):
+        r'\$(\{|\()'
+        t.lexer.level +=1
+
+    def t_ccode_endcom(t):
+        r'(\}|\))'
+        t.lexer.level -=1
+
+        # If closing command, return the code fragment
+        if t.lexer.level == 0:
+             t.value = t.lexer.lexdata[t.lexer.code_start:t.lexer.lexpos-1]
+             t.type = "COMMAND"
+             t.lexer.begin('INITIAL')
+             return t
+
+    def t_ccode_text(t):
+        "[^\$\(\{\)\}]"
+
     def t_begin_com(t):
         r"\#"
         t.lexer.push_state("com")
 
+    def t_com_other(t):
+        r"[^(\n|\\)]+"
+        pass
+
     def t_com_newline(t):
-        r".*\\[ \t]*\n"
+        r".*\\\n"
+        t.lexer.lineno += 1
         pass
 
     def t_com_END(t):
         r"\n"
         t.lexer.pop_state()
+        t.lexer.lineno += 1
         return t
 
-    def t_VAR(t):
-        r"[a-zA-Z_][a-zA-Z0-9_]*[ \t]*="
-        t.value = t.value.split()[0].rstrip("=") #get the name of the var
+    def t_EQ(t):
+        r"="
         return t
 
-    def t_TEXT(t):
-        #make sure it grabs "file-name" and "-flags"
-        r"-*\.*[a-zA-Z_][-|a-zA-Z0-9_]*"
+    def t_COL(t):
+        r":"
+        return t
+
+    def t_SEMICOL(t):
+        r";"
         return t
 
-    def t_LESS(t):
-        r"\$<"
+    def t_bsdexe(t):  #Create a cleaner version
+        r".*\!=.*"
         pass
 
-    def t_DOLLAR(t):
-        r"\$"
+    def t_PERCENT(t):
+        r"\%"
         return t
 
-    def t_COLON(t):
-        r"\:"
+    def t_PEQ(t):
+        r"[a-zA-Z_][a-zA-Z0-9_]*[ \t]*\+="
+        t.value = t.value.split()[0].rstrip("+=")
         return t
 
-    def t_EQUAL(t):
-        r"\="
+    def t_CEQ(t):
+        r"[a-zA-Z_][a-zA-Z0-9_]*[ \t]*:="
+        t.value = t.value.split()[0].rstrip(":=")
         return t
 
-    def t_LPAR(t):
-        r"\("
+    def t_QEQ(t):
+        r"[a-zA-Z_][a-zA-Z0-9_]*[ \t]*\?="
+        t.value = t.value.split()[0].rstrip("?=")
         return t
 
-    def t_RPAR(t):
-        r"\)"
+    def t_VAR(t):
+        r"[a-zA-Z_][a-zA-Z0-9_]*[ \t]*="
+        t.value = t.value.split()[0].rstrip("=") #get the name of the var
         return t
 
-    def t_PERCENT(t):
-        r"\%"
+    def t_DOTVAR(t):
+        r"\.[a-zA-Z_][a-zA-Z0-9_]*[ \t]*="
+        t.value = t.value.split()[0].rstrip("=") #get the name of the var
         return t
 
     def t_contline(t):
         r"\\\n"
+        t.lexer.lineno += 1
+        pass
+
+    def t_LIT(t):
+        r"\\."
+        t.value = t.value[1] #take the literal char
+        return t
+
+    def t_COMMA(t):
+        r","
+        return t
+
+    def t_spacetab(t):
+        r"[ \t]"
         pass
 
     def t_ENDTAB(t):
         r"\n\t"
+        t.lexer.lineno += 1
+        return t
+
+    def t_TEXT(t):
+        r"[^ \n\t:\\,]+"
         return t
 
     def t_END(t):
-        r"[\n]+"
+        r"\n+"
+        t.lexer.lineno += t.value.count('\n')
         return t
 
     def t_ANY_error(t):
+        print("Illegal character '%s'" % t.value[0])
         t.lexer.skip(1)
 
     lexer = lex.lex()
 
-    #lexer.input(makefile)
-    #for tok in lexer:
-    #    print(tok)
+    lexer.input(makefile)
+    for tok in lexer:
+        print(tok)
 
 
     #YACC begins here
 
     #a dict with values of defined variables
     variables = {}
+    ivars = [] #keep track of the immediate variables
     targets = [] #buildtargets, [[target,deps,options],[target2,....
 
-    def p_target(p):
+    def p_peq(p): #immediate if peq was defined as immediate before else deferred
         """
-        var : var textlst COLON textlst end
-            | textlst COLON textlst end
-            | var textlst COLON textlst options end
-            | textlst COLON textlst options end
+        end : end PEQ textlst end
+            | PEQ textlst end
         """
-        if len(p) == 6:
-            if p[3] == ":":
-                targets.append([p[2][0],p[4],[]])
+        if len(p) == 4:
+            if not p[1] in variables:
+                variables[p[1]] = p[2]
+            elif not p[1] in ivars:
+                variables[p[1]] += p[2]
             else:
-                targets.append([p[1][0],p[3],p[4]])
-        elif len(p) == 5:
-            targets.append([p[1][0],p[3],[]])
-        else:
-            targets.append([p[2][0],p[4],p[5]])
+                textvalue = expand(p[2]) #expand any variables
+                variables[p[1]] = textvalue
 
-    def p_lonetarget(p):
-        """
-        var : var textlst COLON options end
-            | textlst COLON options end
-        """
-        if len(p) == 6:
-            targets.append([p[2][0],[],p[4]])
+        elif not p[2] in variables:
+            variables[p[2]] = p[3]
+        elif not p[2] in ivars:
+            variables[p[2]] += p[3]
         else:
-            targets.append([p[1][0],[],p[3]])
+            textvalue = expand(p[3]) #expand any variables
+            variables[p[2]] = textvalue
 
-    def p_depconv(p):
+    def p_ceq(p): #immediate
         """
-        var : var command COLON command end
-            | var command COLON command options end
+        end : end CEQ textlst end
+            | CEQ textlst end
         """
-        if len(p) == 6:
-            options = []
-        else:
-            options = p[5]
-
-        if p[2][0] == p[4][0] == "%":
-            for target in targets:
-                for dep in target[1]:
-                    if p[2][1] in dep:
-                        targets.append([dep,[(dep.replace(p[2][1],p[4][1]))],options])
+        if len(p) == 4:
+            textvalue = expand(p[2]) #expand any variables
+            variables[p[1]] = textvalue
+            ivars.append(p[1])
         else:
-            print("Unknown command")
+            textvalue = expand(p[3]) #expand any variables
+            variables[p[2]] = textvalue
+            ivars.append(p[2])
 
-    def p_var(p):
+    def p_qeq(p): #deferred
         """
-        var : VAR textlst end
-            | VAR end
-            | var VAR textlst end
-            | var VAR end
+        end : end QEQ textlst end
+            | QEQ textlst end
         """
-        if isinstance(p[2],list):
+        if len(p) == 4 and not p[1] in variables:
             variables[p[1]] = p[2]
-        elif len(p) == 5:
+        elif not p[2] in variables:
             variables[p[2]] = p[3]
-        elif len(p) == 3:
-            variables[p[1]] = []
-        else:
-            variables[p[2]] = []
 
-    def p_endtab(p):
+    def p_var(p): #deferred
         """
-        options : ENDTAB textlst
-                | options ENDTAB textlst
+        end : end VAR textlst end
+            | VAR textlst end
         """
-        if len(p) == 3:
-            p[0] = p[2]
-        else:
-            p[0] = p[1] + p[3]
-
-    def p_usecom(p):
-        """
-        textlst : DOLLAR LPAR textlst COLON command RPAR
-                | textlst DOLLAR LPAR textlst COLON command RPAR
-        """
-        if len(p) == 8:
-            o = 1 #offset
-        else:
-            o = 0
-        p[3+o] = variables[p[3+o][0]]
-        p[0] = []
-        if p[5][0] == "replace":
-            for text in p[3+o]:
-                p[0] += [text.replace(p[5+o][1],p[5+o][2])]
+        if len(p) == 4:
+            variables[p[1]] = p[2]
         else:
-            for text in p[3+o]:
-                p[0] += [text + p[5+o][1]]
+            variables[p[2]] = p[3]
 
     def p_textlst(p):
         """
         textlst : textlst TEXT
+                | textlst command
+                | textlst LIT
+                | command
                 | TEXT
-                | DOLLAR LPAR textlst RPAR
-                | textlst DOLLAR LPAR textlst RPAR
+                | LIT
         """
-        if len(p) == 2:
-            p[0] = [p[1]]
-        elif len(p) == 3:
-            p[0] = p[1] + [p[2]]
-        elif len(p) == 5:
-            if p[3][0] in variables:
-                var = variables[p[3][0]]
-                p[0] = var
-            else:
-                p[0] = ["not defined"]
+        if len(p) == 3:
+            p[0] = p[1].append(p[2])
         else:
-            if p[4][0] in variables:
-                var = variables[p[4][0]]
-                p[0] = p[1] + var
-            else:
-                p[0] = ["not defined"]
+            p[0] = [p[1]]
 
     def p_command(p):
-        """
-        command : TEXT EQUAL TEXT
-                | PERCENT EQUAL PERCENT TEXT
-                | PERCENT TEXT
-        """
-        if len(p) == 4:
-            p[0] = ["replace", p[1], p[3]]
-        elif len(p) == 5:
-            p[0] = ["append", p[4]]
-        else:
-            p[0] = [p[1],p[2]]
+        "command: COMMAND"
+        p[0] = [p[1]] #commands are lists within the testlst
 
     def p_end(p):
         """
-        end : end END
-            | END
+        end : END
+            | end END
         """
 
     def p_error(p):
         print("syntax error at '%s'" % p.type,p.lexpos)
         pass
 
-    yacc.yacc()
+    #yacc.yacc()
 
-    yacc.parse(makefile)
+    #yacc.parse(makefile)
 
     #for target in targets:
     #    print(target)
     #print(variables)
 
-    return targets
+    #return targets
+
+
+#immediate
+#deferred
+
+def expand(lst):
+    newlst = []
+    for item in lst:
+        if isinstance(item, list):
+            newitem = com_interp(item[0])
+        else:
+            newitem = item
+
+        newlst += newitem
+
+    return newlst
+
+file="Makefile2"
 
+with open(file, encoding="utf-8", errors="replace") as inputfile:
+    scanmakefile(inputfile.read())
author	Sebastian Parborg <darkdefende@gmail.com>	2011-06-16 21:09:41 +0200
committer	Sebastian Parborg <darkdefende@gmail.com>	2011-06-16 21:09:41 +0200
commit	79af78d4222e0d3d8b7f6ea5916254f98e0908f6 (patch)
tree	3104c117d3cfb446a2f38809cc5f37c33a970afa
parent	Added basic SCM support (diff)
download	ebuildgen-79af78d4222e0d3d8b7f6ea5916254f98e0908f6.tar.gz ebuildgen-79af78d4222e0d3d8b7f6ea5916254f98e0908f6.tar.bz2 ebuildgen-79af78d4222e0d3d8b7f6ea5916254f98e0908f6.zip