From 788625d5711c50f6cbc614f436460df58acff9d3 Mon Sep 17 00:00:00 2001 From: "Bogdan Buduroiu (aider)" Date: Sun, 30 Mar 2025 17:55:00 +0800 Subject: [PATCH] feat: multi-lang implementation (C, Zig, Python) --- src/definitions.zig | 409 +++++++++++++++++++++++++++++++++++++++----- src/language.zig | 138 +++++++++++++++ src/main.zig | 53 +++++- src/parser.zig | 335 ++++++++++++++++++++---------------- src/root.zig | 15 +- 5 files changed, 750 insertions(+), 200 deletions(-) create mode 100644 src/language.zig diff --git a/src/definitions.zig b/src/definitions.zig index eb24e2a..1298eb4 100644 --- a/src/definitions.zig +++ b/src/definitions.zig @@ -1,72 +1,399 @@ const std = @import("std"); +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; -pub const Function = struct { - name: []u8, - params: []u8, - return_type: []u8, - access_modifier: []u8, - documentation: []u8, - allocator: std.mem.Allocator, +// Base definition struct with common fields +const BaseDefinition = struct { + name: []const u8, + allocator: Allocator, + documentation: ?[]const u8 = null, - pub fn init( - allocator: std.mem.Allocator, - name: []const u8, - params: []const u8, - return_type: []const u8, - access_modifier: []const u8, - documentation: []const u8, - ) !Function { - return .{ - .name = try allocator.dupe(u8, name), - .params = try allocator.dupe(u8, params), - .return_type = try allocator.dupe(u8, return_type), - .access_modifier = try allocator.dupe(u8, access_modifier), - .documentation = try allocator.dupe(u8, documentation), + fn deinitBase(self: *const BaseDefinition) void { + self.allocator.free(self.name); + if (self.documentation) |doc| { + self.allocator.free(doc); + } + } + + fn initBase(allocator: Allocator, name: []const u8, documentation: ?[]const u8) !BaseDefinition { + const name_copy = try allocator.dupe(u8, name); + errdefer allocator.free(name_copy); + + var doc_copy: ?[]const u8 = null; + if (documentation) |doc| { + doc_copy = try allocator.dupe(u8, doc); + errdefer if (doc_copy) |d| allocator.free(d); + } + + return BaseDefinition{ + .name = name_copy, .allocator = allocator, + .documentation = doc_copy, }; } +}; - pub fn destroy(self: *Function) void { - self.allocator.free(self.name); - self.allocator.free(self.params); - self.allocator.free(self.return_type); - self.allocator.free(self.access_modifier); - self.allocator.free(self.documentation); +pub const Function = struct { + const Self = @This(); + base: BaseDefinition, + parameters: ArrayList([]const u8), + return_type: ?[]const u8 = null, + + pub fn init(allocator: Allocator, name: []const u8, documentation: ?[]const u8) !*Self { + const base = try BaseDefinition.initBase(allocator, name, documentation); + errdefer base.deinitBase(); + + const f = try allocator.create(Self); + errdefer allocator.destroy(f); + + f.* = .{ + .base = base, + .parameters = ArrayList([]const u8).init(allocator), + .return_type = null, + }; + return f; } - pub fn print(self: Function, writer: anytype) void { - writer.print("func {s}() -> {s};", .{ self.name, self.return_type }); + pub fn deinit(self: *Self) void { + // Free parameter strings + for (self.parameters.items) |param| { + self.base.allocator.free(param); + } + self.parameters.deinit(); + + // Free return type if it exists + if (self.return_type) |ret_type| { + self.base.allocator.free(ret_type); + } + + // Free base definition fields + self.base.deinitBase(); + + // Free the struct itself + self.base.allocator.destroy(self); + } + + pub fn print(self: Self, writer: anytype) !void { + try writer.print("func {s}(", .{self.base.name}); + + for (self.parameters.items, 0..) |param, i| { + if (i > 0) try writer.print(", ", .{}); + try writer.print("{s}", .{param}); + } + + try writer.print(") -> ", .{}); + + if (self.return_type) |ret| { + try writer.print("{s}", .{ret}); + } else { + try writer.print("void", .{}); + } + + try writer.print(";\n", .{}); + } + + pub fn addParameter(self: *Self, param: []const u8) !void { + const param_copy = try self.base.allocator.dupe(u8, param); + errdefer self.base.allocator.free(param_copy); + try self.parameters.append(param_copy); + } + + pub fn setReturnType(self: *Self, ret_type: []const u8) !void { + if (self.return_type) |old_ret| { + self.base.allocator.free(old_ret); + } + self.return_type = try self.base.allocator.dupe(u8, ret_type); + } +}; + +pub const Method = struct { + const Self = @This(); + function: Function, + class_name: []const u8, + + pub fn init(allocator: Allocator, name: []const u8, class_name: []const u8, documentation: ?[]const u8) !*Self { + const func = try Function.init(allocator, name, documentation); + errdefer func.deinit(); + + const class_name_copy = try allocator.dupe(u8, class_name); + errdefer allocator.free(class_name_copy); + + const m = try allocator.create(Self); + errdefer allocator.destroy(m); + + m.* = .{ + .function = func.*, + .class_name = class_name_copy, + }; + + // We've copied the function, so we can destroy the original + allocator.destroy(func); + + return m; + } + + pub fn deinit(self: *Self) void { + // Free the class name + self.function.base.allocator.free(self.class_name); + + // Clean up function fields but don't destroy the struct + // Free parameter strings + for (self.function.parameters.items) |param| { + self.function.base.allocator.free(param); + } + self.function.parameters.deinit(); + + // Free return type if it exists + if (self.function.return_type) |ret_type| { + self.function.base.allocator.free(ret_type); + } + + // Free base definition fields + self.function.base.deinitBase(); + + // Free the struct itself + self.function.base.allocator.destroy(self); + } + + pub fn print(self: Self, writer: anytype) !void { + try writer.print("method {s}::{s}(", .{ self.class_name, self.function.base.name }); + + for (self.function.parameters.items, 0..) |param, i| { + if (i > 0) try writer.print(", ", .{}); + try writer.print("{s}", .{param}); + } + + try writer.print(") -> ", .{}); + + if (self.function.return_type) |ret| { + try writer.print("{s}", .{ret}); + } else { + try writer.print("void", .{}); + } + + try writer.print(";\n", .{}); } }; pub const Property = struct { - name: []u8, - allocator: std.mem.Allocator, + const Self = @This(); + base: BaseDefinition, + type: ?[]const u8 = null, - pub fn init(allocator: std.mem.Allocator, name: []const u8) !Property { - return Property{ - .name = try allocator.dupe(u8, name), - .allocator = allocator, + pub fn init(allocator: Allocator, name: []const u8, documentation: ?[]const u8) !*Self { + const base = try BaseDefinition.initBase(allocator, name, documentation); + errdefer base.deinitBase(); + + const p = try allocator.create(Self); + errdefer allocator.destroy(p); + + p.* = .{ + .base = base, + .type = null, }; + return p; } - pub fn destroy(self: *Property) void { - self.allocator.free(self.name); + pub fn deinit(self: *Self) void { + // Free type if it exists + if (self.type) |t| { + self.base.allocator.free(t); + } + + // Free base definition fields + self.base.deinitBase(); + + // Free the struct itself + self.base.allocator.destroy(self); + } + + pub fn print(self: Property, writer: anytype) !void { + try writer.print("var {s}", .{self.base.name}); + + if (self.type) |t| { + try writer.print(": {s}", .{t}); + } + + try writer.print(";\n", .{}); + } + + pub fn setType(self: *Self, prop_type: []const u8) !void { + if (self.type) |old_type| { + self.base.allocator.free(old_type); + } + self.type = try self.base.allocator.dupe(u8, prop_type); + } +}; + +pub const ClassProperty = struct { + const Self = @This(); + property: Property, + class_name: []const u8, + + pub fn init(allocator: Allocator, name: []const u8, class_name: []const u8, documentation: ?[]const u8) !*Self { + const prop = try Property.init(allocator, name, documentation); + errdefer prop.deinit(); + + const class_name_copy = try allocator.dupe(u8, class_name); + errdefer allocator.free(class_name_copy); + + const cp = try allocator.create(Self); + errdefer allocator.destroy(cp); + + cp.* = .{ + .property = prop.*, + .class_name = class_name_copy, + }; + + // We've copied the property, so we can destroy the original + allocator.destroy(prop); + + return cp; + } + + pub fn deinit(self: *Self) void { + // Free the class name + self.property.base.allocator.free(self.class_name); + + // Clean up property fields but don't destroy the struct + // Free type if it exists + if (self.property.type) |t| { + self.property.base.allocator.free(t); + } + + // Free base definition fields + self.property.base.deinitBase(); + + // Free the struct itself + self.property.base.allocator.destroy(self); + } + + pub fn print(self: ClassProperty, writer: anytype) !void { + try writer.print("prop {s}::{s}", .{ self.class_name, self.property.base.name }); + + if (self.property.type) |t| { + try writer.print(": {s}", .{t}); + } + + try writer.print(";\n", .{}); + } +}; + +pub const Class = struct { + const Self = @This(); + base: BaseDefinition, + properties: ArrayList(*ClassProperty), + methods: ArrayList(*Method), + + pub fn init(allocator: Allocator, name: []const u8, documentation: ?[]const u8) !*Self { + const base = try BaseDefinition.initBase(allocator, name, documentation); + errdefer base.deinitBase(); + + const c = try allocator.create(Self); + errdefer allocator.destroy(c); + + c.* = .{ + .base = base, + .properties = ArrayList(*ClassProperty).init(allocator), + .methods = ArrayList(*Method).init(allocator), + }; + return c; + } + + pub fn deinit(self: *Self) void { + // Free all properties + for (self.properties.items) |prop| { + prop.deinit(); + } + self.properties.deinit(); + + // Free all methods + for (self.methods.items) |method| { + method.deinit(); + } + self.methods.deinit(); + + // Free base definition fields + self.base.deinitBase(); + + // Free the struct itself + self.base.allocator.destroy(self); + } + + pub fn print(self: Class, writer: anytype) !void { + try writer.print("class {s} {{\n", .{self.base.name}); + + for (self.properties.items) |prop| { + try writer.print(" ", .{}); + try prop.print(writer); + } + + if (self.properties.items.len > 0 and self.methods.items.len > 0) { + try writer.print("\n", .{}); + } + + for (self.methods.items) |method| { + try writer.print(" ", .{}); + try method.print(writer); + } + + try writer.print("}};\n", .{}); + } + + pub fn addProperty(self: *Self, prop: *ClassProperty) !void { + try self.properties.append(prop); + } + + pub fn addMethod(self: *Self, method: *Method) !void { + try self.methods.append(method); } }; pub const Definition = union(enum) { - function: Function, + const Self = @This(); + function: *Function, + property: *Property, + class_property: *ClassProperty, + method: *Method, + class: *Class, pub fn print(self: Definition, writer: anytype) !void { switch (self) { - inline else => |case| return case.print(writer), + inline else => |case| try case.print(writer), } } - pub fn destroy(self: *Definition) void { + pub fn deinit(self: Self) void { switch (self) { - inline else => |case| return case.destroy(), + inline else => |case| case.deinit(), } } }; + +pub const DefinitionList = struct { + const Self = @This(); + items: ArrayList(Definition), + allocator: Allocator, + + pub fn init(allocator: Allocator) Self { + return Self{ + .items = ArrayList(Definition).init(allocator), + .allocator = allocator, + }; + } + + pub fn deinit(self: *Self) void { + for (self.items.items) |def| { + def.deinit(); + } + self.items.deinit(); + } + + pub fn append(self: *Self, def: Definition) !void { + try self.items.append(def); + } + + pub fn pop(self: *Self) ?Definition { + return if (self.items.items.len > 0) self.items.pop() else null; + } +}; diff --git a/src/language.zig b/src/language.zig new file mode 100644 index 0000000..bdcb32f --- /dev/null +++ b/src/language.zig @@ -0,0 +1,138 @@ +const std = @import("std"); +const ts = @import("tree-sitter"); +const Allocator = std.mem.Allocator; + +extern fn tree_sitter_zig() callconv(.C) *ts.Language; +extern fn tree_sitter_c() callconv(.C) *ts.Language; +extern fn tree_sitter_python() callconv(.C) *ts.Language; + +pub const LanguageType = enum { + python, + zig, + c, + unknown, + + pub fn fromExtension(ext: []const u8) LanguageType { + if (std.mem.eql(u8, ext, ".py")) { + return .python; + } else if (std.mem.eql(u8, ext, ".zig")) { + return .zig; + } else if (std.mem.eql(u8, ext, ".c") or std.mem.eql(u8, ext, ".h")) { + return .c; + } else { + return .unknown; + } + } + + pub fn getName(self: LanguageType) []const u8 { + return switch (self) { + .python => "python", + .zig => "zig", + .c => "c", + .unknown => "unknown", + }; + } + + pub fn getLanguage(self: LanguageType) ?*ts.Language { + return switch (self) { + .python => tree_sitter_python(), + .zig => tree_sitter_zig(), + .c => tree_sitter_c(), + .unknown => null, + }; + } + + pub fn getQuery(self: LanguageType) ?[]const u8 { + return switch (self) { + .python => + \\;; Capture top-level functions, class, and method definitions + \\(module + \\ (expression_statement + \\ (assignment) @assignment + \\ ) + \\) + \\(module + \\ (function_definition) @function + \\) + \\(module + \\ (decorated_definition + \\ definition: (function_definition) @function + \\ ) + \\) + \\(module + \\ (class_definition + \\ body: (block + \\ (expression_statement + \\ (assignment) @class_assignment + \\ ) + \\ ) + \\ ) @class + \\) + \\(module + \\ (class_definition + \\ body: (block + \\ (function_definition) @method + \\ ) + \\ ) @class + \\) + \\(module + \\ (class_definition + \\ body: (block + \\ (expression_statement + \\ (string) @docstring + \\ ) + \\ ) + \\ ) @class + \\) + \\(module + \\ (class_definition + \\ body: (block + \\ (decorated_definition + \\ definition: (function_definition) @method + \\ ) + \\ ) + \\ ) @class + \\) + , + .zig => + \\ ;; Capture functions, structs, methods, variable definitions, and unions in Zig + \\(variable_declaration (identifier) + \\ (struct_declaration + \\ (container_field) @class_variable)) + \\ + \\(variable_declaration (identifier) + \\ (struct_declaration + \\ (function_declaration + \\ name: (identifier) @method))) + \\ + \\(variable_declaration (identifier) + \\ (enum_declaration + \\ (container_field + \\ type: (identifier) @enum_item))) + \\ + \\(variable_declaration (identifier) + \\ (union_declaration + \\ (container_field + \\ name: (identifier) @union_item))) + \\ + \\(source_file (function_declaration) @function) + \\ + \\(source_file (variable_declaration (identifier) @variable)) + , + .c => + \\;; Capture extern functions, variables, public classes, and methods + \\(function_definition + \\ (storage_class_specifier) @extern + \\) @function + \\(class_specifier + \\ (public) @class + \\ (function_definition) @method + \\) @class + \\(declaration + \\ (storage_class_specifier) @extern + \\) @variable + , + .unknown => null, + }; + } +}; diff --git a/src/main.zig b/src/main.zig index db6028d..c6d3cf7 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,19 +1,56 @@ const std = @import("std"); const ts = @import("tree-sitter"); +const Allocator = std.mem.Allocator; -const Parser = @import("parser.zig"); - -extern fn tree_sitter_zig() callconv(.C) *ts.Language; +const parser = @import("parser.zig"); +const CodeParser = parser.CodeParser; +const lang = @import("language.zig"); +const LanguageType = lang.LanguageType; pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; defer _ = gpa.deinit(); const allocator = gpa.allocator(); - const file_path = "/Users/bogdanbuduroiu/development/aurelio-labs/semantic-router/semantic_router/route.py"; + // Get file path from args or use default + var argsIterator = try std.process.ArgIterator.initWithAllocator(allocator); + defer argsIterator.deinit(); - var parser = try Parser.create(allocator, file_path); - defer parser.destroy(); - const definitions = try parser.extractDefinitions(); - _ = definitions; // autofix + // Skip executable + _ = argsIterator.next(); + + var file_path: [:0]const u8 = undefined; + if (argsIterator.next()) |path| { + file_path = path; + } else { + return error.NoFile; + } + + // Read the source file + const source = try std.fs.cwd().readFileAlloc( + allocator, + file_path, + 1024 * 1024 * 10, + ); + defer allocator.free(source); + + // Create and configure the parser + var code_parser = try CodeParser.create(allocator, file_path, source); + defer code_parser.destroy(); + + // Extract definitions + var definitions = try code_parser.extractDefinitions(); + defer definitions.deinit(); + + // Print the definitions + const stdout = std.io.getStdOut(); + const writer = stdout.writer(); + + try writer.print("File: {s}\n", .{file_path}); + try writer.print("Language: {s}\n\n", .{code_parser.language_type.getName()}); + + // Print all definitions + for (definitions.items.items) |def| { + try def.print(writer); + } } diff --git a/src/parser.zig b/src/parser.zig index 42646ea..3673d11 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -1,170 +1,211 @@ const std = @import("std"); const ts = @import("tree-sitter"); const Allocator = std.mem.Allocator; -const MultiArrayList = std.MultiArrayList; -const definitions = @import("definitions.zig"); -const Definition = definitions.Definition; -const Function = definitions.Function; +const ArrayList = std.ArrayList; +const StringHashMap = std.StringHashMap; -const Self = @This(); -parser: *ts.Parser, -language_name: []const u8, -source: []const u8, -allocator: Allocator, +const defs = @import("definitions.zig"); +const Definition = defs.Definition; +const Function = defs.Function; +const Property = defs.Property; +const ClassProperty = defs.ClassProperty; +const Method = defs.Method; +const Class = defs.Class; +const DefinitionList = defs.DefinitionList; -pub fn create(allocator: Allocator, file_path: []const u8) !*Self { - const ext = std.fs.path.extension(file_path); +const lang = @import("language.zig"); +const LanguageType = lang.LanguageType; - var parser = ts.Parser.create(); - errdefer parser.destroy(); +pub const CodeParser = struct { + const Self = @This(); + parser: *ts.Parser, + language_type: LanguageType, + allocator: Allocator, + source: []const u8, - const language = try getLanguageForExtension(ext); - try parser.setLanguage(language); + // Maps to track class definitions for later reference + class_map: StringHashMap(*Class), - const source = try std.fs.cwd().readFileAlloc(allocator, file_path, 1024 * 1024 * 10); // 10MB max - errdefer allocator.free(source); + pub fn create(allocator: Allocator, file_path: []const u8, source: []const u8) !*Self { + // Determine language from file extension + const ext = std.fs.path.extension(file_path); + const language_type = LanguageType.fromExtension(ext); - const p = try allocator.create(Self); - p.* = .{ - .parser = parser, - .source = source, - .allocator = allocator, - .language_name = "python", - }; - return p; -} + // Get the tree-sitter language + const language = language_type.getLanguage() orelse return error.UnsupportedLanguage; -pub fn destroy(self: *Self) void { - self.parser.destroy(); - self.allocator.free(self.source); - self.allocator.destroy(self); -} + // Create and configure the parser + var parser = ts.Parser.create(); + errdefer parser.destroy(); + try parser.setLanguage(language); -pub fn extractDefinitions(self: *Self) !MultiArrayList(Definition) { - var defs = MultiArrayList(Definition){}; - defer defs.deinit(self.allocator); + // Create the parser instance + const p = try allocator.create(Self); + errdefer allocator.destroy(p); - // Parse the source code - const tree = self.parser.parseString(self.source, null); - if (tree == null) { - return error.ParseFailed; + p.* = .{ + .parser = parser, + .language_type = language_type, + .allocator = allocator, + .source = source, + .class_map = StringHashMap(*Class).init(allocator), + }; + + return p; } - defer tree.?.destroy(); - const root_node = tree.?.rootNode(); + pub fn destroy(self: *Self) void { + // Free class map entries + var it = self.class_map.iterator(); + while (it.next()) |entry| { + // Classes will be freed when the definitions list is freed + _ = entry; + } + self.class_map.deinit(); - // Get the appropriate query for this language - const query_string = try getQueryForLanguage(self.language_name); - var error_offset: u32 = 0; - const query = try ts.Query.create(self.parser.getLanguage() orelse tree_sitter_python(), query_string, &error_offset); - defer query.destroy(); + // Free the parser + self.parser.destroy(); - // Execute the query - const cursor = ts.QueryCursor.create(); - defer cursor.destroy(); - cursor.exec(query, root_node); + // Free self + self.allocator.destroy(self); + } - while (cursor.nextMatch()) |match| { - for (match.captures) |capture| { - const capture_name = query.captureNameForId(capture.index) orelse "mock_caputer"; - const node = capture.node; - const node_text = self.source[node.startByte()..node.endByte()]; - const name = if (node.childByFieldName("name")) |name_node| - self.source[name_node.startByte()..name_node.endByte()] - else - node_text; + pub fn extractDefinitions(self: *Self) !DefinitionList { + var definitions = DefinitionList.init(self.allocator); + errdefer definitions.deinit(); - if (std.mem.eql(u8, capture_name, "function")) { - var func_def = try Function.init(self.allocator, name, "", "", "", ""); - try defs.append(self.allocator, func_def); - defer func_def.destroy(); + // Parse the source code + const tree = self.parser.parseString(self.source, null); + if (tree == null) { + return error.ParseFailed; + } + defer tree.?.destroy(); + + const root_node = tree.?.rootNode(); + + // Get the appropriate query for this language + const query_string = self.language_type.getQuery() orelse return error.QueryNotFound; + var error_offset: u32 = 0; + const query = try ts.Query.create(self.parser.getLanguage() orelse return error.LanguageNotSet, query_string, &error_offset); + defer query.destroy(); + + // Execute the query + const cursor = ts.QueryCursor.create(); + defer cursor.destroy(); + cursor.exec(query, root_node); + + // Track captured nodes to avoid duplicates + var captured_nodes = std.AutoHashMap(ts.Node, void).init(self.allocator); + defer captured_nodes.deinit(); + + while (cursor.nextMatch()) |match| { + for (match.captures) |capture| { + const capture_name = query.captureNameForId(capture.index) orelse continue; + const node = capture.node; + + // Skip if we've already processed this node + if (captured_nodes.contains(node)) continue; + try captured_nodes.put(node, {}); + + // Extract node text and name + const node_text = self.source[node.startByte()..node.endByte()]; + const name = if (node.childByFieldName("name")) |name_node| + self.source[name_node.startByte()..name_node.endByte()] + else + node_text; + + // Extract documentation if available + const doc = self.extractDocumentation(node); + + try self.processCapture(capture_name, node, name, doc, &definitions); } } + + return definitions; + } + + fn extractDocumentation(self: *Self, node: ts.Node) ?[]const u8 { + // Look for docstrings in various formats depending on language + // This is a simplified implementation + if (self.language_type == .python) { + // For Python, look for a string as the first child of a function/class body + if (node.childByFieldName("body")) |body| { + if (body.namedChild(0)) |first_child| { + if (std.mem.eql(u8, first_child.kind(), "string")) { + return self.source[first_child.startByte()..first_child.endByte()]; + } + } + } + } + + return null; + } + + fn processCapture(self: *Self, capture_name: []const u8, node: ts.Node, name: []const u8, doc: ?[]const u8, definitions: *DefinitionList) !void { + if (std.mem.eql(u8, capture_name, "function")) { + const func = try Function.init(self.allocator, name, doc); + try definitions.append(.{ .function = func }); + } else if (std.mem.eql(u8, capture_name, "class")) { + const class = try Class.init(self.allocator, name, doc); + try self.class_map.put(name, class); + try definitions.append(.{ .class = class }); + } else if (std.mem.eql(u8, capture_name, "method")) { + // Find the parent class + const class_name = self.findParentClassName(node); + if (class_name) |cn| { + const method = try Method.init(self.allocator, name, cn, doc); + + // Add to class if we have it + if (self.class_map.get(cn)) |class| { + try class.addMethod(method); + } else { + // Otherwise add as standalone method + try definitions.append(.{ .method = method }); + } + } else { + // If we can't find a parent class, treat it as a function + const func = try Function.init(self.allocator, name, doc); + try definitions.append(.{ .function = func }); + } + } else if (std.mem.eql(u8, capture_name, "class_assignment") or + std.mem.eql(u8, capture_name, "class_variable")) + { + // Find the parent class + const class_name = self.findParentClassName(node); + if (class_name) |cn| { + const prop = try ClassProperty.init(self.allocator, name, cn, doc); + + // Add to class if we have it + if (self.class_map.get(cn)) |class| { + try class.addProperty(prop); + } else { + // Otherwise add as standalone property + try definitions.append(.{ .class_property = prop }); + } + } else { + // If we can't find a parent class, treat it as a regular property + const prop = try Property.init(self.allocator, name, doc); + try definitions.append(.{ .property = prop }); + } + } else if (std.mem.eql(u8, capture_name, "assignment")) { + const prop = try Property.init(self.allocator, name, doc); + try definitions.append(.{ .property = prop }); + } else if (std.mem.eql(u8, capture_name, "docstring")) { + // Handle docstrings - already processed in extractDocumentation + } } - for (defs) |def| { - try def.print(std.debug); + fn findParentClassName(self: *Self, node: ts.Node) ?[]const u8 { + var current = node.parent(); + while (current) |parent| { + if (std.mem.eql(u8, parent.kind(), "class_definition")) { + if (parent.childByFieldName("name")) |name_node| { + return self.source[name_node.startByte()..name_node.endByte()]; + } + return null; + } + current = parent.parent(); + } + return null; } - return defs; -} - -// Helper - -fn getLanguageForExtension(ext: []const u8) !*ts.Language { - if (std.mem.eql(u8, ext, ".zig")) { - return tree_sitter_zig(); - } else if (std.mem.eql(u8, ext, ".c") or std.mem.eql(u8, ext, ".h")) { - return tree_sitter_c(); - } else if (std.mem.eql(u8, ext, ".py")) { - return tree_sitter_python(); - } else { - return error.UnsupportedLanguage; - } -} - -fn getQueryForLanguage(language_name: []const u8) ![]const u8 { - // In a real implementation, this would load queries from files - if (std.mem.eql(u8, language_name, "python")) { - return - \\;; Capture top-level functions, class, and method definitions - \\(module - \\ (expression_statement - \\ (assignment) @assignment - \\ ) - \\) - \\(module - \\ (function_definition) @function - \\) - \\(module - \\ (decorated_definition - \\ definition: (function_definition) @function - \\ ) - \\) - \\(module - \\ (class_definition - \\ body: (block - \\ (expression_statement - \\ (assignment) @class_assignment - \\ ) - \\ ) - \\ ) @class - \\) - \\(module - \\ (class_definition - \\ body: (block - \\ (function_definition) @method - \\ ) - \\ ) @class - \\) - \\(module - \\ (class_definition - \\ body: (block - \\ (expression_statement - \\ (string) @docstring - \\ ) - \\ ) - \\ ) @class - \\) - \\(module - \\ (class_definition - \\ body: (block - \\ (decorated_definition - \\ definition: (function_definition) @method - \\ ) - \\ ) - \\ ) @class - \\) - ; - } else { - return - \\(function_definition name: (identifier) @function) - \\(class_definition name: (identifier) @class) - \\(method_definition name: (identifier) @method) - ; - } -} - -// External C functions for tree-sitter languages -extern fn tree_sitter_zig() callconv(.C) *ts.Language; -extern fn tree_sitter_c() callconv(.C) *ts.Language; -extern fn tree_sitter_python() callconv(.C) *ts.Language; +}; diff --git a/src/root.zig b/src/root.zig index 27d2be8..b8b267d 100644 --- a/src/root.zig +++ b/src/root.zig @@ -2,12 +2,19 @@ //! you are making an executable, the convention is to delete this file and //! start with main.zig instead. const std = @import("std"); +const ts = @import("tree-sitter"); const testing = std.testing; +const Parser = @import("parser.zig"); -pub export fn add(a: i32, b: i32) i32 { - return a + b; -} +extern fn tree_sitter_python() callconv(.C) *ts.Language; test "basic add functionality" { - try testing.expect(add(3, 7) == 10); + const p = try Parser.create(testing.allocator, tree_sitter_python()); + const definitions = try p.extractDefinitions("def is_valid() -> bool: ..."); + + const def = definitions[0]; + switch (def) { + .function => try testing.expect(std.mem.eql(def.function.name, "is_valid")), + else => unreachable, + } }