feat: Parser & Definitions
This commit is contained in:
parent
39e9f4d2c8
commit
1c688e5e45
4 changed files with 254 additions and 28 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -20,3 +20,4 @@ zig-out/
|
|||
# Although this was renamed to .zig-cache, let's leave it here for a few
|
||||
# releases to make it less annoying to work with multiple branches.
|
||||
zig-cache/
|
||||
.aider*
|
||||
|
|
72
src/definitions.zig
Normal file
72
src/definitions.zig
Normal file
|
@ -0,0 +1,72 @@
|
|||
const std = @import("std");
|
||||
|
||||
pub const Function = struct {
|
||||
name: []u8,
|
||||
params: []u8,
|
||||
return_type: []u8,
|
||||
access_modifier: []u8,
|
||||
documentation: []u8,
|
||||
allocator: std.mem.Allocator,
|
||||
|
||||
pub fn init(
|
||||
allocator: std.mem.Allocator,
|
||||
name: []const u8,
|
||||
params: []const u8,
|
||||
return_type: []const u8,
|
||||
access_modifier: []const u8,
|
||||
documentation: []const u8,
|
||||
) !Function {
|
||||
return .{
|
||||
.name = try allocator.dupe(u8, name),
|
||||
.params = try allocator.dupe(u8, params),
|
||||
.return_type = try allocator.dupe(u8, return_type),
|
||||
.access_modifier = try allocator.dupe(u8, access_modifier),
|
||||
.documentation = try allocator.dupe(u8, documentation),
|
||||
.allocator = allocator,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn destroy(self: *Function) void {
|
||||
self.allocator.free(self.name);
|
||||
self.allocator.free(self.params);
|
||||
self.allocator.free(self.return_type);
|
||||
self.allocator.free(self.access_modifier);
|
||||
self.allocator.free(self.documentation);
|
||||
}
|
||||
|
||||
pub fn print(self: Function, writer: anytype) void {
|
||||
writer.print("func {s}() -> {s};", .{ self.name, self.return_type });
|
||||
}
|
||||
};
|
||||
|
||||
pub const Property = struct {
|
||||
name: []u8,
|
||||
allocator: std.mem.Allocator,
|
||||
|
||||
pub fn init(allocator: std.mem.Allocator, name: []const u8) !Property {
|
||||
return Property{
|
||||
.name = try allocator.dupe(u8, name),
|
||||
.allocator = allocator,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn destroy(self: *Property) void {
|
||||
self.allocator.free(self.name);
|
||||
}
|
||||
};
|
||||
|
||||
pub const Definition = union(enum) {
|
||||
function: Function,
|
||||
|
||||
pub fn print(self: Definition, writer: anytype) !void {
|
||||
switch (self) {
|
||||
inline else => |case| return case.print(writer),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn destroy(self: *Definition) void {
|
||||
switch (self) {
|
||||
inline else => |case| return case.destroy(),
|
||||
}
|
||||
}
|
||||
};
|
37
src/main.zig
37
src/main.zig
|
@ -1,36 +1,19 @@
|
|||
const std = @import("std");
|
||||
const ts = @import("tree-sitter");
|
||||
|
||||
const Parser = @import("parser.zig");
|
||||
|
||||
extern fn tree_sitter_zig() callconv(.C) *ts.Language;
|
||||
|
||||
pub fn main() !void {
|
||||
// Create a parser for the zig language
|
||||
const language = tree_sitter_zig();
|
||||
defer language.destroy();
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const parser = ts.Parser.create();
|
||||
const file_path = "/Users/bogdanbuduroiu/development/aurelio-labs/semantic-router/semantic_router/route.py";
|
||||
|
||||
var parser = try Parser.create(allocator, file_path);
|
||||
defer parser.destroy();
|
||||
try parser.setLanguage(language);
|
||||
|
||||
// Parse some source code and get the root node
|
||||
const tree = parser.parseString("pub fn main() !void {}", null);
|
||||
defer tree.?.destroy();
|
||||
|
||||
const node = tree.?.rootNode();
|
||||
std.debug.assert(std.mem.eql(u8, node.kind(), "source_file"));
|
||||
std.debug.print("{s}", .{node.kind()});
|
||||
|
||||
// Create a query and execute it
|
||||
var error_offset: u32 = 0;
|
||||
const query = try ts.Query.create(language, "name: (identifier) @name", &error_offset);
|
||||
defer query.destroy();
|
||||
|
||||
const cursor = ts.QueryCursor.create();
|
||||
defer cursor.destroy();
|
||||
cursor.exec(query, node);
|
||||
|
||||
// Get the captured node of the first match
|
||||
const match = cursor.nextMatch().?;
|
||||
const capture = match.captures[0].node;
|
||||
std.debug.assert(std.mem.eql(u8, capture.kind(), "identifier"));
|
||||
const definitions = try parser.extractDefinitions();
|
||||
_ = definitions; // autofix
|
||||
}
|
||||
|
|
170
src/parser.zig
Normal file
170
src/parser.zig
Normal file
|
@ -0,0 +1,170 @@
|
|||
const std = @import("std");
|
||||
const ts = @import("tree-sitter");
|
||||
const Allocator = std.mem.Allocator;
|
||||
const MultiArrayList = std.MultiArrayList;
|
||||
const definitions = @import("definitions.zig");
|
||||
const Definition = definitions.Definition;
|
||||
const Function = definitions.Function;
|
||||
|
||||
const Self = @This();
|
||||
parser: *ts.Parser,
|
||||
language_name: []const u8,
|
||||
source: []const u8,
|
||||
allocator: Allocator,
|
||||
|
||||
pub fn create(allocator: Allocator, file_path: []const u8) !*Self {
|
||||
const ext = std.fs.path.extension(file_path);
|
||||
|
||||
var parser = ts.Parser.create();
|
||||
errdefer parser.destroy();
|
||||
|
||||
const language = try getLanguageForExtension(ext);
|
||||
try parser.setLanguage(language);
|
||||
|
||||
const source = try std.fs.cwd().readFileAlloc(allocator, file_path, 1024 * 1024 * 10); // 10MB max
|
||||
errdefer allocator.free(source);
|
||||
|
||||
const p = try allocator.create(Self);
|
||||
p.* = .{
|
||||
.parser = parser,
|
||||
.source = source,
|
||||
.allocator = allocator,
|
||||
.language_name = "python",
|
||||
};
|
||||
return p;
|
||||
}
|
||||
|
||||
pub fn destroy(self: *Self) void {
|
||||
self.parser.destroy();
|
||||
self.allocator.free(self.source);
|
||||
self.allocator.destroy(self);
|
||||
}
|
||||
|
||||
pub fn extractDefinitions(self: *Self) !MultiArrayList(Definition) {
|
||||
var defs = MultiArrayList(Definition){};
|
||||
defer defs.deinit(self.allocator);
|
||||
|
||||
// Parse the source code
|
||||
const tree = self.parser.parseString(self.source, null);
|
||||
if (tree == null) {
|
||||
return error.ParseFailed;
|
||||
}
|
||||
defer tree.?.destroy();
|
||||
|
||||
const root_node = tree.?.rootNode();
|
||||
|
||||
// Get the appropriate query for this language
|
||||
const query_string = try getQueryForLanguage(self.language_name);
|
||||
var error_offset: u32 = 0;
|
||||
const query = try ts.Query.create(self.parser.getLanguage() orelse tree_sitter_python(), query_string, &error_offset);
|
||||
defer query.destroy();
|
||||
|
||||
// Execute the query
|
||||
const cursor = ts.QueryCursor.create();
|
||||
defer cursor.destroy();
|
||||
cursor.exec(query, root_node);
|
||||
|
||||
while (cursor.nextMatch()) |match| {
|
||||
for (match.captures) |capture| {
|
||||
const capture_name = query.captureNameForId(capture.index) orelse "mock_caputer";
|
||||
const node = capture.node;
|
||||
const node_text = self.source[node.startByte()..node.endByte()];
|
||||
const name = if (node.childByFieldName("name")) |name_node|
|
||||
self.source[name_node.startByte()..name_node.endByte()]
|
||||
else
|
||||
node_text;
|
||||
|
||||
if (std.mem.eql(u8, capture_name, "function")) {
|
||||
var func_def = try Function.init(self.allocator, name, "", "", "", "");
|
||||
try defs.append(self.allocator, func_def);
|
||||
defer func_def.destroy();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (defs) |def| {
|
||||
try def.print(std.debug);
|
||||
}
|
||||
return defs;
|
||||
}
|
||||
|
||||
// Helper
|
||||
|
||||
fn getLanguageForExtension(ext: []const u8) !*ts.Language {
|
||||
if (std.mem.eql(u8, ext, ".zig")) {
|
||||
return tree_sitter_zig();
|
||||
} else if (std.mem.eql(u8, ext, ".c") or std.mem.eql(u8, ext, ".h")) {
|
||||
return tree_sitter_c();
|
||||
} else if (std.mem.eql(u8, ext, ".py")) {
|
||||
return tree_sitter_python();
|
||||
} else {
|
||||
return error.UnsupportedLanguage;
|
||||
}
|
||||
}
|
||||
|
||||
fn getQueryForLanguage(language_name: []const u8) ![]const u8 {
|
||||
// In a real implementation, this would load queries from files
|
||||
if (std.mem.eql(u8, language_name, "python")) {
|
||||
return
|
||||
\\;; Capture top-level functions, class, and method definitions
|
||||
\\(module
|
||||
\\ (expression_statement
|
||||
\\ (assignment) @assignment
|
||||
\\ )
|
||||
\\)
|
||||
\\(module
|
||||
\\ (function_definition) @function
|
||||
\\)
|
||||
\\(module
|
||||
\\ (decorated_definition
|
||||
\\ definition: (function_definition) @function
|
||||
\\ )
|
||||
\\)
|
||||
\\(module
|
||||
\\ (class_definition
|
||||
\\ body: (block
|
||||
\\ (expression_statement
|
||||
\\ (assignment) @class_assignment
|
||||
\\ )
|
||||
\\ )
|
||||
\\ ) @class
|
||||
\\)
|
||||
\\(module
|
||||
\\ (class_definition
|
||||
\\ body: (block
|
||||
\\ (function_definition) @method
|
||||
\\ )
|
||||
\\ ) @class
|
||||
\\)
|
||||
\\(module
|
||||
\\ (class_definition
|
||||
\\ body: (block
|
||||
\\ (expression_statement
|
||||
\\ (string) @docstring
|
||||
\\ )
|
||||
\\ )
|
||||
\\ ) @class
|
||||
\\)
|
||||
\\(module
|
||||
\\ (class_definition
|
||||
\\ body: (block
|
||||
\\ (decorated_definition
|
||||
\\ definition: (function_definition) @method
|
||||
\\ )
|
||||
\\ )
|
||||
\\ ) @class
|
||||
\\)
|
||||
;
|
||||
} else {
|
||||
return
|
||||
\\(function_definition name: (identifier) @function)
|
||||
\\(class_definition name: (identifier) @class)
|
||||
\\(method_definition name: (identifier) @method)
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
// External C functions for tree-sitter languages
|
||||
extern fn tree_sitter_zig() callconv(.C) *ts.Language;
|
||||
extern fn tree_sitter_c() callconv(.C) *ts.Language;
|
||||
extern fn tree_sitter_python() callconv(.C) *ts.Language;
|
Loading…
Add table
Reference in a new issue