feat: Parser & Definitions
This commit is contained in:
parent
39e9f4d2c8
commit
1c688e5e45
4 changed files with 254 additions and 28 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -19,4 +19,5 @@ zig-out/
|
||||||
|
|
||||||
# Although this was renamed to .zig-cache, let's leave it here for a few
|
# Although this was renamed to .zig-cache, let's leave it here for a few
|
||||||
# releases to make it less annoying to work with multiple branches.
|
# releases to make it less annoying to work with multiple branches.
|
||||||
zig-cache/
|
zig-cache/
|
||||||
|
.aider*
|
||||||
|
|
72
src/definitions.zig
Normal file
72
src/definitions.zig
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
pub const Function = struct {
|
||||||
|
name: []u8,
|
||||||
|
params: []u8,
|
||||||
|
return_type: []u8,
|
||||||
|
access_modifier: []u8,
|
||||||
|
documentation: []u8,
|
||||||
|
allocator: std.mem.Allocator,
|
||||||
|
|
||||||
|
pub fn init(
|
||||||
|
allocator: std.mem.Allocator,
|
||||||
|
name: []const u8,
|
||||||
|
params: []const u8,
|
||||||
|
return_type: []const u8,
|
||||||
|
access_modifier: []const u8,
|
||||||
|
documentation: []const u8,
|
||||||
|
) !Function {
|
||||||
|
return .{
|
||||||
|
.name = try allocator.dupe(u8, name),
|
||||||
|
.params = try allocator.dupe(u8, params),
|
||||||
|
.return_type = try allocator.dupe(u8, return_type),
|
||||||
|
.access_modifier = try allocator.dupe(u8, access_modifier),
|
||||||
|
.documentation = try allocator.dupe(u8, documentation),
|
||||||
|
.allocator = allocator,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn destroy(self: *Function) void {
|
||||||
|
self.allocator.free(self.name);
|
||||||
|
self.allocator.free(self.params);
|
||||||
|
self.allocator.free(self.return_type);
|
||||||
|
self.allocator.free(self.access_modifier);
|
||||||
|
self.allocator.free(self.documentation);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn print(self: Function, writer: anytype) void {
|
||||||
|
writer.print("func {s}() -> {s};", .{ self.name, self.return_type });
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const Property = struct {
|
||||||
|
name: []u8,
|
||||||
|
allocator: std.mem.Allocator,
|
||||||
|
|
||||||
|
pub fn init(allocator: std.mem.Allocator, name: []const u8) !Property {
|
||||||
|
return Property{
|
||||||
|
.name = try allocator.dupe(u8, name),
|
||||||
|
.allocator = allocator,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn destroy(self: *Property) void {
|
||||||
|
self.allocator.free(self.name);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const Definition = union(enum) {
|
||||||
|
function: Function,
|
||||||
|
|
||||||
|
pub fn print(self: Definition, writer: anytype) !void {
|
||||||
|
switch (self) {
|
||||||
|
inline else => |case| return case.print(writer),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn destroy(self: *Definition) void {
|
||||||
|
switch (self) {
|
||||||
|
inline else => |case| return case.destroy(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
37
src/main.zig
37
src/main.zig
|
@ -1,36 +1,19 @@
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const ts = @import("tree-sitter");
|
const ts = @import("tree-sitter");
|
||||||
|
|
||||||
|
const Parser = @import("parser.zig");
|
||||||
|
|
||||||
extern fn tree_sitter_zig() callconv(.C) *ts.Language;
|
extern fn tree_sitter_zig() callconv(.C) *ts.Language;
|
||||||
|
|
||||||
pub fn main() !void {
|
pub fn main() !void {
|
||||||
// Create a parser for the zig language
|
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||||
const language = tree_sitter_zig();
|
defer _ = gpa.deinit();
|
||||||
defer language.destroy();
|
const allocator = gpa.allocator();
|
||||||
|
|
||||||
const parser = ts.Parser.create();
|
const file_path = "/Users/bogdanbuduroiu/development/aurelio-labs/semantic-router/semantic_router/route.py";
|
||||||
|
|
||||||
|
var parser = try Parser.create(allocator, file_path);
|
||||||
defer parser.destroy();
|
defer parser.destroy();
|
||||||
try parser.setLanguage(language);
|
const definitions = try parser.extractDefinitions();
|
||||||
|
_ = definitions; // autofix
|
||||||
// Parse some source code and get the root node
|
|
||||||
const tree = parser.parseString("pub fn main() !void {}", null);
|
|
||||||
defer tree.?.destroy();
|
|
||||||
|
|
||||||
const node = tree.?.rootNode();
|
|
||||||
std.debug.assert(std.mem.eql(u8, node.kind(), "source_file"));
|
|
||||||
std.debug.print("{s}", .{node.kind()});
|
|
||||||
|
|
||||||
// Create a query and execute it
|
|
||||||
var error_offset: u32 = 0;
|
|
||||||
const query = try ts.Query.create(language, "name: (identifier) @name", &error_offset);
|
|
||||||
defer query.destroy();
|
|
||||||
|
|
||||||
const cursor = ts.QueryCursor.create();
|
|
||||||
defer cursor.destroy();
|
|
||||||
cursor.exec(query, node);
|
|
||||||
|
|
||||||
// Get the captured node of the first match
|
|
||||||
const match = cursor.nextMatch().?;
|
|
||||||
const capture = match.captures[0].node;
|
|
||||||
std.debug.assert(std.mem.eql(u8, capture.kind(), "identifier"));
|
|
||||||
}
|
}
|
||||||
|
|
170
src/parser.zig
Normal file
170
src/parser.zig
Normal file
|
@ -0,0 +1,170 @@
|
||||||
|
const std = @import("std");
|
||||||
|
const ts = @import("tree-sitter");
|
||||||
|
const Allocator = std.mem.Allocator;
|
||||||
|
const MultiArrayList = std.MultiArrayList;
|
||||||
|
const definitions = @import("definitions.zig");
|
||||||
|
const Definition = definitions.Definition;
|
||||||
|
const Function = definitions.Function;
|
||||||
|
|
||||||
|
const Self = @This();
|
||||||
|
parser: *ts.Parser,
|
||||||
|
language_name: []const u8,
|
||||||
|
source: []const u8,
|
||||||
|
allocator: Allocator,
|
||||||
|
|
||||||
|
pub fn create(allocator: Allocator, file_path: []const u8) !*Self {
|
||||||
|
const ext = std.fs.path.extension(file_path);
|
||||||
|
|
||||||
|
var parser = ts.Parser.create();
|
||||||
|
errdefer parser.destroy();
|
||||||
|
|
||||||
|
const language = try getLanguageForExtension(ext);
|
||||||
|
try parser.setLanguage(language);
|
||||||
|
|
||||||
|
const source = try std.fs.cwd().readFileAlloc(allocator, file_path, 1024 * 1024 * 10); // 10MB max
|
||||||
|
errdefer allocator.free(source);
|
||||||
|
|
||||||
|
const p = try allocator.create(Self);
|
||||||
|
p.* = .{
|
||||||
|
.parser = parser,
|
||||||
|
.source = source,
|
||||||
|
.allocator = allocator,
|
||||||
|
.language_name = "python",
|
||||||
|
};
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn destroy(self: *Self) void {
|
||||||
|
self.parser.destroy();
|
||||||
|
self.allocator.free(self.source);
|
||||||
|
self.allocator.destroy(self);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn extractDefinitions(self: *Self) !MultiArrayList(Definition) {
|
||||||
|
var defs = MultiArrayList(Definition){};
|
||||||
|
defer defs.deinit(self.allocator);
|
||||||
|
|
||||||
|
// Parse the source code
|
||||||
|
const tree = self.parser.parseString(self.source, null);
|
||||||
|
if (tree == null) {
|
||||||
|
return error.ParseFailed;
|
||||||
|
}
|
||||||
|
defer tree.?.destroy();
|
||||||
|
|
||||||
|
const root_node = tree.?.rootNode();
|
||||||
|
|
||||||
|
// Get the appropriate query for this language
|
||||||
|
const query_string = try getQueryForLanguage(self.language_name);
|
||||||
|
var error_offset: u32 = 0;
|
||||||
|
const query = try ts.Query.create(self.parser.getLanguage() orelse tree_sitter_python(), query_string, &error_offset);
|
||||||
|
defer query.destroy();
|
||||||
|
|
||||||
|
// Execute the query
|
||||||
|
const cursor = ts.QueryCursor.create();
|
||||||
|
defer cursor.destroy();
|
||||||
|
cursor.exec(query, root_node);
|
||||||
|
|
||||||
|
while (cursor.nextMatch()) |match| {
|
||||||
|
for (match.captures) |capture| {
|
||||||
|
const capture_name = query.captureNameForId(capture.index) orelse "mock_caputer";
|
||||||
|
const node = capture.node;
|
||||||
|
const node_text = self.source[node.startByte()..node.endByte()];
|
||||||
|
const name = if (node.childByFieldName("name")) |name_node|
|
||||||
|
self.source[name_node.startByte()..name_node.endByte()]
|
||||||
|
else
|
||||||
|
node_text;
|
||||||
|
|
||||||
|
if (std.mem.eql(u8, capture_name, "function")) {
|
||||||
|
var func_def = try Function.init(self.allocator, name, "", "", "", "");
|
||||||
|
try defs.append(self.allocator, func_def);
|
||||||
|
defer func_def.destroy();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (defs) |def| {
|
||||||
|
try def.print(std.debug);
|
||||||
|
}
|
||||||
|
return defs;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper
|
||||||
|
|
||||||
|
fn getLanguageForExtension(ext: []const u8) !*ts.Language {
|
||||||
|
if (std.mem.eql(u8, ext, ".zig")) {
|
||||||
|
return tree_sitter_zig();
|
||||||
|
} else if (std.mem.eql(u8, ext, ".c") or std.mem.eql(u8, ext, ".h")) {
|
||||||
|
return tree_sitter_c();
|
||||||
|
} else if (std.mem.eql(u8, ext, ".py")) {
|
||||||
|
return tree_sitter_python();
|
||||||
|
} else {
|
||||||
|
return error.UnsupportedLanguage;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn getQueryForLanguage(language_name: []const u8) ![]const u8 {
|
||||||
|
// In a real implementation, this would load queries from files
|
||||||
|
if (std.mem.eql(u8, language_name, "python")) {
|
||||||
|
return
|
||||||
|
\\;; Capture top-level functions, class, and method definitions
|
||||||
|
\\(module
|
||||||
|
\\ (expression_statement
|
||||||
|
\\ (assignment) @assignment
|
||||||
|
\\ )
|
||||||
|
\\)
|
||||||
|
\\(module
|
||||||
|
\\ (function_definition) @function
|
||||||
|
\\)
|
||||||
|
\\(module
|
||||||
|
\\ (decorated_definition
|
||||||
|
\\ definition: (function_definition) @function
|
||||||
|
\\ )
|
||||||
|
\\)
|
||||||
|
\\(module
|
||||||
|
\\ (class_definition
|
||||||
|
\\ body: (block
|
||||||
|
\\ (expression_statement
|
||||||
|
\\ (assignment) @class_assignment
|
||||||
|
\\ )
|
||||||
|
\\ )
|
||||||
|
\\ ) @class
|
||||||
|
\\)
|
||||||
|
\\(module
|
||||||
|
\\ (class_definition
|
||||||
|
\\ body: (block
|
||||||
|
\\ (function_definition) @method
|
||||||
|
\\ )
|
||||||
|
\\ ) @class
|
||||||
|
\\)
|
||||||
|
\\(module
|
||||||
|
\\ (class_definition
|
||||||
|
\\ body: (block
|
||||||
|
\\ (expression_statement
|
||||||
|
\\ (string) @docstring
|
||||||
|
\\ )
|
||||||
|
\\ )
|
||||||
|
\\ ) @class
|
||||||
|
\\)
|
||||||
|
\\(module
|
||||||
|
\\ (class_definition
|
||||||
|
\\ body: (block
|
||||||
|
\\ (decorated_definition
|
||||||
|
\\ definition: (function_definition) @method
|
||||||
|
\\ )
|
||||||
|
\\ )
|
||||||
|
\\ ) @class
|
||||||
|
\\)
|
||||||
|
;
|
||||||
|
} else {
|
||||||
|
return
|
||||||
|
\\(function_definition name: (identifier) @function)
|
||||||
|
\\(class_definition name: (identifier) @class)
|
||||||
|
\\(method_definition name: (identifier) @method)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// External C functions for tree-sitter languages
|
||||||
|
extern fn tree_sitter_zig() callconv(.C) *ts.Language;
|
||||||
|
extern fn tree_sitter_c() callconv(.C) *ts.Language;
|
||||||
|
extern fn tree_sitter_python() callconv(.C) *ts.Language;
|
Loading…
Add table
Reference in a new issue