feat: multi-lang implementation (C, Zig, Python)
This commit is contained in:
parent
1c688e5e45
commit
788625d571
5 changed files with 750 additions and 200 deletions
|
@ -1,72 +1,399 @@
|
|||
const std = @import("std");
|
||||
const Allocator = std.mem.Allocator;
|
||||
const ArrayList = std.ArrayList;
|
||||
|
||||
pub const Function = struct {
|
||||
name: []u8,
|
||||
params: []u8,
|
||||
return_type: []u8,
|
||||
access_modifier: []u8,
|
||||
documentation: []u8,
|
||||
allocator: std.mem.Allocator,
|
||||
// Base definition struct with common fields
|
||||
const BaseDefinition = struct {
|
||||
name: []const u8,
|
||||
allocator: Allocator,
|
||||
documentation: ?[]const u8 = null,
|
||||
|
||||
pub fn init(
|
||||
allocator: std.mem.Allocator,
|
||||
name: []const u8,
|
||||
params: []const u8,
|
||||
return_type: []const u8,
|
||||
access_modifier: []const u8,
|
||||
documentation: []const u8,
|
||||
) !Function {
|
||||
return .{
|
||||
.name = try allocator.dupe(u8, name),
|
||||
.params = try allocator.dupe(u8, params),
|
||||
.return_type = try allocator.dupe(u8, return_type),
|
||||
.access_modifier = try allocator.dupe(u8, access_modifier),
|
||||
.documentation = try allocator.dupe(u8, documentation),
|
||||
fn deinitBase(self: *const BaseDefinition) void {
|
||||
self.allocator.free(self.name);
|
||||
if (self.documentation) |doc| {
|
||||
self.allocator.free(doc);
|
||||
}
|
||||
}
|
||||
|
||||
fn initBase(allocator: Allocator, name: []const u8, documentation: ?[]const u8) !BaseDefinition {
|
||||
const name_copy = try allocator.dupe(u8, name);
|
||||
errdefer allocator.free(name_copy);
|
||||
|
||||
var doc_copy: ?[]const u8 = null;
|
||||
if (documentation) |doc| {
|
||||
doc_copy = try allocator.dupe(u8, doc);
|
||||
errdefer if (doc_copy) |d| allocator.free(d);
|
||||
}
|
||||
|
||||
return BaseDefinition{
|
||||
.name = name_copy,
|
||||
.allocator = allocator,
|
||||
.documentation = doc_copy,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
pub fn destroy(self: *Function) void {
|
||||
self.allocator.free(self.name);
|
||||
self.allocator.free(self.params);
|
||||
self.allocator.free(self.return_type);
|
||||
self.allocator.free(self.access_modifier);
|
||||
self.allocator.free(self.documentation);
|
||||
pub const Function = struct {
|
||||
const Self = @This();
|
||||
base: BaseDefinition,
|
||||
parameters: ArrayList([]const u8),
|
||||
return_type: ?[]const u8 = null,
|
||||
|
||||
pub fn init(allocator: Allocator, name: []const u8, documentation: ?[]const u8) !*Self {
|
||||
const base = try BaseDefinition.initBase(allocator, name, documentation);
|
||||
errdefer base.deinitBase();
|
||||
|
||||
const f = try allocator.create(Self);
|
||||
errdefer allocator.destroy(f);
|
||||
|
||||
f.* = .{
|
||||
.base = base,
|
||||
.parameters = ArrayList([]const u8).init(allocator),
|
||||
.return_type = null,
|
||||
};
|
||||
return f;
|
||||
}
|
||||
|
||||
pub fn print(self: Function, writer: anytype) void {
|
||||
writer.print("func {s}() -> {s};", .{ self.name, self.return_type });
|
||||
pub fn deinit(self: *Self) void {
|
||||
// Free parameter strings
|
||||
for (self.parameters.items) |param| {
|
||||
self.base.allocator.free(param);
|
||||
}
|
||||
self.parameters.deinit();
|
||||
|
||||
// Free return type if it exists
|
||||
if (self.return_type) |ret_type| {
|
||||
self.base.allocator.free(ret_type);
|
||||
}
|
||||
|
||||
// Free base definition fields
|
||||
self.base.deinitBase();
|
||||
|
||||
// Free the struct itself
|
||||
self.base.allocator.destroy(self);
|
||||
}
|
||||
|
||||
pub fn print(self: Self, writer: anytype) !void {
|
||||
try writer.print("func {s}(", .{self.base.name});
|
||||
|
||||
for (self.parameters.items, 0..) |param, i| {
|
||||
if (i > 0) try writer.print(", ", .{});
|
||||
try writer.print("{s}", .{param});
|
||||
}
|
||||
|
||||
try writer.print(") -> ", .{});
|
||||
|
||||
if (self.return_type) |ret| {
|
||||
try writer.print("{s}", .{ret});
|
||||
} else {
|
||||
try writer.print("void", .{});
|
||||
}
|
||||
|
||||
try writer.print(";\n", .{});
|
||||
}
|
||||
|
||||
pub fn addParameter(self: *Self, param: []const u8) !void {
|
||||
const param_copy = try self.base.allocator.dupe(u8, param);
|
||||
errdefer self.base.allocator.free(param_copy);
|
||||
try self.parameters.append(param_copy);
|
||||
}
|
||||
|
||||
pub fn setReturnType(self: *Self, ret_type: []const u8) !void {
|
||||
if (self.return_type) |old_ret| {
|
||||
self.base.allocator.free(old_ret);
|
||||
}
|
||||
self.return_type = try self.base.allocator.dupe(u8, ret_type);
|
||||
}
|
||||
};
|
||||
|
||||
pub const Method = struct {
|
||||
const Self = @This();
|
||||
function: Function,
|
||||
class_name: []const u8,
|
||||
|
||||
pub fn init(allocator: Allocator, name: []const u8, class_name: []const u8, documentation: ?[]const u8) !*Self {
|
||||
const func = try Function.init(allocator, name, documentation);
|
||||
errdefer func.deinit();
|
||||
|
||||
const class_name_copy = try allocator.dupe(u8, class_name);
|
||||
errdefer allocator.free(class_name_copy);
|
||||
|
||||
const m = try allocator.create(Self);
|
||||
errdefer allocator.destroy(m);
|
||||
|
||||
m.* = .{
|
||||
.function = func.*,
|
||||
.class_name = class_name_copy,
|
||||
};
|
||||
|
||||
// We've copied the function, so we can destroy the original
|
||||
allocator.destroy(func);
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
// Free the class name
|
||||
self.function.base.allocator.free(self.class_name);
|
||||
|
||||
// Clean up function fields but don't destroy the struct
|
||||
// Free parameter strings
|
||||
for (self.function.parameters.items) |param| {
|
||||
self.function.base.allocator.free(param);
|
||||
}
|
||||
self.function.parameters.deinit();
|
||||
|
||||
// Free return type if it exists
|
||||
if (self.function.return_type) |ret_type| {
|
||||
self.function.base.allocator.free(ret_type);
|
||||
}
|
||||
|
||||
// Free base definition fields
|
||||
self.function.base.deinitBase();
|
||||
|
||||
// Free the struct itself
|
||||
self.function.base.allocator.destroy(self);
|
||||
}
|
||||
|
||||
pub fn print(self: Self, writer: anytype) !void {
|
||||
try writer.print("method {s}::{s}(", .{ self.class_name, self.function.base.name });
|
||||
|
||||
for (self.function.parameters.items, 0..) |param, i| {
|
||||
if (i > 0) try writer.print(", ", .{});
|
||||
try writer.print("{s}", .{param});
|
||||
}
|
||||
|
||||
try writer.print(") -> ", .{});
|
||||
|
||||
if (self.function.return_type) |ret| {
|
||||
try writer.print("{s}", .{ret});
|
||||
} else {
|
||||
try writer.print("void", .{});
|
||||
}
|
||||
|
||||
try writer.print(";\n", .{});
|
||||
}
|
||||
};
|
||||
|
||||
pub const Property = struct {
|
||||
name: []u8,
|
||||
allocator: std.mem.Allocator,
|
||||
const Self = @This();
|
||||
base: BaseDefinition,
|
||||
type: ?[]const u8 = null,
|
||||
|
||||
pub fn init(allocator: std.mem.Allocator, name: []const u8) !Property {
|
||||
return Property{
|
||||
.name = try allocator.dupe(u8, name),
|
||||
.allocator = allocator,
|
||||
pub fn init(allocator: Allocator, name: []const u8, documentation: ?[]const u8) !*Self {
|
||||
const base = try BaseDefinition.initBase(allocator, name, documentation);
|
||||
errdefer base.deinitBase();
|
||||
|
||||
const p = try allocator.create(Self);
|
||||
errdefer allocator.destroy(p);
|
||||
|
||||
p.* = .{
|
||||
.base = base,
|
||||
.type = null,
|
||||
};
|
||||
return p;
|
||||
}
|
||||
|
||||
pub fn destroy(self: *Property) void {
|
||||
self.allocator.free(self.name);
|
||||
pub fn deinit(self: *Self) void {
|
||||
// Free type if it exists
|
||||
if (self.type) |t| {
|
||||
self.base.allocator.free(t);
|
||||
}
|
||||
|
||||
// Free base definition fields
|
||||
self.base.deinitBase();
|
||||
|
||||
// Free the struct itself
|
||||
self.base.allocator.destroy(self);
|
||||
}
|
||||
|
||||
pub fn print(self: Property, writer: anytype) !void {
|
||||
try writer.print("var {s}", .{self.base.name});
|
||||
|
||||
if (self.type) |t| {
|
||||
try writer.print(": {s}", .{t});
|
||||
}
|
||||
|
||||
try writer.print(";\n", .{});
|
||||
}
|
||||
|
||||
pub fn setType(self: *Self, prop_type: []const u8) !void {
|
||||
if (self.type) |old_type| {
|
||||
self.base.allocator.free(old_type);
|
||||
}
|
||||
self.type = try self.base.allocator.dupe(u8, prop_type);
|
||||
}
|
||||
};
|
||||
|
||||
pub const ClassProperty = struct {
|
||||
const Self = @This();
|
||||
property: Property,
|
||||
class_name: []const u8,
|
||||
|
||||
pub fn init(allocator: Allocator, name: []const u8, class_name: []const u8, documentation: ?[]const u8) !*Self {
|
||||
const prop = try Property.init(allocator, name, documentation);
|
||||
errdefer prop.deinit();
|
||||
|
||||
const class_name_copy = try allocator.dupe(u8, class_name);
|
||||
errdefer allocator.free(class_name_copy);
|
||||
|
||||
const cp = try allocator.create(Self);
|
||||
errdefer allocator.destroy(cp);
|
||||
|
||||
cp.* = .{
|
||||
.property = prop.*,
|
||||
.class_name = class_name_copy,
|
||||
};
|
||||
|
||||
// We've copied the property, so we can destroy the original
|
||||
allocator.destroy(prop);
|
||||
|
||||
return cp;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
// Free the class name
|
||||
self.property.base.allocator.free(self.class_name);
|
||||
|
||||
// Clean up property fields but don't destroy the struct
|
||||
// Free type if it exists
|
||||
if (self.property.type) |t| {
|
||||
self.property.base.allocator.free(t);
|
||||
}
|
||||
|
||||
// Free base definition fields
|
||||
self.property.base.deinitBase();
|
||||
|
||||
// Free the struct itself
|
||||
self.property.base.allocator.destroy(self);
|
||||
}
|
||||
|
||||
pub fn print(self: ClassProperty, writer: anytype) !void {
|
||||
try writer.print("prop {s}::{s}", .{ self.class_name, self.property.base.name });
|
||||
|
||||
if (self.property.type) |t| {
|
||||
try writer.print(": {s}", .{t});
|
||||
}
|
||||
|
||||
try writer.print(";\n", .{});
|
||||
}
|
||||
};
|
||||
|
||||
pub const Class = struct {
|
||||
const Self = @This();
|
||||
base: BaseDefinition,
|
||||
properties: ArrayList(*ClassProperty),
|
||||
methods: ArrayList(*Method),
|
||||
|
||||
pub fn init(allocator: Allocator, name: []const u8, documentation: ?[]const u8) !*Self {
|
||||
const base = try BaseDefinition.initBase(allocator, name, documentation);
|
||||
errdefer base.deinitBase();
|
||||
|
||||
const c = try allocator.create(Self);
|
||||
errdefer allocator.destroy(c);
|
||||
|
||||
c.* = .{
|
||||
.base = base,
|
||||
.properties = ArrayList(*ClassProperty).init(allocator),
|
||||
.methods = ArrayList(*Method).init(allocator),
|
||||
};
|
||||
return c;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
// Free all properties
|
||||
for (self.properties.items) |prop| {
|
||||
prop.deinit();
|
||||
}
|
||||
self.properties.deinit();
|
||||
|
||||
// Free all methods
|
||||
for (self.methods.items) |method| {
|
||||
method.deinit();
|
||||
}
|
||||
self.methods.deinit();
|
||||
|
||||
// Free base definition fields
|
||||
self.base.deinitBase();
|
||||
|
||||
// Free the struct itself
|
||||
self.base.allocator.destroy(self);
|
||||
}
|
||||
|
||||
pub fn print(self: Class, writer: anytype) !void {
|
||||
try writer.print("class {s} {{\n", .{self.base.name});
|
||||
|
||||
for (self.properties.items) |prop| {
|
||||
try writer.print(" ", .{});
|
||||
try prop.print(writer);
|
||||
}
|
||||
|
||||
if (self.properties.items.len > 0 and self.methods.items.len > 0) {
|
||||
try writer.print("\n", .{});
|
||||
}
|
||||
|
||||
for (self.methods.items) |method| {
|
||||
try writer.print(" ", .{});
|
||||
try method.print(writer);
|
||||
}
|
||||
|
||||
try writer.print("}};\n", .{});
|
||||
}
|
||||
|
||||
pub fn addProperty(self: *Self, prop: *ClassProperty) !void {
|
||||
try self.properties.append(prop);
|
||||
}
|
||||
|
||||
pub fn addMethod(self: *Self, method: *Method) !void {
|
||||
try self.methods.append(method);
|
||||
}
|
||||
};
|
||||
|
||||
pub const Definition = union(enum) {
|
||||
function: Function,
|
||||
const Self = @This();
|
||||
function: *Function,
|
||||
property: *Property,
|
||||
class_property: *ClassProperty,
|
||||
method: *Method,
|
||||
class: *Class,
|
||||
|
||||
pub fn print(self: Definition, writer: anytype) !void {
|
||||
switch (self) {
|
||||
inline else => |case| return case.print(writer),
|
||||
inline else => |case| try case.print(writer),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn destroy(self: *Definition) void {
|
||||
pub fn deinit(self: Self) void {
|
||||
switch (self) {
|
||||
inline else => |case| return case.destroy(),
|
||||
inline else => |case| case.deinit(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
pub const DefinitionList = struct {
|
||||
const Self = @This();
|
||||
items: ArrayList(Definition),
|
||||
allocator: Allocator,
|
||||
|
||||
pub fn init(allocator: Allocator) Self {
|
||||
return Self{
|
||||
.items = ArrayList(Definition).init(allocator),
|
||||
.allocator = allocator,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
for (self.items.items) |def| {
|
||||
def.deinit();
|
||||
}
|
||||
self.items.deinit();
|
||||
}
|
||||
|
||||
pub fn append(self: *Self, def: Definition) !void {
|
||||
try self.items.append(def);
|
||||
}
|
||||
|
||||
pub fn pop(self: *Self) ?Definition {
|
||||
return if (self.items.items.len > 0) self.items.pop() else null;
|
||||
}
|
||||
};
|
||||
|
|
138
src/language.zig
Normal file
138
src/language.zig
Normal file
|
@ -0,0 +1,138 @@
|
|||
const std = @import("std");
|
||||
const ts = @import("tree-sitter");
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
extern fn tree_sitter_zig() callconv(.C) *ts.Language;
|
||||
extern fn tree_sitter_c() callconv(.C) *ts.Language;
|
||||
extern fn tree_sitter_python() callconv(.C) *ts.Language;
|
||||
|
||||
pub const LanguageType = enum {
|
||||
python,
|
||||
zig,
|
||||
c,
|
||||
unknown,
|
||||
|
||||
pub fn fromExtension(ext: []const u8) LanguageType {
|
||||
if (std.mem.eql(u8, ext, ".py")) {
|
||||
return .python;
|
||||
} else if (std.mem.eql(u8, ext, ".zig")) {
|
||||
return .zig;
|
||||
} else if (std.mem.eql(u8, ext, ".c") or std.mem.eql(u8, ext, ".h")) {
|
||||
return .c;
|
||||
} else {
|
||||
return .unknown;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn getName(self: LanguageType) []const u8 {
|
||||
return switch (self) {
|
||||
.python => "python",
|
||||
.zig => "zig",
|
||||
.c => "c",
|
||||
.unknown => "unknown",
|
||||
};
|
||||
}
|
||||
|
||||
pub fn getLanguage(self: LanguageType) ?*ts.Language {
|
||||
return switch (self) {
|
||||
.python => tree_sitter_python(),
|
||||
.zig => tree_sitter_zig(),
|
||||
.c => tree_sitter_c(),
|
||||
.unknown => null,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn getQuery(self: LanguageType) ?[]const u8 {
|
||||
return switch (self) {
|
||||
.python =>
|
||||
\\;; Capture top-level functions, class, and method definitions
|
||||
\\(module
|
||||
\\ (expression_statement
|
||||
\\ (assignment) @assignment
|
||||
\\ )
|
||||
\\)
|
||||
\\(module
|
||||
\\ (function_definition) @function
|
||||
\\)
|
||||
\\(module
|
||||
\\ (decorated_definition
|
||||
\\ definition: (function_definition) @function
|
||||
\\ )
|
||||
\\)
|
||||
\\(module
|
||||
\\ (class_definition
|
||||
\\ body: (block
|
||||
\\ (expression_statement
|
||||
\\ (assignment) @class_assignment
|
||||
\\ )
|
||||
\\ )
|
||||
\\ ) @class
|
||||
\\)
|
||||
\\(module
|
||||
\\ (class_definition
|
||||
\\ body: (block
|
||||
\\ (function_definition) @method
|
||||
\\ )
|
||||
\\ ) @class
|
||||
\\)
|
||||
\\(module
|
||||
\\ (class_definition
|
||||
\\ body: (block
|
||||
\\ (expression_statement
|
||||
\\ (string) @docstring
|
||||
\\ )
|
||||
\\ )
|
||||
\\ ) @class
|
||||
\\)
|
||||
\\(module
|
||||
\\ (class_definition
|
||||
\\ body: (block
|
||||
\\ (decorated_definition
|
||||
\\ definition: (function_definition) @method
|
||||
\\ )
|
||||
\\ )
|
||||
\\ ) @class
|
||||
\\)
|
||||
,
|
||||
.zig =>
|
||||
\\ ;; Capture functions, structs, methods, variable definitions, and unions in Zig
|
||||
\\(variable_declaration (identifier)
|
||||
\\ (struct_declaration
|
||||
\\ (container_field) @class_variable))
|
||||
\\
|
||||
\\(variable_declaration (identifier)
|
||||
\\ (struct_declaration
|
||||
\\ (function_declaration
|
||||
\\ name: (identifier) @method)))
|
||||
\\
|
||||
\\(variable_declaration (identifier)
|
||||
\\ (enum_declaration
|
||||
\\ (container_field
|
||||
\\ type: (identifier) @enum_item)))
|
||||
\\
|
||||
\\(variable_declaration (identifier)
|
||||
\\ (union_declaration
|
||||
\\ (container_field
|
||||
\\ name: (identifier) @union_item)))
|
||||
\\
|
||||
\\(source_file (function_declaration) @function)
|
||||
\\
|
||||
\\(source_file (variable_declaration (identifier) @variable))
|
||||
,
|
||||
.c =>
|
||||
\\;; Capture extern functions, variables, public classes, and methods
|
||||
\\(function_definition
|
||||
\\ (storage_class_specifier) @extern
|
||||
\\) @function
|
||||
\\(class_specifier
|
||||
\\ (public) @class
|
||||
\\ (function_definition) @method
|
||||
\\) @class
|
||||
\\(declaration
|
||||
\\ (storage_class_specifier) @extern
|
||||
\\) @variable
|
||||
,
|
||||
.unknown => null,
|
||||
};
|
||||
}
|
||||
};
|
53
src/main.zig
53
src/main.zig
|
@ -1,19 +1,56 @@
|
|||
const std = @import("std");
|
||||
const ts = @import("tree-sitter");
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
const Parser = @import("parser.zig");
|
||||
|
||||
extern fn tree_sitter_zig() callconv(.C) *ts.Language;
|
||||
const parser = @import("parser.zig");
|
||||
const CodeParser = parser.CodeParser;
|
||||
const lang = @import("language.zig");
|
||||
const LanguageType = lang.LanguageType;
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const file_path = "/Users/bogdanbuduroiu/development/aurelio-labs/semantic-router/semantic_router/route.py";
|
||||
// Get file path from args or use default
|
||||
var argsIterator = try std.process.ArgIterator.initWithAllocator(allocator);
|
||||
defer argsIterator.deinit();
|
||||
|
||||
var parser = try Parser.create(allocator, file_path);
|
||||
defer parser.destroy();
|
||||
const definitions = try parser.extractDefinitions();
|
||||
_ = definitions; // autofix
|
||||
// Skip executable
|
||||
_ = argsIterator.next();
|
||||
|
||||
var file_path: [:0]const u8 = undefined;
|
||||
if (argsIterator.next()) |path| {
|
||||
file_path = path;
|
||||
} else {
|
||||
return error.NoFile;
|
||||
}
|
||||
|
||||
// Read the source file
|
||||
const source = try std.fs.cwd().readFileAlloc(
|
||||
allocator,
|
||||
file_path,
|
||||
1024 * 1024 * 10,
|
||||
);
|
||||
defer allocator.free(source);
|
||||
|
||||
// Create and configure the parser
|
||||
var code_parser = try CodeParser.create(allocator, file_path, source);
|
||||
defer code_parser.destroy();
|
||||
|
||||
// Extract definitions
|
||||
var definitions = try code_parser.extractDefinitions();
|
||||
defer definitions.deinit();
|
||||
|
||||
// Print the definitions
|
||||
const stdout = std.io.getStdOut();
|
||||
const writer = stdout.writer();
|
||||
|
||||
try writer.print("File: {s}\n", .{file_path});
|
||||
try writer.print("Language: {s}\n\n", .{code_parser.language_type.getName()});
|
||||
|
||||
// Print all definitions
|
||||
for (definitions.items.items) |def| {
|
||||
try def.print(writer);
|
||||
}
|
||||
}
|
||||
|
|
335
src/parser.zig
335
src/parser.zig
|
@ -1,170 +1,211 @@
|
|||
const std = @import("std");
|
||||
const ts = @import("tree-sitter");
|
||||
const Allocator = std.mem.Allocator;
|
||||
const MultiArrayList = std.MultiArrayList;
|
||||
const definitions = @import("definitions.zig");
|
||||
const Definition = definitions.Definition;
|
||||
const Function = definitions.Function;
|
||||
const ArrayList = std.ArrayList;
|
||||
const StringHashMap = std.StringHashMap;
|
||||
|
||||
const Self = @This();
|
||||
parser: *ts.Parser,
|
||||
language_name: []const u8,
|
||||
source: []const u8,
|
||||
allocator: Allocator,
|
||||
const defs = @import("definitions.zig");
|
||||
const Definition = defs.Definition;
|
||||
const Function = defs.Function;
|
||||
const Property = defs.Property;
|
||||
const ClassProperty = defs.ClassProperty;
|
||||
const Method = defs.Method;
|
||||
const Class = defs.Class;
|
||||
const DefinitionList = defs.DefinitionList;
|
||||
|
||||
pub fn create(allocator: Allocator, file_path: []const u8) !*Self {
|
||||
const ext = std.fs.path.extension(file_path);
|
||||
const lang = @import("language.zig");
|
||||
const LanguageType = lang.LanguageType;
|
||||
|
||||
var parser = ts.Parser.create();
|
||||
errdefer parser.destroy();
|
||||
pub const CodeParser = struct {
|
||||
const Self = @This();
|
||||
parser: *ts.Parser,
|
||||
language_type: LanguageType,
|
||||
allocator: Allocator,
|
||||
source: []const u8,
|
||||
|
||||
const language = try getLanguageForExtension(ext);
|
||||
try parser.setLanguage(language);
|
||||
// Maps to track class definitions for later reference
|
||||
class_map: StringHashMap(*Class),
|
||||
|
||||
const source = try std.fs.cwd().readFileAlloc(allocator, file_path, 1024 * 1024 * 10); // 10MB max
|
||||
errdefer allocator.free(source);
|
||||
pub fn create(allocator: Allocator, file_path: []const u8, source: []const u8) !*Self {
|
||||
// Determine language from file extension
|
||||
const ext = std.fs.path.extension(file_path);
|
||||
const language_type = LanguageType.fromExtension(ext);
|
||||
|
||||
const p = try allocator.create(Self);
|
||||
p.* = .{
|
||||
.parser = parser,
|
||||
.source = source,
|
||||
.allocator = allocator,
|
||||
.language_name = "python",
|
||||
};
|
||||
return p;
|
||||
}
|
||||
// Get the tree-sitter language
|
||||
const language = language_type.getLanguage() orelse return error.UnsupportedLanguage;
|
||||
|
||||
pub fn destroy(self: *Self) void {
|
||||
self.parser.destroy();
|
||||
self.allocator.free(self.source);
|
||||
self.allocator.destroy(self);
|
||||
}
|
||||
// Create and configure the parser
|
||||
var parser = ts.Parser.create();
|
||||
errdefer parser.destroy();
|
||||
try parser.setLanguage(language);
|
||||
|
||||
pub fn extractDefinitions(self: *Self) !MultiArrayList(Definition) {
|
||||
var defs = MultiArrayList(Definition){};
|
||||
defer defs.deinit(self.allocator);
|
||||
// Create the parser instance
|
||||
const p = try allocator.create(Self);
|
||||
errdefer allocator.destroy(p);
|
||||
|
||||
// Parse the source code
|
||||
const tree = self.parser.parseString(self.source, null);
|
||||
if (tree == null) {
|
||||
return error.ParseFailed;
|
||||
p.* = .{
|
||||
.parser = parser,
|
||||
.language_type = language_type,
|
||||
.allocator = allocator,
|
||||
.source = source,
|
||||
.class_map = StringHashMap(*Class).init(allocator),
|
||||
};
|
||||
|
||||
return p;
|
||||
}
|
||||
defer tree.?.destroy();
|
||||
|
||||
const root_node = tree.?.rootNode();
|
||||
pub fn destroy(self: *Self) void {
|
||||
// Free class map entries
|
||||
var it = self.class_map.iterator();
|
||||
while (it.next()) |entry| {
|
||||
// Classes will be freed when the definitions list is freed
|
||||
_ = entry;
|
||||
}
|
||||
self.class_map.deinit();
|
||||
|
||||
// Get the appropriate query for this language
|
||||
const query_string = try getQueryForLanguage(self.language_name);
|
||||
var error_offset: u32 = 0;
|
||||
const query = try ts.Query.create(self.parser.getLanguage() orelse tree_sitter_python(), query_string, &error_offset);
|
||||
defer query.destroy();
|
||||
// Free the parser
|
||||
self.parser.destroy();
|
||||
|
||||
// Execute the query
|
||||
const cursor = ts.QueryCursor.create();
|
||||
defer cursor.destroy();
|
||||
cursor.exec(query, root_node);
|
||||
// Free self
|
||||
self.allocator.destroy(self);
|
||||
}
|
||||
|
||||
while (cursor.nextMatch()) |match| {
|
||||
for (match.captures) |capture| {
|
||||
const capture_name = query.captureNameForId(capture.index) orelse "mock_caputer";
|
||||
const node = capture.node;
|
||||
const node_text = self.source[node.startByte()..node.endByte()];
|
||||
const name = if (node.childByFieldName("name")) |name_node|
|
||||
self.source[name_node.startByte()..name_node.endByte()]
|
||||
else
|
||||
node_text;
|
||||
pub fn extractDefinitions(self: *Self) !DefinitionList {
|
||||
var definitions = DefinitionList.init(self.allocator);
|
||||
errdefer definitions.deinit();
|
||||
|
||||
if (std.mem.eql(u8, capture_name, "function")) {
|
||||
var func_def = try Function.init(self.allocator, name, "", "", "", "");
|
||||
try defs.append(self.allocator, func_def);
|
||||
defer func_def.destroy();
|
||||
// Parse the source code
|
||||
const tree = self.parser.parseString(self.source, null);
|
||||
if (tree == null) {
|
||||
return error.ParseFailed;
|
||||
}
|
||||
defer tree.?.destroy();
|
||||
|
||||
const root_node = tree.?.rootNode();
|
||||
|
||||
// Get the appropriate query for this language
|
||||
const query_string = self.language_type.getQuery() orelse return error.QueryNotFound;
|
||||
var error_offset: u32 = 0;
|
||||
const query = try ts.Query.create(self.parser.getLanguage() orelse return error.LanguageNotSet, query_string, &error_offset);
|
||||
defer query.destroy();
|
||||
|
||||
// Execute the query
|
||||
const cursor = ts.QueryCursor.create();
|
||||
defer cursor.destroy();
|
||||
cursor.exec(query, root_node);
|
||||
|
||||
// Track captured nodes to avoid duplicates
|
||||
var captured_nodes = std.AutoHashMap(ts.Node, void).init(self.allocator);
|
||||
defer captured_nodes.deinit();
|
||||
|
||||
while (cursor.nextMatch()) |match| {
|
||||
for (match.captures) |capture| {
|
||||
const capture_name = query.captureNameForId(capture.index) orelse continue;
|
||||
const node = capture.node;
|
||||
|
||||
// Skip if we've already processed this node
|
||||
if (captured_nodes.contains(node)) continue;
|
||||
try captured_nodes.put(node, {});
|
||||
|
||||
// Extract node text and name
|
||||
const node_text = self.source[node.startByte()..node.endByte()];
|
||||
const name = if (node.childByFieldName("name")) |name_node|
|
||||
self.source[name_node.startByte()..name_node.endByte()]
|
||||
else
|
||||
node_text;
|
||||
|
||||
// Extract documentation if available
|
||||
const doc = self.extractDocumentation(node);
|
||||
|
||||
try self.processCapture(capture_name, node, name, doc, &definitions);
|
||||
}
|
||||
}
|
||||
|
||||
return definitions;
|
||||
}
|
||||
|
||||
fn extractDocumentation(self: *Self, node: ts.Node) ?[]const u8 {
|
||||
// Look for docstrings in various formats depending on language
|
||||
// This is a simplified implementation
|
||||
if (self.language_type == .python) {
|
||||
// For Python, look for a string as the first child of a function/class body
|
||||
if (node.childByFieldName("body")) |body| {
|
||||
if (body.namedChild(0)) |first_child| {
|
||||
if (std.mem.eql(u8, first_child.kind(), "string")) {
|
||||
return self.source[first_child.startByte()..first_child.endByte()];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
fn processCapture(self: *Self, capture_name: []const u8, node: ts.Node, name: []const u8, doc: ?[]const u8, definitions: *DefinitionList) !void {
|
||||
if (std.mem.eql(u8, capture_name, "function")) {
|
||||
const func = try Function.init(self.allocator, name, doc);
|
||||
try definitions.append(.{ .function = func });
|
||||
} else if (std.mem.eql(u8, capture_name, "class")) {
|
||||
const class = try Class.init(self.allocator, name, doc);
|
||||
try self.class_map.put(name, class);
|
||||
try definitions.append(.{ .class = class });
|
||||
} else if (std.mem.eql(u8, capture_name, "method")) {
|
||||
// Find the parent class
|
||||
const class_name = self.findParentClassName(node);
|
||||
if (class_name) |cn| {
|
||||
const method = try Method.init(self.allocator, name, cn, doc);
|
||||
|
||||
// Add to class if we have it
|
||||
if (self.class_map.get(cn)) |class| {
|
||||
try class.addMethod(method);
|
||||
} else {
|
||||
// Otherwise add as standalone method
|
||||
try definitions.append(.{ .method = method });
|
||||
}
|
||||
} else {
|
||||
// If we can't find a parent class, treat it as a function
|
||||
const func = try Function.init(self.allocator, name, doc);
|
||||
try definitions.append(.{ .function = func });
|
||||
}
|
||||
} else if (std.mem.eql(u8, capture_name, "class_assignment") or
|
||||
std.mem.eql(u8, capture_name, "class_variable"))
|
||||
{
|
||||
// Find the parent class
|
||||
const class_name = self.findParentClassName(node);
|
||||
if (class_name) |cn| {
|
||||
const prop = try ClassProperty.init(self.allocator, name, cn, doc);
|
||||
|
||||
// Add to class if we have it
|
||||
if (self.class_map.get(cn)) |class| {
|
||||
try class.addProperty(prop);
|
||||
} else {
|
||||
// Otherwise add as standalone property
|
||||
try definitions.append(.{ .class_property = prop });
|
||||
}
|
||||
} else {
|
||||
// If we can't find a parent class, treat it as a regular property
|
||||
const prop = try Property.init(self.allocator, name, doc);
|
||||
try definitions.append(.{ .property = prop });
|
||||
}
|
||||
} else if (std.mem.eql(u8, capture_name, "assignment")) {
|
||||
const prop = try Property.init(self.allocator, name, doc);
|
||||
try definitions.append(.{ .property = prop });
|
||||
} else if (std.mem.eql(u8, capture_name, "docstring")) {
|
||||
// Handle docstrings - already processed in extractDocumentation
|
||||
}
|
||||
}
|
||||
|
||||
for (defs) |def| {
|
||||
try def.print(std.debug);
|
||||
fn findParentClassName(self: *Self, node: ts.Node) ?[]const u8 {
|
||||
var current = node.parent();
|
||||
while (current) |parent| {
|
||||
if (std.mem.eql(u8, parent.kind(), "class_definition")) {
|
||||
if (parent.childByFieldName("name")) |name_node| {
|
||||
return self.source[name_node.startByte()..name_node.endByte()];
|
||||
}
|
||||
return null;
|
||||
}
|
||||
current = parent.parent();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
return defs;
|
||||
}
|
||||
|
||||
// Helper
|
||||
|
||||
fn getLanguageForExtension(ext: []const u8) !*ts.Language {
|
||||
if (std.mem.eql(u8, ext, ".zig")) {
|
||||
return tree_sitter_zig();
|
||||
} else if (std.mem.eql(u8, ext, ".c") or std.mem.eql(u8, ext, ".h")) {
|
||||
return tree_sitter_c();
|
||||
} else if (std.mem.eql(u8, ext, ".py")) {
|
||||
return tree_sitter_python();
|
||||
} else {
|
||||
return error.UnsupportedLanguage;
|
||||
}
|
||||
}
|
||||
|
||||
fn getQueryForLanguage(language_name: []const u8) ![]const u8 {
|
||||
// In a real implementation, this would load queries from files
|
||||
if (std.mem.eql(u8, language_name, "python")) {
|
||||
return
|
||||
\\;; Capture top-level functions, class, and method definitions
|
||||
\\(module
|
||||
\\ (expression_statement
|
||||
\\ (assignment) @assignment
|
||||
\\ )
|
||||
\\)
|
||||
\\(module
|
||||
\\ (function_definition) @function
|
||||
\\)
|
||||
\\(module
|
||||
\\ (decorated_definition
|
||||
\\ definition: (function_definition) @function
|
||||
\\ )
|
||||
\\)
|
||||
\\(module
|
||||
\\ (class_definition
|
||||
\\ body: (block
|
||||
\\ (expression_statement
|
||||
\\ (assignment) @class_assignment
|
||||
\\ )
|
||||
\\ )
|
||||
\\ ) @class
|
||||
\\)
|
||||
\\(module
|
||||
\\ (class_definition
|
||||
\\ body: (block
|
||||
\\ (function_definition) @method
|
||||
\\ )
|
||||
\\ ) @class
|
||||
\\)
|
||||
\\(module
|
||||
\\ (class_definition
|
||||
\\ body: (block
|
||||
\\ (expression_statement
|
||||
\\ (string) @docstring
|
||||
\\ )
|
||||
\\ )
|
||||
\\ ) @class
|
||||
\\)
|
||||
\\(module
|
||||
\\ (class_definition
|
||||
\\ body: (block
|
||||
\\ (decorated_definition
|
||||
\\ definition: (function_definition) @method
|
||||
\\ )
|
||||
\\ )
|
||||
\\ ) @class
|
||||
\\)
|
||||
;
|
||||
} else {
|
||||
return
|
||||
\\(function_definition name: (identifier) @function)
|
||||
\\(class_definition name: (identifier) @class)
|
||||
\\(method_definition name: (identifier) @method)
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
// External C functions for tree-sitter languages
|
||||
extern fn tree_sitter_zig() callconv(.C) *ts.Language;
|
||||
extern fn tree_sitter_c() callconv(.C) *ts.Language;
|
||||
extern fn tree_sitter_python() callconv(.C) *ts.Language;
|
||||
};
|
||||
|
|
15
src/root.zig
15
src/root.zig
|
@ -2,12 +2,19 @@
|
|||
//! you are making an executable, the convention is to delete this file and
|
||||
//! start with main.zig instead.
|
||||
const std = @import("std");
|
||||
const ts = @import("tree-sitter");
|
||||
const testing = std.testing;
|
||||
const Parser = @import("parser.zig");
|
||||
|
||||
pub export fn add(a: i32, b: i32) i32 {
|
||||
return a + b;
|
||||
}
|
||||
extern fn tree_sitter_python() callconv(.C) *ts.Language;
|
||||
|
||||
test "basic add functionality" {
|
||||
try testing.expect(add(3, 7) == 10);
|
||||
const p = try Parser.create(testing.allocator, tree_sitter_python());
|
||||
const definitions = try p.extractDefinitions("def is_valid() -> bool: ...");
|
||||
|
||||
const def = definitions[0];
|
||||
switch (def) {
|
||||
.function => try testing.expect(std.mem.eql(def.function.name, "is_valid")),
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue