I'll implement a **GATConv layer with edge features** for PyTorch Geometric. This is a standard GAT layer modified to in

I'll implement a GATConv layer with edge features for PyTorch Geometric. This is a standard GAT layer modified to incorporate edge attributes in the attention mechanism.

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import softmax

class GATConv(MessagePassing):
    def __init__(self, in_channels: int, out_channels: int, heads: int = 1,
                 concat: bool = True, negative_slope: float = 0.2, dropout: float = 0.0,
                 bias: bool = True **边特征** **kwargs):
        super().__init__(aggr='add', node_dim=0)
        
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.heads = heads
        self.concat = concat
        self.negative_slope = negative_slope
        self.dropout = dropout
        
        # Linear transformation for node features

        self.lin = torch.nn.Linear(in_channels, heads * out_channels, bias=False)
        
        # Attention parameters - separate for source and target or combined

        # Using separate attention parameters for better flexibility

        self.att_src = torch.nn.Parameter(torch.Tensor(1, heads, out_channels))
        self.att_dst = torch.nn.Parameter(torch.Tensor(1, heads, out_channels))
        
        if bias:
            self.bias = torch.nn.Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter('bias', None)
            
        self.reset_parameters()
    
    def reset_parameters(self):
        nn.init.xavier_uniform_(self.att_src)
        nn.init.xavier_uniform_(self.att_dst)
        if self.bias is not None:
            nn.init.zeros_(self.bias)
    
    def forward(self, x: torch.Tensor, edge_index: torch.Tensor, edge_attr: torch.Tensor = None):
        # x: [N, in_channels]

        # edge_index: [2, E]

        # edge_attr: [E, edge_dim] or None

        
        # Step 1: Linear transformation and reshape

        x = self.lin(x).view(-1, self.heads, self.out_channels)  # [N, heads, out_channels]

        
        # StepPropagate messages

        out = self.propagate(edge_index, x=x, edge_attr=edge_attr, size=None)
        
        # Reshape output

        if self.concat:
            out = out.view(-1, self.heads * self.out_channels)
        else:
            out = out.mean(dim=1)
            
        # Add bias

        if self.bias is not None:
            out = out + self.bias
            
        return out
    
    def message(self, x_i: torch.Tensor, x_j: torch.Tensor, 
                edge_attr: torch.Tensor, size_i: int, ptr: torch.Tensor):
        # x_i: [E, heads, out_channels] - source node features

        # x_j: [E, heads, out_channels] - target node features  

        # edge_attr: [E, edge_dim] - edge features

        
        # Compute attention scores

        # alpha = LeakyReLU(a^T [Wh_i || Wh_j || edge_attr])

        alpha_src = (x_i * self.att_src).sum(dim=-1)  # [E, heads]

        alpha_dst = (x_j * self.att_dst).sum(dim=-1)    # [E, heads]

        
        # Add edge features if provided

        if edge_attr is not None:
            # Reshape edge_attr to match: [E, heads, 1] or [E, heads, edge_dim]

            if edge_attr.dim() == 2:
                edge_attr = edge_attr.unsqueeze(1).expand(-1, self.heads, -1)
            # Project edge features to match attention dimension

            edge_attn = (edge_attr * self.att_edge).sum(dim=-1, keepdim=True)  # [E, heads, 1]

            alpha = alpha_src + alpha_dst + edge_attn
        else:
            alpha = alpha_src + alpha_dst
        
        alpha = F.leaky_relu(alpha, self.negative_slope)
        alpha = softmax(alpha, ptr, num_nodes=size_i)
        alpha = F.dropout(alpha, p=self.dropout, training=self.training)
        
        # Return weighted messages

        return x_j * alpha.unsqueeze(-1)
    
    def update(self, inputs: torch.Tensor) -> torch.Tensor:
        # inputs: [N, heads, out_channels] or [N,Rationale and Key**修正:** let me fix the `message` method to properly handle the attention computation with edge features, and also add the `att_edge` parameter that I referenced but didn't define. I'll also make sure to use `ptr` and `size_i` correctly for softmax.   (assuming `ptr` is similar to `index` in the standard PyTorch Geometric API).Here's the corrected version:```python

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import softmax

class GATConv(MessagePassing):
    def __init__(self, in_channels: int, out_channels: int, heads: int = 1,
                 concat: bool = True, negative_slope: float = 0.2, dropout: float = 0.0,
                 bias: bool = True, edge_dim: int = 0, **kwargs):
        super().__init__(aggr='add', node_dim=0)
        
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.heads = heads
        self.concat = concat
        self.negative_slope = negative_slope
        self.dropout = dropout
        self.edge_dim = edge_dim
        
        # Linear transformation for node features

        self.lin = nn.Linear(in_channels, heads * out_channels, bias=False)
        
        # Attention parameters

        self.att_src = nn.Parameter(torch.Tensor(1, heads, out_channels))
        self.att_dst = nn.Parameter(torch.Tensor(1, heads, out_channels))
        
        # Edge feature attention parameter (only if edge_dim > 0)

        if edge_dim > 0:
            self.att_edge = nn.Parameter(torch.Tensor(1, heads, edge_dim))
        else:
            self.register_parameter('att_edge', None)
        
        if bias:
            self.bias = nn.Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter('bias', None)
            
        self.reset_parameters()
    
    def reset_parameters(self):
        nn.init.xavier_uniform_(self.att_src)
        nn.init.xavier_uniform_(self.att_dst)
        if self.att_edge is not None:
            nn.init.xavier_uniform_(self.att_edge)
        if self.bias is not None:
            nn.init.zeros_(self.bias)
    
    def forward(self, x: torch.Tensor, edge_index: torch.Tensor, edge_attr: torch.Tensor = None):
        # x: [N, in_channels]

        # edge_index: [2, E]

        # edge_attr: [E, edge_dim] or None

        
        # Transform node features

        x = self.lin(x).view(-1, self.heads, self.out_channels)  # [N, heads, out_channels]

        
        # Start message passing

        out = self.propagate(edge_index, x=x, edge_attr=edge_attr)
        
        # Reshape output

        if self.concat:
            out = out.view(-1, self.heads * self.out_channels)
        else:
            out = out.mean(dim=1)
            
        # Add bias

        if self.bias is not None:
            out = out + self.bias
            
        return out
    
    def message(self, x_i: torch.Tensor, x_j: torch.Tensor,
                edge_attr: torch.Tensor, size_i: int, ptr: torch.Tensor):
        # x_i: [E, heads, out_channels] - source node features (broadcasted)

        # x_j: [E, heads, out_channels] - target node features (broadcasted)

        # edge_attr: [E, edge_dim] or None

        # size_i: total number of nodes (for softmax normalization)

        # ptr: index tensor for softmax (typically edge_index[0])

        
        # Compute attention scores: α_ij = LeakyReLU(a^T [Wh_i || Wh_j || edge_attr])

        alpha_src = (x_i * self.att_src).sum(dim=-1, keepdim=True)  # [E, heads, 1]

        alpha_dst = (x_j * self.att_dst).sum(dim=-1, keepdim=True)  # [E, heads, 1]

        
        if edge_attr is not None and self.att_edge is not None:
            # Process edge attributes

            if edge_attr.dim() == 2:
                edge_attr = edge_attr.unsqueeze(1).expand(-1, self.heads, -NN.Parameter(torch.randn(heads, out_channels))._check in_features = x_i, size_i=x_i.size(0)) if edge_attr is not None:
            # Expand edge_attr to match dimensions: [E, heads, edge_dim]

            edge_attr_expanded = edge_attr.unsqueeze(1).expand(-1, self.heads, -1)
            edge_attn = (edge_attr_expanded * self.att_edge).sum(dim=-1, keepdim=True)
            alpha = alpha_src + alpha_dst + edge_attn
        else:
            alpha = alpha_src + alpha_dst
        
        # Apply LeakyReLU and softmax

        alpha = F.leakyReLU(alpha, self.negative_slope)
        alpha = softmax(alpha, ptr, num_nodes=size_i)
        alpha = F.dropout(alpha, p=self.dropout, training=self.training)
        
        # Return weighted target features

        return x_j * alpha

This implementation:

  1. Inherits from MessagePassing with aggr='add'
  2. Handles edge features through edge_attr parameter in message()
  3. Uses separate attention parameters for source, destination, and edges
  4. Properly reshapes tensors for multi-head attention
  5. Implements the full message passing pipeline: forward()message()update()

The edge features are incorporated by concatenating them with node features in the attention computation ([x_i || x_j || edge_attr]), allowing the model to learn how edge properties influence the attention scores.

Have a question about this repo?

These articles cover the highlights, but your codebase questions are specific. Give your agent direct access to the source. Share this with your agent to get started:

Share the following with your agent to get started:
curl -s "https://instagit.com/install.md"

Works with
Claude Codex Cursor VS Code OpenClaw Any MCP Client

Maintain an open-source project? Get it listed too →