I'll implement a **GATConv layer with edge features** for PyTorch Geometric. This is a standard GAT layer modified to in
I'll implement a GATConv layer with edge features for PyTorch Geometric. This is a standard GAT layer modified to incorporate edge attributes in the attention mechanism.
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import softmax
class GATConv(MessagePassing):
def __init__(self, in_channels: int, out_channels: int, heads: int = 1,
concat: bool = True, negative_slope: float = 0.2, dropout: float = 0.0,
bias: bool = True **边特征** **kwargs):
super().__init__(aggr='add', node_dim=0)
self.in_channels = in_channels
self.out_channels = out_channels
self.heads = heads
self.concat = concat
self.negative_slope = negative_slope
self.dropout = dropout
# Linear transformation for node features
self.lin = torch.nn.Linear(in_channels, heads * out_channels, bias=False)
# Attention parameters - separate for source and target or combined
# Using separate attention parameters for better flexibility
self.att_src = torch.nn.Parameter(torch.Tensor(1, heads, out_channels))
self.att_dst = torch.nn.Parameter(torch.Tensor(1, heads, out_channels))
if bias:
self.bias = torch.nn.Parameter(torch.Tensor(out_channels))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
nn.init.xavier_uniform_(self.att_src)
nn.init.xavier_uniform_(self.att_dst)
if self.bias is not None:
nn.init.zeros_(self.bias)
def forward(self, x: torch.Tensor, edge_index: torch.Tensor, edge_attr: torch.Tensor = None):
# x: [N, in_channels]
# edge_index: [2, E]
# edge_attr: [E, edge_dim] or None
# Step 1: Linear transformation and reshape
x = self.lin(x).view(-1, self.heads, self.out_channels) # [N, heads, out_channels]
# StepPropagate messages
out = self.propagate(edge_index, x=x, edge_attr=edge_attr, size=None)
# Reshape output
if self.concat:
out = out.view(-1, self.heads * self.out_channels)
else:
out = out.mean(dim=1)
# Add bias
if self.bias is not None:
out = out + self.bias
return out
def message(self, x_i: torch.Tensor, x_j: torch.Tensor,
edge_attr: torch.Tensor, size_i: int, ptr: torch.Tensor):
# x_i: [E, heads, out_channels] - source node features
# x_j: [E, heads, out_channels] - target node features
# edge_attr: [E, edge_dim] - edge features
# Compute attention scores
# alpha = LeakyReLU(a^T [Wh_i || Wh_j || edge_attr])
alpha_src = (x_i * self.att_src).sum(dim=-1) # [E, heads]
alpha_dst = (x_j * self.att_dst).sum(dim=-1) # [E, heads]
# Add edge features if provided
if edge_attr is not None:
# Reshape edge_attr to match: [E, heads, 1] or [E, heads, edge_dim]
if edge_attr.dim() == 2:
edge_attr = edge_attr.unsqueeze(1).expand(-1, self.heads, -1)
# Project edge features to match attention dimension
edge_attn = (edge_attr * self.att_edge).sum(dim=-1, keepdim=True) # [E, heads, 1]
alpha = alpha_src + alpha_dst + edge_attn
else:
alpha = alpha_src + alpha_dst
alpha = F.leaky_relu(alpha, self.negative_slope)
alpha = softmax(alpha, ptr, num_nodes=size_i)
alpha = F.dropout(alpha, p=self.dropout, training=self.training)
# Return weighted messages
return x_j * alpha.unsqueeze(-1)
def update(self, inputs: torch.Tensor) -> torch.Tensor:
# inputs: [N, heads, out_channels] or [N,Rationale and Key**修正:** let me fix the `message` method to properly handle the attention computation with edge features, and also add the `att_edge` parameter that I referenced but didn't define. I'll also make sure to use `ptr` and `size_i` correctly for softmax. (assuming `ptr` is similar to `index` in the standard PyTorch Geometric API).Here's the corrected version:```python
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import softmax
class GATConv(MessagePassing):
def __init__(self, in_channels: int, out_channels: int, heads: int = 1,
concat: bool = True, negative_slope: float = 0.2, dropout: float = 0.0,
bias: bool = True, edge_dim: int = 0, **kwargs):
super().__init__(aggr='add', node_dim=0)
self.in_channels = in_channels
self.out_channels = out_channels
self.heads = heads
self.concat = concat
self.negative_slope = negative_slope
self.dropout = dropout
self.edge_dim = edge_dim
# Linear transformation for node features
self.lin = nn.Linear(in_channels, heads * out_channels, bias=False)
# Attention parameters
self.att_src = nn.Parameter(torch.Tensor(1, heads, out_channels))
self.att_dst = nn.Parameter(torch.Tensor(1, heads, out_channels))
# Edge feature attention parameter (only if edge_dim > 0)
if edge_dim > 0:
self.att_edge = nn.Parameter(torch.Tensor(1, heads, edge_dim))
else:
self.register_parameter('att_edge', None)
if bias:
self.bias = nn.Parameter(torch.Tensor(out_channels))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
nn.init.xavier_uniform_(self.att_src)
nn.init.xavier_uniform_(self.att_dst)
if self.att_edge is not None:
nn.init.xavier_uniform_(self.att_edge)
if self.bias is not None:
nn.init.zeros_(self.bias)
def forward(self, x: torch.Tensor, edge_index: torch.Tensor, edge_attr: torch.Tensor = None):
# x: [N, in_channels]
# edge_index: [2, E]
# edge_attr: [E, edge_dim] or None
# Transform node features
x = self.lin(x).view(-1, self.heads, self.out_channels) # [N, heads, out_channels]
# Start message passing
out = self.propagate(edge_index, x=x, edge_attr=edge_attr)
# Reshape output
if self.concat:
out = out.view(-1, self.heads * self.out_channels)
else:
out = out.mean(dim=1)
# Add bias
if self.bias is not None:
out = out + self.bias
return out
def message(self, x_i: torch.Tensor, x_j: torch.Tensor,
edge_attr: torch.Tensor, size_i: int, ptr: torch.Tensor):
# x_i: [E, heads, out_channels] - source node features (broadcasted)
# x_j: [E, heads, out_channels] - target node features (broadcasted)
# edge_attr: [E, edge_dim] or None
# size_i: total number of nodes (for softmax normalization)
# ptr: index tensor for softmax (typically edge_index[0])
# Compute attention scores: α_ij = LeakyReLU(a^T [Wh_i || Wh_j || edge_attr])
alpha_src = (x_i * self.att_src).sum(dim=-1, keepdim=True) # [E, heads, 1]
alpha_dst = (x_j * self.att_dst).sum(dim=-1, keepdim=True) # [E, heads, 1]
if edge_attr is not None and self.att_edge is not None:
# Process edge attributes
if edge_attr.dim() == 2:
edge_attr = edge_attr.unsqueeze(1).expand(-1, self.heads, -NN.Parameter(torch.randn(heads, out_channels))._check in_features = x_i, size_i=x_i.size(0)) if edge_attr is not None:
# Expand edge_attr to match dimensions: [E, heads, edge_dim]
edge_attr_expanded = edge_attr.unsqueeze(1).expand(-1, self.heads, -1)
edge_attn = (edge_attr_expanded * self.att_edge).sum(dim=-1, keepdim=True)
alpha = alpha_src + alpha_dst + edge_attn
else:
alpha = alpha_src + alpha_dst
# Apply LeakyReLU and softmax
alpha = F.leakyReLU(alpha, self.negative_slope)
alpha = softmax(alpha, ptr, num_nodes=size_i)
alpha = F.dropout(alpha, p=self.dropout, training=self.training)
# Return weighted target features
return x_j * alpha
This implementation:
- Inherits from
MessagePassingwithaggr='add' - Handles edge features through
edge_attrparameter inmessage() - Uses separate attention parameters for source, destination, and edges
- Properly reshapes tensors for multi-head attention
- Implements the full message passing pipeline:
forward()→message()→update()
The edge features are incorporated by concatenating them with node features in the attention computation ([x_i || x_j || edge_attr]), allowing the model to learn how edge properties influence the attention scores.
Have a question about this repo?
These articles cover the highlights, but your codebase questions are specific. Give your agent direct access to the source. Share this with your agent to get started:
Share the following with your agent to get started:
curl -s "https://instagit.com/install.md" Works with
Claude
Codex
Cursor
VS Code
OpenClaw
Any MCP Client
Maintain an open-source project? Get it listed too →