Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Use SparkPlanInfo constructor that is compatible with Databricks' Spa…
…rk fork
  • Loading branch information
charlesmyu committed Nov 5, 2025
commit e82dd209fe1f69ddac27adc61699578caf259f1a
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import datadog.trace.agent.tooling.InstrumenterModule;
import datadog.trace.api.Config;
import de.thetaphi.forbiddenapis.SuppressForbidden;
import java.lang.reflect.Constructor;
import net.bytebuddy.asm.Advice;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.execution.SparkPlan;
Expand Down Expand Up @@ -35,7 +34,8 @@ public String[] helperClassNames() {
packageName + ".SparkSQLUtils",
packageName + ".SparkSQLUtils$SparkPlanInfoForStage",
packageName + ".SparkSQLUtils$AccumulatorWithStage",
packageName + ".Spark212PlanSerializer"
packageName + ".Spark212PlanSerializer",
packageName + ".Spark212PlanUtils"
};
}

Expand Down Expand Up @@ -104,29 +104,14 @@ public static void exit(
if (planInfo.metadata().size() == 0
&& (Config.get().isDataJobsParseSparkPlanEnabled()
|| Config.get().isDataJobsExperimentalFeaturesEnabled())) {
Spark212PlanSerializer planUtils = new Spark212PlanSerializer();
Spark212PlanSerializer planSerializer = new Spark212PlanSerializer();
Map<String, String> meta =
JavaConverters.mapAsScalaMap(planUtils.extractFormattedProduct(plan))
JavaConverters.mapAsScalaMap(planSerializer.extractFormattedProduct(plan))
.toMap(Predef.$conforms());
try {
Constructor<?> targetCtor = null;
for (Constructor<?> c : SparkPlanInfo.class.getConstructors()) {
if (c.getParameterCount() == 5) {
targetCtor = c;
break;
}
}
if (targetCtor != null) {
Object newInst =
targetCtor.newInstance(
planInfo.nodeName(),
planInfo.simpleString(),
planInfo.children(),
meta,
planInfo.metrics());
planInfo = (SparkPlanInfo) newInst;
}
} catch (Throwable ignored) {

SparkPlanInfo newPlanInfo = Spark212PlanUtils.upsertSparkPlanInfoMetadata(planInfo, meta);
if (newPlanInfo != null) {
planInfo = newPlanInfo;
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package datadog.trace.instrumentation.spark;

import datadog.trace.util.MethodHandles;
import java.lang.invoke.MethodHandle;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.spark.sql.execution.SparkPlanInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Option;

public class Spark212PlanUtils {
private static final Logger log = LoggerFactory.getLogger(Spark212PlanUtils.class);

private static final MethodHandles methodLoader =
new MethodHandles(ClassLoader.getSystemClassLoader());
private static final MethodHandle constructor =
methodLoader.constructor(
SparkPlanInfo.class,
String.class,
String.class,
scala.collection.Seq.class,
scala.collection.immutable.Map.class,
scala.collection.Seq.class);
private static final MethodHandle databricksConstructor =
methodLoader.constructor(
SparkPlanInfo.class,
String.class,
String.class,
scala.collection.Seq.class,
scala.collection.immutable.Map.class,
scala.collection.Seq.class,
Option.class,
String.class,
Option.class);

public static SparkPlanInfo upsertSparkPlanInfoMetadata(
SparkPlanInfo planInfo, scala.collection.immutable.Map<String, String> meta) {
// Attempt to create a new SparkPlanInfo with additional metadata replaced
// Since the fields are immutable we must instantiate a new SparkPlanInfo to do this

Object[] standardArgs =
new Object[] {
planInfo.nodeName(),
planInfo.simpleString(),
planInfo.children(),
meta,
planInfo.metrics()
};

if (databricksConstructor != null) {
List<Object> databricksArgs = new ArrayList<>(Arrays.asList(standardArgs));
try {
databricksArgs.add(SparkPlanInfo.class.getMethod("estRowCount").invoke(planInfo));
databricksArgs.add(SparkPlanInfo.class.getMethod("rddScopeId").invoke(planInfo));
databricksArgs.add(SparkPlanInfo.class.getMethod("explainId").invoke(planInfo));
} catch (Throwable t) {
log.warn("Error obtaining Databricks-specific SparkPlanInfo args", t);
}

SparkPlanInfo newPlan = methodLoader.invoke(databricksConstructor, databricksArgs.toArray());
if (newPlan != null) {
return newPlan;
}
}

if (constructor != null) {
SparkPlanInfo newPlan = methodLoader.invoke(constructor, standardArgs);
if (newPlan != null) {
return newPlan;
}
}

return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import datadog.trace.agent.tooling.InstrumenterModule;
import datadog.trace.api.Config;
import de.thetaphi.forbiddenapis.SuppressForbidden;
import java.lang.reflect.Constructor;
import net.bytebuddy.asm.Advice;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.execution.SparkPlan;
Expand Down Expand Up @@ -35,7 +34,8 @@ public String[] helperClassNames() {
packageName + ".SparkSQLUtils",
packageName + ".SparkSQLUtils$SparkPlanInfoForStage",
packageName + ".SparkSQLUtils$AccumulatorWithStage",
packageName + ".Spark213PlanSerializer"
packageName + ".Spark213PlanSerializer",
packageName + ".Spark213PlanUtils"
};
}

Expand Down Expand Up @@ -105,28 +105,13 @@ public static void exit(
if (planInfo.metadata().size() == 0
&& (Config.get().isDataJobsParseSparkPlanEnabled()
|| Config.get().isDataJobsExperimentalFeaturesEnabled())) {
Spark213PlanSerializer planUtils = new Spark213PlanSerializer();
Spark213PlanSerializer planSerializer = new Spark213PlanSerializer();
Map<String, String> meta =
HashMap.from(JavaConverters.asScala(planUtils.extractFormattedProduct(plan)));
try {
Constructor<?> targetCtor = null;
for (Constructor<?> c : SparkPlanInfo.class.getConstructors()) {
if (c.getParameterCount() == 5) {
targetCtor = c;
break;
}
}
if (targetCtor != null) {
Object newInst =
targetCtor.newInstance(
planInfo.nodeName(),
planInfo.simpleString(),
planInfo.children(),
meta,
planInfo.metrics());
planInfo = (SparkPlanInfo) newInst;
}
} catch (Throwable ignored) {
HashMap.from(JavaConverters.asScala(planSerializer.extractFormattedProduct(plan)));

SparkPlanInfo newPlanInfo = Spark213PlanUtils.upsertSparkPlanInfoMetadata(planInfo, meta);
if (newPlanInfo != null) {
planInfo = newPlanInfo;
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package datadog.trace.instrumentation.spark;

import datadog.trace.util.MethodHandles;
import java.lang.invoke.MethodHandle;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.spark.sql.execution.SparkPlanInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Option;

public class Spark213PlanUtils {
private static final Logger log = LoggerFactory.getLogger(Spark213PlanUtils.class);

private static final MethodHandles methodLoader =
new MethodHandles(ClassLoader.getSystemClassLoader());
private static final MethodHandle constructor =
methodLoader.constructor(
SparkPlanInfo.class,
String.class,
String.class,
scala.collection.immutable.Seq.class,
scala.collection.immutable.Map.class,
scala.collection.immutable.Seq.class);
private static final MethodHandle databricksConstructor =
methodLoader.constructor(
SparkPlanInfo.class,
String.class,
String.class,
scala.collection.immutable.Seq.class,
scala.collection.immutable.Map.class,
scala.collection.immutable.Seq.class,
Option.class,
String.class,
Option.class);

public static SparkPlanInfo upsertSparkPlanInfoMetadata(
SparkPlanInfo planInfo, scala.collection.immutable.Map<String, String> meta) {
// Attempt to create a new SparkPlanInfo with additional metadata replaced
// Since the fields are immutable we must instantiate a new SparkPlanInfo to do this

Object[] standardArgs =
new Object[] {
planInfo.nodeName(),
planInfo.simpleString(),
planInfo.children(),
meta,
planInfo.metrics()
};

if (databricksConstructor != null) {
List<Object> databricksArgs = new ArrayList<>(Arrays.asList(standardArgs));
try {
databricksArgs.add(SparkPlanInfo.class.getMethod("estRowCount").invoke(planInfo));
databricksArgs.add(SparkPlanInfo.class.getMethod("rddScopeId").invoke(planInfo));
databricksArgs.add(SparkPlanInfo.class.getMethod("explainId").invoke(planInfo));
} catch (Throwable t) {
log.warn("Error obtaining Databricks-specific SparkPlanInfo args", t);
}

SparkPlanInfo newPlan = methodLoader.invoke(databricksConstructor, databricksArgs.toArray());
if (newPlan != null) {
return newPlan;
}
}

return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public abstract class AbstractSparkPlanSerializer {

private final MethodHandles methodLoader = new MethodHandles(ClassLoader.getSystemClassLoader());
private final MethodHandle getSimpleString =
methodLoader.method(TreeNode.class, "simpleString", new Class[] {int.class});
methodLoader.method(TreeNode.class, "simpleString", int.class);
private final MethodHandle getSimpleStringLegacy =
methodLoader.method(TreeNode.class, "simpleString");

Expand Down Expand Up @@ -156,25 +156,20 @@ protected Object safeParseObjectToJson(Object value, int depth) {
}

private String getSimpleString(TreeNode value) {
Object simpleString = null;

if (getSimpleString != null) {
try {
simpleString = getSimpleString.invoke(value, MAX_LENGTH);
} catch (Throwable e) {
String simpleString = methodLoader.invoke(getSimpleString, value, MAX_LENGTH);
if (simpleString != null) {
return simpleString;
}
}

if (getSimpleStringLegacy != null) {
try {
simpleString = getSimpleStringLegacy.invoke(value);
} catch (Throwable e) {
String simpleString = methodLoader.invoke(getSimpleStringLegacy, value);
if (simpleString != null) {
return simpleString;
}
}

if (simpleString != null && simpleString instanceof String) {
return (String) simpleString;
}
return null;
}

Expand Down