Dataset comparison didn't generate a key in the case compared data has a column with complex data type in.
Dataset comparison has the same behavior regardless of datatype.
Test DatasetComparison (2) failed on an exception
org.apache.spark.sql.AnalysisException: cannot resolve 'concat_ws('|', `name`, `string`, CAST(`boolean` AS STRING), CAST(`integer` AS STRING), CAST(`date` AS STRING), CAST(`binary` AS STRING), transform(`errCol`, lambdafunction(named_struct('errType', namedlambdavariable().`errType`, 'errCode', namedlambdavariable().`errCode`, 'errMsg', namedlambdavariable().`errMsg`, 'errCol', namedlambdavariable().`errCol`, 'rawValues', namedlambdavariable().`rawValues`, 'mappings', transform(namedlambdavariable().`mappings`, lambdafunction(named_struct('mappingTableColumn', namedlambdavariable().`mappingTableColumn`, 'mappedDatasetColumn', namedlambdavariable().`mappedDatasetColumn`), namedlambdavariable()))), namedlambdavariable())), CAST(`enceladus_record_id` AS STRING))' due to data type mismatch: argument 8 requires (array<string> or string) type, however, 'transform(`errCol`, lambdafunction(named_struct('errType', namedlambdavariable().`errType`, 'errCode', namedlambdavariable().`errCode`, 'errMsg', namedlambdavariable().`errMsg`, 'errCol', namedlambdavariable().`errCol`, 'rawValues', namedlambdavariable().`rawValues`, 'mappings', transform(namedlambdavariable().`mappings`, lambdafunction(named_struct('mappingTableColumn', namedlambdavariable().`mappingTableColumn`, 'mappedDatasetColumn', namedlambdavariable().`mappedDatasetColumn`), namedlambdavariable()))), namedlambdavariable()))' is of array<struct<errType:string,errCode:string,errMsg:string,errCol:string,rawValues:array<string>,mappings:array<struct<mappingTableColumn:string,mappedDatasetColumn:string>>>> type.;;
'Project [name#0, string#1, boolean#2, integer#3, date#4, binary#5, errCol#67, enceladus_record_id#7, md5(concat_ws(|, name#0, string#1, cast(boolean#2 as string), cast(integer#3 as string), cast(date#4 as string), cast(binary#5 as string), transform(errCol#67, lambdafunction(named_struct(errType, lambda elm#89.errType, errCode, lambda elm#89.errCode, errMsg, lambda elm#89.errMsg, errCol, lambda elm#89.errCol, rawValues, lambda elm#89.rawValues, mappings, transform(lambda elm#89.mappings, lambdafunction(named_struct(mappingTableColumn, lambda elm#90.mappingTableColumn, mappedDatasetColumn, lambda elm#90.mappedDatasetColumn), lambda elm#90, false))), lambda elm#89, false)), cast(enceladus_record_id#7 as string))) AS HermesDatasetComparisonUniqueId#88]
+- Project [name#0, string#1, boolean#2, integer#3, date#4, binary#5, transform(errCol#6, lambdafunction(named_struct(errType, lambda elm#68.errType, errCode, lambda elm#68.errCode, errMsg, lambda elm#68.errMsg, errCol, lambda elm#68.errCol, rawValues, lambda elm#68.rawValues, mappings, transform(lambda elm#68.mappings, lambdafunction(named_struct(mappingTableColumn, lambda elm#69.mappingTableColumn, mappedDatasetColumn, lambda elm#69.mappedDatasetColumn), lambda elm#69, false))), lambda elm#68, false)) AS errCol#67, enceladus_record_id#7]
+- Relation[name#0,string#1,boolean#2,integer#3,date#4,binary#5,errCol#6,enceladus_record_id#7] parquet
at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$3.applyOrElse(CheckAnalysis.scala:116)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$3.applyOrElse(CheckAnalysis.scala:108)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:281)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:281)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:280)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:278)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:278)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:329)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:327)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:278)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:278)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:278)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:329)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:327)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:278)
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$transformExpressionsUp$1.apply(QueryPlan.scala:93)
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$transformExpressionsUp$1.apply(QueryPlan.scala:93)
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$1.apply(QueryPlan.scala:105)
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$1.apply(QueryPlan.scala:105)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpression$1(QueryPlan.scala:104)
at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(QueryPlan.scala:116)
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1$2.apply(QueryPlan.scala:121)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.immutable.List.foreach(List.scala:392)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.immutable.List.map(List.scala:296)
at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(QueryPlan.scala:121)
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$2.apply(QueryPlan.scala:126)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.plans.QueryPlan.mapExpressions(QueryPlan.scala:126)
at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(QueryPlan.scala:93)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:108)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:86)
at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:127)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:86)
at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:95)
at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:108)
at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:105)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:201)
at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:105)
at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:57)
at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:55)
at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:47)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:78)
at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:3412)
at org.apache.spark.sql.Dataset.select(Dataset.scala:1340)
at org.apache.spark.sql.Dataset.withColumns(Dataset.scala:2258)
at org.apache.spark.sql.Dataset.withColumn(Dataset.scala:2225)
at za.co.absa.hermes.datasetComparison.DatasetComparison.addKeyColumn(DatasetComparison.scala:264)
at za.co.absa.hermes.datasetComparison.DatasetComparison.compare(DatasetComparison.scala:75)
at za.co.absa.hermes.e2eRunner.plugins.DatasetComparisonPlugin.performAction(DatasetComparisonPlugin.scala:76)
at za.co.absa.hermes.e2eRunner.E2ERunnerJobExperimental$$anonfun$za$co$absa$hermes$e2eRunner$E2ERunnerJobExperimental$$tryExecute$1.apply(E2ERunnerJobExperimental.scala:105)
at za.co.absa.hermes.e2eRunner.E2ERunnerJobExperimental$$anonfun$za$co$absa$hermes$e2eRunner$E2ERunnerJobExperimental$$tryExecute$1.apply(E2ERunnerJobExperimental.scala:104)
at scala.util.Try$.apply(Try.scala:192)
at za.co.absa.hermes.e2eRunner.E2ERunnerJobExperimental$.za$co$absa$hermes$e2eRunner$E2ERunnerJobExperimental$$tryExecute(E2ERunnerJobExperimental.scala:104)
at za.co.absa.hermes.e2eRunner.E2ERunnerJobExperimental$$anonfun$runTests$1.apply(E2ERunnerJobExperimental.scala:81)
at za.co.absa.hermes.e2eRunner.E2ERunnerJobExperimental$$anonfun$runTests$1.apply(E2ERunnerJobExperimental.scala:75)
at za.co.absa.hermes.e2eRunner.TestDefinitions.fold$1(TestDefinitions.scala:61)
at za.co.absa.hermes.e2eRunner.TestDefinitions.foldLeftWithIndex(TestDefinitions.scala:63)
at za.co.absa.hermes.e2eRunner.E2ERunnerJobExperimental$.runTests(E2ERunnerJobExperimental.scala:75)
at za.co.absa.hermes.e2eRunner.E2ERunnerJobExperimental$.main(E2ERunnerJobExperimental.scala:54)
at za.co.absa.hermes.e2eRunner.E2ERunnerJobExperimental.main(E2ERunnerJobExperimental.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)