I have seen two types of notebook, When i clone into my virtual machine i have seen three types of version for Clinical Entity Resolver and I run through from jupyter notebook
In github -> IllegalArgumentException: "requirement failed: Wrong or missing inputCols annotators in NerConverterInternal_37f79d519de6
In Jupyter notebook -> AnalysisException: cannot resolve '`ner_token.metadata
Code snippet for Github link
posology_ner = NerDLModel.pretrained("ner_posology", "en", "clinical/models")
.setInputCols(["sentence", "token", "embeddings"])
.setOutputCol("ner")
ner_converter1 = NerConverterInternal()
.setInputCols(["sentence", "token", "ner"])
.setOutputCol("ner_chunk")
chunk_merge = ChunkMergeApproach().setInputCols("ner_chunk","ner_chunk").setOutputCol("merged_chunk")
.setReplaceDictResource("replace_dict.csv","TEXT", {"delimiter":","})
iob_tagger = IOBTagger().setInputCols("token","merged_chunk").setOutputCol("merged_ner")
ner_converter2 = NerConverterInternal()
.setInputCols(["sentence", "token", "merged_ner"])
.setOutputCol("greedy_chunk")
.setPreservePosition(False)
.setWhiteList(['DRUG'])
posology_rx = Pipeline(
stages = [
documentAssembler,
sentenceDetector,
tokenizer,
stopwords,
word_embeddings,
posology_ner,
ner_converter1,
chunk_merge,
iob_tagger,
ner_converter2,
chunk_embeddings,
rxnorm_resolver1
])
model_rxnorm = posology_rx.fit(data_ner)
output = model_rxnorm.transform(data_ner)
output.select(F.explode(F.arrays_zip("greedy_chunk.result","greedy_chunk.metadata","rxnorm_resolution.result","rxnorm_resolution.metadata")).alias("rxnorm_result"))
.select(F.expr("rxnorm_result['0']").alias("chunk"),
F.expr("rxnorm_result['1'].entity").alias("entity"),
F.expr("rxnorm_result['3'].all_k_resolutions").alias("target_text"),
F.expr("rxnorm_result['2']").alias("code"),
F.expr("rxnorm_result['3'].confidence").alias("confidence")).show(truncate = 100)
Error Log:
--------------------------------------------------------------------------- Py4JJavaError Traceback (most recent call last) ~/spark-nlp/anaconda3/envs/sparknlp/lib/python3.6/site-packages/pyspark/sql/utils.py in deco(*a, **kw) 62 try: ---> 63 return f(*a, **kw) 64 except py4j.protocol.Py4JJavaError as e: ~/spark-nlp/anaconda3/envs/sparknlp/lib/python3.6/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name) 327 "An error occurred while calling {0}{1}{2}.\n". --> 328 format(target_id, ".", name), value) 329 else: Py4JJavaError: An error occurred while calling o4989.transform. : java.lang.IllegalArgumentException: requirement failed: Wrong or missing inputCols annotators in NerConverterInternal_37f79d519de6. Current inputCols: sentence,token,merged_ner. Dataset's columns: (column_name=text,is_nlp_annotator=false) (column_name=document,is_nlp_annotator=true,type=document) (column_name=sentence,is_nlp_annotator=true,type=document) (column_name=raw_token,is_nlp_annotator=true,type=token) (column_name=token,is_nlp_annotator=true,type=token) (column_name=embeddings,is_nlp_annotator=true,type=word_embeddings) (column_name=ner,is_nlp_annotator=true,type=named_entity) (column_name=ner_chunk,is_nlp_annotator=true,type=chunk) (column_name=merged_chunk,is_nlp_annotator=true,type=chunk) (column_name=merged_ner,is_nlp_annotator=true,type=chunk). Make sure such annotators exist in your pipeline, with the right output names and that they have following annotator types: document, token, named_entity at scala.Predef$.require(Predef.scala:224) at com.johnsnowlabs.nlp.AnnotatorModel._transform(AnnotatorModel.scala:43) at com.johnsnowlabs.nlp.AnnotatorModel.transform(AnnotatorModel.scala:79) at sun.reflect.GeneratedMethodAccessor117.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) at py4j.Gateway.invoke(Gateway.java:282) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:238) at java.lang.Thread.run(Thread.java:748) During handling of the above exception, another exception occurred: IllegalArgumentException Traceback (most recent call last) <ipython-input-36-c40be2771ed1> in <module> 36 model_rxnorm = posology_rx.fit(data_ner) 37 ---> 38 output = model_rxnorm.transform(data_ner) 39 40 output.select(F.explode(F.arrays_zip("greedy_chunk.result","greedy_chunk.metadata","rxnorm_resolution.result","rxnorm_resolution.metadata")).alias("rxnorm_result")) \ ~/spark-nlp/anaconda3/envs/sparknlp/lib/python3.6/site-packages/pyspark/ml/base.py in transform(self, dataset, params) 171 return self.copy(params)._transform(dataset) 172 else: --> 173 return self._transform(dataset) 174 else: 175 raise ValueError("Params must be a param map but got %s." % type(params)) ~/spark-nlp/anaconda3/envs/sparknlp/lib/python3.6/site-packages/pyspark/ml/pipeline.py in _transform(self, dataset) 260 def _transform(self, dataset): 261 for t in self.stages: --> 262 dataset = t.transform(dataset) 263 return dataset 264 ~/spark-nlp/anaconda3/envs/sparknlp/lib/python3.6/site-packages/pyspark/ml/base.py in transform(self, dataset, params) 171 return self.copy(params)._transform(dataset) 172 else: --> 173 return self._transform(dataset) 174 else: 175 raise ValueError("Params must be a param map but got %s." % type(params)) ~/spark-nlp/anaconda3/envs/sparknlp/lib/python3.6/site-packages/pyspark/ml/wrapper.py in _transform(self, dataset) 310 def _transform(self, dataset): 311 self._transfer_params_to_java() --> 312 return DataFrame(self._java_obj.transform(dataset._jdf), dataset.sql_ctx) 313 314 ~/spark-nlp/anaconda3/envs/sparknlp/lib/python3.6/site-packages/py4j/java_gateway.py in __call__(self, *args) 1255 answer = self.gateway_client.send_command(command) 1256 return_value = get_return_value( -> 1257 answer, self.gateway_client, self.target_id, self.name) 1258 1259 for temp_arg in temp_args: ~/spark-nlp/anaconda3/envs/sparknlp/lib/python3.6/site-packages/pyspark/sql/utils.py in deco(*a, **kw) 77 raise QueryExecutionException(s.split(': ', 1)[1], stackTrace) 78 if s.startswith('java.lang.IllegalArgumentException: '): ---> 79 raise IllegalArgumentException(s.split(': ', 1)[1], stackTrace) 80 raise 81 return deco IllegalArgumentException: "requirement failed: Wrong or missing inputCols annotators in NerConverterInternal_37f79d519de6.\n\nCurrent inputCols: sentence,token,merged_ner. Dataset's columns:\n(column_name=text,is_nlp_annotator=false)\n(column_name=document,is_nlp_annotator=true,type=document)\n(column_name=sentence,is_nlp_annotator=true,type=document)\n(column_name=raw_token,is_nlp_annotator=true,type=token)\n(column_name=token,is_nlp_annotator=true,type=token)\n(column_name=embeddings,is_nlp_annotator=true,type=word_embeddings)\n(column_name=ner,is_nlp_annotator=true,type=named_entity)\n(column_name=ner_chunk,is_nlp_annotator=true,type=chunk)\n(column_name=merged_chunk,is_nlp_annotator=true,type=chunk)\n(column_name=merged_ner,is_nlp_annotator=true,type=chunk).\nMake sure such annotators exist in your pipeline, with the right output names and that they have following annotator types: document, token, named_entity"
Code snippet for Jupyter link
Persisiting temporarily to keep DAG size and resource usage low (Ensmeble Resolvers are Resource Intensive)
pipelineModelFull = pipelineFull.fit(data)
output = pipelineModelFull.transform(data)
output.write.mode("overwrite").save("temp")
output = spark.read.load("temp")
Error Log:
`---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
~/spark-nlp/anaconda3/envs/sparknlp/lib/python3.6/site-packages/pyspark/sql/utils.py in deco(*a, **kw)
62 try:
---> 63 return f(*a, **kw)
64 except py4j.protocol.Py4JJavaError as e:
~/spark-nlp/anaconda3/envs/sparknlp/lib/python3.6/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
327 "An error occurred while calling {0}{1}{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
Py4JJavaError: An error occurred while calling o6703.transform.
: org.apache.spark.sql.AnalysisException: cannot resolve 'ner_token.metadata
' given input columns: [chunk_token_jsl, ner_jsl, chunk_drug, chunk_jsl, chunk_embs_jsl, text_feed, ner_drug, embeddings, token, document, chunk_token_drug, chunk_embs_drug, sentence, doc_id]; line 1 pos 0;
'Project ['ner_token.metadata]
+- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4494, ner_jsl#4503, ner_drug#4513, chunk_jsl#4523, chunk_drug#4534, chunk_embs_jsl#4559, chunk_embs_drug#4586, chunk_token_jsl#4600, UDF(array(chunk_drug#4534)) AS chunk_token_drug#4615]
+- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4494, ner_jsl#4503, ner_drug#4513, chunk_jsl#4523, chunk_drug#4534, chunk_embs_jsl#4559, chunk_embs_drug#4586, UDF(array(chunk_jsl#4523)) AS chunk_token_jsl#4600]
+- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4494, ner_jsl#4503, ner_drug#4513, chunk_jsl#4523, chunk_drug#4534, chunk_embs_jsl#4559, chunk_embs_drug#4572 AS chunk_embs_drug#4586]
+- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4494, ner_jsl#4503, ner_drug#4513, chunk_jsl#4523, chunk_drug#4534, chunk_embs_jsl#4559, UDF(array(chunk_drug#4534, embeddings#4494)) AS chunk_embs_drug#4572]
+- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4494, ner_jsl#4503, ner_drug#4513, chunk_jsl#4523, chunk_drug#4534, chunk_embs_jsl#4546 AS chunk_embs_jsl#4559]
+- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4494, ner_jsl#4503, ner_drug#4513, chunk_jsl#4523, chunk_drug#4534, UDF(array(chunk_jsl#4523, embeddings#4494)) AS chunk_embs_jsl#4546]
+- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4494, ner_jsl#4503, ner_drug#4513, chunk_jsl#4523, UDF(array(sentence#4473, token#4479, ner_drug#4513)) AS chunk_drug#4534]
+- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4494, ner_jsl#4503, ner_drug#4513, UDF(array(sentence#4473, token#4479, ner_jsl#4503)) AS chunk_jsl#4523]
+- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4494, ner_jsl#4503, UDF(array(sentence#4473, token#4479, embeddings#4494)) AS ner_drug#4513]
+- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4494, UDF(array(sentence#4473, token#4479, embeddings#4494)) AS ner_jsl#4503]
+- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4486 AS embeddings#4494]
+- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, UDF(array(sentence#4473, token#4479)) AS embeddings#4486]
+- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, UDF(array(sentence#4473)) AS token#4479]
+- Project [doc_id#4309L, text_feed#4310, document#4468, UDF(array(document#4468)) AS sentence#4473]
+- Project [doc_id#4309L, text_feed#4310, UDF(text_feed#4310) AS document#4468]
+- Project [_1#4305L AS doc_id#4309L, _2#4306 AS text_feed#4310]
+- LogicalRDD [_1#4305L, _2#4306], false
at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$3.applyOrElse(CheckAnalysis.scala:111)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$3.applyOrElse(CheckAnalysis.scala:108)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:281)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:281)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:280)
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$transformExpressionsUp$1.apply(QueryPlan.scala:93)
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$transformExpressionsUp$1.apply(QueryPlan.scala:93)
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$1.apply(QueryPlan.scala:105)
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$1.apply(QueryPlan.scala:105)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpression$1(QueryPlan.scala:104)
at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(QueryPlan.scala:116)
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1$2.apply(QueryPlan.scala:121)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.AbstractTraversable.map(Traversable.scala:104)
at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(QueryPlan.scala:121)
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$2.apply(QueryPlan.scala:126)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.plans.QueryPlan.mapExpressions(QueryPlan.scala:126)
at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(QueryPlan.scala:93)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:108)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:86)
at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:127)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:86)
at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:95)
at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:108)
at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:105)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:201)
at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:105)
at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:57)
at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:55)
at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:47)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:78)
at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:3412)
at org.apache.spark.sql.Dataset.select(Dataset.scala:1340)
at com.johnsnowlabs.nlp.annotators.resolution.EnsembleEntityResolverModel.checkIfTokensHaveChunk(EnsembleEntityResolverModel.scala:116)
at com.johnsnowlabs.nlp.annotators.resolution.EnsembleEntityResolverModel.transform(EnsembleEntityResolverModel.scala:72)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
During handling of the above exception, another exception occurred:
AnalysisException Traceback (most recent call last)
in
2 pipelineModelFull = pipelineFull.fit(data)
3
----> 4 output = pipelineModelFull.transform(data)
5
6 output.write.mode("overwrite").save("temp")
~/spark-nlp/anaconda3/envs/sparknlp/lib/python3.6/site-packages/pyspark/ml/base.py in transform(self, dataset, params)
171 return self.copy(params)._transform(dataset)
172 else:
--> 173 return self._transform(dataset)
174 else:
175 raise ValueError("Params must be a param map but got %s." % type(params))
~/spark-nlp/anaconda3/envs/sparknlp/lib/python3.6/site-packages/pyspark/ml/pipeline.py in _transform(self, dataset)
260 def _transform(self, dataset):
261 for t in self.stages:
--> 262 dataset = t.transform(dataset)
263 return dataset
264
~/spark-nlp/anaconda3/envs/sparknlp/lib/python3.6/site-packages/pyspark/ml/base.py in transform(self, dataset, params)
171 return self.copy(params)._transform(dataset)
172 else:
--> 173 return self._transform(dataset)
174 else:
175 raise ValueError("Params must be a param map but got %s." % type(params))
~/spark-nlp/anaconda3/envs/sparknlp/lib/python3.6/site-packages/pyspark/ml/wrapper.py in _transform(self, dataset)
310 def _transform(self, dataset):
311 self._transfer_params_to_java()
--> 312 return DataFrame(self._java_obj.transform(dataset._jdf), dataset.sql_ctx)
313
314
~/spark-nlp/anaconda3/envs/sparknlp/lib/python3.6/site-packages/py4j/java_gateway.py in call(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
-> 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:
~/spark-nlp/anaconda3/envs/sparknlp/lib/python3.6/site-packages/pyspark/sql/utils.py in deco(*a, **kw)
67 e.java_exception.getStackTrace()))
68 if s.startswith('org.apache.spark.sql.AnalysisException: '):
---> 69 raise AnalysisException(s.split(': ', 1)[1], stackTrace)
70 if s.startswith('org.apache.spark.sql.catalyst.analysis'):
71 raise AnalysisException(s.split(': ', 1)[1], stackTrace)
AnalysisException: "cannot resolve 'ner_token.metadata
' given input columns: [chunk_token_jsl, ner_jsl, chunk_drug, chunk_jsl, chunk_embs_jsl, text_feed, ner_drug, embeddings, token, document, chunk_token_drug, chunk_embs_drug, sentence, doc_id]; line 1 pos 0;\n'Project ['ner_token.metadata]\n+- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4494, ner_jsl#4503, ner_drug#4513, chunk_jsl#4523, chunk_drug#4534, chunk_embs_jsl#4559, chunk_embs_drug#4586, chunk_token_jsl#4600, UDF(array(chunk_drug#4534)) AS chunk_token_drug#4615]\n +- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4494, ner_jsl#4503, ner_drug#4513, chunk_jsl#4523, chunk_drug#4534, chunk_embs_jsl#4559, chunk_embs_drug#4586, UDF(array(chunk_jsl#4523)) AS chunk_token_jsl#4600]\n +- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4494, ner_jsl#4503, ner_drug#4513, chunk_jsl#4523, chunk_drug#4534, chunk_embs_jsl#4559, chunk_embs_drug#4572 AS chunk_embs_drug#4586]\n +- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4494, ner_jsl#4503, ner_drug#4513, chunk_jsl#4523, chunk_drug#4534, chunk_embs_jsl#4559, UDF(array(chunk_drug#4534, embeddings#4494)) AS chunk_embs_drug#4572]\n +- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4494, ner_jsl#4503, ner_drug#4513, chunk_jsl#4523, chunk_drug#4534, chunk_embs_jsl#4546 AS chunk_embs_jsl#4559]\n +- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4494, ner_jsl#4503, ner_drug#4513, chunk_jsl#4523, chunk_drug#4534, UDF(array(chunk_jsl#4523, embeddings#4494)) AS chunk_embs_jsl#4546]\n +- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4494, ner_jsl#4503, ner_drug#4513, chunk_jsl#4523, UDF(array(sentence#4473, token#4479, ner_drug#4513)) AS chunk_drug#4534]\n +- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4494, ner_jsl#4503, ner_drug#4513, UDF(array(sentence#4473, token#4479, ner_jsl#4503)) AS chunk_jsl#4523]\n +- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4494, ner_jsl#4503, UDF(array(sentence#4473, token#4479, embeddings#4494)) AS ner_drug#4513]\n +- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4494, UDF(array(sentence#4473, token#4479, embeddings#4494)) AS ner_jsl#4503]\n +- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, embeddings#4486 AS embeddings#4494]\n +- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, token#4479, UDF(array(sentence#4473, token#4479)) AS embeddings#4486]\n +- Project [doc_id#4309L, text_feed#4310, document#4468, sentence#4473, UDF(array(sentence#4473)) AS token#4479]\n +- Project [doc_id#4309L, text_feed#4310, document#4468, UDF(array(document#4468)) AS sentence#4473]\n +- Project [doc_id#4309L, text_feed#4310, UDF(text_feed#4310) AS document#4468]\n +- Project [_1#4305L AS doc_id#4309L, _2#4306 AS text_feed#4310]\n +- LogicalRDD [_1#4305L, _2#4306], false\n"`
Your Environment
- Spark-NLP version: '2.5.3'
- Operating System and version: Ubuntu 18.04 (Google VM)
- Deployment (Docker, Jupyter, Scala, pip, conda, etc.): jupyter