KeyError:
('2-.-0-.-0--394352f6a8351feaac334fbb8cc63fa4-46c7c5d46afed8316facd72e7e581bec-4da92a6240b2b826fdd8860ba75a31b4-39e
3c68a052760cc345a9147b0d68f7d-5c5e32ff210f3b7f56c98ca29917c25e-06f0df2d61979d629033f4a22eff5198-4ac47e74762ba6a774c
ceea0e1e75ae6-13b7ffc189bd9fba7696034bbcfee151', (torch.bfloat16, torch.bfloat16, torch.bfloat16, torch.float32,
torch.bfloat16, torch.float32, torch.float32, 'fp32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32',
'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32', 'i32'), ('vector', True,
128, False, False, True, 128, 128), (True, True, True, True, True, True, True, (False,), (True, False), (True,
False), (True, False), (True, False), (True, False), (True, False), (True, False), (True, False), (True, False),
(True, False), (False, False), (True, False), (True, False), (True, False), (True, False), (True, False), (False,
False), (False, False), (True, False), (True, False), (False, False), (False, False)))
in :1 β
β in answer_question:3 β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/langchain/chains/base.py:140 in call β
β β
β 137 β β β ) β
β 138 β β except (KeyboardInterrupt, Exception) as e: β
β 139 β β β run_manager.on_chain_error(e) β
β β± 140 β β β raise e β
β 141 β β run_manager.on_chain_end(outputs) β
β 142 β β return self.prep_outputs(inputs, outputs, return_only_outputs) β
β 143 β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/langchain/chains/base.py:134 in call β
β β
β 131 β β ) β
β 132 β β try: β
β 133 β β β outputs = ( β
β β± 134 β β β β self._call(inputs, run_manager=run_manager) β
β 135 β β β β if new_arg_supported β
β 136 β β β β else self._call(inputs) β
β 137 β β β ) β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/langchain/chains/combine_documents/base.py:84 in _call β
β β
β 81 β β docs = inputs[self.input_key] β
β 82 β β # Other keys are assumed to be needed for LLM prediction β
β 83 β β other_keys = {k: v for k, v in inputs.items() if k != self.input_key} β
β β± 84 β β output, extra_return_dict = self.combine_docs( β
β 85 β β β docs, callbacks=_run_manager.get_child(), **other_keys β
β 86 β β ) β
β 87 β β extra_return_dict[self.output_key] = output β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/langchain/chains/combine_documents/stuff.py:87 in combine_docs β
β β
β 84 β β """Stuff all documents into one prompt and pass to LLM.""" β
β 85 β β inputs = self._get_inputs(docs, **kwargs) β
β 86 β β # Call predict on the LLM. β
β β± 87 β β return self.llm_chain.predict(callbacks=callbacks, **inputs), {} β
β 88 β β
β 89 β async def acombine_docs( β
β 90 β β self, docs: List[Document], callbacks: Callbacks = None, **kwargs: Any β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/langchain/chains/llm.py:213 in predict β
β β
β 210 β β β β β
β 211 β β β β completion = llm.predict(adjective="funny") β
β 212 β β """ β
β β± 213 β β return self(kwargs, callbacks=callbacks)[self.output_key] β
β 214 β β
β 215 β async def apredict(self, callbacks: Callbacks = None, **kwargs: Any) -> str: β
β 216 β β """Format prompt with kwargs and pass to LLM. β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/langchain/chains/base.py:140 in call β
β β
β 137 β β β ) β
β 138 β β except (KeyboardInterrupt, Exception) as e: β
β 139 β β β run_manager.on_chain_error(e) β
β β± 140 β β β raise e β
β 141 β β run_manager.on_chain_end(outputs) β
β 142 β β return self.prep_outputs(inputs, outputs, return_only_outputs) β
β 143 β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/langchain/chains/base.py:134 in call β
β β
β 131 β β ) β
β 132 β β try: β
β 133 β β β outputs = ( β
β β± 134 β β β β self._call(inputs, run_manager=run_manager) β
β 135 β β β β if new_arg_supported β
β 136 β β β β else self._call(inputs) β
β 137 β β β ) β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/langchain/chains/llm.py:69 in _call β
β β
β 66 β β inputs: Dict[str, Any], β
β 67 β β run_manager: Optional[CallbackManagerForChainRun] = None, β
β 68 β ) -> Dict[str, str]: β
β β± 69 β β response = self.generate([inputs], run_manager=run_manager) β
β 70 β β return self.create_outputs(response)[0] β
β 71 β β
β 72 β def generate( β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/langchain/chains/llm.py:79 in generate β
β β
β 76 β ) -> LLMResult: β
β 77 β β """Generate LLM result from inputs.""" β
β 78 β β prompts, stop = self.prep_prompts(input_list, run_manager=run_manager) β
β β± 79 β β return self.llm.generate_prompt( β
β 80 β β β prompts, stop, callbacks=run_manager.get_child() if run_manager else None β
β 81 β β ) β
β 82 β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/langchain/llms/base.py:134 in generate_prompt β
β β
β 131 β β callbacks: Callbacks = None, β
β 132 β ) -> LLMResult: β
β 133 β β prompt_strings = [p.to_string() for p in prompts] β
β β± 134 β β return self.generate(prompt_strings, stop=stop, callbacks=callbacks) β
β 135 β β
β 136 β async def agenerate_prompt( β
β 137 β β self, β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/langchain/llms/base.py:191 in generate β
β β
β 188 β β β β ) β
β 189 β β β except (KeyboardInterrupt, Exception) as e: β
β 190 β β β β run_manager.on_llm_error(e) β
β β± 191 β β β β raise e β
β 192 β β β run_manager.on_llm_end(output) β
β 193 β β β return output β
β 194 β β if len(missing_prompts) > 0: β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/langchain/llms/base.py:185 in generate β
β β
β 182 β β β ) β
β 183 β β β try: β
β 184 β β β β output = ( β
β β± 185 β β β β β self._generate(prompts, stop=stop, run_manager=run_manager) β
β 186 β β β β β if new_arg_supported β
β 187 β β β β β else self._generate(prompts, stop=stop) β
β 188 β β β β ) β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/langchain/llms/base.py:436 in _generate β
β β
β 433 β β new_arg_supported = inspect.signature(self._call).parameters.get("run_manager") β
β 434 β β for prompt in prompts: β
β 435 β β β text = ( β
β β± 436 β β β β self._call(prompt, stop=stop, run_manager=run_manager) β
β 437 β β β β if new_arg_supported β
β 438 β β β β else self._call(prompt, stop=stop) β
β 439 β β β ) β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/langchain/llms/huggingface_pipeline.py:168 in _call β
β β
β 165 β β stop: Optional[List[str]] = None, β
β 166 β β run_manager: Optional[CallbackManagerForLLMRun] = None, β
β 167 β ) -> str: β
β β± 168 β β response = self.pipeline(prompt) β
β 169 β β if self.pipeline.task == "text-generation": β
β 170 β β β # Text generation return includes the starter text. β
β 171 β β β text = response[0]["generated_text"][len(prompt) :] β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/transformers/pipelines/text_generation.py:201 in call β
β β
β 198 β β β - generated_token_ids (torch.Tensor
or tf.Tensor
, present when `retu β
β 199 β β β ids of the generated text. β
β 200 β β """ β
β β± 201 β β return super().call(text_inputs, **kwargs) β
β 202 β β
β 203 β def preprocess(self, prompt_text, prefix="", handle_long_generation=None, **generate β
β 204 β β inputs = self.tokenizer( β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/transformers/pipelines/base.py:1119 in call β
β β
β 1116 β β β β ) β
β 1117 β β β ) β
β 1118 β β else: β
β β± 1119 β β β return self.run_single(inputs, preprocess_params, forward_params, postproces β
β 1120 β β
β 1121 β def run_multi(self, inputs, preprocess_params, forward_params, postprocess_params): β
β 1122 β β return [self.run_single(item, preprocess_params, forward_params, postprocess_par β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/transformers/pipelines/base.py:1126 in run_single β
β β
β 1123 β β
β 1124 β def run_single(self, inputs, preprocess_params, forward_params, postprocess_params): β
β 1125 β β model_inputs = self.preprocess(inputs, **preprocess_params) β
β β± 1126 β β model_outputs = self.forward(model_inputs, **forward_params) β
β 1127 β β outputs = self.postprocess(model_outputs, **postprocess_params) β
β 1128 β β return outputs β
β 1129 β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/transformers/pipelines/base.py:1025 in forward β
β β
β 1022 β β β β inference_context = self.get_inference_context() β
β 1023 β β β β with inference_context(): β
β 1024 β β β β β model_inputs = self._ensure_tensor_on_device(model_inputs, device=se β
β β± 1025 β β β β β model_outputs = self._forward(model_inputs, **forward_params) β
β 1026 β β β β β model_outputs = self._ensure_tensor_on_device(model_outputs, device= β
β 1027 β β β else: β
β 1028 β β β β raise ValueError(f"Framework {self.framework} is not supported") β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/transformers/pipelines/text_generation.py:263 in _forward β
β β
β 260 β β β β generate_kwargs["min_length"] += prefix_length β
β 261 β β β
β 262 β β # BS x SL β
β β± 263 β β generated_sequence = self.model.generate(input_ids=input_ids, attention_mask=att β
β 264 β β out_b = generated_sequence.shape[0] β
β 265 β β if self.framework == "pt": β
β 266 β β β generated_sequence = generated_sequence.reshape(in_b, out_b // in_b, *genera β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/torch/utils/_contextlib.py:115 in decorate_context β
β β
β 112 β @functools.wraps(func) β
β 113 β def decorate_context(*args, **kwargs): β
β 114 β β with ctx_factory(): β
β β± 115 β β β return func(*args, **kwargs) β
β 116 β β
β 117 β return decorate_context β
β 118 β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/transformers/generation/utils.py:1565 in generate β
β β
β 1562 β β β ) β
β 1563 β β β β
β 1564 β β β # 13. run sample β
β β± 1565 β β β return self.sample( β
β 1566 β β β β input_ids, β
β 1567 β β β β logits_processor=logits_processor, β
β 1568 β β β β logits_warper=logits_warper, β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/transformers/generation/utils.py:2612 in sample β
β β
β 2609 β β β model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs) β
β 2610 β β β β
β 2611 β β β # forward pass to get next token β
β β± 2612 β β β outputs = self( β
β 2613 β β β β **model_inputs, β
β 2614 β β β β return_dict=True, β
β 2615 β β β β output_attentions=output_attentions, β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/torch/nn/modules/module.py:1501 in _call_impl β
β β
β 1498 β β if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks β
β 1499 β β β β or _global_backward_pre_hooks or _global_backward_hooks β
β 1500 β β β β or _global_forward_hooks or _global_forward_pre_hooks): β
β β± 1501 β β β return forward_call(*args, **kwargs) β
β 1502 β β # Do not call functions when jit is used β
β 1503 β β full_backward_hooks, non_full_backward_hooks = [], [] β
β 1504 β β backward_pre_hooks = [] β
β β
β /root/.cache/huggingface/modules/transformers_modules/mosaicml/mpt-7b-instruct/925e0d80e50e77aad β
β daf9c3ced41ca4ea23a1025/modeling_mpt.py:270 in forward β
β β
β 267 β β use_cache = use_cache if use_cache is not None else self.config.use_cache β
β 268 β β if inputs_embeds is not None: β
β 269 β β β raise NotImplementedError('inputs_embeds has to be None (for hf/peft support β
β β± 270 β β outputs = self.transformer(input_ids=input_ids, past_key_values=past_key_values, β
β 271 β β logits = self.transformer.wte(outputs.last_hidden_state.to(self.transformer.wte. β
β 272 β β if self.logit_scale is not None: β
β 273 β β β if self.logit_scale == 0: β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/torch/nn/modules/module.py:1501 in _call_impl β
β β
β 1498 β β if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks β
β 1499 β β β β or _global_backward_pre_hooks or _global_backward_hooks β
β 1500 β β β β or _global_forward_hooks or _global_forward_pre_hooks): β
β β± 1501 β β β return forward_call(*args, **kwargs) β
β 1502 β β # Do not call functions when jit is used β
β 1503 β β full_backward_hooks, non_full_backward_hooks = [], [] β
β 1504 β β backward_pre_hooks = [] β
β β
β /root/.cache/huggingface/modules/transformers_modules/mosaicml/mpt-7b-instruct/925e0d80e50e77aad β
β daf9c3ced41ca4ea23a1025/modeling_mpt.py:202 in forward β
β β
β 199 β β β β assert all_hidden_states is not None β
β 200 β β β β all_hidden_states = all_hidden_states + (x,) β
β 201 β β β past_key_value = past_key_values[b_idx] if past_key_values is not None else β
β β± 202 β β β (x, attn_weights, past_key_value) = block(x, past_key_value=past_key_value, β
β 203 β β β if past_key_values is not None: β
β 204 β β β β past_key_values[b_idx] = past_key_value β
β 205 β β β if output_attentions: β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/torch/nn/modules/module.py:1501 in _call_impl β
β β
β 1498 β β if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks β
β 1499 β β β β or _global_backward_pre_hooks or _global_backward_hooks β
β 1500 β β β β or _global_forward_hooks or _global_forward_pre_hooks): β
β β± 1501 β β β return forward_call(*args, **kwargs) β
β 1502 β β # Do not call functions when jit is used β
β 1503 β β full_backward_hooks, non_full_backward_hooks = [], [] β
β 1504 β β backward_pre_hooks = [] β
β β
β /root/.cache/huggingface/modules/transformers_modules/mosaicml/mpt-7b-instruct/925e0d80e50e77aad β
β daf9c3ced41ca4ea23a1025/blocks.py:36 in forward β
β β
β 33 β β
β 34 β def forward(self, x: torch.Tensor, past_key_value: Optional[Tuple[torch.Tensor]]=Non β
β 35 β β a = self.norm_1(x) β
β β± 36 β β (b, attn_weights, past_key_value) = self.attn(a, past_key_value=past_key_value, β
β 37 β β x = x + self.resid_attn_dropout(b) β
β 38 β β m = self.norm_2(x) β
β 39 β β n = self.ffn(m) β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/torch/nn/modules/module.py:1501 in _call_impl β
β β
β 1498 β β if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks β
β 1499 β β β β or _global_backward_pre_hooks or _global_backward_hooks β
β 1500 β β β β or _global_forward_hooks or _global_forward_pre_hooks): β
β β± 1501 β β β return forward_call(*args, **kwargs) β
β 1502 β β # Do not call functions when jit is used β
β 1503 β β full_backward_hooks, non_full_backward_hooks = [], [] β
β 1504 β β backward_pre_hooks = [] β
β β
β /root/.cache/huggingface/modules/transformers_modules/mosaicml/mpt-7b-instruct/925e0d80e50e77aad β
β daf9c3ced41ca4ea23a1025/attention.py:201 in forward β
β β
β 198 β β β dtype = query.dtype β
β 199 β β β query = self.q_ln(query).to(dtype) β
β 200 β β β key = self.k_ln(key).to(dtype) β
β β± 201 β β (context, attn_weights, past_key_value) = self.attn_fn(query, key, value, self.n β
β 202 β β return (self.out_proj(context), attn_weights, past_key_value) β
β 203 β
β 204 class MultiQueryAttention(nn.Module): β
β β
β /root/.cache/huggingface/modules/transformers_modules/mosaicml/mpt-7b-instruct/925e0d80e50e77aad β
β daf9c3ced41ca4ea23a1025/attention.py:147 in triton_flash_attn_fn β
β β
β 144 β β key = key.expand(*key.shape[:2], n_heads, key.size(-1)) β
β 145 β β value = value.expand(*value.shape[:2], n_heads, value.size(-1)) β
β 146 β reset_is_causal = _reset_is_causal(query.size(1), key.size(1), is_causal) β
β β± 147 β attn_output = flash_attn_func(query, key, value, attn_bias, reset_is_causal, softmax β
β 148 β output = attn_output.view(*attn_output.shape[:2], -1) β
β 149 β return (output, None, past_key_value) β
β 150 β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/torch/autograd/function.py:506 in apply β
β β
β 503 β β if not torch._C._are_functorch_transforms_active(): β
β 504 β β β # See NOTE: [functorch vjp and autograd interaction] β
β 505 β β β args = _functorch.utils.unwrap_dead_wrappers(args) β
β β± 506 β β β return super().apply(*args, **kwargs) # type: ignore[misc] β
β 507 β β β
β 508 β β if cls.setup_context == SingleLevelFunction.setup_context: β
β 509 β β β raise RuntimeError( β
β β
β /root/.cache/huggingface/modules/transformers_modules/mosaicml/mpt-7b-instruct/925e0d80e50e77aad β
β daf9c3ced41ca4ea23a1025/flash_attn_triton.py:469 in forward β
β β
β 466 β β β β ALiBi mask for non-causal would have shape (1, nheads, seqlen_q, seqlen β
β 467 β β """ β
β 468 β β (q, k, v) = [x if x.stride(-1) == 1 else x.contiguous() for x in [q, k, v]] β
β β± 469 β β (o, lse, ctx.softmax_scale) = _flash_attn_forward(q, k, v, bias=bias, causal=cau β
β 470 β β ctx.save_for_backward(q, k, v, o, lse, bias) β
β 471 β β ctx.causal = causal β
β 472 β β return o β
β β
β /root/.cache/huggingface/modules/transformers_modules/mosaicml/mpt-7b-instruct/925e0d80e50e77aad β
β daf9c3ced41ca4ea23a1025/flash_attn_triton.py:363 in _flash_attn_forward β
β β
β 360 β BLOCK = 128 β
β 361 β num_warps = 4 if d <= 64 else 8 β
β 362 β grid = lambda META: (triton.cdiv(seqlen_q, META['BLOCK_M']), batch * nheads) β
β β± 363 β _fwd_kernel[grid](q, k, v, bias, o, lse, tmp, softmax_scale, q.stride(0), q.stride(2 β
β 364 β return (o, lse, softmax_scale) β
β 365 β
β 366 def _flash_attn_backward(do, q, k, v, o, lse, dq, dk, dv, bias=None, causal=False, softm β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/triton_pre_mlir/runtime/jit.py:106 in launcher β
β β
β 103 β β memorizes the grid. β
β 104 β β """ β
β 105 β β def launcher(*args, **kwargs): β
β β± 106 β β β return self.run(*args, grid=grid, **kwargs) β
β 107 β β return launcher β
β 108 β
β 109 β
β β
β /local_disk0/.ephemeral_nfs/envs/pythonEnv-e4039dcb-6324-43cf-ad2f-10278d712478/lib/python3.10/s β
β ite-packages/triton_pre_mlir/runtime/autotuner.py:200 in run β
β β
β 197 β def run(self, *args, **kwargs): β
β 198 β β for v, heur in self.values.items(): β
β 199 β β β kwargs[v] = heur({**dict(zip(self.arg_names, args)), **kwargs}) β
β β± 200 β β return self.fn.run(*args, **kwargs) β
β 201 β
β 202 β
β 203 def heuristics(values): β
β in _fwd_kernel:43 β
β°βββββββββββββββββββββββββββββββ