Hello,
I have the following code in my jupyter notebook:
!pip install scikit-llm
from skllm.config import SKLLMConfig
SKLLMConfig.set_openai_key("")
SKLLMConfig.set_azure_api_base("<my azure api base")
from skllm.datasets import get_multilabel_classification_dataset
X, y = get_classification_dataset()
from skllm import DynamicFewShotGPTClassifier
GPT_model3 = DynamicFewShotGPTClassifier(n_examples=3)
GPT_model3.fit(X, y)
GPT_labels3 = GPT_model3.predict(X)
from skllm.preprocessing import GPTVectorizer
model = GPTVectorizer()
vectors = model.fit_transform(X)
Both dynamic few shot classifier and GPTvectorizer are giving me the following issues:
RuntimeError Traceback (most recent call last)
Cell In[135], line 4
2 X, _ = get_classification_dataset()
3 model = GPTVectorizer()
----> 4 vectors = model.fit_transform(X)
File ~/anaconda3/lib/python3.10/site-packages/sklearn/utils/_set_output.py:142, in _wrap_method_output..wrapped(self, X, *args, **kwargs)
140 @wraps(f)
141 def wrapped(self, X, *args, **kwargs):
--> 142 data_to_wrap = f(self, X, *args, **kwargs)
143 if isinstance(data_to_wrap, tuple):
144 # only wrap the first output for cross decomposition
145 return (
146 _wrap_data_with_container(method, data_to_wrap[0], X, self),
147 *data_to_wrap[1:],
148 )
File ~/anaconda3/lib/python3.10/site-packages/skllm/preprocessing/gpt_vectorizer.py:94, in GPTVectorizer.fit_transform(self, X, y, **fit_params)
79 def fit_transform(self, X: Optional[Union[np.ndarray, pd.Series, List[str]]], y=None, **fit_params) -> ndarray:
80 """
81 Fits and transforms a list of strings into a list of GPT embeddings.
82 This is modelled to function as the sklearn fit_transform method
(...)
92 embeddings : np.ndarray
93 """
---> 94 return self.fit(X, y).transform(X)
File ~/anaconda3/lib/python3.10/site-packages/sklearn/utils/_set_output.py:142, in _wrap_method_output..wrapped(self, X, *args, **kwargs)
140 @wraps(f)
141 def wrapped(self, X, *args, **kwargs):
--> 142 data_to_wrap = f(self, X, *args, **kwargs)
143 if isinstance(data_to_wrap, tuple):
144 # only wrap the first output for cross decomposition
145 return (
146 _wrap_data_with_container(method, data_to_wrap[0], X, self),
147 *data_to_wrap[1:],
148 )
File ~/anaconda3/lib/python3.10/site-packages/skllm/preprocessing/gpt_vectorizer.py:74, in GPTVectorizer.transform(self, X)
71 embeddings = []
72 for i in tqdm(range(len(X))):
73 embeddings.append(
---> 74 _get_embedding(X[i], self._get_openai_key(), self._get_openai_org())
75 )
76 embeddings = np.asarray(embeddings)
77 return embeddings
File ~/anaconda3/lib/python3.10/site-packages/skllm/openai/embeddings.py:48, in get_embedding(text, key, org, model, max_retries)
46 error_type = type(e).name
47 sleep(3)
---> 48 raise RuntimeError(
49 f"Could not obtain the embedding after {max_retries} retries: {error_type} :: {error_msg}
"
50 )
RuntimeError: Could not obtain the embedding after 3 retries: InvalidRequestError :: Must provide an 'engine' or 'deployment_id' parameter to create a <class 'openai.api_resources.embedding.Embedding'>
. '
If you could help me figure out this issue, that would be great!