sugarforever / advanced-rag Goto Github PK
View Code? Open in Web Editor NEWLicense: MIT License
License: MIT License
---------------------------------------------------------------------------
ConnectionResetError Traceback (most recent call last)
[/usr/local/lib/python3.10/dist-packages/urllib3/connectionpool.py](https://localhost:8080/#) in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
790 # Make the request on the HTTPConnection object
--> 791 response = self._make_request(
792 conn,
36 frames
[/usr/local/lib/python3.10/dist-packages/urllib3/connectionpool.py](https://localhost:8080/#) in _make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
491 new_e = _wrap_proxy_error(new_e, conn.proxy.scheme)
--> 492 raise new_e
493
[/usr/local/lib/python3.10/dist-packages/urllib3/connectionpool.py](https://localhost:8080/#) in _make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
467 try:
--> 468 self._validate_conn(conn)
469 except (SocketTimeout, BaseSSLError) as e:
[/usr/local/lib/python3.10/dist-packages/urllib3/connectionpool.py](https://localhost:8080/#) in _validate_conn(self, conn)
1096 if conn.is_closed:
-> 1097 conn.connect()
1098
[/usr/local/lib/python3.10/dist-packages/urllib3/connection.py](https://localhost:8080/#) in connect(self)
641
--> 642 sock_and_verified = _ssl_wrap_socket_and_match_hostname(
643 sock=sock,
[/usr/local/lib/python3.10/dist-packages/urllib3/connection.py](https://localhost:8080/#) in _ssl_wrap_socket_and_match_hostname(sock, cert_reqs, ssl_version, ssl_minimum_version, ssl_maximum_version, cert_file, key_file, key_password, ca_certs, ca_cert_dir, ca_cert_data, assert_hostname, assert_fingerprint, server_hostname, ssl_context, tls_in_tls)
782
--> 783 ssl_sock = ssl_wrap_socket(
784 sock=sock,
[/usr/local/lib/python3.10/dist-packages/urllib3/util/ssl_.py](https://localhost:8080/#) in ssl_wrap_socket(sock, keyfile, certfile, cert_reqs, ca_certs, server_hostname, ssl_version, ciphers, ssl_context, ca_cert_dir, key_password, ca_cert_data, tls_in_tls)
470
--> 471 ssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname)
472 return ssl_sock
[/usr/local/lib/python3.10/dist-packages/urllib3/util/ssl_.py](https://localhost:8080/#) in _ssl_wrap_socket_impl(sock, ssl_context, tls_in_tls, server_hostname)
514
--> 515 return ssl_context.wrap_socket(sock, server_hostname=server_hostname)
[/usr/lib/python3.10/ssl.py](https://localhost:8080/#) in wrap_socket(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, session)
512 # ctx._wrap_socket()
--> 513 return self.sslsocket_class._create(
514 sock=sock,
[/usr/lib/python3.10/ssl.py](https://localhost:8080/#) in _create(cls, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, context, session)
1099 raise ValueError("do_handshake_on_connect should not be specified for non-blocking sockets")
-> 1100 self.do_handshake()
1101 except (OSError, ValueError):
[/usr/lib/python3.10/ssl.py](https://localhost:8080/#) in do_handshake(self, block)
1370 self.settimeout(None)
-> 1371 self._sslobj.do_handshake()
1372 finally:
ConnectionResetError: [Errno 104] Connection reset by peer
During handling of the above exception, another exception occurred:
ProtocolError Traceback (most recent call last)
[<ipython-input-20-86b1c0c50fc9>](https://localhost:8080/#) in <cell line: 1>()
----> 1 vectorstore.add_documents(documents, namespace=USER_2)
[/usr/local/lib/python3.10/dist-packages/langchain_core/vectorstores.py](https://localhost:8080/#) in add_documents(self, documents, **kwargs)
120 texts = [doc.page_content for doc in documents]
121 metadatas = [doc.metadata for doc in documents]
--> 122 return self.add_texts(texts, metadatas, **kwargs)
123
124 async def aadd_documents(
[/usr/local/lib/python3.10/dist-packages/langchain_community/vectorstores/pinecone.py](https://localhost:8080/#) in add_texts(self, texts, metadatas, ids, namespace, batch_size, embedding_chunk_size, **kwargs)
150 )
151 ]
--> 152 [res.get() for res in async_res]
153
154 return ids
[/usr/local/lib/python3.10/dist-packages/langchain_community/vectorstores/pinecone.py](https://localhost:8080/#) in <listcomp>(.0)
150 )
151 ]
--> 152 [res.get() for res in async_res]
153
154 return ids
[/usr/lib/python3.10/multiprocessing/pool.py](https://localhost:8080/#) in get(self, timeout)
772 return self._value
773 else:
--> 774 raise self._value
775
776 def _set(self, i, obj):
[/usr/lib/python3.10/multiprocessing/pool.py](https://localhost:8080/#) in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
123 job, i, func, args, kwds = task
124 try:
--> 125 result = (True, func(*args, **kwds))
126 except Exception as e:
127 if wrap_exception and func is not _helper_reraises_exception:
[/usr/local/lib/python3.10/dist-packages/pinecone/core/client/api_client.py](https://localhost:8080/#) in __call_api(self, resource_path, method, path_params, query_params, header_params, body, post_params, files, response_type, auth_settings, _return_http_data_only, collection_formats, _preload_content, _request_timeout, _host, _check_type)
198 try:
199 # perform request and return response
--> 200 response_data = self.request(
201 method, url, query_params=query_params, headers=header_params,
202 post_params=post_params, body=body,
[/usr/local/lib/python3.10/dist-packages/pinecone/core/client/api_client.py](https://localhost:8080/#) in request(self, method, url, query_params, headers, post_params, body, _preload_content, _request_timeout)
457 body=body)
458 elif method == "POST":
--> 459 return self.rest_client.POST(url,
460 query_params=query_params,
461 headers=headers,
[/usr/local/lib/python3.10/dist-packages/pinecone/core/client/rest.py](https://localhost:8080/#) in POST(self, url, headers, query_params, post_params, body, _preload_content, _request_timeout)
269 def POST(self, url, headers=None, query_params=None, post_params=None,
270 body=None, _preload_content=True, _request_timeout=None):
--> 271 return self.request("POST", url,
272 headers=headers,
273 query_params=query_params,
[/usr/local/lib/python3.10/dist-packages/pinecone/core/client/rest.py](https://localhost:8080/#) in request(self, method, url, query_params, headers, body, post_params, _preload_content, _request_timeout)
155 if body is not None:
156 request_body = json.dumps(body)
--> 157 r = self.pool_manager.request(
158 method, url,
159 body=request_body,
[/usr/local/lib/python3.10/dist-packages/urllib3/_request_methods.py](https://localhost:8080/#) in request(self, method, url, body, fields, headers, json, **urlopen_kw)
116 )
117 else:
--> 118 return self.request_encode_body(
119 method, url, fields=fields, headers=headers, **urlopen_kw
120 )
[/usr/local/lib/python3.10/dist-packages/urllib3/_request_methods.py](https://localhost:8080/#) in request_encode_body(self, method, url, fields, headers, encode_multipart, multipart_boundary, **urlopen_kw)
215 extra_kw.update(urlopen_kw)
216
--> 217 return self.urlopen(method, url, **extra_kw)
[/usr/local/lib/python3.10/dist-packages/urllib3/poolmanager.py](https://localhost:8080/#) in urlopen(self, method, url, redirect, **kw)
441 response = conn.urlopen(method, url, **kw)
442 else:
--> 443 response = conn.urlopen(method, u.request_uri, **kw)
444
445 redirect_location = redirect and response.get_redirect_location()
[/usr/local/lib/python3.10/dist-packages/urllib3/connectionpool.py](https://localhost:8080/#) in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
843 new_e = ProtocolError("Connection aborted.", new_e)
844
--> 845 retries = retries.increment(
846 method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2]
847 )
[/usr/local/lib/python3.10/dist-packages/urllib3/util/retry.py](https://localhost:8080/#) in increment(self, method, url, response, error, _pool, _stacktrace)
468 # Read retry?
469 if read is False or method is None or not self._is_method_retryable(method):
--> 470 raise reraise(type(error), error, _stacktrace)
471 elif read is not None:
472 read -= 1
[/usr/local/lib/python3.10/dist-packages/urllib3/util/util.py](https://localhost:8080/#) in reraise(tp, value, tb)
36 try:
37 if value.__traceback__ is not tb:
---> 38 raise value.with_traceback(tb)
39 raise value
40 finally:
[/usr/local/lib/python3.10/dist-packages/urllib3/connectionpool.py](https://localhost:8080/#) in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
789
790 # Make the request on the HTTPConnection object
--> 791 response = self._make_request(
792 conn,
793 method,
[/usr/local/lib/python3.10/dist-packages/urllib3/connectionpool.py](https://localhost:8080/#) in _make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
490 ) and (conn and conn.proxy and not conn.has_connected_to_proxy):
491 new_e = _wrap_proxy_error(new_e, conn.proxy.scheme)
--> 492 raise new_e
493
494 # conn.request() calls http.client.*.request, not the method in
[/usr/local/lib/python3.10/dist-packages/urllib3/connectionpool.py](https://localhost:8080/#) in _make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
466 # Trigger any extra validation we need to do.
467 try:
--> 468 self._validate_conn(conn)
469 except (SocketTimeout, BaseSSLError) as e:
470 self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)
[/usr/local/lib/python3.10/dist-packages/urllib3/connectionpool.py](https://localhost:8080/#) in _validate_conn(self, conn)
1095 # Force connect early to allow us to validate the connection.
1096 if conn.is_closed:
-> 1097 conn.connect()
1098
1099 if not conn.is_verified:
[/usr/local/lib/python3.10/dist-packages/urllib3/connection.py](https://localhost:8080/#) in connect(self)
640 )
641
--> 642 sock_and_verified = _ssl_wrap_socket_and_match_hostname(
643 sock=sock,
644 cert_reqs=self.cert_reqs,
[/usr/local/lib/python3.10/dist-packages/urllib3/connection.py](https://localhost:8080/#) in _ssl_wrap_socket_and_match_hostname(sock, cert_reqs, ssl_version, ssl_minimum_version, ssl_maximum_version, cert_file, key_file, key_password, ca_certs, ca_cert_dir, ca_cert_data, assert_hostname, assert_fingerprint, server_hostname, ssl_context, tls_in_tls)
781 server_hostname = normalized
782
--> 783 ssl_sock = ssl_wrap_socket(
784 sock=sock,
785 keyfile=key_file,
[/usr/local/lib/python3.10/dist-packages/urllib3/util/ssl_.py](https://localhost:8080/#) in ssl_wrap_socket(sock, keyfile, certfile, cert_reqs, ca_certs, server_hostname, ssl_version, ciphers, ssl_context, ca_cert_dir, key_password, ca_cert_data, tls_in_tls)
469 pass
470
--> 471 ssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname)
472 return ssl_sock
473
[/usr/local/lib/python3.10/dist-packages/urllib3/util/ssl_.py](https://localhost:8080/#) in _ssl_wrap_socket_impl(sock, ssl_context, tls_in_tls, server_hostname)
513 return SSLTransport(sock, ssl_context, server_hostname)
514
--> 515 return ssl_context.wrap_socket(sock, server_hostname=server_hostname)
[/usr/lib/python3.10/ssl.py](https://localhost:8080/#) in wrap_socket(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, session)
511 # SSLSocket class handles server_hostname encoding before it calls
512 # ctx._wrap_socket()
--> 513 return self.sslsocket_class._create(
514 sock=sock,
515 server_side=server_side,
[/usr/lib/python3.10/ssl.py](https://localhost:8080/#) in _create(cls, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, context, session)
1098 # non-blocking
1099 raise ValueError("do_handshake_on_connect should not be specified for non-blocking sockets")
-> 1100 self.do_handshake()
1101 except (OSError, ValueError):
1102 self.close()
[/usr/lib/python3.10/ssl.py](https://localhost:8080/#) in do_handshake(self, block)
1369 if timeout == 0.0 and block:
1370 self.settimeout(None)
-> 1371 self._sslobj.do_handshake()
1372 finally:
1373 self.settimeout(timeout)
ProtocolError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))
I actually run your code: 01_semi_structured_data.ipynb in collab
from typing import Any
from pydantic import BaseModel
from unstructured.partition.pdf import partition_pdf
raw_pdf_elements = partition_pdf(
filename="statement_of_changes.pdf",
extract_images_in_pdf=False,
infer_table_structure=True,
chunking_strategy="by_title",
max_characters=4000,
new_after_n_chars=3800,
combine_text_under_n_chars=2000,
image_output_dir_path=".",
)
and got error shows
WARNING:unstructured:This function will be deprecated in a future release and `unstructured` will simply use the DEFAULT_MODEL from `unstructured_inference.model.base` to set default model name
---------------------------------------------------------------------------
UnidentifiedImageError Traceback (most recent call last)
[<ipython-input-10-c47946c825bc>](https://localhost:8080/#) in <cell line: 6>()
4 from unstructured.partition.pdf import partition_pdf
5
----> 6 raw_pdf_elements = partition_pdf(
7 filename="statement_of_changes.pdf",
8 extract_images_in_pdf=False,
10 frames
[/usr/local/lib/python3.10/dist-packages/PIL/Image.py](https://localhost:8080/#) in open(fp, mode, formats)
3281 fp.seek(0)
3282 except (AttributeError, io.UnsupportedOperation):
-> 3283 fp = io.BytesIO(fp.read())
3284 exclusive_fp = True
3285
UnidentifiedImageError: cannot identify image file '/tmp/tmpt9l2pd51/88be9f82-5a19-4ec0-baa1-a029cf45dfc4-1.ppm'
I have no idea how to resolve it.
A declarative, efficient, and flexible JavaScript library for building user interfaces.
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google ❤️ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.