mirror of https://github.com/hwchase17/langchain
Merge dbf0a79b2f
into 242eeb537f
commit
f6380a9a49
@ -0,0 +1,110 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4394a872",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Alibaba Cloud OSS File\n",
|
||||
"The following code demonstrates how to load an object from the Alibaba Cloud OSS as document."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "c43d811b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install the required package\n",
|
||||
"# pip install oss2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "5e16bae6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders.oss_file import OSSFileLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "f9816984",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = OSSFileLoader(\n",
|
||||
" \"your-bucket-name\",\n",
|
||||
" \"your-object-key\",\n",
|
||||
" \"your-endpoint\",\n",
|
||||
" \"your-access-key-id\",\n",
|
||||
" \"your-access-key-secret\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6143b39b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b77aa18c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Access a Publicly Accessible Object\n",
|
||||
"If the object you want to access allows anonymous user access, you can directly load the object without any authentication."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "df83d121",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = OSSFileLoader(\"your-bucket-name\", \"your-object-key\", \"your-endpoint\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "82a844ba",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -0,0 +1,95 @@
|
||||
# coding:utf-8
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.document_loaders.base import BaseLoader
|
||||
from langchain_community.document_loaders.unstructured import UnstructuredFileLoader
|
||||
|
||||
|
||||
class OSSFileLoader(BaseLoader):
|
||||
"""Load from the `Alibaba Cloud OSS file`."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
bucket: str,
|
||||
key: str,
|
||||
endpoint: str,
|
||||
access_key_id: Optional[str] = None,
|
||||
access_key_secret: Optional[str] = None,
|
||||
auth: Optional[Any] = None,
|
||||
) -> None:
|
||||
"""Initialize the OSSFileLoader with the specified settings.
|
||||
|
||||
Args:
|
||||
bucket (str): The name of the OSS bucket to be used.
|
||||
key (str): The name of the object in the OSS bucket.
|
||||
endpoint_url (str): The endpoint URL of your OSS bucket.
|
||||
access_key_id (str, optional): The access key ID for authentication. Defaults to None.
|
||||
access_key_secret (str, optional): The access key secret for authentication. Defaults to None.
|
||||
auth (oss2.auth.Auth or oss2.auth.ProviderAuth, optional): An instance of the oss2.auth class.
|
||||
|
||||
Raises:
|
||||
ImportError: If the `oss2` package is not installed.
|
||||
TypeError: If the provided `auth` is not an instance of oss2.auth.Auth or oss2.auth.ProviderAuth.
|
||||
Note:
|
||||
Before using this class, make sure you have registered with OSS and have the necessary credentials.
|
||||
If none of the above authentication methods is provided, the loader will attempt to access oss file anonymously.
|
||||
|
||||
Example:
|
||||
To create a new OSSFileLoader with explicit access key and secret:
|
||||
```
|
||||
oss_loader = OSSFileLoader(
|
||||
"your-bucket-name",
|
||||
"your-object-key",
|
||||
"your-endpoint-url",
|
||||
"your-access-key",
|
||||
"you-access-key-secret"
|
||||
)
|
||||
```
|
||||
|
||||
To create a new OSSFileLoader with an existing auth from environment variables:
|
||||
```
|
||||
from oss2.credentials import EnvironmentVariableCredentialsProvider
|
||||
auth = oss2.ProviderAuth(EnvironmentVariableCredentialsProvider())
|
||||
|
||||
oss_loader = OSSFileLoader("
|
||||
"your-bucket-name",
|
||||
"your-object-key",
|
||||
"your-endpoint-url",
|
||||
auth=auth
|
||||
)
|
||||
```
|
||||
""" # noqa: E501
|
||||
try:
|
||||
import oss2
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import oss2 python package. "
|
||||
"Please install it with `pip install oss2`."
|
||||
)
|
||||
|
||||
if access_key_id and access_key_secret:
|
||||
self.auth = oss2.Auth(access_key_id, access_key_secret)
|
||||
elif auth and isinstance(auth, (oss2.Auth, oss2.ProviderAuth)):
|
||||
self.auth = auth
|
||||
else:
|
||||
self.auth = oss2.AnonymousAuth()
|
||||
|
||||
self.access_key_id = access_key_id
|
||||
self.access_key_secret = access_key_secret
|
||||
self.bucket = oss2.Bucket(self.auth, endpoint, bucket)
|
||||
self.key = key
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load documents."""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
file_path = f"{temp_dir}/{self.bucket}/{self.key}"
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
# Download the file to a destination
|
||||
self.bucket.get_object_to_file(self.key, file_path)
|
||||
loader = UnstructuredFileLoader(file_path)
|
||||
return loader.load()
|
@ -0,0 +1,16 @@
|
||||
from langchain_community.document_loaders.oss_file import OSSFileLoader
|
||||
|
||||
BUCKET = ""
|
||||
FILE_KEY = ""
|
||||
ENDPOINT = ""
|
||||
ACCESS_KEY_ID = ""
|
||||
ACCESS_KEY_SECRET = ""
|
||||
|
||||
|
||||
def test_oss_file_loader() -> None:
|
||||
"""Test Alibaba Cloud OSS file loader."""
|
||||
loader = OSSFileLoader(BUCKET, FILE_KEY, ENDPOINT, ACCESS_KEY_ID, ACCESS_KEY_SECRET)
|
||||
docs = loader.load()
|
||||
|
||||
assert len(docs) == 1
|
||||
assert docs[0].page_content is not None
|
@ -0,0 +1,23 @@
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from langchain._api import create_importer
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain_community.document_loaders import OSSFileLoader
|
||||
|
||||
# Create a way to dynamically look up deprecated imports.
|
||||
# Used to consolidate logic for raising deprecation warnings and
|
||||
# handling optional imports.
|
||||
DEPRECATED_LOOKUP = {"OSSFileLoader": "langchain_community.document_loaders"}
|
||||
|
||||
_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)
|
||||
|
||||
|
||||
def __getattr__(name: str) -> Any:
|
||||
"""Look up attributes dynamically."""
|
||||
return _import_attribute(name)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"OSSFileLoader",
|
||||
]
|
Loading…
Reference in New Issue