Pengcheng Liu 2 weeks ago committed by GitHub
commit f6380a9a49
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -0,0 +1,110 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "4394a872",
"metadata": {},
"source": [
"# Alibaba Cloud OSS File\n",
"The following code demonstrates how to load an object from the Alibaba Cloud OSS as document."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "c43d811b",
"metadata": {},
"outputs": [],
"source": [
"# Install the required package\n",
"# pip install oss2"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5e16bae6",
"metadata": {},
"outputs": [],
"source": [
"from langchain_community.document_loaders.oss_file import OSSFileLoader"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "f9816984",
"metadata": {},
"outputs": [],
"source": [
"loader = OSSFileLoader(\n",
" \"your-bucket-name\",\n",
" \"your-object-key\",\n",
" \"your-endpoint\",\n",
" \"your-access-key-id\",\n",
" \"your-access-key-secret\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6143b39b",
"metadata": {},
"outputs": [],
"source": [
"loader.load()"
]
},
{
"cell_type": "markdown",
"id": "b77aa18c",
"metadata": {},
"source": [
"## Access a Publicly Accessible Object\n",
"If the object you want to access allows anonymous user access, you can directly load the object without any authentication."
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "df83d121",
"metadata": {},
"outputs": [],
"source": [
"loader = OSSFileLoader(\"your-bucket-name\", \"your-object-key\", \"your-endpoint\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "82a844ba",
"metadata": {},
"outputs": [],
"source": [
"loader.load()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,95 @@
# coding:utf-8
import os
import tempfile
from typing import Any, List, Optional
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseLoader
from langchain_community.document_loaders.unstructured import UnstructuredFileLoader
class OSSFileLoader(BaseLoader):
"""Load from the `Alibaba Cloud OSS file`."""
def __init__(
self,
bucket: str,
key: str,
endpoint: str,
access_key_id: Optional[str] = None,
access_key_secret: Optional[str] = None,
auth: Optional[Any] = None,
) -> None:
"""Initialize the OSSFileLoader with the specified settings.
Args:
bucket (str): The name of the OSS bucket to be used.
key (str): The name of the object in the OSS bucket.
endpoint_url (str): The endpoint URL of your OSS bucket.
access_key_id (str, optional): The access key ID for authentication. Defaults to None.
access_key_secret (str, optional): The access key secret for authentication. Defaults to None.
auth (oss2.auth.Auth or oss2.auth.ProviderAuth, optional): An instance of the oss2.auth class.
Raises:
ImportError: If the `oss2` package is not installed.
TypeError: If the provided `auth` is not an instance of oss2.auth.Auth or oss2.auth.ProviderAuth.
Note:
Before using this class, make sure you have registered with OSS and have the necessary credentials.
If none of the above authentication methods is provided, the loader will attempt to access oss file anonymously.
Example:
To create a new OSSFileLoader with explicit access key and secret:
```
oss_loader = OSSFileLoader(
"your-bucket-name",
"your-object-key",
"your-endpoint-url",
"your-access-key",
"you-access-key-secret"
)
```
To create a new OSSFileLoader with an existing auth from environment variables:
```
from oss2.credentials import EnvironmentVariableCredentialsProvider
auth = oss2.ProviderAuth(EnvironmentVariableCredentialsProvider())
oss_loader = OSSFileLoader("
"your-bucket-name",
"your-object-key",
"your-endpoint-url",
auth=auth
)
```
""" # noqa: E501
try:
import oss2
except ImportError:
raise ImportError(
"Could not import oss2 python package. "
"Please install it with `pip install oss2`."
)
if access_key_id and access_key_secret:
self.auth = oss2.Auth(access_key_id, access_key_secret)
elif auth and isinstance(auth, (oss2.Auth, oss2.ProviderAuth)):
self.auth = auth
else:
self.auth = oss2.AnonymousAuth()
self.access_key_id = access_key_id
self.access_key_secret = access_key_secret
self.bucket = oss2.Bucket(self.auth, endpoint, bucket)
self.key = key
def load(self) -> List[Document]:
"""Load documents."""
with tempfile.TemporaryDirectory() as temp_dir:
file_path = f"{temp_dir}/{self.bucket}/{self.key}"
os.makedirs(os.path.dirname(file_path), exist_ok=True)
# Download the file to a destination
self.bucket.get_object_to_file(self.key, file_path)
loader = UnstructuredFileLoader(file_path)
return loader.load()

@ -0,0 +1,16 @@
from langchain_community.document_loaders.oss_file import OSSFileLoader
BUCKET = ""
FILE_KEY = ""
ENDPOINT = ""
ACCESS_KEY_ID = ""
ACCESS_KEY_SECRET = ""
def test_oss_file_loader() -> None:
"""Test Alibaba Cloud OSS file loader."""
loader = OSSFileLoader(BUCKET, FILE_KEY, ENDPOINT, ACCESS_KEY_ID, ACCESS_KEY_SECRET)
docs = loader.load()
assert len(docs) == 1
assert docs[0].page_content is not None

@ -0,0 +1,23 @@
from typing import TYPE_CHECKING, Any
from langchain._api import create_importer
if TYPE_CHECKING:
from langchain_community.document_loaders import OSSFileLoader
# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {"OSSFileLoader": "langchain_community.document_loaders"}
_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)
def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)
__all__ = [
"OSSFileLoader",
]
Loading…
Cancel
Save