Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
adaab80
adding support for vector download
MichaelLukowski Oct 31, 2025
a18b512
fix fstring escape
MichaelLukowski Nov 4, 2025
9255e13
add model to vec get
MichaelLukowski Nov 4, 2025
ad409e0
change route for vector download
MichaelLukowski Nov 4, 2025
98abb8f
update vector return
MichaelLukowski Nov 4, 2025
15a5cf8
add vec class
MichaelLukowski Nov 4, 2025
a5804e3
add debug statement for vec
MichaelLukowski Nov 4, 2025
f9ec6b0
fix requests url
MichaelLukowski Nov 4, 2025
98ccf74
fix request protocol for vec signed url
MichaelLukowski Nov 4, 2025
589d2e0
adding vector upload
MichaelLukowski Nov 5, 2025
fcda7dd
fix import and add logging
MichaelLukowski Nov 5, 2025
9ff13a7
fix function typo
MichaelLukowski Nov 5, 2025
5485623
remove old code
MichaelLukowski Nov 5, 2025
3638613
update indexd post params
MichaelLukowski Nov 5, 2025
0b237b6
remove more old code
MichaelLukowski Nov 5, 2025
f0d09fd
fix string in url
MichaelLukowski Nov 6, 2025
6dcb43c
fix string escape typo
MichaelLukowski Nov 6, 2025
52ab7e9
ensure md5 is not used for security
MichaelLukowski Nov 7, 2025
68392f8
add bulk download
MichaelLukowski Nov 13, 2025
24ac110
add bulk download
MichaelLukowski Nov 13, 2025
59e0234
bulk download with authz role caching
MichaelLukowski Dec 15, 2025
1f2e60f
add function definiton
MichaelLukowski Dec 15, 2025
f5bec59
fix class call
MichaelLukowski Dec 15, 2025
4af5368
clean up imports
MichaelLukowski Dec 15, 2025
7220fe5
update function arguments
MichaelLukowski Dec 15, 2025
1452601
fix loop
MichaelLukowski Dec 15, 2025
c34c074
actions of bulk to only support download
MichaelLukowski Dec 15, 2025
c132e40
actions of bulk to only support download
MichaelLukowski Dec 15, 2025
8a548c5
fix escape quotes
MichaelLukowski Dec 15, 2025
f403b36
no actions for bulk downloads
MichaelLukowski Dec 15, 2025
34d2afe
fix bulk actions
MichaelLukowski Dec 15, 2025
a352217
fix bulk download return
MichaelLukowski Dec 15, 2025
21f44bd
reformat bulk response
MichaelLukowski Dec 15, 2025
87d2aed
Merge branch 'master' into feat/embedding-vectors
MichaelLukowski Dec 15, 2025
256f199
change ems service name
MichaelLukowski Jan 8, 2026
99c9fe1
Merge branch 'feat/embedding-vectors' of github.com:uc-cdis/fence int…
MichaelLukowski Jan 8, 2026
9eeb4b0
change service networking for embedding management service
MichaelLukowski Jan 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 128 additions & 0 deletions fence/blueprints/data/blueprint.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import flask
import json
import hashlib

from cdislogging import get_logger
from cdispyutils.config import get_value
Expand All @@ -7,8 +9,10 @@
from fence.authz.auth import check_arborist_auth
from fence.blueprints.data.indexd import (
BlankIndex,
EmbeddingIndex,
IndexedFile,
get_signed_url_for_file,
bulk_get_signed_url_for_file,
verify_data_upload_bucket_configuration,
)
from fence.config import config
Expand Down Expand Up @@ -197,6 +201,101 @@ def upload_data_file():
return flask.jsonify(response), 201


@blueprint.route("/upload/vector", methods=["POST"])
@require_auth_header(scope={"data"})
@login_required({"data"})
def upload_vector():
"""
Return a presigned URL for use with uploading a data file.

See the documentation on the entire flow here for more info:

https://github.com/uc-cdis/cdis-wiki/tree/master/dev/gen3/data_upload

"""
# make new record in indexd, with just the `uploader` field (and a GUID)

params = flask.request.get_json()
if not params:
raise UserError("wrong Content-Type; expected application/json")

if "model" not in params:
raise UserError("missing required argument `model`")

if "embedding" not in params:
raise UserError("missing required argument `embedding`")

if "file_id" not in params:
raise UserError("missing required argument `file_id`")

if "authz" not in params:
raise UserError("missing required argument `authz`")

authorized = False
authz_err_msg = "Auth error when attempting to get a presigned URL for upload. User must have '{}' access on '{}'."

authz = params.get("authz")
uploader = None

guid = params.get("guid")

if authz:
# if requesting an authz field, using new authorization method which doesn't
# rely on uploader field, so clear it out
uploader = ""
authorized = flask.current_app.arborist.auth_request(
jwt=get_jwt(),
service="fence",
methods=["create", "write-storage"],
resources=authz,
)
if not authorized:
logger.error(authz_err_msg.format("create' and 'write-storage", authz))
else:
# no 'authz' was provided, so fall back on 'file_upload' logic
authorized = flask.current_app.arborist.auth_request(
jwt=get_jwt(),
service="fence",
methods=["file_upload"],
resources=["/data_file"],
)
if not authorized:
logger.error(authz_err_msg.format("file_upload", "/data_file"))

if not authorized:
raise Forbidden(
"You do not have access to upload data. You either need "
"general file uploader permissions or create and write-storage permissions "
"on the authz resources you specified (if you specified any)."
)

# token = get_jwt()

model = params.get("model")
embedding = params.get("embedding")
file_id = params.get("file_id")

EMS = EmbeddingIndex(
authz=authz,
model=model,
embedding=embedding,
file_id=file_id,
uploader=uploader,
)

# create embedding record and get id from service
embedding_id, md5_hash = EMS.create_embedding_record()

guid = EMS.create_indexd_record(embedding_id, md5_hash)

response = {
"message": "An embedding was successfully added to the embedding management service and inserted into indexd",
"guid": guid,
}

return flask.jsonify(response), 201


@blueprint.route("/multipart/init", methods=["POST"])
@require_auth_header(scope={"data"})
@login_required({"data"})
Expand Down Expand Up @@ -348,6 +447,35 @@ def download_file(file_id):
return flask.redirect(result["url"])


@blueprint.route("/download/bulk", methods=["POST"])
def download_bulk_files():
"""
Get a presigned url to download a file given by file_id.
"""
# {"guids": ["1234", "4567"]}
params = flask.request.get_json()
if not params:
raise UserError("wrong Content-Type; expected application/json")

if "guids" not in params:
raise UserError("missing required argument `guids`")

guids = params["guids"]
results = bulk_get_signed_url_for_file(guids)

return flask.jsonify(results)
# results = {}
# results["urls"] = []
# for g in guids:
# result = get_signed_url_for_file("download", g)
# if not "redirect" in flask.request.args or not "url" in result:
# # return flask.jsonify(result)
# results["urls"].append(result)
# else:
# results["urls"].append(result["url"])
# return flask.jsonify(results)


@blueprint.route(
"/buckets",
methods=["GET"],
Expand Down
Loading
Loading