-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_dataframe.R
More file actions
261 lines (212 loc) · 8.87 KB
/
get_dataframe.R
File metadata and controls
261 lines (212 loc) · 8.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
################################################################################
# OBJECTIVE: Get dataframe from Benchling and save it as an R dataframe ########
################################################################################
library(httr)
library(jsonlite)
library(base64enc)
################################################################################
# AUTHENTICATION CONFIGURATION - Set your method here
################################################################################
USE_OAUTH2 <- FALSE # Set to FALSE for JWT (default), TRUE for OAuth2
# OAuth2 credentials (only used if USE_OAUTH2 = TRUE)
if (USE_OAUTH2) {
client_id <- Sys.getenv("BENCHLING_CLIENT_ID")
client_secret <- Sys.getenv("BENCHLING_CLIENT_SECRET")
domain_fixed <- Sys.getenv("BENCHLING_DOMAIN")
if (client_id == "" || client_secret == "" || domain_fixed == "") {
stop("OAuth2 enabled but credentials missing! Set BENCHLING_CLIENT_ID, BENCHLING_CLIENT_SECRET, and BENCHLING_DOMAIN in .Renviron")
}
}
################################################################################
################################################################################
# Get Analysis Key
################################################################################
# Check if analysis key is in environment (optional)
analysis_key <- Sys.getenv("BENCHLING_ANALYSIS_KEY")
if (analysis_key == "") {
cat("\n")
cat("========================================\n")
cat("Enter your Benchling Analysis Key\n")
cat("========================================\n")
cat("The analysis key format is: ana_XXXXX:eyJhbGc...\n")
cat("You can find this in your Benchling analysis.\n")
if (USE_OAUTH2) {
cat("Note: Using OAuth2 - only analysis ID portion will be used.\n\n")
} else {
cat("Note: JWT tokens expire after 10 minutes.\n\n")
}
# Prompt for analysis key
analysis_key <- readline(prompt = "Analysis Key: ")
# Trim whitespace
analysis_key <- trimws(analysis_key)
if (analysis_key == "") {
stop("Analysis key is required. Please run the script again and provide the key.")
}
cat("\n✓ Analysis key provided\n\n")
}
################################################################################
# Parse Analysis Key and Authenticate
################################################################################
cat("Parsing analysis key...\n")
tryCatch({
# Split the analysis key: ana_XXXXX:JWT_TOKEN
key_parts <- strsplit(analysis_key, ":", fixed = TRUE)[[1]]
if (length(key_parts) != 2) {
stop("Invalid analysis key format. Expected format: ana_XXXXX:eyJhbGc...")
}
analysis_id <- key_parts[1]
if (USE_OAUTH2) {
# OAuth2: Use domain from credentials, generate new token
base_domain <- domain_fixed
subdomain <- sub("\\..*", "", domain_fixed) # Extract first part for display
cat("✓ Analysis key parsed successfully\n")
cat(" Analysis ID:", analysis_id, "\n")
cat(" Domain:", base_domain, "(from credentials)\n\n")
# Generate OAuth2 bearer token
cat("Authenticating with Benchling App credentials...\n")
token_url <- paste0("https://", base_domain, "/api/v2/token")
token_request <- httr::POST(
url = token_url,
body = paste0("client_id=", client_id,
"&client_secret=", client_secret,
"&grant_type=client_credentials"),
httr::accept('application/json'),
httr::content_type('application/x-www-form-urlencoded')
)
if (token_request$status_code != 200) {
stop("OAuth2 authentication failed! Check your client_id and client_secret.")
}
request_body <- jsonlite::fromJSON(rawToChar(token_request$content))
access_token <- request_body$access_token
cat("✓ OAuth2 authentication successful (app-attributable)\n")
cat(" Token expires in:", request_body$expires_in, "seconds\n\n")
} else {
# JWT: Extract subdomain from JWT, use JWT as bearer token
jwt_token <- key_parts[2]
# Decode JWT to extract subdomain
jwt_parts <- strsplit(jwt_token, ".", fixed = TRUE)[[1]]
payload_json <- rawToChar(base64decode(jwt_parts[2]))
payload <- jsonlite::fromJSON(payload_json)
# Extract domain from aud or iss field
domain_raw <- payload$aud
if (is.null(domain_raw) || domain_raw == "") {
domain_raw <- payload$iss
}
# Determine if this is a full domain or just a subdomain
# Full domain examples: "tenant.benchling.com", "tenant.bnchdev.org"
# Subdomain example: "tenant"
if (grepl("\\.", domain_raw)) {
# Contains a dot - it's a full domain, use as-is
base_domain <- domain_raw
subdomain <- sub("\\..*", "", domain_raw) # Extract first part for display
} else {
# No dot - it's just a subdomain, append .benchling.com
subdomain <- domain_raw
base_domain <- paste0(domain_raw, ".benchling.com")
}
# The JWT token IS our bearer token
access_token <- jwt_token
cat("✓ Analysis key parsed successfully\n")
cat(" Domain:", base_domain, "\n")
cat(" Analysis ID:", analysis_id, "\n")
cat(" Authentication: Using JWT from analysis key (user-attributable)\n")
cat(" Token expires: ~10 minutes from generation\n\n")
}
}, error = function(e) {
stop(paste(
"Failed to parse analysis key!",
"Please check that you've entered the complete key in format: ana_XXXXX:eyJhbGc...",
"\nError:", e$message,
sep = "\n"
))
})
###############################################################
# Use the Get analysis endpoint and retrieve the dataframe id
###############################################################
cat("Fetching analysis metadata...\n")
# Construct url
# The resulting URL should look like: https://<base_domain>/api/v2-beta/analyses/ana_ABCD1234
api_path <- "/api/v2-beta/analyses/"
url <-
paste("https://", base_domain, api_path, analysis_id, sep = "")
# Perform a get request using JWT as bearer token
analysis_response <-
httr::GET(url,
add_headers(
Accept = 'application/json',
Authorization = paste("Bearer", access_token, sep = " ")
))
# Check for errors
if (analysis_response$status_code != 200) {
stop(paste(
"Failed to fetch analysis! Status code:", analysis_response$status_code,
"\nPlease check your analysis key is valid and not expired",
sep = ""
))
}
# Use the jsonlite library to read the JSON body
analysis_body <-
jsonlite::fromJSON(rawToChar(analysis_response$content))
# Get the dataframe ID
# Note: This assumes there is a single input dataframe
dataframe_id <- c(analysis_body$dataFrameIds)
if (length(dataframe_id) == 0) {
stop(paste(
"No input dataframes found in analysis!",
"Please attach a dataset to your analysis in Benchling",
sep = "\n"
))
}
# Get the folder ID
folder_id <- c(analysis_body$folderId)
cat("✓ Analysis metadata retrieved\n")
cat(" Dataframe ID:", dataframe_id, "\n")
cat(" Folder ID:", folder_id, "\n\n")
################################################################
# Use the Get dataframe endpoint and retrieve the dataframe
################################################################
cat("Downloading dataset...\n")
# Construct url
# The resulting URL should look like: https://<base_domain>/api/v2-beta/data-frames/dset_bHbGo1FP79Kl)
api_path <- "/api/v2-beta/data-frames/"
url <- paste("https://", base_domain, api_path, dataframe_id, sep = "")
# Perform a get request
dataframe_response <-
httr::GET(url,
add_headers(
Accept = 'application/json',
Authorization = paste("Bearer", access_token, sep = " ")
))
# Check for errors
if (dataframe_response$status_code != 200) {
stop(paste(
"Failed to fetch dataframe! Status code:", dataframe_response$status_code,
sep = ""
))
}
# Use the jsonlite library to read the JSON body
dataframe_body <-
jsonlite::fromJSON(rawToChar(dataframe_response$content))
# Retrieve the url to retrieve the dataframe in CSV format
dataframe_url <- c(dataframe_body$manifest$url)
# Read the dataframe csv and save it as an R dataframe
df <- read.csv(dataframe_url)
cat("✓ Dataset loaded successfully\n")
cat(" Rows:", nrow(df), "\n")
cat(" Columns:", ncol(df), "\n")
cat(" Column names:", paste(names(df), collapse = ", "), "\n\n")
cat("==========================================\n")
cat("SUCCESS! Dataset ready for analysis\n")
cat("==========================================\n")
cat("Available variables:\n")
cat(" df - Dataset as R dataframe\n")
cat(" base_domain - Benchling domain (e.g., tenant.benchling.com or tenant.bnchdev.org)\n")
cat(" subdomain - Tenant subdomain (first part of domain)\n")
cat(" analysis_id - Analysis ID\n")
if (USE_OAUTH2) {
cat(" access_token - OAuth2 bearer token (15 min expiration)\n")
} else {
cat(" access_token - JWT bearer token (10 min expiration)\n")
}
cat(" folder_id - Folder ID for uploads\n")
cat("\nNext step: Run results_analysis.R\n")