Skip to content

WebAPI Data Sources #53

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@ DBInterface = "a10d1c49-ce27-4219-8d33-6db1a4562965"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
FunSQL = "cf6cc811-59f4-4a10-b258-a8547a8f6407"
Gumbo = "708ec375-b3d6-5a57-a7ce-8257bf98657a"
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
LibPQ = "194296ae-ab2e-5f79-8cd4-7183a0a5a0d1"
MLStyle = "d8e11817-5142-5d16-987a-aa16d5891078"
MySQL = "39abe10b-433b-5dbd-92d4-e302a9df00cd"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
SQLite = "0aa819cd-b072-5ff4-a722-6bc24af294d9"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
Expand All @@ -34,10 +36,12 @@ Catlab = "0.16"
DBInterface = "2.6.1"
DataFrames = "1.0"
Dates = "1.11.0"
Gumbo = "0.8.3"
HTTP = "1.10.15"
JSON3 = "1.14.1"
MLStyle = "0.4.17"
MySQL = "1.4.7"
OrderedCollections = "1.8.1"
Reexport = "1.2.2"
SQLite = "1.6.1"
julia = "1.6"
Expand Down
43 changes: 43 additions & 0 deletions docs/literate/webapi.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
using AlgebraicRelations

axolotl = WebAPI("https://theaxolotlapi.netlify.app/")

# we may specify where to get information on the webpage after we have defined `axolotl`.
add_path!(axolotl, :children=>"*.2.2.1.2.1.4.:children")

# now the `subpart` interface is like ACSet interface
subpart(axolotl, :children; formatter=x->(first(x) |> Base.Fix2(getproperty, :text)))

# # Second part

# Let's specify a connection to the "genrenator" API
randomgenres = WebAPI("https://binaryjazz.us/wp-json/genrenator/v1/genre/")

# we instruct the webapi connection
add_path!(randomgenres, :text=>"*.2.1.:text")

# now the `subpart` interface is like the ACSet interface
subpart(randomgenres, :text)

using JSON3, Gumbo, HTTP

sOMOP = "cms_synthetic_patient_data_omop"

tableconn = WebAPI("https://redivis.com/api/v1/{{type}}/Demo.{{dataset}}"; token_envar="JULIA_OMOP_API_KEY")

# metadata = subpart(tableconn[dataset=sOMOP])

add_path!(tableconn, :text=>"*.2.1.:text")

(!)(x::Symbol) = Base.Fix2(getproperty, x)

subpart(tableconn[type="datasets", dataset=sOMOP, "/tables/"], :text) |> JSON3.read |> !:results

table = "care_site"

# cacheing queries.
# need an optional keyword
json_out = subpart(tableconn[type="tables", dataset=sOMOP, ".$table/rows?format=jsonl"], :text) |> Base.Fix2(split, "\n") .|> JSON3.read

using DataFrames
DataFrame(json_out) # because JSON3.Object support AbstractDict interface, yes it really is this easy
7 changes: 6 additions & 1 deletion src/sqlacsets/fabric/Fabric.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,13 @@ export DataSourceGraph
struct Log
time::DateTime
event
Log(event::DataType) = new(Dates.now(), event)
end
export Log

function Log(event::DataType)
Log(Dates.now(), event)
end

@kwdef mutable struct DataFabric
# this will store the connections, their schema, and values
graph::DataSourceGraph = DataSourceGraph()
Expand Down Expand Up @@ -158,9 +161,11 @@ include("acset_interface.jl")

include("datasources/database/DatabaseDS.jl")
include("datasources/inmemory/InMemoryDS.jl")
include("datasources/webapi/WebApiDS.jl")

@reexport using .DatabaseDS
@reexport using .InMemoryDS
@reexport using .WebAPIDS


end
82 changes: 65 additions & 17 deletions src/sqlacsets/fabric/datasources/webapi/WebApiDS.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,72 @@ using ..Fabric
using ACSets

using HTTP
using Gumbo # HTML Parsing

@kwdef struct WebAPI <: AbstractDataSource
conn::String # HTTP endpoint
log::Vector{Log} = Log[]
@kwdef mutable struct WebAPIConnection
const value::Union{String, Function}
const kvs::Vector{Symbol} = []
endpoint::Union{String, Nothing}=nothing
end
export WebAPIConnection

endpoint(wac::WebAPIConnection) = wac.endpoint

function connect(string::String)
matches = eachmatch(r"{{([a-z]*)}}", string)
kwargs = [Symbol(m.captures[1]) for m in matches]
out = (; kws...) -> begin
if !isempty(matches)
if kwargs != [keys(kws)...]
error("$kwargs does not equal $(keys(kws))")
end
replace(string, ["{{$k}}" => v for (k,v) in kws]...)
else
string
end
end
WebAPIConnection(value=out, kvs=kwargs, endpoint=string)
end
export connect

@kwdef mutable struct WebAPI <: AbstractDataSource
const conn::WebAPIConnection # HTTP endpoint
paths::Dict{Symbol, String} = Dict{Symbol, String}()
token_envar::Union{String, Nothing} = nothing
log::Vector{Log} = Log[]
end
export WebAPI

function WebAPI(conn::String; kwargs...)
WebAPI(; conn=connect(conn), kwargs...)
end

function Base.show(io::IO, web::WebAPI)
print(io, "Arity $(join(web.conn.kvs, ", "))")
end

endpoint(web::WebAPI) = endpoint(web.conn)

function add_path!(web::WebAPI, path::Pair{Symbol, String})
web.paths[path.first] = path.second
end
export add_path!

function Base.getindex(web::WebAPI, args...; kwargs...)
web.conn.endpoint = web.conn.value(;kwargs...) * "$(args...)"
@info "Endpoint set to:

$(web.conn.endpoint)
"
web
end

function build_headers(web::WebAPI)
if !isnothing(web.token_envar)
Dict("authorization" => "Bearer $(ENV[web.token_envar])",
"accept" => "application/json;odata=verbse")
end
end
export WebAPISource

# objects are endpoints
# attrs are query params
Expand All @@ -22,18 +82,6 @@ function Fabric.recatalog!(::WebAPI) end
function Fabric.execute!(webapi::WebAPI, stmt::AbstractString; formatter=nothing)
end

function ACSetInterface.subpart(web::WebAPI, column::Symbol)
HTTP.get(web.conn; query=[column => column])
end

function ACSetInterface.incident(web::WebAPI, id, column::Symbol)
end

# HTTP.get(...) # select # subpart
# HTTP.post(...) # insert
# HTTP.put(...) # upsert
# HTTP.delete(...)
# HTTP.patch(...) #
# HTTP.head(...)
include("acset_interface.jl")

end
66 changes: 66 additions & 0 deletions src/sqlacsets/fabric/datasources/webapi/acset_interface.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
using ACSets

using MLStyle
using Dates

struct Accessors
accessors
end

@active Re{r :: Regex}(x) begin
res = match(r, x)
if res !== nothing
Some(res)
else
nothing
end
end

function Accessors(path::String)
accessors = []
foreach(split(path, ".")) do a
@match a begin
Re{r"\d+"}(x) => begin
push!(accessors, Base.Fix2(getproperty, :children))
push!(accessors, Base.Fix2(getindex, parse(Int, a)))
end
Re{r":(\w+)"}(x) => begin
push!(accessors, Base.Fix2(getproperty, Symbol(x.captures[1])))
end
"*" => begin
push!(accessors, Base.Fix2(getproperty, :root))
end
_ => identity
end
end
Accessors(accessors)
end

function query(doc::HTMLDocument, t::Accessors)
foldl(|>, t.accessors; init=doc)
end
export query

function query(doc::HTMLDocument, path::String)
query(doc, Accessors(path))
end

# its generally better to explicitly format the return value of the query, such as piping it through split+JSON3.read, but it may also be the case that we want a general format for how return data should be cached.
#
# function ACSetInterface.subpart(web::WebAPI, kws...; path::String="", formatter=identity)
function ACSetInterface.subpart(web::WebAPI, name::Symbol; formatter=identity)
headers = build_headers(web)
push!(web.log, Log(Dates.now(), "Called $(web.conn.endpoint) for $name"))
response = isnothing(headers) ? HTTP.request("GET", web.conn.endpoint) : HTTP.request("GET", web.conn.endpoint, headers)
@assert response.status == 200
parsed_doc = parsehtml(String(response.body))
query(parsed_doc, web.paths[name]) |> formatter
end

function ACSetInterface.incident(web::WebAPI, id, column::Union{Symbol, Nothing}=nothing)

end

# HTTP.get(...) # select # subpart
# HTTP.post(...) # insert
# HTTP.put(...) # upsert