url percent encoding & (naïve) fuzzy matching
This commit is contained in:
parent
0e4119d3b9
commit
a827bb0e4d
4 changed files with 170 additions and 6 deletions
19
gleam.toml
Normal file
19
gleam.toml
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
name = "bahnhofname"
|
||||||
|
version = "0.1.0"
|
||||||
|
|
||||||
|
# Fill out these fields if you intend to generate HTML documentation or publish
|
||||||
|
# your project to the Hex package manager.
|
||||||
|
#
|
||||||
|
# licences = ["Apache-2.0"]
|
||||||
|
# description = "A Gleam library..."
|
||||||
|
# repository = { type = "github", user = "username", repo = "project" }
|
||||||
|
# links = [{ title = "Website", href = "https://gleam.run" }]
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
gleam_stdlib = "~> 0.19"
|
||||||
|
gleam_http = "~> 3.0"
|
||||||
|
mist = "~> 0.4"
|
||||||
|
gleam_hackney = "~> 1.0"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
gleeunit = "~> 0.6"
|
28
manifest.toml
Normal file
28
manifest.toml
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
# This file was generated by Gleam
|
||||||
|
# You typically do not need to edit this file
|
||||||
|
|
||||||
|
packages = [
|
||||||
|
{ name = "certifi", version = "2.9.0", build_tools = ["rebar3"], requirements = [], otp_app = "certifi", source = "hex", outer_checksum = "266DA46BDB06D6C6D35FDE799BCB28D36D985D424AD7C08B5BB48F5B5CDD4641" },
|
||||||
|
{ name = "gleam_erlang", version = "0.18.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_erlang", source = "hex", outer_checksum = "C69F59D086AD50B80DE294FB0963550630971C9DC04E92B1F7AEEDD2C0BE226C" },
|
||||||
|
{ name = "gleam_hackney", version = "1.0.0", build_tools = ["gleam"], requirements = ["gleam_http", "gleam_stdlib", "hackney"], otp_app = "gleam_hackney", source = "hex", outer_checksum = "B3C1E6BD138D57252F9F9E499C741E9227EE7EE9B017CA650EC8193E02F734E1" },
|
||||||
|
{ name = "gleam_http", version = "3.2.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_http", source = "hex", outer_checksum = "D034F5CE0639CD142CBA210B7D5D14236C284B0C5772A043D2E22128594573AE" },
|
||||||
|
{ name = "gleam_otp", version = "0.5.3", build_tools = ["gleam"], requirements = ["gleam_erlang", "gleam_stdlib"], otp_app = "gleam_otp", source = "hex", outer_checksum = "6E705B69464237353E0380AC8143BDB29A3F0BF6168755D5F2D6E55A34A8B077" },
|
||||||
|
{ name = "gleam_stdlib", version = "0.28.1", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "73F0A89FADE5022CBEF6D6C3551F9ADCE7054AFCE0CB1DC4C6D5AB4CA62D0111" },
|
||||||
|
{ name = "gleeunit", version = "0.10.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "ECEA2DE4BE6528D36AFE74F42A21CDF99966EC36D7F25DEB34D47DD0F7977BAF" },
|
||||||
|
{ name = "glisten", version = "0.7.0", build_tools = ["gleam"], requirements = ["gleam_erlang", "gleam_stdlib", "gleam_otp"], otp_app = "glisten", source = "hex", outer_checksum = "52B530FF25370590843998D1B6C4EC6169DB1300D5E4407A5CDA1575374B7AEC" },
|
||||||
|
{ name = "hackney", version = "1.18.1", build_tools = ["rebar3"], requirements = ["certifi", "metrics", "mimerl", "ssl_verify_fun", "idna", "parse_trans", "unicode_util_compat"], otp_app = "hackney", source = "hex", outer_checksum = "A4ECDAFF44297E9B5894AE499E9A070EA1888C84AFDD1FD9B7B2BC384950128E" },
|
||||||
|
{ name = "idna", version = "6.1.1", build_tools = ["rebar3"], requirements = ["unicode_util_compat"], otp_app = "idna", source = "hex", outer_checksum = "92376EB7894412ED19AC475E4A86F7B413C1B9FBB5BD16DCCD57934157944CEA" },
|
||||||
|
{ name = "metrics", version = "1.0.1", build_tools = ["rebar3"], requirements = [], otp_app = "metrics", source = "hex", outer_checksum = "69B09ADDDC4F74A40716AE54D140F93BEB0FB8978D8636EADED0C31B6F099F16" },
|
||||||
|
{ name = "mimerl", version = "1.2.0", build_tools = ["rebar3"], requirements = [], otp_app = "mimerl", source = "hex", outer_checksum = "F278585650AA581986264638EBF698F8BB19DF297F66AD91B18910DFC6E19323" },
|
||||||
|
{ name = "mist", version = "0.10.0", build_tools = ["gleam"], requirements = ["gleam_erlang", "gleam_stdlib", "gleam_otp", "glisten", "gleam_http"], otp_app = "mist", source = "hex", outer_checksum = "5AFBABABF738BAB8720F047471051E4E9D102CA4694C120DB899FA12AD5D180B" },
|
||||||
|
{ name = "parse_trans", version = "3.3.1", build_tools = ["rebar3"], requirements = [], otp_app = "parse_trans", source = "hex", outer_checksum = "07CD9577885F56362D414E8C4C4E6BDF10D43A8767ABB92D24CBE8B24C54888B" },
|
||||||
|
{ name = "ssl_verify_fun", version = "1.1.6", build_tools = ["mix", "rebar3", "make"], requirements = [], otp_app = "ssl_verify_fun", source = "hex", outer_checksum = "BDB0D2471F453C88FF3908E7686F86F9BE327D065CC1EC16FA4540197EA04680" },
|
||||||
|
{ name = "unicode_util_compat", version = "0.7.0", build_tools = ["rebar3"], requirements = [], otp_app = "unicode_util_compat", source = "hex", outer_checksum = "25EEE6D67DF61960CF6A794239566599B09E17E668D3700247BC498638152521" },
|
||||||
|
]
|
||||||
|
|
||||||
|
[requirements]
|
||||||
|
gleam_hackney = "~> 1.0"
|
||||||
|
gleam_http = "~> 3.0"
|
||||||
|
gleam_stdlib = "~> 0.19"
|
||||||
|
gleeunit = "~> 0.6"
|
||||||
|
mist = "~> 0.4"
|
|
@ -4,14 +4,113 @@ import gleam/http.{Get}
|
||||||
import gleam/bit_builder.{BitBuilder}
|
import gleam/bit_builder.{BitBuilder}
|
||||||
import gleam/erlang/process
|
import gleam/erlang/process
|
||||||
import gleam/io
|
import gleam/io
|
||||||
|
import gleam/int
|
||||||
import gleam/string
|
import gleam/string
|
||||||
|
import gleam/bit_string
|
||||||
import gleam/list
|
import gleam/list
|
||||||
import gleam/map.{Map}
|
import gleam/map.{Map}
|
||||||
import gleam/result.{lazy_unwrap}
|
import gleam/result.{lazy_unwrap}
|
||||||
import gleam/uri
|
import gleam/uri
|
||||||
import gleam/hackney
|
import gleam/hackney
|
||||||
|
import gleam/option.{None, Some}
|
||||||
import mist
|
import mist
|
||||||
|
|
||||||
|
fn do_distlist(
|
||||||
|
b: String,
|
||||||
|
distlist: List(Int),
|
||||||
|
grapheme: String,
|
||||||
|
new_distlist: List(Int),
|
||||||
|
last_dist: Int,
|
||||||
|
) {
|
||||||
|
case #(b, distlist) {
|
||||||
|
#("", _) -> list.reverse(new_distlist)
|
||||||
|
#(_, [distlist_hd, distlist_snd, ..distlist_tl]) -> {
|
||||||
|
let assert Ok(b_hd) = string.first(b)
|
||||||
|
let b_tl = string.drop_left(b, up_to: 1)
|
||||||
|
let diff = case #(b_hd, grapheme) {
|
||||||
|
#(a, b) if a != b -> 1
|
||||||
|
_ -> 0
|
||||||
|
}
|
||||||
|
let minimum =
|
||||||
|
int.min(int.min(last_dist + 1, distlist_snd + 1), distlist_hd + diff)
|
||||||
|
do_distlist(
|
||||||
|
b_tl,
|
||||||
|
[distlist_snd, ..distlist_tl],
|
||||||
|
grapheme,
|
||||||
|
[minimum, ..new_distlist],
|
||||||
|
minimum,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn do_distance(a: String, b: String, distlist: List(Int), step: Int) {
|
||||||
|
case a {
|
||||||
|
"" -> result.unwrap(list.last(distlist), -1)
|
||||||
|
_ -> {
|
||||||
|
let assert Ok(src_hd) = string.first(a)
|
||||||
|
let src_tl = string.drop_left(a, up_to: 1)
|
||||||
|
let distlist = do_distlist(b, distlist, src_hd, [step], step)
|
||||||
|
do_distance(src_tl, b, distlist, step + 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn levenshtein(a: String, b: String) -> Int {
|
||||||
|
case #(a, b) {
|
||||||
|
#(a, b) if a == b -> 0
|
||||||
|
#("", b) -> string.length(b)
|
||||||
|
#(a, "") -> string.length(a)
|
||||||
|
#(a, b) -> {
|
||||||
|
let distlist = list.range(0, string.length(b))
|
||||||
|
do_distance(a, b, distlist, 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn unpercent(encoded: String) -> String {
|
||||||
|
let #([head], chunks) =
|
||||||
|
encoded
|
||||||
|
|> string.split(on: "%")
|
||||||
|
|> list.split(at: 1)
|
||||||
|
|
||||||
|
let assert Ok(res) =
|
||||||
|
chunks
|
||||||
|
|> list.map(fn(str) {
|
||||||
|
case string.length(str) < 2 {
|
||||||
|
True -> bit_string.from_string(str)
|
||||||
|
False -> {
|
||||||
|
let assert Ok(codepoint) =
|
||||||
|
str
|
||||||
|
|> string.slice(at_index: 0, length: 2)
|
||||||
|
|> int.base_parse(16)
|
||||||
|
<<codepoint:8, string.drop_left(str, 2):utf8>>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|> list.prepend(bit_string.from_string(head))
|
||||||
|
|> bit_string.concat
|
||||||
|
|> bit_string.to_string
|
||||||
|
res
|
||||||
|
}
|
||||||
|
|
||||||
|
fn guess_station(query: String, stations: Map(String, String)) -> String {
|
||||||
|
query
|
||||||
|
stations
|
||||||
|
|> map.keys
|
||||||
|
|> list.map(fn(a) { #(levenshtein(query, a), a) })
|
||||||
|
|> list.fold(
|
||||||
|
from: #(string.length(query), query),
|
||||||
|
with: fn(a, b) {
|
||||||
|
case a.0 < b.0 {
|
||||||
|
True -> a
|
||||||
|
False -> b
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|> fn(a: #(Int, String)) { a.1 }
|
||||||
|
}
|
||||||
|
|
||||||
fn the_lookup(
|
fn the_lookup(
|
||||||
query: String,
|
query: String,
|
||||||
stations: Map(String, String),
|
stations: Map(String, String),
|
||||||
|
@ -21,7 +120,7 @@ fn the_lookup(
|
||||||
|> lazy_unwrap(fn() {
|
|> lazy_unwrap(fn() {
|
||||||
io.println(query)
|
io.println(query)
|
||||||
map.get(stations, query)
|
map.get(stations, query)
|
||||||
|> lazy_unwrap(fn() { "unknown" })
|
|> lazy_unwrap(fn() { guess_station(query, stations) })
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -32,13 +131,14 @@ fn lookup_station(
|
||||||
baseurl: String,
|
baseurl: String,
|
||||||
) -> Response(BitBuilder) {
|
) -> Response(BitBuilder) {
|
||||||
let #(code, text) = case request {
|
let #(code, text) = case request {
|
||||||
Request(method: Get, path: "/help", ..) -> #(
|
Request(method: Get, path: "/help", ..)
|
||||||
|
| Request(method: Get, path: "/", ..) -> #(
|
||||||
200,
|
200,
|
||||||
"ds100 → Name: " <> baseurl <> "/FF\n" <> "Name → ds100: " <> baseurl <> "/Frankfurt Hbf",
|
"ds100 → Name: " <> baseurl <> "/NN\n" <> "Name → ds100: " <> baseurl <> "/Nürnberg Hbf",
|
||||||
)
|
)
|
||||||
Request(method: Get, path: "/" <> path, ..) -> #(
|
Request(method: Get, path: "/" <> path, ..) -> #(
|
||||||
200,
|
200,
|
||||||
the_lookup(string.replace(path, each: "%20", with: " "), stations, ds100s),
|
the_lookup(unpercent(path), stations, ds100s),
|
||||||
)
|
)
|
||||||
_ -> #(404, "intended usage is e.g. curl " <> baseurl <> "/FF")
|
_ -> #(404, "intended usage is e.g. curl " <> baseurl <> "/FF")
|
||||||
}
|
}
|
||||||
|
@ -49,6 +149,11 @@ fn lookup_station(
|
||||||
"x-data-source",
|
"x-data-source",
|
||||||
"https://data.deutschebahn.com/dataset/data-betriebsstellen.html",
|
"https://data.deutschebahn.com/dataset/data-betriebsstellen.html",
|
||||||
)
|
)
|
||||||
|
|> response.prepend_header(
|
||||||
|
"x-sources-at",
|
||||||
|
"https://stuebinm.eu/git/bahnhof.name",
|
||||||
|
)
|
||||||
|
|> response.prepend_header("content-type", "text/plain; charset=utf8")
|
||||||
|> response.set_body(body)
|
|> response.set_body(body)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -89,8 +194,8 @@ pub fn main() {
|
||||||
|> list.map(fn(a) { #(a.1, a.0) })
|
|> list.map(fn(a) { #(a.1, a.0) })
|
||||||
|> map.from_list
|
|> map.from_list
|
||||||
|
|
||||||
let assert Ok(_) = mist.run_service(
|
mist.run_service(
|
||||||
1234,
|
2345,
|
||||||
fn(req) { lookup_station(req, stationmap, ds100map, baseurl) },
|
fn(req) { lookup_station(req, stationmap, ds100map, baseurl) },
|
||||||
max_body_limit: 100,
|
max_body_limit: 100,
|
||||||
)
|
)
|
||||||
|
|
12
test/bahnhofname_test.gleam
Normal file
12
test/bahnhofname_test.gleam
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
import gleeunit
|
||||||
|
import gleeunit/should
|
||||||
|
|
||||||
|
pub fn main() {
|
||||||
|
gleeunit.main()
|
||||||
|
}
|
||||||
|
|
||||||
|
// gleeunit test functions end in `_test`
|
||||||
|
pub fn hello_world_test() {
|
||||||
|
1
|
||||||
|
|> should.equal(1)
|
||||||
|
}
|
Loading…
Reference in a new issue