From a827bb0e4d75f784f2cb26d664055c05810b096b Mon Sep 17 00:00:00 2001 From: stuebinm Date: Wed, 26 Apr 2023 00:53:24 +0200 Subject: [PATCH] =?UTF-8?q?url=20percent=20encoding=20&=20(na=C3=AFve)=20f?= =?UTF-8?q?uzzy=20matching?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- gleam.toml | 19 ++++++ manifest.toml | 28 +++++++++ src/bahnhofname.gleam | 117 ++++++++++++++++++++++++++++++++++-- test/bahnhofname_test.gleam | 12 ++++ 4 files changed, 170 insertions(+), 6 deletions(-) create mode 100644 gleam.toml create mode 100644 manifest.toml create mode 100644 test/bahnhofname_test.gleam diff --git a/gleam.toml b/gleam.toml new file mode 100644 index 0000000..63ee397 --- /dev/null +++ b/gleam.toml @@ -0,0 +1,19 @@ +name = "bahnhofname" +version = "0.1.0" + +# Fill out these fields if you intend to generate HTML documentation or publish +# your project to the Hex package manager. +# +# licences = ["Apache-2.0"] +# description = "A Gleam library..." +# repository = { type = "github", user = "username", repo = "project" } +# links = [{ title = "Website", href = "https://gleam.run" }] + +[dependencies] +gleam_stdlib = "~> 0.19" +gleam_http = "~> 3.0" +mist = "~> 0.4" +gleam_hackney = "~> 1.0" + +[dev-dependencies] +gleeunit = "~> 0.6" diff --git a/manifest.toml b/manifest.toml new file mode 100644 index 0000000..9627978 --- /dev/null +++ b/manifest.toml @@ -0,0 +1,28 @@ +# This file was generated by Gleam +# You typically do not need to edit this file + +packages = [ + { name = "certifi", version = "2.9.0", build_tools = ["rebar3"], requirements = [], otp_app = "certifi", source = "hex", outer_checksum = "266DA46BDB06D6C6D35FDE799BCB28D36D985D424AD7C08B5BB48F5B5CDD4641" }, + { name = "gleam_erlang", version = "0.18.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_erlang", source = "hex", outer_checksum = "C69F59D086AD50B80DE294FB0963550630971C9DC04E92B1F7AEEDD2C0BE226C" }, + { name = "gleam_hackney", version = "1.0.0", build_tools = ["gleam"], requirements = ["gleam_http", "gleam_stdlib", "hackney"], otp_app = "gleam_hackney", source = "hex", outer_checksum = "B3C1E6BD138D57252F9F9E499C741E9227EE7EE9B017CA650EC8193E02F734E1" }, + { name = "gleam_http", version = "3.2.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_http", source = "hex", outer_checksum = "D034F5CE0639CD142CBA210B7D5D14236C284B0C5772A043D2E22128594573AE" }, + { name = "gleam_otp", version = "0.5.3", build_tools = ["gleam"], requirements = ["gleam_erlang", "gleam_stdlib"], otp_app = "gleam_otp", source = "hex", outer_checksum = "6E705B69464237353E0380AC8143BDB29A3F0BF6168755D5F2D6E55A34A8B077" }, + { name = "gleam_stdlib", version = "0.28.1", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "73F0A89FADE5022CBEF6D6C3551F9ADCE7054AFCE0CB1DC4C6D5AB4CA62D0111" }, + { name = "gleeunit", version = "0.10.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "ECEA2DE4BE6528D36AFE74F42A21CDF99966EC36D7F25DEB34D47DD0F7977BAF" }, + { name = "glisten", version = "0.7.0", build_tools = ["gleam"], requirements = ["gleam_erlang", "gleam_stdlib", "gleam_otp"], otp_app = "glisten", source = "hex", outer_checksum = "52B530FF25370590843998D1B6C4EC6169DB1300D5E4407A5CDA1575374B7AEC" }, + { name = "hackney", version = "1.18.1", build_tools = ["rebar3"], requirements = ["certifi", "metrics", "mimerl", "ssl_verify_fun", "idna", "parse_trans", "unicode_util_compat"], otp_app = "hackney", source = "hex", outer_checksum = "A4ECDAFF44297E9B5894AE499E9A070EA1888C84AFDD1FD9B7B2BC384950128E" }, + { name = "idna", version = "6.1.1", build_tools = ["rebar3"], requirements = ["unicode_util_compat"], otp_app = "idna", source = "hex", outer_checksum = "92376EB7894412ED19AC475E4A86F7B413C1B9FBB5BD16DCCD57934157944CEA" }, + { name = "metrics", version = "1.0.1", build_tools = ["rebar3"], requirements = [], otp_app = "metrics", source = "hex", outer_checksum = "69B09ADDDC4F74A40716AE54D140F93BEB0FB8978D8636EADED0C31B6F099F16" }, + { name = "mimerl", version = "1.2.0", build_tools = ["rebar3"], requirements = [], otp_app = "mimerl", source = "hex", outer_checksum = "F278585650AA581986264638EBF698F8BB19DF297F66AD91B18910DFC6E19323" }, + { name = "mist", version = "0.10.0", build_tools = ["gleam"], requirements = ["gleam_erlang", "gleam_stdlib", "gleam_otp", "glisten", "gleam_http"], otp_app = "mist", source = "hex", outer_checksum = "5AFBABABF738BAB8720F047471051E4E9D102CA4694C120DB899FA12AD5D180B" }, + { name = "parse_trans", version = "3.3.1", build_tools = ["rebar3"], requirements = [], otp_app = "parse_trans", source = "hex", outer_checksum = "07CD9577885F56362D414E8C4C4E6BDF10D43A8767ABB92D24CBE8B24C54888B" }, + { name = "ssl_verify_fun", version = "1.1.6", build_tools = ["mix", "rebar3", "make"], requirements = [], otp_app = "ssl_verify_fun", source = "hex", outer_checksum = "BDB0D2471F453C88FF3908E7686F86F9BE327D065CC1EC16FA4540197EA04680" }, + { name = "unicode_util_compat", version = "0.7.0", build_tools = ["rebar3"], requirements = [], otp_app = "unicode_util_compat", source = "hex", outer_checksum = "25EEE6D67DF61960CF6A794239566599B09E17E668D3700247BC498638152521" }, +] + +[requirements] +gleam_hackney = "~> 1.0" +gleam_http = "~> 3.0" +gleam_stdlib = "~> 0.19" +gleeunit = "~> 0.6" +mist = "~> 0.4" diff --git a/src/bahnhofname.gleam b/src/bahnhofname.gleam index 232f0f7..36ea035 100644 --- a/src/bahnhofname.gleam +++ b/src/bahnhofname.gleam @@ -4,14 +4,113 @@ import gleam/http.{Get} import gleam/bit_builder.{BitBuilder} import gleam/erlang/process import gleam/io +import gleam/int import gleam/string +import gleam/bit_string import gleam/list import gleam/map.{Map} import gleam/result.{lazy_unwrap} import gleam/uri import gleam/hackney +import gleam/option.{None, Some} import mist +fn do_distlist( + b: String, + distlist: List(Int), + grapheme: String, + new_distlist: List(Int), + last_dist: Int, +) { + case #(b, distlist) { + #("", _) -> list.reverse(new_distlist) + #(_, [distlist_hd, distlist_snd, ..distlist_tl]) -> { + let assert Ok(b_hd) = string.first(b) + let b_tl = string.drop_left(b, up_to: 1) + let diff = case #(b_hd, grapheme) { + #(a, b) if a != b -> 1 + _ -> 0 + } + let minimum = + int.min(int.min(last_dist + 1, distlist_snd + 1), distlist_hd + diff) + do_distlist( + b_tl, + [distlist_snd, ..distlist_tl], + grapheme, + [minimum, ..new_distlist], + minimum, + ) + } + } +} + +fn do_distance(a: String, b: String, distlist: List(Int), step: Int) { + case a { + "" -> result.unwrap(list.last(distlist), -1) + _ -> { + let assert Ok(src_hd) = string.first(a) + let src_tl = string.drop_left(a, up_to: 1) + let distlist = do_distlist(b, distlist, src_hd, [step], step) + do_distance(src_tl, b, distlist, step + 1) + } + } +} + +fn levenshtein(a: String, b: String) -> Int { + case #(a, b) { + #(a, b) if a == b -> 0 + #("", b) -> string.length(b) + #(a, "") -> string.length(a) + #(a, b) -> { + let distlist = list.range(0, string.length(b)) + do_distance(a, b, distlist, 1) + } + } +} + +fn unpercent(encoded: String) -> String { + let #([head], chunks) = + encoded + |> string.split(on: "%") + |> list.split(at: 1) + + let assert Ok(res) = + chunks + |> list.map(fn(str) { + case string.length(str) < 2 { + True -> bit_string.from_string(str) + False -> { + let assert Ok(codepoint) = + str + |> string.slice(at_index: 0, length: 2) + |> int.base_parse(16) + <> + } + } + }) + |> list.prepend(bit_string.from_string(head)) + |> bit_string.concat + |> bit_string.to_string + res +} + +fn guess_station(query: String, stations: Map(String, String)) -> String { + query + stations + |> map.keys + |> list.map(fn(a) { #(levenshtein(query, a), a) }) + |> list.fold( + from: #(string.length(query), query), + with: fn(a, b) { + case a.0 < b.0 { + True -> a + False -> b + } + }, + ) + |> fn(a: #(Int, String)) { a.1 } +} + fn the_lookup( query: String, stations: Map(String, String), @@ -21,7 +120,7 @@ fn the_lookup( |> lazy_unwrap(fn() { io.println(query) map.get(stations, query) - |> lazy_unwrap(fn() { "unknown" }) + |> lazy_unwrap(fn() { guess_station(query, stations) }) }) } @@ -32,13 +131,14 @@ fn lookup_station( baseurl: String, ) -> Response(BitBuilder) { let #(code, text) = case request { - Request(method: Get, path: "/help", ..) -> #( + Request(method: Get, path: "/help", ..) + | Request(method: Get, path: "/", ..) -> #( 200, - "ds100 → Name: " <> baseurl <> "/FF\n" <> "Name → ds100: " <> baseurl <> "/Frankfurt Hbf", + "ds100 → Name: " <> baseurl <> "/NN\n" <> "Name → ds100: " <> baseurl <> "/Nürnberg Hbf", ) Request(method: Get, path: "/" <> path, ..) -> #( 200, - the_lookup(string.replace(path, each: "%20", with: " "), stations, ds100s), + the_lookup(unpercent(path), stations, ds100s), ) _ -> #(404, "intended usage is e.g. curl " <> baseurl <> "/FF") } @@ -49,6 +149,11 @@ fn lookup_station( "x-data-source", "https://data.deutschebahn.com/dataset/data-betriebsstellen.html", ) + |> response.prepend_header( + "x-sources-at", + "https://stuebinm.eu/git/bahnhof.name", + ) + |> response.prepend_header("content-type", "text/plain; charset=utf8") |> response.set_body(body) } @@ -89,8 +194,8 @@ pub fn main() { |> list.map(fn(a) { #(a.1, a.0) }) |> map.from_list - let assert Ok(_) = mist.run_service( - 1234, + mist.run_service( + 2345, fn(req) { lookup_station(req, stationmap, ds100map, baseurl) }, max_body_limit: 100, ) diff --git a/test/bahnhofname_test.gleam b/test/bahnhofname_test.gleam new file mode 100644 index 0000000..3831e7a --- /dev/null +++ b/test/bahnhofname_test.gleam @@ -0,0 +1,12 @@ +import gleeunit +import gleeunit/should + +pub fn main() { + gleeunit.main() +} + +// gleeunit test functions end in `_test` +pub fn hello_world_test() { + 1 + |> should.equal(1) +}