From 1a72eaeaaf87bb56925bd8a272db5535767d7391 Mon Sep 17 00:00:00 2001 From: Florian RICHER Date: Thu, 16 Jun 2022 23:13:58 +0200 Subject: [PATCH] first commit --- .env | 1 + .gitignore | 1 + .vscode/launch.json | 45 ++ Cargo.lock | 1557 ++++++++++++++++++++++++++++++++++++ Cargo.toml | 11 + src/extractor/artists.rs | 110 +++ src/extractor/labels.rs | 110 +++ src/extractor/mod.rs | 103 +++ src/extractor/musicians.rs | 110 +++ src/extractor/releases.rs | 132 +++ src/main.rs | 7 + 11 files changed, 2187 insertions(+) create mode 100644 .env create mode 100644 .gitignore create mode 100644 .vscode/launch.json create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/extractor/artists.rs create mode 100644 src/extractor/labels.rs create mode 100644 src/extractor/mod.rs create mode 100644 src/extractor/musicians.rs create mode 100644 src/extractor/releases.rs create mode 100644 src/main.rs diff --git a/.env b/.env new file mode 100644 index 0000000..ca380fd --- /dev/null +++ b/.env @@ -0,0 +1 @@ +URL=http://vk.gy/database \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..95dfc79 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,45 @@ +{ + // Utilisez IntelliSense pour en savoir plus sur les attributs possibles. + // Pointez pour afficher la description des attributs existants. + // Pour plus d'informations, visitez : https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "request": "launch", + "name": "Debug executable 'data_extractor'", + "cargo": { + "args": [ + "build", + "--bin=data_extractor", + "--package=data_extractor" + ], + "filter": { + "name": "data_extractor", + "kind": "bin" + } + }, + "args": [], + "cwd": "${workspaceFolder}" + }, + { + "type": "lldb", + "request": "launch", + "name": "Debug unit tests in executable 'data_extractor'", + "cargo": { + "args": [ + "test", + "--no-run", + "--bin=data_extractor", + "--package=data_extractor" + ], + "filter": { + "name": "data_extractor", + "kind": "bin" + } + }, + "args": [], + "cwd": "${workspaceFolder}" + } + ] +} \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..d999f63 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,1557 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "base64" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bumpalo" +version = "3.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3" + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "bytes" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" + +[[package]] +name = "cc" +version = "1.0.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "convert_case" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" + +[[package]] +name = "core-foundation" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" + +[[package]] +name = "cssparser" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa 0.4.8", + "matches", + "phf 0.8.0", + "proc-macro2", + "quote", + "smallvec", + "syn", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e" +dependencies = [ + "quote", + "syn", +] + +[[package]] +name = "data_extractor" +version = "0.1.0" +dependencies = [ + "dotenvy", + "reqwest", + "scraper", +] + +[[package]] +name = "derive_more" +version = "0.99.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn", +] + +[[package]] +name = "dirs" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + +[[package]] +name = "dotenvy" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e851a83c30366fd01d75b913588e95e74a1705c1ecc5d58b1f8e1a6d556525f" +dependencies = [ + "dirs", +] + +[[package]] +name = "dtoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56899898ce76aaf4a0f24d914c97ea6ed976d42fec6ad33fcbb0a1103e07b2b0" + +[[package]] +name = "dtoa-short" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bde03329ae10e79ede66c9ce4dc930aa8599043b0743008548680f25b91502d6" +dependencies = [ + "dtoa", +] + +[[package]] +name = "ego-tree" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591" + +[[package]] +name = "encoding_rs" +version = "0.8.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9852635589dc9f9ea1b6fe9f05b50ef208c85c834a562f0c6abb1c475736ec2b" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "fastrand" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" +dependencies = [ + "instant", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191" +dependencies = [ + "matches", + "percent-encoding", +] + +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + +[[package]] +name = "futures-channel" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" +dependencies = [ + "futures-core", +] + +[[package]] +name = "futures-core" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" + +[[package]] +name = "futures-io" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" + +[[package]] +name = "futures-sink" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" + +[[package]] +name = "futures-task" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" + +[[package]] +name = "futures-util" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" +dependencies = [ + "futures-core", + "futures-io", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "h2" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37a82c6d637fc9515a4694bbf1cb2457b79d81ce52b3108bdeea58b07dd34a57" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hashbrown" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "html5ever" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "http" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399" +dependencies = [ + "bytes", + "fnv", + "itoa 1.0.2", +] + +[[package]] +name = "http-body" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" +dependencies = [ + "bytes", + "http", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "496ce29bb5a52785b44e0f7ca2847ae0bb839c9bd28f69acac9b99d461c0c04c" + +[[package]] +name = "httpdate" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" + +[[package]] +name = "hyper" +version = "0.14.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42dc3c131584288d375f2d07f822b0cb012d8c6fb899a5b9fdb3cb7eb9b6004f" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa 1.0.2", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes", + "hyper", + "native-tls", + "tokio", + "tokio-native-tls", +] + +[[package]] +name = "idna" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8" +dependencies = [ + "matches", + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6012d540c5baa3589337a98ce73408de9b5a25ec9fc2c6fd6be8f0d39e0ca5a" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "ipnet" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879d54834c8c76457ef4293a689b2a8c59b076067ad77b15efafbb05f92a592b" + +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "itoa" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d" + +[[package]] +name = "js-sys" +version = "0.3.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3fac17f7123a73ca62df411b1bf727ccc805daa070338fda671c86dac1bdc27" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" + +[[package]] +name = "lock_api" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016" +dependencies = [ + "log", + "phf 0.10.1", + "phf_codegen 0.10.0", + "string_cache", + "string_cache_codegen", + "tendril", +] + +[[package]] +name = "matches" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "mime" +version = "0.3.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" + +[[package]] +name = "mio" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "713d550d9b44d89174e066b7a6217ae06234c10cb47819a88290d2b353c31799" +dependencies = [ + "libc", + "log", + "wasi 0.11.0+wasi-snapshot-preview1", + "windows-sys", +] + +[[package]] +name = "native-tls" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd7e2f3618557f980e0b17e8856252eee3c97fa12c54dff0ca290fb6266ca4a9" +dependencies = [ + "lazy_static", + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "new_debug_unreachable" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" + +[[package]] +name = "nodrop" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" + +[[package]] +name = "num_cpus" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" + +[[package]] +name = "openssl" +version = "0.10.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb81a6430ac911acb25fe5ac8f1d2af1b4ea8a4fdfda0f1ee4292af2e2d8eb0e" +dependencies = [ + "bitflags", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b501e44f11665960c7e7fcf062c7d96a14ade4aa98116c004b2e37b5be7d736c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "835363342df5fba8354c5b453325b110ffd54044e588c539cf2f20a8014e4cb1" +dependencies = [ + "autocfg", + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-sys", +] + +[[package]] +name = "percent-encoding" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" + +[[package]] +name = "phf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" +dependencies = [ + "phf_macros", + "phf_shared 0.8.0", + "proc-macro-hack", +] + +[[package]] +name = "phf" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" +dependencies = [ + "phf_shared 0.10.0", +] + +[[package]] +name = "phf_codegen" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", +] + +[[package]] +name = "phf_codegen" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", +] + +[[package]] +name = "phf_generator" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" +dependencies = [ + "phf_shared 0.8.0", + "rand 0.7.3", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared 0.10.0", + "rand 0.8.5", +] + +[[package]] +name = "phf_macros" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", + "proc-macro-hack", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" +dependencies = [ + "siphasher", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae" + +[[package]] +name = "ppv-lite86" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" + +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + +[[package]] +name = "proc-macro2" +version = "1.0.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc", + "rand_pcg", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.3", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.3", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", +] + +[[package]] +name = "rand_core" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +dependencies = [ + "getrandom 0.2.7", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] +name = "rand_pcg" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] +name = "redox_syscall" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" +dependencies = [ + "bitflags", +] + +[[package]] +name = "redox_users" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" +dependencies = [ + "getrandom 0.2.7", + "redox_syscall", + "thiserror", +] + +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + +[[package]] +name = "reqwest" +version = "0.11.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b75aa69a3f06bbcc66ede33af2af253c6f7a86b1ca0033f60c580a27074fbf92" +dependencies = [ + "base64", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-tls", + "ipnet", + "js-sys", + "lazy_static", + "log", + "mime", + "native-tls", + "percent-encoding", + "pin-project-lite", + "serde", + "serde_json", + "serde_urlencoded", + "tokio", + "tokio-native-tls", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "winreg", +] + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + +[[package]] +name = "ryu" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" + +[[package]] +name = "schannel" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d6731146462ea25d9244b2ed5fd1d716d25c52e4d54aa4fb0f3c4e9854dbe2" +dependencies = [ + "lazy_static", + "windows-sys", +] + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "scraper" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5684396b456f3eb69ceeb34d1b5cb1a2f6acf7ca4452131efa3ba0ee2c2d0a70" +dependencies = [ + "cssparser", + "ego-tree", + "getopts", + "html5ever", + "matches", + "selectors", + "smallvec", + "tendril", +] + +[[package]] +name = "security-framework" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dc14f172faf8a0194a3aded622712b0de276821addc574fa54fc0a1167e10dc" +dependencies = [ + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0160a13a177a45bfb43ce71c01580998474f556ad854dcbca936dd2841a5c556" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "selectors" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe" +dependencies = [ + "bitflags", + "cssparser", + "derive_more", + "fxhash", + "log", + "matches", + "phf 0.8.0", + "phf_codegen 0.8.0", + "precomputed-hash", + "servo_arc", + "smallvec", + "thin-slice", +] + +[[package]] +name = "semver" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a41d061efea015927ac527063765e73601444cdc344ba855bc7bd44578b25e1c" + +[[package]] +name = "serde" +version = "1.0.137" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1" + +[[package]] +name = "serde_json" +version = "1.0.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b7ce2b32a1aed03c558dc61a5cd328f15aff2dbc17daad8fb8af04d2100e15c" +dependencies = [ + "itoa 1.0.2", + "ryu", + "serde", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa 1.0.2", + "ryu", + "serde", +] + +[[package]] +name = "servo_arc" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432" +dependencies = [ + "nodrop", + "stable_deref_trait", +] + +[[package]] +name = "siphasher" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" + +[[package]] +name = "slab" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32" + +[[package]] +name = "smallvec" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" + +[[package]] +name = "socket2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66d72b759436ae32898a2af0a14218dbf55efde3feeb170eb623637db85ee1e0" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "string_cache" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "213494b7a2b503146286049378ce02b482200519accc31872ee8be91fa820a08" +dependencies = [ + "new_debug_unreachable", + "once_cell", + "parking_lot", + "phf_shared 0.10.0", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", + "proc-macro2", + "quote", +] + +[[package]] +name = "syn" +version = "1.0.96" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0748dd251e24453cb8717f0354206b91557e4ec8703673a4b30208f2abaf1ebf" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" +dependencies = [ + "cfg-if", + "fastrand", + "libc", + "redox_syscall", + "remove_dir_all", + "winapi", +] + +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + +[[package]] +name = "thin-slice" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" + +[[package]] +name = "thiserror" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" + +[[package]] +name = "tokio" +version = "1.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c51a52ed6686dd62c320f9b89299e9dfb46f730c7a48e635c19f21d116cb1439" +dependencies = [ + "bytes", + "libc", + "memchr", + "mio", + "num_cpus", + "once_cell", + "pin-project-lite", + "socket2", + "winapi", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d995660bd2b7f8c1568414c1126076c13fbb725c40112dc0120b78eb9b717b" +dependencies = [ + "native-tls", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc463cd8deddc3770d20f9852143d50bf6094e640b485cb2e189a2099085ff45" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", + "tracing", +] + +[[package]] +name = "tower-service" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6" + +[[package]] +name = "tracing" +version = "0.1.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a400e31aa60b9d44a52a8ee0343b5b18566b03a8321e0d321f695cf56e940160" +dependencies = [ + "cfg-if", + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7709595b8878a4965ce5e87ebf880a7d39c9afc6837721b21a5a816a8117d921" +dependencies = [ + "once_cell", +] + +[[package]] +name = "try-lock" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" + +[[package]] +name = "unicode-bidi" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" + +[[package]] +name = "unicode-ident" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c" + +[[package]] +name = "unicode-normalization" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-width" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" + +[[package]] +name = "url" +version = "2.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507c383b2d33b5fc35d1861e77e6b383d158b2da5e14fe51b83dfedf6fd578c" +dependencies = [ + "form_urlencoded", + "idna", + "matches", + "percent-encoding", +] + +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "want" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0" +dependencies = [ + "log", + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c53b543413a17a202f4be280a7e5c62a1c69345f5de525ee64f8cfdbc954994" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5491a68ab4500fa6b4d726bd67408630c3dbe9c4fe7bda16d5c82a1fd8c7340a" +dependencies = [ + "bumpalo", + "lazy_static", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de9a9cec1733468a8c657e57fa2413d2ae2c0129b95e87c5b72b8ace4d13f31f" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c441e177922bc58f1e12c022624b6216378e5febc2f0533e41ba443d505b80aa" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d94ac45fcf608c1f45ef53e748d35660f168490c10b23704c7779ab8f5c3048" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a89911bd99e5f3659ec4acf9c4d93b0a90fe4a2a11f15328472058edc5261be" + +[[package]] +name = "web-sys" +version = "0.3.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fed94beee57daf8dd7d51f2b15dc2bcde92d7a72304cdf662a4371008b71b90" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" +dependencies = [ + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" + +[[package]] +name = "windows_i686_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" + +[[package]] +name = "windows_i686_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" + +[[package]] +name = "winreg" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" +dependencies = [ + "winapi", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..f27b67f --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "data_extractor" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +reqwest = {version = "0.11", features = ["blocking"]} +scraper = "0.13.0" +dotenvy = "0.15.1" \ No newline at end of file diff --git a/src/extractor/artists.rs b/src/extractor/artists.rs new file mode 100644 index 0000000..210af5e --- /dev/null +++ b/src/extractor/artists.rs @@ -0,0 +1,110 @@ +use scraper::Html; + +use super::{Extractor, trim_whitespace, parse_date}; + +#[derive(Debug, Default)] +pub struct Artist { + pub name: String, + pub url: String, + pub date: String, + pub author: String, + pub author_url: String, +} + +impl Extractor for Artist { + type Output = Self; + fn extract_all(document: Html) -> Vec { + let mut artists : Vec = Vec::new(); + + let selector = scraper::Selector::parse("ul>li").unwrap(); + let select = document.select(&selector); + for el in select { + artists.push(Self::extract(Html::parse_fragment(&el.html()))); + } + + artists + } + + fn extract(document: Html) -> Self { + let a_el = document.select(&scraper::Selector::parse("a").unwrap()).next().unwrap(); + + let title = a_el.inner_html(); + let url = a_el.value().attr("href").unwrap(); + + let more_info_el = document.select(&scraper::Selector::parse("div").unwrap()).next().unwrap(); + let date = more_info_el.inner_html(); + + let author_el = more_info_el.select(&scraper::Selector::parse("a").unwrap()).next().unwrap(); + let author = author_el.inner_html(); + let author_url = author_el.value().attr("href").unwrap(); + + Self { + name: trim_whitespace(&title), + url: trim_whitespace(url), + date: parse_date(&date), + author: trim_whitespace(&author), + author_url: trim_whitespace(author_url), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const EXAMPLES: &'static str = +r#" + +"#; + + #[test] + fn extract_all_must_return_two_result() { + let document = scraper::Html::parse_fragment(EXAMPLES); + let artists = Artist::extract_all(document); + assert_eq!(artists.len(), 2); + assert_eq!(artists[0].author, "suji"); + assert_eq!(artists[0].author_url, "/users/suji/"); + assert_eq!(artists[0].name, "BabyKingdom"); + assert_eq!(artists[0].url, "/artists/babykingdom/"); + assert_eq!(artists[0].date, "2022-06-16 14:12:03"); + assert_eq!(artists[1].author, "kumika"); + assert_eq!(artists[1].author_url, "/users/kumika/"); + assert_eq!(artists[1].name, "HIRO"); + assert_eq!(artists[1].url, "/artists/hiro-ruvish/"); + assert_eq!(artists[1].date, "2022-06-16 11:11:06"); + } + + const EXAMPLE: &'static str = r#" + BabyKingdom +
+ 2022-06-16 14:12:03 by + suji +
+ "#; + + #[test] + fn extract_must_be_return_correct_value() { + let document = scraper::Html::parse_fragment(EXAMPLE); + let artist = Artist::extract(document); + assert_eq!(artist.author, "suji"); + assert_eq!(artist.author_url, "/users/suji/"); + assert_eq!(artist.name, "BabyKingdom"); + assert_eq!(artist.url, "/artists/babykingdom/"); + assert_eq!(artist.date, "2022-06-16 14:12:03"); + } +} \ No newline at end of file diff --git a/src/extractor/labels.rs b/src/extractor/labels.rs new file mode 100644 index 0000000..da83e43 --- /dev/null +++ b/src/extractor/labels.rs @@ -0,0 +1,110 @@ +use scraper::Html; + +use super::{Extractor, trim_whitespace, parse_date}; + +#[derive(Debug, Default)] +pub struct Label { + pub name: String, + pub url: String, + pub date: String, + pub author: String, + pub author_url: String, +} + +impl Extractor for Label { + type Output = Self; + fn extract_all(document: Html) -> Vec { + let mut artists : Vec = Vec::new(); + + let selector = scraper::Selector::parse("ul>li").unwrap(); + let select = document.select(&selector); + for el in select { + artists.push(Self::extract(Html::parse_fragment(&el.html()))); + } + + artists + } + + fn extract(document: Html) -> Self { + let a_el = document.select(&scraper::Selector::parse("a").unwrap()).next().unwrap(); + + let title = a_el.inner_html(); + let url = a_el.value().attr("href").unwrap(); + + let more_info_el = document.select(&scraper::Selector::parse("div").unwrap()).next().unwrap(); + let date = more_info_el.inner_html(); + + let author_el = more_info_el.select(&scraper::Selector::parse("a").unwrap()).next().unwrap(); + let author = author_el.inner_html(); + let author_url = author_el.value().attr("href").unwrap(); + + Self { + name: trim_whitespace(&title), + url: trim_whitespace(url), + date: parse_date(&date), + author: trim_whitespace(&author), + author_url: trim_whitespace(author_url), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const EXAMPLES: &'static str = +r#" + +"#; + + #[test] + fn extract_all_must_return_two_result() { + let document = scraper::Html::parse_fragment(EXAMPLES); + let artists = Label::extract_all(document); + assert_eq!(artists.len(), 2); + assert_eq!(artists[0].name, "ARCANUMANIA Records."); + assert_eq!(artists[0].url, "/labels/arcanumania-records/"); + assert_eq!(artists[0].date, "2022-06-13 21:27:11"); + assert_eq!(artists[0].author, "haru"); + assert_eq!(artists[0].author_url, "/users/haru/"); + assert_eq!(artists[1].name, "Omega Code"); + assert_eq!(artists[1].url, "/labels/omega-code/"); + assert_eq!(artists[1].date, "2022-06-12 18:43:41"); + assert_eq!(artists[1].author, "inartistic"); + assert_eq!(artists[1].author_url, "/users/inartistic/"); + } + + const EXAMPLE: &'static str = r#" + ARCANUMANIA Records. +
+ 2022-06-13 21:27:11 by + haru +
+ "#; + + #[test] + fn extract_must_be_return_correct_value() { + let document = scraper::Html::parse_fragment(EXAMPLE); + let artist = Label::extract(document); + assert_eq!(artist.author, "haru"); + assert_eq!(artist.author_url, "/users/haru/"); + assert_eq!(artist.name, "ARCANUMANIA Records."); + assert_eq!(artist.url, "/labels/arcanumania-records/"); + assert_eq!(artist.date, "2022-06-13 21:27:11"); + } +} \ No newline at end of file diff --git a/src/extractor/mod.rs b/src/extractor/mod.rs new file mode 100644 index 0000000..2b7dff6 --- /dev/null +++ b/src/extractor/mod.rs @@ -0,0 +1,103 @@ +use std::env; + +use scraper::Html; + +mod artists; +mod labels; +mod musicians; +mod releases; + +pub trait Extractor { + type Output; + fn extract_all(select: Html) -> Vec; + fn extract(select: Html) -> Self::Output; +} + +const RECENTLY_UPDATED_SELECTOR : &'static str= "body>div.c2>div"; +const CATEGORY_TITLE_SELECTOR : &'static str= "h2>a"; +const NEWS_TITLE_SELECTOR : &'static str= "div.text"; + +#[derive(Debug, Default)] +pub struct Informations { + pub artists: Vec, + pub labels: Vec, + pub musicians: Vec, + pub releases: Vec, +} + +pub fn extract() { + let response = reqwest::blocking::get(env::var("URL").unwrap()) + .unwrap() + .text() + .unwrap(); + + let document = scraper::Html::parse_document(&response); + + let div_selector = scraper::Selector::parse(RECENTLY_UPDATED_SELECTOR).unwrap(); + + let divs = document.select(&div_selector); + + let mut categories = Informations::default(); + + for div in divs { + if let Some(category) = div.select(&scraper::Selector::parse(CATEGORY_TITLE_SELECTOR).unwrap()).next() { + + let category_title = category.inner_html(); + + match category_title.as_str() { + "artists" => { + let content_div = div.select(&scraper::Selector::parse(NEWS_TITLE_SELECTOR).unwrap()).next().unwrap(); + categories.artists = artists::Artist::extract_all(scraper::Html::parse_fragment(&content_div.inner_html())); + }, + "labels" => { + let content_div = div.select(&scraper::Selector::parse(NEWS_TITLE_SELECTOR).unwrap()).next().unwrap(); + categories.labels = labels::Label::extract_all(scraper::Html::parse_fragment(&content_div.inner_html())); + } + "musicians" => { + let content_div = div.select(&scraper::Selector::parse(NEWS_TITLE_SELECTOR).unwrap()).next().unwrap(); + categories.musicians = musicians::Musician::extract_all(scraper::Html::parse_fragment(&content_div.inner_html())); + } + "releases" => { + let content_div = div.select(&scraper::Selector::parse(NEWS_TITLE_SELECTOR).unwrap()).next().unwrap(); + categories.releases = releases::Release::extract_all(scraper::Html::parse_fragment(&content_div.inner_html())); + } + _ => {} + } + } + } + + println!("{:#?}", categories); +} + +pub(self) fn trim_whitespace(s: &str) -> String { + s.replace('\t', " ").replace("\n", " ").trim().to_owned() +} + +pub(self) fn parse_date(s: &str) -> String { + trim_whitespace(s)[0..19].to_owned() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_trim_whitespace() { + assert_eq!(trim_whitespace("Hello World"), "Hello World"); + } + + #[test] + fn test_remove_whitespace_with_tabs() { + assert_eq!(trim_whitespace("\tHello\tWorld\t"), "Hello World"); + } + + #[test] + fn test_remove_whitespace_with_newlines() { + assert_eq!(trim_whitespace("\nHello\nWorld\n"), "Hello World"); + } + + #[test] + fn test_parse_date() { + assert_eq!(parse_date("2022-06-16 14:12:03 by suji"), "2022-06-16 14:12:03"); + } +} \ No newline at end of file diff --git a/src/extractor/musicians.rs b/src/extractor/musicians.rs new file mode 100644 index 0000000..274a5f1 --- /dev/null +++ b/src/extractor/musicians.rs @@ -0,0 +1,110 @@ +use scraper::Html; + +use super::{Extractor, trim_whitespace, parse_date}; + +#[derive(Debug, Default)] +pub struct Musician { + pub name: String, + pub url: String, + pub date: String, + pub author: String, + pub author_url: String, +} + +impl Extractor for Musician { + type Output = Self; + fn extract_all(document: Html) -> Vec { + let mut artists : Vec = Vec::new(); + + let selector = scraper::Selector::parse("ul>li").unwrap(); + let select = document.select(&selector); + for el in select { + artists.push(Self::extract(Html::parse_fragment(&el.html()))); + } + + artists + } + + fn extract(document: Html) -> Self { + let a_el = document.select(&scraper::Selector::parse("a").unwrap()).next().unwrap(); + + let title = a_el.inner_html(); + let url = a_el.value().attr("href").unwrap(); + + let more_info_el = document.select(&scraper::Selector::parse("div").unwrap()).next().unwrap(); + let date = more_info_el.inner_html(); + + let author_el = more_info_el.select(&scraper::Selector::parse("a").unwrap()).next().unwrap(); + let author = author_el.inner_html(); + let author_url = author_el.value().attr("href").unwrap(); + + Self { + name: trim_whitespace(&title), + url: trim_whitespace(url), + date: parse_date(&date), + author: trim_whitespace(&author), + author_url: trim_whitespace(author_url), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const EXAMPLES: &'static str = +r#" +
    +
  • + HIKO +
    + 2022-06-16 14:12:02 by + suji +
    +
  • +
  • + HIRO +
    + 2022-06-16 11:11:25 by + kumika +
    +
  • +
+"#; + + #[test] + fn extract_all_must_return_two_result() { + let document = scraper::Html::parse_fragment(EXAMPLES); + let artists = Musician::extract_all(document); + assert_eq!(artists.len(), 2); + assert_eq!(artists[0].name, "HIKO"); + assert_eq!(artists[0].url, "/musicians/27075/hiko/"); + assert_eq!(artists[0].date, "2022-06-16 14:12:02"); + assert_eq!(artists[0].author, "suji"); + assert_eq!(artists[0].author_url, "/users/suji/"); + assert_eq!(artists[1].name, "HIRO"); + assert_eq!(artists[1].url, "/musicians/6312/hiro/"); + assert_eq!(artists[1].date, "2022-06-16 11:11:25"); + assert_eq!(artists[1].author, "kumika"); + assert_eq!(artists[1].author_url, "/users/kumika/"); + } + + const EXAMPLE: &'static str = r#" + HIRO +
+ 2022-06-16 11:11:25 by + kumika +
+ "#; + + #[test] + fn extract_must_be_return_correct_value() { + let document = scraper::Html::parse_fragment(EXAMPLE); + let artist = Musician::extract(document); + assert_eq!(artist.name, "HIRO"); + assert_eq!(artist.url, "/musicians/6312/hiro/"); + assert_eq!(artist.date, "2022-06-16 11:11:25"); + assert_eq!(artist.author, "kumika"); + assert_eq!(artist.author_url, "/users/kumika/"); + } +} \ No newline at end of file diff --git a/src/extractor/releases.rs b/src/extractor/releases.rs new file mode 100644 index 0000000..1f9838c --- /dev/null +++ b/src/extractor/releases.rs @@ -0,0 +1,132 @@ +use scraper::Html; + +use super::{Extractor, trim_whitespace, parse_date}; + +#[derive(Debug, Default)] +pub struct Release { + pub name: String, + pub url: String, + pub album: String, + pub album_url: String, + pub date: String, + pub author: String, + pub author_url: String, +} + +impl Extractor for Release { + type Output = Self; + fn extract_all(document: Html) -> Vec { + let mut artists : Vec = Vec::new(); + + let selector = scraper::Selector::parse("ul>li").unwrap(); + let select = document.select(&selector); + for el in select { + artists.push(Self::extract(Html::parse_fragment(&el.html()))); + } + + artists + } + + fn extract(document: Html) -> Self { + let a_el = document.select(&scraper::Selector::parse("div.any--weaken-color>a.artist").unwrap()).next().unwrap(); + let title = a_el.inner_html(); + let url = a_el.value().attr("href").unwrap(); + + let a_album_el = document.select(&scraper::Selector::parse("a.symbol__release").unwrap()).next().unwrap(); + let album = a_album_el.inner_html(); + let album_url = a_album_el.value().attr("href").unwrap(); + + let more_info_el = document.select(&scraper::Selector::parse("div.h5").unwrap()).next().unwrap(); + let date = more_info_el.inner_html(); + + let author_el = more_info_el.select(&scraper::Selector::parse("a").unwrap()).next().unwrap(); + let author = author_el.inner_html(); + let author_url = author_el.value().attr("href").unwrap(); + + Self { + name: trim_whitespace(&title), + url: trim_whitespace(url), + album: trim_whitespace(&album), + album_url: trim_whitespace(album_url), + date: parse_date(&date), + author: trim_whitespace(&author), + author_url: trim_whitespace(author_url), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const EXAMPLES: &'static str = +r#" + +"#; + + #[test] + fn extract_all_must_return_two_result() { + let document = scraper::Html::parse_fragment(EXAMPLES); + let artists = Release::extract_all(document); + assert_eq!(artists.len(), 2); + assert_eq!(artists[0].name, "MUCC"); + assert_eq!(artists[0].url, "/artists/mucc/"); + assert_eq!(artists[0].album, "Shin Sekai Tsuujouban"); + assert_eq!(artists[0].album_url, "/releases/mucc/54429/shin-sekai-tsuujouban/"); + assert_eq!(artists[0].date, "2022-06-16 12:21:00"); + assert_eq!(artists[0].author, "kumika"); + assert_eq!(artists[0].author_url, "/users/kumika/"); + assert_eq!(artists[1].name, "LAY ABOUT WORLD"); + assert_eq!(artists[1].url, "/artists/lay-about-world/"); + assert_eq!(artists[1].album, "c×lone"); + assert_eq!(artists[1].album_url, "/releases/lay-about-world/37128/c-lone/"); + assert_eq!(artists[1].date, "2022-06-16 11:39:52"); + assert_eq!(artists[1].author, "kumika"); + assert_eq!(artists[1].author_url, "/users/kumika/"); + } + + const EXAMPLE: &'static str = r#" +
+ MUCC +
+ Shin Sekai Tsuujouban +
+ 2022-06-16 12:21:00 by + kumika +
+ "#; + + #[test] + fn extract_must_be_return_correct_value() { + let document = scraper::Html::parse_fragment(EXAMPLE); + let artist = Release::extract(document); + assert_eq!(artist.name, "MUCC"); + assert_eq!(artist.url, "/artists/mucc/"); + assert_eq!(artist.album, "Shin Sekai Tsuujouban"); + assert_eq!(artist.album_url, "/releases/mucc/54429/shin-sekai-tsuujouban/"); + assert_eq!(artist.date, "2022-06-16 12:21:00"); + assert_eq!(artist.author, "kumika"); + assert_eq!(artist.author_url, "/users/kumika/"); + } +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..c2cc8bf --- /dev/null +++ b/src/main.rs @@ -0,0 +1,7 @@ +mod extractor; + +fn main() { + dotenvy::dotenv().unwrap(); + + extractor::extract(); +}