first commit
This commit is contained in:
commit
1a72eaeaaf
11 changed files with 2187 additions and 0 deletions
1
.env
Normal file
1
.env
Normal file
|
@ -0,0 +1 @@
|
|||
URL=http://vk.gy/database
|
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
/target
|
45
.vscode/launch.json
vendored
Normal file
45
.vscode/launch.json
vendored
Normal file
|
@ -0,0 +1,45 @@
|
|||
{
|
||||
// Utilisez IntelliSense pour en savoir plus sur les attributs possibles.
|
||||
// Pointez pour afficher la description des attributs existants.
|
||||
// Pour plus d'informations, visitez : https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "Debug executable 'data_extractor'",
|
||||
"cargo": {
|
||||
"args": [
|
||||
"build",
|
||||
"--bin=data_extractor",
|
||||
"--package=data_extractor"
|
||||
],
|
||||
"filter": {
|
||||
"name": "data_extractor",
|
||||
"kind": "bin"
|
||||
}
|
||||
},
|
||||
"args": [],
|
||||
"cwd": "${workspaceFolder}"
|
||||
},
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "Debug unit tests in executable 'data_extractor'",
|
||||
"cargo": {
|
||||
"args": [
|
||||
"test",
|
||||
"--no-run",
|
||||
"--bin=data_extractor",
|
||||
"--package=data_extractor"
|
||||
],
|
||||
"filter": {
|
||||
"name": "data_extractor",
|
||||
"kind": "bin"
|
||||
}
|
||||
},
|
||||
"args": [],
|
||||
"cwd": "${workspaceFolder}"
|
||||
}
|
||||
]
|
||||
}
|
1557
Cargo.lock
generated
Normal file
1557
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
11
Cargo.toml
Normal file
11
Cargo.toml
Normal file
|
@ -0,0 +1,11 @@
|
|||
[package]
|
||||
name = "data_extractor"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
reqwest = {version = "0.11", features = ["blocking"]}
|
||||
scraper = "0.13.0"
|
||||
dotenvy = "0.15.1"
|
110
src/extractor/artists.rs
Normal file
110
src/extractor/artists.rs
Normal file
|
@ -0,0 +1,110 @@
|
|||
use scraper::Html;
|
||||
|
||||
use super::{Extractor, trim_whitespace, parse_date};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Artist {
|
||||
pub name: String,
|
||||
pub url: String,
|
||||
pub date: String,
|
||||
pub author: String,
|
||||
pub author_url: String,
|
||||
}
|
||||
|
||||
impl Extractor for Artist {
|
||||
type Output = Self;
|
||||
fn extract_all(document: Html) -> Vec<Self> {
|
||||
let mut artists : Vec<Self> = Vec::new();
|
||||
|
||||
let selector = scraper::Selector::parse("ul>li").unwrap();
|
||||
let select = document.select(&selector);
|
||||
for el in select {
|
||||
artists.push(Self::extract(Html::parse_fragment(&el.html())));
|
||||
}
|
||||
|
||||
artists
|
||||
}
|
||||
|
||||
fn extract(document: Html) -> Self {
|
||||
let a_el = document.select(&scraper::Selector::parse("a").unwrap()).next().unwrap();
|
||||
|
||||
let title = a_el.inner_html();
|
||||
let url = a_el.value().attr("href").unwrap();
|
||||
|
||||
let more_info_el = document.select(&scraper::Selector::parse("div").unwrap()).next().unwrap();
|
||||
let date = more_info_el.inner_html();
|
||||
|
||||
let author_el = more_info_el.select(&scraper::Selector::parse("a").unwrap()).next().unwrap();
|
||||
let author = author_el.inner_html();
|
||||
let author_url = author_el.value().attr("href").unwrap();
|
||||
|
||||
Self {
|
||||
name: trim_whitespace(&title),
|
||||
url: trim_whitespace(url),
|
||||
date: parse_date(&date),
|
||||
author: trim_whitespace(&author),
|
||||
author_url: trim_whitespace(author_url),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
const EXAMPLES: &'static str =
|
||||
r#"
|
||||
<ul>
|
||||
<li>
|
||||
<a class="symbol__artist" href="/artists/babykingdom/">BabyKingdom</a>
|
||||
<div class="h5 any--no-wrap">
|
||||
2022-06-16 14:12:03 by
|
||||
<a class="user a--inherit" data-icon="" data-is-vip="1" href="/users/suji/">suji</a>
|
||||
</div>
|
||||
</li>
|
||||
<li>
|
||||
<a class="symbol__artist" href="/artists/hiro-ruvish/">HIRO</a>
|
||||
<div class="h5 any--no-wrap">
|
||||
2022-06-16 11:11:06 by
|
||||
<a class="user a--inherit" data-icon="" data-is-vip="1" href="/users/kumika/">kumika</a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
"#;
|
||||
|
||||
#[test]
|
||||
fn extract_all_must_return_two_result() {
|
||||
let document = scraper::Html::parse_fragment(EXAMPLES);
|
||||
let artists = Artist::extract_all(document);
|
||||
assert_eq!(artists.len(), 2);
|
||||
assert_eq!(artists[0].author, "suji");
|
||||
assert_eq!(artists[0].author_url, "/users/suji/");
|
||||
assert_eq!(artists[0].name, "BabyKingdom");
|
||||
assert_eq!(artists[0].url, "/artists/babykingdom/");
|
||||
assert_eq!(artists[0].date, "2022-06-16 14:12:03");
|
||||
assert_eq!(artists[1].author, "kumika");
|
||||
assert_eq!(artists[1].author_url, "/users/kumika/");
|
||||
assert_eq!(artists[1].name, "HIRO");
|
||||
assert_eq!(artists[1].url, "/artists/hiro-ruvish/");
|
||||
assert_eq!(artists[1].date, "2022-06-16 11:11:06");
|
||||
}
|
||||
|
||||
const EXAMPLE: &'static str = r#"
|
||||
<a class="symbol__artist" href="/artists/babykingdom/">BabyKingdom</a>
|
||||
<div class="h5 any--no-wrap">
|
||||
2022-06-16 14:12:03 by
|
||||
<a class="user a--inherit" data-icon="" data-is-vip="1" href="/users/suji/">suji</a>
|
||||
</div>
|
||||
"#;
|
||||
|
||||
#[test]
|
||||
fn extract_must_be_return_correct_value() {
|
||||
let document = scraper::Html::parse_fragment(EXAMPLE);
|
||||
let artist = Artist::extract(document);
|
||||
assert_eq!(artist.author, "suji");
|
||||
assert_eq!(artist.author_url, "/users/suji/");
|
||||
assert_eq!(artist.name, "BabyKingdom");
|
||||
assert_eq!(artist.url, "/artists/babykingdom/");
|
||||
assert_eq!(artist.date, "2022-06-16 14:12:03");
|
||||
}
|
||||
}
|
110
src/extractor/labels.rs
Normal file
110
src/extractor/labels.rs
Normal file
|
@ -0,0 +1,110 @@
|
|||
use scraper::Html;
|
||||
|
||||
use super::{Extractor, trim_whitespace, parse_date};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Label {
|
||||
pub name: String,
|
||||
pub url: String,
|
||||
pub date: String,
|
||||
pub author: String,
|
||||
pub author_url: String,
|
||||
}
|
||||
|
||||
impl Extractor for Label {
|
||||
type Output = Self;
|
||||
fn extract_all(document: Html) -> Vec<Self> {
|
||||
let mut artists : Vec<Self> = Vec::new();
|
||||
|
||||
let selector = scraper::Selector::parse("ul>li").unwrap();
|
||||
let select = document.select(&selector);
|
||||
for el in select {
|
||||
artists.push(Self::extract(Html::parse_fragment(&el.html())));
|
||||
}
|
||||
|
||||
artists
|
||||
}
|
||||
|
||||
fn extract(document: Html) -> Self {
|
||||
let a_el = document.select(&scraper::Selector::parse("a").unwrap()).next().unwrap();
|
||||
|
||||
let title = a_el.inner_html();
|
||||
let url = a_el.value().attr("href").unwrap();
|
||||
|
||||
let more_info_el = document.select(&scraper::Selector::parse("div").unwrap()).next().unwrap();
|
||||
let date = more_info_el.inner_html();
|
||||
|
||||
let author_el = more_info_el.select(&scraper::Selector::parse("a").unwrap()).next().unwrap();
|
||||
let author = author_el.inner_html();
|
||||
let author_url = author_el.value().attr("href").unwrap();
|
||||
|
||||
Self {
|
||||
name: trim_whitespace(&title),
|
||||
url: trim_whitespace(url),
|
||||
date: parse_date(&date),
|
||||
author: trim_whitespace(&author),
|
||||
author_url: trim_whitespace(author_url),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
const EXAMPLES: &'static str =
|
||||
r#"
|
||||
<ul>
|
||||
<li>
|
||||
<a class="symbol__company" href="/labels/arcanumania-records/">ARCANUMANIA Records.</a>
|
||||
<div class="h5 any--no-wrap">
|
||||
2022-06-13 21:27:11 by
|
||||
<a class="user a--inherit" data-icon="" data-is-vip="0" href="/users/haru/">haru</a>
|
||||
</div>
|
||||
</li>
|
||||
<li>
|
||||
<a class="symbol__company" href="/labels/omega-code/">Omega Code</a>
|
||||
<div class="h5 any--no-wrap">
|
||||
2022-06-12 18:43:41 by
|
||||
<a class="user a--inherit" data-icon="" data-is-vip="1" href="/users/inartistic/">inartistic</a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
"#;
|
||||
|
||||
#[test]
|
||||
fn extract_all_must_return_two_result() {
|
||||
let document = scraper::Html::parse_fragment(EXAMPLES);
|
||||
let artists = Label::extract_all(document);
|
||||
assert_eq!(artists.len(), 2);
|
||||
assert_eq!(artists[0].name, "ARCANUMANIA Records.");
|
||||
assert_eq!(artists[0].url, "/labels/arcanumania-records/");
|
||||
assert_eq!(artists[0].date, "2022-06-13 21:27:11");
|
||||
assert_eq!(artists[0].author, "haru");
|
||||
assert_eq!(artists[0].author_url, "/users/haru/");
|
||||
assert_eq!(artists[1].name, "Omega Code");
|
||||
assert_eq!(artists[1].url, "/labels/omega-code/");
|
||||
assert_eq!(artists[1].date, "2022-06-12 18:43:41");
|
||||
assert_eq!(artists[1].author, "inartistic");
|
||||
assert_eq!(artists[1].author_url, "/users/inartistic/");
|
||||
}
|
||||
|
||||
const EXAMPLE: &'static str = r#"
|
||||
<a class="symbol__company" href="/labels/arcanumania-records/">ARCANUMANIA Records.</a>
|
||||
<div class="h5 any--no-wrap">
|
||||
2022-06-13 21:27:11 by
|
||||
<a class="user a--inherit" data-icon="" data-is-vip="0" href="/users/haru/">haru</a>
|
||||
</div>
|
||||
"#;
|
||||
|
||||
#[test]
|
||||
fn extract_must_be_return_correct_value() {
|
||||
let document = scraper::Html::parse_fragment(EXAMPLE);
|
||||
let artist = Label::extract(document);
|
||||
assert_eq!(artist.author, "haru");
|
||||
assert_eq!(artist.author_url, "/users/haru/");
|
||||
assert_eq!(artist.name, "ARCANUMANIA Records.");
|
||||
assert_eq!(artist.url, "/labels/arcanumania-records/");
|
||||
assert_eq!(artist.date, "2022-06-13 21:27:11");
|
||||
}
|
||||
}
|
103
src/extractor/mod.rs
Normal file
103
src/extractor/mod.rs
Normal file
|
@ -0,0 +1,103 @@
|
|||
use std::env;
|
||||
|
||||
use scraper::Html;
|
||||
|
||||
mod artists;
|
||||
mod labels;
|
||||
mod musicians;
|
||||
mod releases;
|
||||
|
||||
pub trait Extractor {
|
||||
type Output;
|
||||
fn extract_all(select: Html) -> Vec<Self::Output>;
|
||||
fn extract(select: Html) -> Self::Output;
|
||||
}
|
||||
|
||||
const RECENTLY_UPDATED_SELECTOR : &'static str= "body>div.c2>div";
|
||||
const CATEGORY_TITLE_SELECTOR : &'static str= "h2>a";
|
||||
const NEWS_TITLE_SELECTOR : &'static str= "div.text";
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Informations {
|
||||
pub artists: Vec<artists::Artist>,
|
||||
pub labels: Vec<labels::Label>,
|
||||
pub musicians: Vec<musicians::Musician>,
|
||||
pub releases: Vec<releases::Release>,
|
||||
}
|
||||
|
||||
pub fn extract() {
|
||||
let response = reqwest::blocking::get(env::var("URL").unwrap())
|
||||
.unwrap()
|
||||
.text()
|
||||
.unwrap();
|
||||
|
||||
let document = scraper::Html::parse_document(&response);
|
||||
|
||||
let div_selector = scraper::Selector::parse(RECENTLY_UPDATED_SELECTOR).unwrap();
|
||||
|
||||
let divs = document.select(&div_selector);
|
||||
|
||||
let mut categories = Informations::default();
|
||||
|
||||
for div in divs {
|
||||
if let Some(category) = div.select(&scraper::Selector::parse(CATEGORY_TITLE_SELECTOR).unwrap()).next() {
|
||||
|
||||
let category_title = category.inner_html();
|
||||
|
||||
match category_title.as_str() {
|
||||
"artists" => {
|
||||
let content_div = div.select(&scraper::Selector::parse(NEWS_TITLE_SELECTOR).unwrap()).next().unwrap();
|
||||
categories.artists = artists::Artist::extract_all(scraper::Html::parse_fragment(&content_div.inner_html()));
|
||||
},
|
||||
"labels" => {
|
||||
let content_div = div.select(&scraper::Selector::parse(NEWS_TITLE_SELECTOR).unwrap()).next().unwrap();
|
||||
categories.labels = labels::Label::extract_all(scraper::Html::parse_fragment(&content_div.inner_html()));
|
||||
}
|
||||
"musicians" => {
|
||||
let content_div = div.select(&scraper::Selector::parse(NEWS_TITLE_SELECTOR).unwrap()).next().unwrap();
|
||||
categories.musicians = musicians::Musician::extract_all(scraper::Html::parse_fragment(&content_div.inner_html()));
|
||||
}
|
||||
"releases" => {
|
||||
let content_div = div.select(&scraper::Selector::parse(NEWS_TITLE_SELECTOR).unwrap()).next().unwrap();
|
||||
categories.releases = releases::Release::extract_all(scraper::Html::parse_fragment(&content_div.inner_html()));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!("{:#?}", categories);
|
||||
}
|
||||
|
||||
pub(self) fn trim_whitespace(s: &str) -> String {
|
||||
s.replace('\t', " ").replace("\n", " ").trim().to_owned()
|
||||
}
|
||||
|
||||
pub(self) fn parse_date(s: &str) -> String {
|
||||
trim_whitespace(s)[0..19].to_owned()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_trim_whitespace() {
|
||||
assert_eq!(trim_whitespace("Hello World"), "Hello World");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_remove_whitespace_with_tabs() {
|
||||
assert_eq!(trim_whitespace("\tHello\tWorld\t"), "Hello World");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_remove_whitespace_with_newlines() {
|
||||
assert_eq!(trim_whitespace("\nHello\nWorld\n"), "Hello World");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_date() {
|
||||
assert_eq!(parse_date("2022-06-16 14:12:03 by <adata-icon=\"\"data-is-vip=\"1\"href=\"/users/suji/\"class=\"usera--inherit\">suji</a>"), "2022-06-16 14:12:03");
|
||||
}
|
||||
}
|
110
src/extractor/musicians.rs
Normal file
110
src/extractor/musicians.rs
Normal file
|
@ -0,0 +1,110 @@
|
|||
use scraper::Html;
|
||||
|
||||
use super::{Extractor, trim_whitespace, parse_date};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Musician {
|
||||
pub name: String,
|
||||
pub url: String,
|
||||
pub date: String,
|
||||
pub author: String,
|
||||
pub author_url: String,
|
||||
}
|
||||
|
||||
impl Extractor for Musician {
|
||||
type Output = Self;
|
||||
fn extract_all(document: Html) -> Vec<Self> {
|
||||
let mut artists : Vec<Self> = Vec::new();
|
||||
|
||||
let selector = scraper::Selector::parse("ul>li").unwrap();
|
||||
let select = document.select(&selector);
|
||||
for el in select {
|
||||
artists.push(Self::extract(Html::parse_fragment(&el.html())));
|
||||
}
|
||||
|
||||
artists
|
||||
}
|
||||
|
||||
fn extract(document: Html) -> Self {
|
||||
let a_el = document.select(&scraper::Selector::parse("a").unwrap()).next().unwrap();
|
||||
|
||||
let title = a_el.inner_html();
|
||||
let url = a_el.value().attr("href").unwrap();
|
||||
|
||||
let more_info_el = document.select(&scraper::Selector::parse("div").unwrap()).next().unwrap();
|
||||
let date = more_info_el.inner_html();
|
||||
|
||||
let author_el = more_info_el.select(&scraper::Selector::parse("a").unwrap()).next().unwrap();
|
||||
let author = author_el.inner_html();
|
||||
let author_url = author_el.value().attr("href").unwrap();
|
||||
|
||||
Self {
|
||||
name: trim_whitespace(&title),
|
||||
url: trim_whitespace(url),
|
||||
date: parse_date(&date),
|
||||
author: trim_whitespace(&author),
|
||||
author_url: trim_whitespace(author_url),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
const EXAMPLES: &'static str =
|
||||
r#"
|
||||
<ul>
|
||||
<li>
|
||||
<a class="symbol__musician" href="/musicians/27075/hiko/">HIKO</a>
|
||||
<div class="h5 any--no-wrap">
|
||||
2022-06-16 14:12:02 by
|
||||
<a class="user a--inherit" data-icon="" data-is-vip="1" href="/users/suji/">suji</a>
|
||||
</div>
|
||||
</li>
|
||||
<li>
|
||||
<a class="symbol__musician" href="/musicians/6312/hiro/">HIRO</a>
|
||||
<div class="h5 any--no-wrap">
|
||||
2022-06-16 11:11:25 by
|
||||
<a class="user a--inherit" data-icon="" data-is-vip="1" href="/users/kumika/">kumika</a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
"#;
|
||||
|
||||
#[test]
|
||||
fn extract_all_must_return_two_result() {
|
||||
let document = scraper::Html::parse_fragment(EXAMPLES);
|
||||
let artists = Musician::extract_all(document);
|
||||
assert_eq!(artists.len(), 2);
|
||||
assert_eq!(artists[0].name, "HIKO");
|
||||
assert_eq!(artists[0].url, "/musicians/27075/hiko/");
|
||||
assert_eq!(artists[0].date, "2022-06-16 14:12:02");
|
||||
assert_eq!(artists[0].author, "suji");
|
||||
assert_eq!(artists[0].author_url, "/users/suji/");
|
||||
assert_eq!(artists[1].name, "HIRO");
|
||||
assert_eq!(artists[1].url, "/musicians/6312/hiro/");
|
||||
assert_eq!(artists[1].date, "2022-06-16 11:11:25");
|
||||
assert_eq!(artists[1].author, "kumika");
|
||||
assert_eq!(artists[1].author_url, "/users/kumika/");
|
||||
}
|
||||
|
||||
const EXAMPLE: &'static str = r#"
|
||||
<a class="symbol__musician" href="/musicians/6312/hiro/">HIRO</a>
|
||||
<div class="h5 any--no-wrap">
|
||||
2022-06-16 11:11:25 by
|
||||
<a class="user a--inherit" data-icon="" data-is-vip="1" href="/users/kumika/">kumika</a>
|
||||
</div>
|
||||
"#;
|
||||
|
||||
#[test]
|
||||
fn extract_must_be_return_correct_value() {
|
||||
let document = scraper::Html::parse_fragment(EXAMPLE);
|
||||
let artist = Musician::extract(document);
|
||||
assert_eq!(artist.name, "HIRO");
|
||||
assert_eq!(artist.url, "/musicians/6312/hiro/");
|
||||
assert_eq!(artist.date, "2022-06-16 11:11:25");
|
||||
assert_eq!(artist.author, "kumika");
|
||||
assert_eq!(artist.author_url, "/users/kumika/");
|
||||
}
|
||||
}
|
132
src/extractor/releases.rs
Normal file
132
src/extractor/releases.rs
Normal file
|
@ -0,0 +1,132 @@
|
|||
use scraper::Html;
|
||||
|
||||
use super::{Extractor, trim_whitespace, parse_date};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Release {
|
||||
pub name: String,
|
||||
pub url: String,
|
||||
pub album: String,
|
||||
pub album_url: String,
|
||||
pub date: String,
|
||||
pub author: String,
|
||||
pub author_url: String,
|
||||
}
|
||||
|
||||
impl Extractor for Release {
|
||||
type Output = Self;
|
||||
fn extract_all(document: Html) -> Vec<Self> {
|
||||
let mut artists : Vec<Self> = Vec::new();
|
||||
|
||||
let selector = scraper::Selector::parse("ul>li").unwrap();
|
||||
let select = document.select(&selector);
|
||||
for el in select {
|
||||
artists.push(Self::extract(Html::parse_fragment(&el.html())));
|
||||
}
|
||||
|
||||
artists
|
||||
}
|
||||
|
||||
fn extract(document: Html) -> Self {
|
||||
let a_el = document.select(&scraper::Selector::parse("div.any--weaken-color>a.artist").unwrap()).next().unwrap();
|
||||
let title = a_el.inner_html();
|
||||
let url = a_el.value().attr("href").unwrap();
|
||||
|
||||
let a_album_el = document.select(&scraper::Selector::parse("a.symbol__release").unwrap()).next().unwrap();
|
||||
let album = a_album_el.inner_html();
|
||||
let album_url = a_album_el.value().attr("href").unwrap();
|
||||
|
||||
let more_info_el = document.select(&scraper::Selector::parse("div.h5").unwrap()).next().unwrap();
|
||||
let date = more_info_el.inner_html();
|
||||
|
||||
let author_el = more_info_el.select(&scraper::Selector::parse("a").unwrap()).next().unwrap();
|
||||
let author = author_el.inner_html();
|
||||
let author_url = author_el.value().attr("href").unwrap();
|
||||
|
||||
Self {
|
||||
name: trim_whitespace(&title),
|
||||
url: trim_whitespace(url),
|
||||
album: trim_whitespace(&album),
|
||||
album_url: trim_whitespace(album_url),
|
||||
date: parse_date(&date),
|
||||
author: trim_whitespace(&author),
|
||||
author_url: trim_whitespace(author_url),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
const EXAMPLES: &'static str =
|
||||
r#"
|
||||
<ul>
|
||||
<li>
|
||||
<div class="any--weaken-color">
|
||||
<a class="artist" href="/artists/mucc/">MUCC</a>
|
||||
</div>
|
||||
<a class="symbol__release" href="/releases/mucc/54429/shin-sekai-tsuujouban/">Shin Sekai Tsuujouban</a>
|
||||
<div class="h5 any--no-wrap">
|
||||
2022-06-16 12:21:00 by
|
||||
<a class="user a--inherit" data-icon="" data-is-vip="1" href="/users/kumika/">kumika</a>
|
||||
</div>
|
||||
</li>
|
||||
<li>
|
||||
<div class="any--weaken-color">
|
||||
<a class="artist" href="/artists/lay-about-world/">LAY ABOUT WORLD</a>
|
||||
</div>
|
||||
<a class="symbol__release" href="/releases/lay-about-world/37128/c-lone/">c×lone </a>
|
||||
<div class="h5 any--no-wrap">
|
||||
2022-06-16 11:39:52 by
|
||||
<a class="user a--inherit" data-icon="" data-is-vip="1" href="/users/kumika/">kumika</a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
"#;
|
||||
|
||||
#[test]
|
||||
fn extract_all_must_return_two_result() {
|
||||
let document = scraper::Html::parse_fragment(EXAMPLES);
|
||||
let artists = Release::extract_all(document);
|
||||
assert_eq!(artists.len(), 2);
|
||||
assert_eq!(artists[0].name, "MUCC");
|
||||
assert_eq!(artists[0].url, "/artists/mucc/");
|
||||
assert_eq!(artists[0].album, "Shin Sekai Tsuujouban");
|
||||
assert_eq!(artists[0].album_url, "/releases/mucc/54429/shin-sekai-tsuujouban/");
|
||||
assert_eq!(artists[0].date, "2022-06-16 12:21:00");
|
||||
assert_eq!(artists[0].author, "kumika");
|
||||
assert_eq!(artists[0].author_url, "/users/kumika/");
|
||||
assert_eq!(artists[1].name, "LAY ABOUT WORLD");
|
||||
assert_eq!(artists[1].url, "/artists/lay-about-world/");
|
||||
assert_eq!(artists[1].album, "c×lone");
|
||||
assert_eq!(artists[1].album_url, "/releases/lay-about-world/37128/c-lone/");
|
||||
assert_eq!(artists[1].date, "2022-06-16 11:39:52");
|
||||
assert_eq!(artists[1].author, "kumika");
|
||||
assert_eq!(artists[1].author_url, "/users/kumika/");
|
||||
}
|
||||
|
||||
const EXAMPLE: &'static str = r#"
|
||||
<div class="any--weaken-color">
|
||||
<a class="artist" href="/artists/mucc/">MUCC</a>
|
||||
</div>
|
||||
<a class="symbol__release" href="/releases/mucc/54429/shin-sekai-tsuujouban/">Shin Sekai Tsuujouban</a>
|
||||
<div class="h5 any--no-wrap">
|
||||
2022-06-16 12:21:00 by
|
||||
<a class="user a--inherit" data-icon="" data-is-vip="1" href="/users/kumika/">kumika</a>
|
||||
</div>
|
||||
"#;
|
||||
|
||||
#[test]
|
||||
fn extract_must_be_return_correct_value() {
|
||||
let document = scraper::Html::parse_fragment(EXAMPLE);
|
||||
let artist = Release::extract(document);
|
||||
assert_eq!(artist.name, "MUCC");
|
||||
assert_eq!(artist.url, "/artists/mucc/");
|
||||
assert_eq!(artist.album, "Shin Sekai Tsuujouban");
|
||||
assert_eq!(artist.album_url, "/releases/mucc/54429/shin-sekai-tsuujouban/");
|
||||
assert_eq!(artist.date, "2022-06-16 12:21:00");
|
||||
assert_eq!(artist.author, "kumika");
|
||||
assert_eq!(artist.author_url, "/users/kumika/");
|
||||
}
|
||||
}
|
7
src/main.rs
Normal file
7
src/main.rs
Normal file
|
@ -0,0 +1,7 @@
|
|||
mod extractor;
|
||||
|
||||
fn main() {
|
||||
dotenvy::dotenv().unwrap();
|
||||
|
||||
extractor::extract();
|
||||
}
|
Loading…
Reference in a new issue