Compare commits

..

No commits in common. "main" and "dev" have entirely different histories.
main ... dev

8 changed files with 1 additions and 2197 deletions

View File

@ -4,4 +4,4 @@
| Name | Description | | Name | Description |
| --- | --- | | --- | --- |
| [semantic-scholar-client](./semantic-scholar-client) | Rust utility for reading data from the [Semantic Scholar API](https://api.semanticscholar.org/api-docs) | | [forum-logic](https://gitlab.com/dao-governance-framework/forum-logic) | Javascript prototyping forum architecture |

View File

@ -1 +0,0 @@
SEMANTIC_SCHOLAR_API_KEY=

View File

@ -1,2 +0,0 @@
/target
.env

File diff suppressed because it is too large Load Diff

View File

@ -1,17 +0,0 @@
[package]
name = "semantic-scholar-client"
version = "0.1.0"
edition = "2021"
default-run = "import"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
async-recursion = "1.0.0"
clap = { version = "3.2.11", features = ["derive"] }
dotenv = "0.15.0"
mongodb = "2.2.2"
reqwest = { version = "0.11.11", features = ["json"] }
serde = { version = "1.0.139", features = ["derive"] }
serde_json = "1.0.82"
tokio = { version = "1.20.0", features = ["full"] }

View File

@ -1,25 +0,0 @@
#`semantic-scholar-client`
This utility is able to fetch data from Semantic Scholar API.
Initial proof of concept here writes the result to stdout.
Work in progress to pipe this data into an operating database.
### Usage
* (Optional) Copy `.env.example` to `.env` and set the value of `SEMANTIC_SCHOLAR_API_KEY`
* Run the program
cargo run -- --paper-id <paper_id> --depth <depth>
* `paper_id` values are in accordance with [Semantic Scholar API](https://api.semanticscholar.org/api-docs/).
* `depth` is the number of citations to traverse, from the starting paper.
### Notes
Ideas for followup work:
- Consider strategies for deciding where to terminate a given traversal
- Provide an HTTP/WebSocket interface that can be used to talk to this process during its operation.
This can enable us to pipe the data to other tasks, to monitor, to start/stop, and even to make configuration changes.
- Rate limit requests

View File

@ -1,153 +0,0 @@
// During development, allowing dead code
#![allow(dead_code)]
use async_recursion::async_recursion;
use clap::Parser;
use dotenv::dotenv;
use serde::Deserialize;
use std::cmp::min;
use std::error::Error;
use std::fmt::Write;
type DataResult<T> = Result<T, Box<dyn Error>>;
const BASE_URL: &str = "https://api.semanticscholar.org/graph/v1";
const MAX_DEPTH: u32 = 3;
#[derive(Parser, Debug)]
#[clap(author, version, about, long_about = None)]
struct Args {
/// How deep to traverse citation graph from the starting paper
#[clap(short, long, value_parser)]
depth: u32,
/// Starting paper. We will traverse papers that cite this one
#[clap(short, long, value_parser)]
paper_id: String,
// Write the results to MongoDB
// #[clap(short, long, value_parser)]
// write_to_mongo: bool,
}
struct Author {
name: String,
}
type Authors = Vec<Author>;
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
struct Paper {
paper_id: String,
title: Option<String>,
citations: Vec<Citation>,
}
/**
* Occurs within Citation struct
*/
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
struct CitingPaper {
paper_id: Option<String>,
title: Option<String>,
}
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
struct Citation {
citing_paper: CitingPaper,
}
/**
code: Option<String>,
* Generic struct to wrap the common API response pattern {data: [...]}
*/
#[derive(Deserialize, Debug)]
struct ApiListResponse<T> {
data: Option<Vec<T>>,
message: Option<String>,
}
// TODO: Cache results in a (separate but local) database such as Redis
// TODO: Store results in a (separate but local) database such as Postgres
#[async_recursion]
async fn get_citations(
client: &reqwest::Client,
paper_id: String,
depth: u32,
authors: &mut Vec<Author>,
) -> DataResult<Vec<Citation>> {
// Bound recursion to some depth
if depth > MAX_DEPTH {
return Ok(vec![]);
}
// Build the URL
let mut url = String::new();
write!(&mut url, "{}/paper/{}/citations", BASE_URL, paper_id)?;
let mut req = client.get(url);
let api_key = std::env::var("SEMANTIC_SCHOLAR_API_KEY");
if api_key.is_ok() {
req = req.header("x-api-key", api_key.unwrap());
}
let resp = req.send().await?.text().await?;
let resp_deserialized_attempt =
serde_json::from_str::<ApiListResponse<Citation>>(resp.as_str());
if let Err(err) = resp_deserialized_attempt {
println!("depth {} paper {} error {}", depth, paper_id, err);
return Ok(vec![]);
}
let resp_deserialized: ApiListResponse<Citation> = resp_deserialized_attempt.unwrap();
if resp_deserialized.message.is_some() {
println!(
"depth {} paper {} error {}",
depth,
paper_id,
resp_deserialized.message.unwrap()
);
return Ok(vec![]);
}
for Citation {
citing_paper:
CitingPaper {
paper_id: citing_paper_id,
title,
},
} in resp_deserialized.data.unwrap()
{
if let (Some(citing_paper_id), Some(title)) = (citing_paper_id, title) {
let short_len = min(50, title.len());
let (short_title, _) = title.split_at(short_len);
println!(
"depth {} paper {} cites {} title {}",
depth, citing_paper_id, paper_id, short_title
);
get_citations(&client, citing_paper_id, depth + 1, authors).await?;
}
}
Ok(vec![])
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
let Args { depth, paper_id } = Args::parse();
dotenv().ok();
let mut authors = Authors::new();
let client: reqwest::Client = reqwest::Client::new();
get_citations(&client, paper_id, depth, &mut authors).await?;
Ok(())
}

View File

@ -1,59 +0,0 @@
use mongodb::{Client, options::ClientOptions};
const MONGO_DB_ADDRESS: &str = "mongodb://docker:mongopw@localhost:55000";
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Parse a connection string into an options struct.
let client_options = ClientOptions::parse(MONGO_DB_ADDRESS).await?;
// Get a handle to the deployment.
let client = Client::with_options(client_options)?;
// Try creating a collection
{
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize)]
struct Book {
title: String,
author: String,
}
// Reference a (new) database
let db = client.database("db2");
// Get a handle to a collection of `Book`.
let typed_collection = db.collection::<Book>("books");
let books = vec![
Book {
title: "The Grapes of Wrath".to_string(),
author: "John Steinbeck".to_string(),
},
Book {
title: "To Kill a Mockingbird".to_string(),
author: "Harper Lee".to_string(),
},
];
// Insert the books into "mydb.books" collection, no manual conversion to BSON necessary.
typed_collection.insert_many(books, None).await?;
}
// List the names of the databases in that deployment.
for db_name in client.list_database_names(None, None).await? {
println!("{}", db_name);
// Get a handle to a database.
let db = client.database(db_name.as_str());
// List the names of the collections in that database.
for collection_name in db.list_collection_names(None).await? {
println!("- {}", collection_name);
}
}
Ok(())
}