Merge branch 'semantic-scholar-import'

This commit is contained in:
Ladd Hoffman 2024-04-04 12:02:42 -05:00
commit ad382b5caf
12 changed files with 2625 additions and 1 deletions

View File

@ -1,3 +1,7 @@
# DAO Governance Framework # DAO Governance Framework
Project moved to https://gitea.dgov.io/DGF/dao-governance-framework ## Subprojects
| Name | Description |
| --- | --- |
| [semantic-scholar-client](./semantic-scholar-client) | Rust utility for reading data from the [Semantic Scholar API](https://api.semanticscholar.org/api-docs) |

101
diagrams/contracts.sq Normal file
View File

@ -0,0 +1,101 @@
participantgroup #lightblue Voting Contract
participantgroup Methods
boundary "createVote()" as create_vote
boundary "voteResults()" as voting_vote_result
end
participantgroup Data
database "Params" as voting_params
end
end
participantgroup #lightyellow Vote Contract
participantgroup Methods
boundary "vote()" as vote
end
participantgroup Data
database "Votes" as votes
end
end
participantgroup #pink Forum Contract
participantgroup Methods
boundary "post()" as post
boundary "voteResult()" as forum_vote_result
end
participantgroup Data
database "Params" as forum_params
end
end
participantgroup #orange Post\nContract
participantgroup Data
database "Posts" as posts
end
end
participantgroup #lightgreen Operating Accounts
participant "Reputation\nNFT" as rep
participant "Reviewer" as reviewer
participant "Public" as public
end
activate voting_params
activate forum_params
activate rep
group Author a post
public -> post : post()
activate public
activate post
post<-forum_params:Read param values
post -> posts : Create post instance;\nInitialize with current\nparam values
activate posts
posts->posts:Reference\nother posts
deactivate post
deactivate public
end
group Initiate a vote
reviewer -> create_vote : createVote()
activate reviewer
activate create_vote
create_vote<-voting_params:Read params
create_vote -> votes : Create vote instance;\nInitialize with current\nparam values
activate votes
votes -> posts : Reference a post
deactivate create_vote
deactivate reviewer
end
group Cast a vote
reviewer->vote:vote()
activate vote
activate reviewer
vote<-votes:Read prior votes
rep->vote:Read voter reputations
vote->vote:Evaluate\nterminating\nconditions
end
alt Voting terminates, according to params
alt Voting param change
posts->vote:Read post contents
vote->voting_vote_result:voteResult()
voting_vote_result ->voting_params : Update\nparams
end
votes->forum_vote_result:voteResult()
activate forum_vote_result
posts ->forum_vote_result : Read post contents
forum_vote_result<-forum_params:Read params
alt Forum param change
forum_vote_result -> forum_params : Update\nparams
end
forum_vote_result<-rep:Read authors reputations
forum_vote_result->rep:Mint reputation for post / authors / references
deactivate forum_vote_result
activate rep
votes->rep:Mint reputation for vote winners
activate rep
end
vote->votes:Update\nvote\nrecord
deactivate vote
deactivate reviewer

View File

@ -0,0 +1,84 @@
Forum Network Node
---
The forum is a collection of posts.
Each post may reference other posts, attributing some reputation to those posts.
A validation pool may vote to validate forum posts.
When such a vote passes, the reputation minted at the creation of the post, gets awarded
- to the post author
- to the voters
- recursively to cited posts
A post's author should be able to choose how much reputation to stake on it.
- Work evidence post. expect unanimity, decent sized stake, punish losers.
- Upvote on a comment. Low stakes. Constitutes validation of the comment as
- carrying an appropriate stake
- containing appropriate content
- Downvote a comment. Moderate stakes.
- Upvote on a post. Moderate stakes. Constitutes validation of the post as being valuable to the DAO.
- Downvote on a post. Moderate stakes.
what if we don't support generic upvotes?
instead we would just have posts with weighted citations, and standard validation pool votes on each post.
It will be important for forum voting to be cheap too, in addition to posting to the forum.
So we probably need off-chain voting as well.
We probably want each client UI to be validating as many posts as possible.
We can try to do something clever such as monitor the time the user spends with a given post visible on their screen, and if they don't downvote it within a certain time, we can cast an implicit upvote. This would save a lot of manual activity.
The DAO can choose to implement additional automated filtering of what to show the user, and could automatically downvote unwanted content in order to earn validation pool rewards, punish bad actors, and decrease visibility of the unwanted content.
The UI can be configured to highlight incoming, unvalidate posts for the user to review and validate.
A user might choose to disable this option, setting the threshold higher, so that they only see validated posts.
This is something a user might turn on and off at different times, depending on their present mode of engagement with the forum.
I.e., browsing or actively engaged.
Approving these posts should be a quick operation on the client side.
The results of such operations should be propagated among network nodes and to other clients as needed, so that the off-chain system maintains an up-to-date view of the status of each post. Periodically, the resulting changes to reputation balances should be written to the blockchain.
We should identify all viable opportunities to collect payments from users. These payments will be necessary to fund the on-chain operations of this system.
Fundamentally we expect payments will be associated with incoming work requests from outside.
So a post is made requesting work?
Then the worker makes a post presenting the work output for validation.
Discussion may occur in the forum regarding the work output.
This means associates will make new comment posts descending from the work output.
These comments form a body of discussion.
Comments may declare their agreement or disagreement with other comments.
The validation pool will need to validate the comments.
The comments should not mint new reputation if there is no incoming fee.
Instead, the commenter must stake some of their own reputation.
If the comment is successfully validated, it gains comment reeputation.
Then once the parent post is validated, the comment author will get a portion of the reputation minted from the parent post creation.
If the comment is not successfully validated one way or the other, the author's stakes will be returned to them.
If the comment is invalidated, the author will lose their stakes.
The validation pool eventually votes on the work output.
If the vote passes, awards are distributed among
- authors
- cited posts, weighted
- validated comments, weighted by the comment reputation balances
A governance post, such as a parameter change or a client software upgrade, will function similarly.
The expected fee will be smaller.
The reputation awards should be pretty decent.
The discussion around these governance posts will be particularly important.
Posts, including comments and governance posts, can reference prior posts.
Suppose a comment references a prior comment from a different thread (different work product parent post).
Then if reputation is awarded to the new comment, some of it should be transitively awarded to the referenced comment.
In this way, a comment can gain reputation over time, which in turn awards reputation to its authors and cited posts.
Thus, the forum is like a living system, where connections with new posts can influence existing posts.
---
We want the Network Node to accept reputation-staking actions from the web client (to make it more general, let's call it the user agent).
This means the user agent must have a way to prove that the user owns a given reputation token.
Ideally this should be done via a zero-knowledge proof, where
- the server sends the client something,
- the client does something with it and sends a response, and
- the server is able to verify based on the client's response that they own the given reputation token.

235
forum-network-node/forum.md Normal file
View File

@ -0,0 +1,235 @@
# Goals
- Enable each individual to express their values by taking actions in the system.
- Enable a group to arrive at a decision through a process of deliberation.
- Reward participants in the deliberation process.
- Enable participants to post contributions for review.
- Also enable discussion during this review process, a.k.a.
Enable participants to post comments on the review and on other comments
- Enable participants to submit arbitrary posts that stand alone or that reference other posts
- This correctly implies recursion.
- Since we don't want loops, we want a DAG (directed acyclic graph).
# Requirements
## Use Cases
- Outsiders can submit work requests via the Business contract
- Includes fees
- Incoming request can be reviewed and approved by validators
- Adds a post to the forum
- Assigns the work request to an associate, via Availability contract
- Associate can submit their work results via the Business contract
- Adds a post to the forum
- Associates can carry out discussions on the forum by adding new posts.
- Each post can attribute reputation for or against other posts
- Each post should be validated by associates.
- Eventually a formal vote should occur, in the context of the Business contract.
If the votes passes, reputation should be awarded to the following:
- associate who submitted the work
- associates who voted in favor of the post
- associates whose comments in the discussion earned reputation
- Associates can submit new posts to the forum, outside of any existing post or work request
- Each post should be validated by associates
- These can be referenced by future posts, thereby gaining or losing reputation
- Reputation awards are only distributed when posts are later referenced in a fee-generating discussion
## Storage Requirements
- Run-time operational data
- Active sessions
- Possible cache of on-chain data to expedite look-ups
- Subscribe to updates?
- Archival data
- Forum posts and their contents
- This is needed in order to display forum contents to clients,
as well as to compute reputation awards when submitting a batch
of forum results to the forum validator contract.
## Messaging Requirements
# Contracts
## Validator Contract
How generic do we want our validator contract to be?
So far, what I've thought of:
- Points to a forum post.
- Off-chain computation provides reputation effects arising from the forum attribution DAG.
- Network nodes function as voters here, to vote on the result they believe is correct.
This decision is expected to be determined by the forum client software.
The forum contract must include provision for tracking the forum client post with the highest reputation.
A new forum client post would include reputation stake against the previous version, and if
Like all governance decisions and perhaps many other kinds of decisions, there should be a period of deliberation where participants may express their opinions.
At some point it will transition to a formal vote. This will occur when the off-chain network nodes decide to cast formal votes.
## Network Node Contract
- Should require staking reputation to add a new network node
- Should require vote by validation pool to add the new network node
- Should allow vote by validation pool to remove a network node
# Options for architecture of off-chain forum components.
## 1
Use the following components:
- Existing storage network
- Existing messaging network
- New forum network nodes
Effects of this arrangement:
- Pro: Minimize storage and network requirements for the individual network nodes,
since they won't need to talk to each other directly.
- Pro: Gains benefits of whatever features the chosen storage or messaging systems provide.
- Con: Adds infrastructure costs that must be managed.
- Con: Adds requirement to implement integration with chosen storage and messaging systems.
## 2
Use the following components:
- Existing storage network
- New messaging network
- Network nodes talk to each other directly
Effects of this arrangement:
- Pro: Reduce messaging infrastructure costs by implementing this functionality within our own application.
- Nodes can discover each other by reading from the blockchain.
- Nodes can vote for/against each other with regard to their stakes as network nodes.
Nodes can gossip amongst each other.
Nodes MUST be able to verify peer nodes ownership of reputation tokens.
Nodes SHOULD periodically re-verify their local view of the network, with the view that may be accessed on-chain.
Notes:
- Since we need our application to be publicly networked anyway, to interact with user agents,
it's not asking a huge amount for them to communicate amongst one another.
## 3
Use the following components:
- New messaging network
- New storage network
Effects of this arrangement:
- Pro: Integrity of storage can be policed by reputation staking
Notes:
- We would have to choose a consensus algorithm for our data storage, or adopt an existing self-managed solution
---
# Questions
## How much on-chain?
Just hashes? Any full content?
## What forum storage?
IPFS?
Filecoin?
Arweave?
CouchDB?
Custom?
If we use existing/separate networks for storage and/or messaging, how do we police them?
If they mess up our data, whose reputation is staked?
Perhaps this is implicitly covered by the Validator voting, where off-chain results are compared.
We may want to support multiple storage options.
No matter where archival storage occurs, stored data can and should be verified using hashes stored on-chain.
Nodes should only write to the archive AFTER voting on results.
In the worst case, if archived data loses integrity, it will prevent the forum from processing new transactions.
If enough network nodes could agree on a strategy to remediate the data, it might be possible to recover somewhat gracefully. This would depend on the nature and extent of the damage.
The last resort would be to initialize a new forum and abandon the old one.
While this would be disruptive to continuity of operations for the DAO, it would not alter on-chain reputation holdings.
## What forum messaging?
ZeroMQ?
RabbitMQ?
CouchDB?
Custom?
## What UI?
The forum contract should serve the forum network node source code.
If we only store a hash, we need a secure mechanism for storing and serving the actual code.
If we store the full code on-chain, we would also need to document a process for network node operators to obtain the code.
For example, by using existing command-line utilities to interface with the blockchain and download the data.
If we store the full code off-chain, we would still need to document the process for network node operators to obtain the code.
We would also need to make sure that one network nodes are up and running, they help pin the content to IPFS.
The forum network node should then serve the UI to users. This can be served as a web application.
Network node will send HTML, CSS, and Javascript to a browser client.
The browser client must have an extension that allows it to function as a wallet, and it must be able to
provide proof to the forum node, of the user's ownership of reputation tokens.
This can be accomplished as follows:
1. Web client prepares a message (probably using Casper Signer browser extension). Message includes:
- Public key
- Nonce
- Signature
2. Forum node verifies the signed message.
3. Forum node checks on-chain reputation for the given public key.
4. Forum node authenticates the client's HTTPS session.
From there, the forum node should be able to take actions on behalf of the client.
Most of these actions will occur initially within the off-chain context.
Eventually however, the results of the actions should be encoded in an on-chain validation vote.
The above step 3, check on-chain reputation for given key, may be prohibitively expensive.
Here's a way we might deal with that.
When the forum node receives the signature from the client, we can store it along with the data representing other forum activities; we can provisionally accept the offered public key from the client, and use it for the purposes of computing
reputation effects from forum activities. It could be verified asynchronously. If it turns out to have an issue, however,
then we would have to remediate our results before finalizing.
Here's another approach.
Each client will need to pay a small fee to register with the forum.
This would cover the cost of the on-chain transaction which is needed in order for the forum node to verify the client's reputation.
Once verified, the forum will store the client's public key. The client will then be able to authenticate with the off-chain forum network.
Certain actions in the forum will involve an associate staking reputation.
The plan is for the off-chain network to keep track of these actions,
and periodically vote on-chain to enact their results.
In order for these reputation stakes to be realized on-chain,
the Forum Validator Contract must empowered to apply the resulting reputation effects.
Otherwise, the user agent would need to engage directly with the blockchain.
How shall we fund the forum nodes to deploy the necessary calls to smart contracts?
Maybe it should be possible to submit a fee in order to fund a given forum node, and
thereby to gain some reputation, and thus receive a share of the fees that the DAO earns.
This would also suggest the need for a votable parameter to tune the proportion of these rewards.
## Should network node contract voters consist of network nodes, or voting associates?
Network nodes should be resistant to DOS attacks by restricting white-listed peers to the list obtained from the network node contract on-chain.
But what if a whitelisted node starts misbehaving?
A network node that notices a problem with another network node can:
- Locally graylist or blacklist the offending peer.
- Attempt to notify its human operator, who may then cast an on-chain vote against the offender.
- Attempt to notify its network peers, who may then graylist or blacklist the offending peer.
- Automatically cast an on-chain vote aginst the offender.
Let's consider the possibility of nodes notifying each other of problematic behavior of other nodes.
What if a bad node sends messages to its peers attempting to gray/blacklist a good node?
This suggests that each node should listen for such messages from peers, but should require
some number of them before taking action.
Perhaps the degree of graylisting can build up with additional reports from other peers.
It should be expected that people will attempt to attack the network nodes.
If we enforce whitelisting by reputation stakes via on-chain network node contract,
we raise the bar considerably for a successful attack.
Remaining threat models:
- A reputation holder may attempt to act against the interests of the DAO.
- A supply chain attack may occur against the network node or user agent
- A man-in-the-middle attack may occur between network nodes, or between user agents and network nodes
- A network node may be compromised.
- A user agent may be compromised.
Among these threats, the supply chain attack against the network node is the most severe.
The other threats are limited because individual nodes, clients, or accounts must be compromised one by one.
But a supply chain attack may compromise many nodes, clients, or accounts.
Therefore, securing the supply chain is a top priority for this system.
## What's the desired timing of the process to initiate a new network node?
If the initiator already has reputation available to stake, maybe the process shouldn't take very long.
However, it's an action with serious repercussions. If network nodes can be added quickly,
then anyone in control of a disproportionately large amount of reputation for whatever reason,
could potentially quickly add a fleet of new network nodes and execute a 51% attack on the forum.
So, on the order of hours to days seems reasonable to me. Also the answer may depend on the current number of network nodes.
When there are more, it will make sense to add them in batches, and it might be nice to expedite that process to an appropriate degree.
On the other hand, there may not be a legitimate value in adding many new network nodes in the same physical location.
If they're going to be spreat out in space, their activation might as well be spread out in time.

4
notes.md Normal file
View File

@ -0,0 +1,4 @@
```mermaid
```

View File

@ -0,0 +1 @@
SEMANTIC_SCHOLAR_API_KEY=

2
semantic-scholar-client/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/target
.env

1939
semantic-scholar-client/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,17 @@
[package]
name = "semantic-scholar-client"
version = "0.1.0"
edition = "2021"
default-run = "import"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
async-recursion = "1.0.0"
clap = { version = "3.2.11", features = ["derive"] }
dotenv = "0.15.0"
mongodb = "2.2.2"
reqwest = { version = "0.11.11", features = ["json"] }
serde = { version = "1.0.139", features = ["derive"] }
serde_json = "1.0.82"
tokio = { version = "1.20.0", features = ["full"] }

View File

@ -0,0 +1,25 @@
#`semantic-scholar-client`
This utility is able to fetch data from Semantic Scholar API.
Initial proof of concept here writes the result to stdout.
Work in progress to pipe this data into an operating database.
### Usage
* (Optional) Copy `.env.example` to `.env` and set the value of `SEMANTIC_SCHOLAR_API_KEY`
* Run the program
cargo run -- --paper-id <paper_id> --depth <depth>
* `paper_id` values are in accordance with [Semantic Scholar API](https://api.semanticscholar.org/api-docs/).
* `depth` is the number of citations to traverse, from the starting paper.
### Notes
Ideas for followup work:
- Consider strategies for deciding where to terminate a given traversal
- Provide an HTTP/WebSocket interface that can be used to talk to this process during its operation.
This can enable us to pipe the data to other tasks, to monitor, to start/stop, and even to make configuration changes.
- Rate limit requests

View File

@ -0,0 +1,153 @@
// During development, allowing dead code
#![allow(dead_code)]
use async_recursion::async_recursion;
use clap::Parser;
use dotenv::dotenv;
use serde::Deserialize;
use std::cmp::min;
use std::error::Error;
use std::fmt::Write;
type DataResult<T> = Result<T, Box<dyn Error>>;
const BASE_URL: &str = "https://api.semanticscholar.org/graph/v1";
const MAX_DEPTH: u32 = 3;
#[derive(Parser, Debug)]
#[clap(author, version, about, long_about = None)]
struct Args {
/// How deep to traverse citation graph from the starting paper
#[clap(short, long, value_parser)]
depth: u32,
/// Starting paper. We will traverse papers that cite this one
#[clap(short, long, value_parser)]
paper_id: String,
// Write the results to MongoDB
// #[clap(short, long, value_parser)]
// write_to_mongo: bool,
}
struct Author {
name: String,
}
type Authors = Vec<Author>;
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
struct Paper {
paper_id: String,
title: Option<String>,
citations: Vec<Citation>,
}
/**
* Occurs within Citation struct
*/
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
struct CitingPaper {
paper_id: Option<String>,
title: Option<String>,
}
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
struct Citation {
citing_paper: CitingPaper,
}
/**
code: Option<String>,
* Generic struct to wrap the common API response pattern {data: [...]}
*/
#[derive(Deserialize, Debug)]
struct ApiListResponse<T> {
data: Option<Vec<T>>,
message: Option<String>,
}
// TODO: Cache results in a (separate but local) database such as Redis
// TODO: Store results in a (separate but local) database such as Postgres
#[async_recursion]
async fn get_citations(
client: &reqwest::Client,
paper_id: String,
depth: u32,
authors: &mut Vec<Author>,
) -> DataResult<Vec<Citation>> {
// Bound recursion to some depth
if depth > MAX_DEPTH {
return Ok(vec![]);
}
// Build the URL
let mut url = String::new();
write!(&mut url, "{}/paper/{}/citations", BASE_URL, paper_id)?;
let mut req = client.get(url);
let api_key = std::env::var("SEMANTIC_SCHOLAR_API_KEY");
if api_key.is_ok() {
req = req.header("x-api-key", api_key.unwrap());
}
let resp = req.send().await?.text().await?;
let resp_deserialized_attempt =
serde_json::from_str::<ApiListResponse<Citation>>(resp.as_str());
if let Err(err) = resp_deserialized_attempt {
println!("depth {} paper {} error {}", depth, paper_id, err);
return Ok(vec![]);
}
let resp_deserialized: ApiListResponse<Citation> = resp_deserialized_attempt.unwrap();
if resp_deserialized.message.is_some() {
println!(
"depth {} paper {} error {}",
depth,
paper_id,
resp_deserialized.message.unwrap()
);
return Ok(vec![]);
}
for Citation {
citing_paper:
CitingPaper {
paper_id: citing_paper_id,
title,
},
} in resp_deserialized.data.unwrap()
{
if let (Some(citing_paper_id), Some(title)) = (citing_paper_id, title) {
let short_len = min(50, title.len());
let (short_title, _) = title.split_at(short_len);
println!(
"depth {} paper {} cites {} title {}",
depth, citing_paper_id, paper_id, short_title
);
get_citations(&client, citing_paper_id, depth + 1, authors).await?;
}
}
Ok(vec![])
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
let Args { depth, paper_id } = Args::parse();
dotenv().ok();
let mut authors = Authors::new();
let client: reqwest::Client = reqwest::Client::new();
get_citations(&client, paper_id, depth, &mut authors).await?;
Ok(())
}

View File

@ -0,0 +1,59 @@
use mongodb::{Client, options::ClientOptions};
const MONGO_DB_ADDRESS: &str = "mongodb://docker:mongopw@localhost:55000";
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Parse a connection string into an options struct.
let client_options = ClientOptions::parse(MONGO_DB_ADDRESS).await?;
// Get a handle to the deployment.
let client = Client::with_options(client_options)?;
// Try creating a collection
{
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize)]
struct Book {
title: String,
author: String,
}
// Reference a (new) database
let db = client.database("db2");
// Get a handle to a collection of `Book`.
let typed_collection = db.collection::<Book>("books");
let books = vec![
Book {
title: "The Grapes of Wrath".to_string(),
author: "John Steinbeck".to_string(),
},
Book {
title: "To Kill a Mockingbird".to_string(),
author: "Harper Lee".to_string(),
},
];
// Insert the books into "mydb.books" collection, no manual conversion to BSON necessary.
typed_collection.insert_many(books, None).await?;
}
// List the names of the databases in that deployment.
for db_name in client.list_database_names(None, None).await? {
println!("{}", db_name);
// Get a handle to a database.
let db = client.database(db_name.as_str());
// List the names of the collections in that database.
for collection_name in db.list_collection_names(None).await? {
println!("- {}", collection_name);
}
}
Ok(())
}