From 90e5f599b8f553f9620b56724f1955bd4806e0e5 Mon Sep 17 00:00:00 2001 From: Ladd Hoffman Date: Fri, 19 Apr 2024 18:08:18 -0500 Subject: [PATCH] backend: first cut at semantic scholar import --- backend/.env.example | 4 +- backend/contract-addresses.json | 14 ++++ backend/index.js | 98 ++--------------------- backend/package-lock.json | 106 +++++++++++++++++++++++-- backend/package.json | 3 +- backend/src/contract-config.js | 23 ++++++ backend/src/db.js | 9 +++ backend/src/import-from-ss.js | 133 ++++++++++++++++++++++++++++++++ backend/src/read.js | 47 +++++++++++ backend/src/verify-signature.js | 24 ++++++ backend/src/write.js | 35 +++++++++ 11 files changed, 395 insertions(+), 101 deletions(-) create mode 100644 backend/contract-addresses.json create mode 100644 backend/src/contract-config.js create mode 100644 backend/src/db.js create mode 100644 backend/src/import-from-ss.js create mode 100644 backend/src/read.js create mode 100644 backend/src/verify-signature.js create mode 100644 backend/src/write.js diff --git a/backend/.env.example b/backend/.env.example index 54330e2..f261488 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -1,2 +1,4 @@ PORT=3000 -DATA_DIR="./data" \ No newline at end of file +DATA_DIR="./data" +SEMANTIC_SCHOLAR_API_KEY= +NETWORK="localhost" \ No newline at end of file diff --git a/backend/contract-addresses.json b/backend/contract-addresses.json new file mode 100644 index 0000000..77cb51a --- /dev/null +++ b/backend/contract-addresses.json @@ -0,0 +1,14 @@ +{ + "localhost": { + "DAO": "0xD60A1c64B96a133587A75C2771690072F238a549", + "Work1": "0xCF3f16D151052FA7b99a71E79EC3b0e6C793aa0b", + "Onboarding": "0xE148e864A646B8bFc95dcc9acd3dBcB52704EE60", + "Proposals": "0x981234BBBC1ec93200F5BB3a65e2F9711A6109aa" + }, + "sepolia": { + "DAO": "0x241514DC94568e98222fBE66662b054b545A61AE", + "Work1": "0xc04152a440d8f79099e2049dc19b07EE7f2F8cc0", + "Onboarding": "0xFa5877940e527559320afc1303c06D0fb7E88907", + "Proposals": "0xeA9AF5fF56ef2bfd9DbC1295F1488302c61B92dF" + } +} \ No newline at end of file diff --git a/backend/index.js b/backend/index.js index 7b111fa..6b3d1c9 100644 --- a/backend/index.js +++ b/backend/index.js @@ -1,103 +1,19 @@ const express = require('express'); -const { Level } = require('level'); -const { recoverPersonalSignature } = require('@metamask/eth-sig-util'); -const objectHash = require('object-hash'); + +const read = require('./src/read'); +const write = require('./src/write'); +const importFromSS = require('./src/import-from-ss'); require('dotenv').config(); const app = express(); const port = process.env.PORT || 3000; -const dataDir = process.env.DATA_DIR || 'data'; - -const db = new Level(`${dataDir}/forum`, { valueEncoding: 'json' }); - -const verifySignature = ({ - author, content, signature, embeddedData, -}) => { - let contentToVerify = content; - if (embeddedData && Object.entries(embeddedData).length) { - contentToVerify += `\n\n${JSON.stringify(embeddedData, null, 2)}`; - } - try { - const account = recoverPersonalSignature({ data: contentToVerify, signature }); - if (account !== author) { - console.log('error: author does not match signature'); - return false; - } - } catch (e) { - console.log('error: failed to recover signature:', e.message); - return false; - } - return true; -}; app.use(express.json()); -app.post('/write', async (req, res) => { - const { - body: { - author, content, signature, embeddedData, - }, - } = req; - // Check author signature - if (!verifySignature({ - author, content, signature, embeddedData, - })) { - res.status(403).end(); - return; - } - - // Compute content hash - const data = { - author, content, signature, embeddedData, - }; - const hash = objectHash(data); - console.log('write', hash); - console.log(data); - - // Store content - db.put(hash, data); - - // Return hash - res.send(hash); -}); - -app.get('/read/:hash', async (req, res) => { - const { hash } = req.params; - console.log('read', hash); - - // Fetch content - let data; - try { - data = await db.get(req.params.hash); - } catch (e) { - console.log('read error:', e.message, hash); - res.status(e.status).end(); - return; - } - - data.embeddedData = data.embeddedData || undefined; - - console.log(data); - - // Verify hash - const derivedHash = objectHash(data); - if (derivedHash !== hash) { - console.log('error: hash mismatch'); - res.status(500).end(); - return; - } - - // Verify signature - if (!verifySignature(data)) { - console.log('error: signature verificaition failed'); - res.status(500).end(); - return; - } - - // Return content - res.json(data); -}); +app.post('/write', write); +app.get('/read/:hash', read); +app.post('/importFromSemanticScholar', importFromSS); app.get('*', (req, res) => { console.log(`404 req.path: ${req.path}`); diff --git a/backend/package-lock.json b/backend/package-lock.json index 603d6b2..acecb08 100644 --- a/backend/package-lock.json +++ b/backend/package-lock.json @@ -10,8 +10,9 @@ "license": "ISC", "dependencies": { "@metamask/eth-sig-util": "^7.0.1", - "axios": "^1.6.7", + "axios": "^1.6.8", "dotenv": "^16.4.5", + "ethers": "^6.12.0", "express": "^4.18.2", "level": "^8.0.1", "object-hash": "^3.0.0" @@ -34,6 +35,11 @@ "node": ">=0.10.0" } }, + "node_modules/@adraffy/ens-normalize": { + "version": "1.10.1", + "resolved": "https://registry.npmjs.org/@adraffy/ens-normalize/-/ens-normalize-1.10.1.tgz", + "integrity": "sha512-96Z2IP3mYmF1Xg2cDm8f1gWGf/HUVedQ3FMifV4kG/PQ4yEP51xDtRAEfhVNt5f/uzpNkZHwWQuUcu6D6K+Ekw==" + }, "node_modules/@babel/runtime": { "version": "7.23.9", "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.23.9.tgz", @@ -400,6 +406,11 @@ "resolved": "https://registry.npmjs.org/@types/ms/-/ms-0.7.34.tgz", "integrity": "sha512-nG96G3Wp6acyAgJqGasjODb+acrI7KltPiRxzHPXnP3NgI28bpQDRv53olbqGXbfcgF5aiiHmO3xpwEpS5Ld9g==" }, + "node_modules/@types/node": { + "version": "18.15.13", + "resolved": "https://registry.npmjs.org/@types/node/-/node-18.15.13.tgz", + "integrity": "sha512-N+0kuo9KgrUQ1Sn/ifDXsvg0TTleP7rIy4zOBGECxAljqvqfqpTfzx0Q1NUedOixRMBfe2Whhb056a42cWs26Q==" + }, "node_modules/@ungap/structured-clone": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.2.0.tgz", @@ -456,6 +467,11 @@ "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" } }, + "node_modules/aes-js": { + "version": "4.0.0-beta.5", + "resolved": "https://registry.npmjs.org/aes-js/-/aes-js-4.0.0-beta.5.tgz", + "integrity": "sha512-G965FqalsNyrPqgEGON7nIx1e/OVENSgiEIzyC63haUMuvNnwIgIjMs52hlTCKhkBny7A2ORNlfY9Zu+jmGk1Q==" + }, "node_modules/ajv": { "version": "6.12.6", "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", @@ -705,11 +721,11 @@ } }, "node_modules/axios": { - "version": "1.6.7", - "resolved": "https://registry.npmjs.org/axios/-/axios-1.6.7.tgz", - "integrity": "sha512-/hDJGff6/c7u0hDkvkGxR/oy6CbCs8ziCsC7SqmhjfozqiJGc8Z11wrv9z9lYfY4K8l+H9TpjcMDX0xOZmx+RA==", + "version": "1.6.8", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.6.8.tgz", + "integrity": "sha512-v/ZHtJDU39mDpyBoFVkETcd/uNdxrWRrg3bKpOKzXFA6Bvqopts6ALSMU3y6ijYxbw2B+wPrIv46egTzJXCLGQ==", "dependencies": { - "follow-redirects": "^1.15.4", + "follow-redirects": "^1.15.6", "form-data": "^4.0.0", "proxy-from-env": "^1.1.0" } @@ -1701,6 +1717,55 @@ "@scure/bip39": "1.2.2" } }, + "node_modules/ethers": { + "version": "6.12.0", + "resolved": "https://registry.npmjs.org/ethers/-/ethers-6.12.0.tgz", + "integrity": "sha512-zL5NlOTjML239gIvtVJuaSk0N9GQLi1Hom3ZWUszE5lDTQE/IVB62mrPkQ2W1bGcZwVGSLaetQbWNQSvI4rGDQ==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/ethers-io/" + }, + { + "type": "individual", + "url": "https://www.buymeacoffee.com/ricmoo" + } + ], + "dependencies": { + "@adraffy/ens-normalize": "1.10.1", + "@noble/curves": "1.2.0", + "@noble/hashes": "1.3.2", + "@types/node": "18.15.13", + "aes-js": "4.0.0-beta.5", + "tslib": "2.4.0", + "ws": "8.5.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/ethers/node_modules/@noble/curves": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@noble/curves/-/curves-1.2.0.tgz", + "integrity": "sha512-oYclrNgRaM9SsBUBVbb8M6DTV7ZHRTKugureoYEncY5c65HOmRzvSiTE3y5CYaPYJA/GVkrhXEoF0M3Ya9PMnw==", + "dependencies": { + "@noble/hashes": "1.3.2" + }, + "funding": { + "url": "https://paulmillr.com/funding/" + } + }, + "node_modules/ethers/node_modules/@noble/hashes": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/@noble/hashes/-/hashes-1.3.2.tgz", + "integrity": "sha512-MVC8EAQp7MvEcm30KWENFjgR+Mkmf+D189XJTkFIlwohU5hcBbn1ZkKq7KVTi2Hme3PMGF390DaL52beVrIihQ==", + "engines": { + "node": ">= 16" + }, + "funding": { + "url": "https://paulmillr.com/funding/" + } + }, "node_modules/express": { "version": "4.18.2", "resolved": "https://registry.npmjs.org/express/-/express-4.18.2.tgz", @@ -1861,9 +1926,9 @@ "dev": true }, "node_modules/follow-redirects": { - "version": "1.15.5", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.5.tgz", - "integrity": "sha512-vSFWUON1B+yAw1VN4xMfxgn5fTUiaOzAJCKBwIIgT/+7CuGy9+r+5gITvP62j3RmaD5Ph65UaERdOSRGUzZtgw==", + "version": "1.15.6", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", + "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==", "funding": [ { "type": "individual", @@ -3802,6 +3867,11 @@ "strip-bom": "^3.0.0" } }, + "node_modules/tslib": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.0.tgz", + "integrity": "sha512-d6xOpEDfsi2CZVlPQzGeux8XMwLT9hssAsaPYExaQMuYskwb+x1x7J371tWlbBdWHroy99KnVB6qIkUbs5X3UQ==" + }, "node_modules/tweetnacl": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-1.0.3.tgz", @@ -4078,6 +4148,26 @@ "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", "dev": true }, + "node_modules/ws": { + "version": "8.5.0", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.5.0.tgz", + "integrity": "sha512-BWX0SWVgLPzYwF8lTzEy1egjhS4S4OEAHfsO8o65WOVsrnSRGaSiUaa9e0ggGlkMTtBlmOpEXiie9RUcBO86qg==", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": "^5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, "node_modules/yallist": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", diff --git a/backend/package.json b/backend/package.json index ac883da..70c9b3d 100644 --- a/backend/package.json +++ b/backend/package.json @@ -10,8 +10,9 @@ "license": "ISC", "dependencies": { "@metamask/eth-sig-util": "^7.0.1", - "axios": "^1.6.7", + "axios": "^1.6.8", "dotenv": "^16.4.5", + "ethers": "^6.12.0", "express": "^4.18.2", "level": "^8.0.1", "object-hash": "^3.0.0" diff --git a/backend/src/contract-config.js b/backend/src/contract-config.js new file mode 100644 index 0000000..53f1ad6 --- /dev/null +++ b/backend/src/contract-config.js @@ -0,0 +1,23 @@ +const contractAddresses = require('../contract-addresses.json'); + +const networks = { + localhost: '0x539', + sepolia: '0xaa36a7', +}; + +const getContractAddressByNetworkName = (networkName, contractName) => { + const address = contractAddresses[networkName][contractName]; + if (!address) throw new Error(`Contract ${contractName} not recognized`); + return address; +}; + +const getContractAddressByChainId = (chainId, contractName) => { + const network = Object.entries(networks).find(([, id]) => id === chainId)[0]; + if (!network) throw new Error(`Chain ID ${chainId} not recognized`); + return getContractAddressByNetworkName(network, contractName); +}; + +module.exports = { + getContractAddressByChainId, + getContractAddressByNetworkName, +}; diff --git a/backend/src/db.js b/backend/src/db.js new file mode 100644 index 0000000..65084cc --- /dev/null +++ b/backend/src/db.js @@ -0,0 +1,9 @@ +const { Level } = require('level'); + +const dataDir = process.env.DATA_DIR || 'data'; + +module.exports = { + forum: new Level(`${dataDir}/forum`, { valueEncoding: 'json' }), + authorAddresses: new Level(`${dataDir}/authorAddresses`, { valueEncoding: 'utf8' }), + authorPrivKeys: new Level(`${dataDir}/authorPrivKeys`, { valueEncoding: 'utf8' }), +}; diff --git a/backend/src/import-from-ss.js b/backend/src/import-from-ss.js new file mode 100644 index 0000000..bb0fa07 --- /dev/null +++ b/backend/src/import-from-ss.js @@ -0,0 +1,133 @@ +const axios = require('axios'); +const ethers = require('ethers'); +const crypto = require('crypto'); +const objectHash = require('object-hash'); + +const { getContractAddressByNetworkName } = require('./contract-config'); +const { authorAddresses, authorPrivKeys, forum } = require('./db'); + +const getContract = (name) => ethers.getContractAt( + name, + getContractAddressByNetworkName(process.env.NETWORK, name), +); + +const fetchPaperInfo = async (paperId) => { + const paper = await axios.get(`https://api.semanticscholar.org/graph/v1/paper/${paperId}`, { + headers: { + 'api-key': process.env.SEMANTIC_SCHOLAR_API_KEY, + }, + }); + return paper; +}; + +const getAuthorsInfo = async (paper) => Promise.all(paper.authors.map(async ({ authorId }) => { + // Check if we already have an account for each author + let authorAddress; + let authorPrivKey; + try { + authorAddress = await authorAddresses.get(authorId); + } catch (e) { + // Probably not found + } + if (authorAddress) { + // This should always succeed, so we don't use try/catch here + authorPrivKey = await authorPrivKeys.get(authorAddress); + } else { + // Generate and store a new account + const id = crypto.randomBytes(32).toString('hex'); + authorPrivKey = `0x${id}`; + const wallet = new ethers.Wallet(authorPrivKey); + authorAddress = wallet.address; + await authorAddress.put(authorId, authorAddress); + await authorPrivKeys.put(authorAddress, authorPrivKey); + } + return { + authorAddress, + authorPrivKey, + }; +})); + +const generatePost = async (paper) => { + const authorsInfo = getAuthorsInfo(paper); + const firstAuthorWallet = new ethers.Wallet(authorsInfo[0].authorPrivKey); + const eachAuthorWeightPercent = Math.floor(100 / authorsInfo.length); + const authors = authorsInfo.map(({ authorAddress }) => ({ + weightPercent: eachAuthorWeightPercent, + authorAddress, + })); + // Make sure author weights sum to 100 + const totalAuthorsWeight = authors.reduce((t, { weightPercent }) => t + weightPercent); + authors[0].weightPercent += 100 - totalAuthorsWeight; + + const content = `Semantic Scholar paper ${paper.paperId} +${paper.title} +HREF ${paper.url}`; + + // Note that for now we leave embedded data empty, but the stub is here in case we want to use it + const embeddedData = {}; + let contentToSign = content; + if (embeddedData && Object.entries(embeddedData).length) { + contentToSign += `\n\nDATA\n${JSON.stringify(embeddedData, null, 2)}`; + } + const signature = firstAuthorWallet.signMessageSync(contentToSign); + const hash = objectHash({ + authors, content, signature, embeddedData, + }); + return { + hash, authors, content, signature, embeddedData, + }; +}; + +module.exports = async (req, res) => { + const dao = await getContract('DAO'); + const { + body: { + paperId, + }, + } = req; + + console.log(`importFromSS ${paperId}`); + + // Read the paper info from SS + const paper = await fetchPaperInfo(paperId); + + const citations = []; + + if (paper.references) { + const eachCitationWeightPercent = Math.floor(30 / paper.references.length); + paper.references.forEach(async ({ paperId: citedPaperId }) => { + // We need to fetch this paper so we can + // We need to generate the post we would add to the forum, sign, and hash it. + // The forum allows dangling citations to support this use case. + const citedPaper = await fetchPaperInfo(citedPaperId); + const citedPaperInfo = await generatePost(citedPaper); + + citations.push({ + weightPercent: eachCitationWeightPercent, + targetPostId: citedPaperInfo.hash, + }); + }); + // Make sure citation weights sum to 100 + const totalCitationWeight = citations.reduce((t, { weightPercent }) => t + weightPercent); + citations[0].weightPercent += 100 - totalCitationWeight; + } + + // Create a post for this paper + const { + hash, authors, content, signature, embeddedData, + } = await generatePost(paper); + + // Write the new post to our database + await forum.put(hash, { + authors, content, signature, embeddedData, citations, + }); + + // Add the post to the form (on-chain) + await dao.addPost(authors, hash, citations); + + console.log({ + authors, content, signature, embeddedData, citations, + }); + + res.end(); +}; diff --git a/backend/src/read.js b/backend/src/read.js new file mode 100644 index 0000000..7db4de3 --- /dev/null +++ b/backend/src/read.js @@ -0,0 +1,47 @@ +const objectHash = require('object-hash'); + +const verifySignature = require('./verify-signature'); +const { forum } = require('./db'); + +module.exports = async (req, res) => { + const { hash } = req.params; + console.log('read', hash); + + // Fetch content + let data; + try { + data = await forum.get(req.params.hash); + } catch (e) { + console.log('read error:', e.message, hash); + res.status(e.status).end(); + return; + } + + data.embeddedData = data.embeddedData || undefined; + + console.log(data); + + const { + authors, content, signature, embeddedData, + } = data; + + // Verify hash + const derivedHash = objectHash({ + authors, content, signature, embeddedData, + }); + if (derivedHash !== hash) { + console.log('error: hash mismatch'); + res.status(500).end(); + return; + } + + // Verify signature + if (!verifySignature(data)) { + console.log('error: signature verificaition failed'); + res.status(500).end(); + return; + } + + // Return content + res.json(data); +}; diff --git a/backend/src/verify-signature.js b/backend/src/verify-signature.js new file mode 100644 index 0000000..31b0698 --- /dev/null +++ b/backend/src/verify-signature.js @@ -0,0 +1,24 @@ +const { recoverPersonalSignature } = require('@metamask/eth-sig-util'); + +const verifySignature = ({ + authors, content, signature, embeddedData, +}) => { + let contentToVerify = content; + if (embeddedData && Object.entries(embeddedData).length) { + contentToVerify += `\n\nDATA\n${JSON.stringify(embeddedData, null, 2)}`; + } + try { + const account = recoverPersonalSignature({ data: contentToVerify, signature }); + const authorAddresses = authors.map((author) => author.authorAddress); + if (!authorAddresses.includes(account)) { + console.log('error: signer is not among the authors'); + return false; + } + } catch (e) { + console.log('error: failed to recover signature:', e.message); + return false; + } + return true; +}; + +module.exports = verifySignature; diff --git a/backend/src/write.js b/backend/src/write.js new file mode 100644 index 0000000..a4c7f7f --- /dev/null +++ b/backend/src/write.js @@ -0,0 +1,35 @@ +const objectHash = require('object-hash'); + +const verifySignature = require('./verify-signature'); +const { forum } = require('./db'); + +module.exports = async (req, res) => { + const { + body: { + authors, content, signature, embeddedData, citations, + }, + } = req; + // Check author signature + if (!verifySignature({ + authors, content, signature, embeddedData, + })) { + res.status(403).end(); + return; + } + + // Compute content hash + const data = { + authors, content, signature, embeddedData, citations, + }; + const hash = objectHash({ + authors, content, signature, embeddedData, + }); + console.log('write', hash); + console.log(data); + + // Store content + await forum.put(hash, data); + + // Return hash + res.send(hash); +};