Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
This page list all the concepts implemented by Datashare that users might want to understand before starting to search within documents.
Datashare allows you to search in your files, regardless of their format. It is a free open-source software developed by the International Consortium of Investigative Journalists (ICIJ).


Find the Datashare application on your computer and run it locally on your browser.
When running Datashare from the command-line, pick which "stage" to apply to analyse your documents.
datashare --mode CLI \
# Select the NLP stage
--stage NLP \
# Use CORENLP to detect named entities
--nlpp CORENLP \
# URI of Elasticsearch
--elasticsearchAddress http://elasticsearch:9200 datashare --mode CLI \
# Select the INDEX stage
--stage INDEX \
# Where the document are located
--dataDir /path/to/documents \
# Store the queued files in Redis
--dataBusType REDIS \
# URI of Elasticsearch
--elasticsearchAddress http://elasticsearch:9200 \
# Enable OCR \
--ocr true
# URI of Redis
--redisAddress redis://redis:6379


These pages will help you set up and install Datashare on your computer.
These pages will help you set up and install Datashare on your computer.
datashare \
# Switch to SERVER mode
--mode SERVER \
# Dummy session filter to creates ephemeral users
--authFilter org.icij.datashare.session.YesCookieAuthFilter \
# Name of the default project for every user
--defaultProject local-datashare \
# URI of Elasticsearch
--elasticsearchAddress http://elasticsearch:9200 \
# URI of Redis
--redisAddress redis://redis:6379 \
# store user sessions in Redis.
--sessionStoreType REDISFind the application on your computer and run it locally in your browser.













platform: linux/x86_64docker run --mount src=$HOME/Datashare,target=/home/datashare/data,type=bind -p 8080:8080 icij/datashare:11.1.9 --mode EMBEDDEDversion: "3.7"
services:
datashare:
image: icij/datashare:18.1.3
hostname: datashare
ports:
- 8080:8080
environment:
- DS_DOCKER_MOUNTED_DATA_DIR=/home/datashare/data
volumes:
- type: bind
source: ${HOME}/Datashare
target: /home/datashare/data
- type: volume
source: datashare-models
target: /home/datashare/dist
command: >-
--dataSourceUrl jdbc:postgresql://postgresql/datashare?user=datashare\&password=password
--mode LOCAL
--tcpListenPort 8080
depends_on:
- postgresql
- redis
- elasticsearch
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.9.1
restart: on-failure
volumes:
- type: volume
source: elasticsearch-data
target: /usr/share/elasticsearch/data
read_only: false
environment:
- "http.host=0.0.0.0"
- "transport.host=0.0.0.0"
- "cluster.name=datashare"
- "discovery.type=single-node"
- "discovery.zen.minimum_master_nodes=1"
- "xpack.license.self_generated.type=basic"
- "http.cors.enabled=true"
- "http.cors.allow-origin=*"
- "http.cors.allow-methods=OPTIONS, HEAD, GET, POST, PUT, DELETE"
redis:
image: redis:4.0.1-alpine
restart: on-failure
postgresql:
image: postgres:12-alpine
environment:
- POSTGRES_USER=datashare
- POSTGRES_PASSWORD=password
- POSTGRES_DB=datashare
volumes:
- type: volume
source: postgresql-data
target: /var/lib/postgresql/data
volumes:
datashare-models:
elasticsearch-data:
postgresql-data:docker-compose up -ddocker-compose logs -f datasharedocker-compose downFind the application on your computer and run it locally on your browser.
This page helps you find entities (people, organizations, locations, e-mail addresses) in your documents.
$ sudo apt install /dir/to/debian/package/datashare-dist_7.2.0_all.deb$ datashare





































datashare
sudo apt install tesseract-ocr-[lang]docker compose exec datashare_web /entrypoint.sh \
--mode CLI \
--stage NLP \
--defaultProject secret-project \
--elasticsearchAddress http://elasticsearch:9200 \
--nlpParallelism 2 \
--nlpp CORENLPNeo4j User should be set to your Neo4j user name (neo4j by default)


























port versionport install tesseract-deubrew -vdocker compose exec datashare_web /entrypoint.sh \
--mode CLI \
--stage SCAN,INDEX,NLP \
--defaultProject secret-project \
--elasticsearchAddress http://elasticsearch:9200 \
--nlpParallelism 2 \
--nlpp CORENLP \
--dataDir /home/datashare/Datashare/docker compose exec datashare_web /entrypoint.sh \
--mode CLI \
--stage ENQUEUEIDX,NLP \
--defaultProject secret-project \
--elasticsearchAddress http://elasticsearch:9200 \
--nlpParallelism 2 \
--nlpp CORENLPversion: "3.7"
services:
datashare:
image: icij/datashare:20.1.4
hostname: datashare
ports:
- 8080:8080
environment:
- DS_DOCKER_MOUNTED_DATA_DIR=/home/datashare/data
volumes:
- type: bind
source: ${HOME}/Datashare
target: /home/datashare/data
- type: volume
source: datashare-models
target: /home/datashare/dist
command: >-
--dataSourceUrl jdbc:postgresql://postgresql/datashare?user=datashare\&password=password
--mode LOCAL
--tcpListenPort 8080
depends_on:
- postgresql
- redis
- elasticsearch
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.9.1
restart: on-failure
volumes:
- type: volume
source: elasticsearch-data
target: /usr/share/elasticsearch/data
read_only: false
environment:
- "http.host=0.0.0.0"
- "transport.host=0.0.0.0"
- "cluster.name=datashare"
- "discovery.type=single-node"
- "discovery.zen.minimum_master_nodes=1"
- "xpack.license.self_generated.type=basic"
- "http.cors.enabled=true"
- "http.cors.allow-origin=*"
- "http.cors.allow-methods=OPTIONS, HEAD, GET, POST, PUT, DELETE"
redis:
image: redis:4.0.1-alpine
restart: on-failure
postgresql:
image: postgres:12-alpine
environment:
- POSTGRES_USER=datashare
- POSTGRES_PASSWORD=password
- POSTGRES_DB=datashare
volumes:
- type: volume
source: postgresql-data
target: /var/lib/postgresql/data
volumes:
datashare-models:
elasticsearch-data:
postgresql-data:docker-compose up -ddocker-compose logs -f datashare_webdocker-compose downDummy authentication provider to disable authentication
OAuth2 authentication with a third-party id service
docker run -ti ICIJ/datashare:version --mode SERVER \
--oauthClientId 30045255030c6740ce4c95c \
--oauthClientSecret 10af3d46399a8143179271e6b726aaf63f20604092106 \
--oauthAuthorizeUrl https://my.oauth-server.org/oauth/authorize \
--oauthTokenUrl https://my.oauth-server.org/oauth/token \
--oauthApiUrl https://my.oauth-server.org/api/v1/me.json \
--oauthCallbackPath /auth/callbackdocker compose exec datashare_web /entrypoint.sh \
--mode CLI \
--stage SCAN,INDEX \
--defaultProject secret-project \
--elasticsearchAddress http://elasticsearch:9200 \
--dataDir /home/datashare/Datashare/docker compose exec datashare_web /entrypoint.sh \
--mode CLI \
--pluginInstall datashare-plugin-neo4j-graph-widget docker compose exec \
# if you are not using the default extensions directory
# you have to specify it extending the CLASSPATH variable ex:
# -e CLASSPATH=/home/datashare/extensions/* \
datashare_web /entrypoint.sh \
--mode CLI \
--ext neo4j \
... datashare --mode CLI --stage SCAN --redisAddress redis://redis:6379 --busType REDIS
datashare --mode CLI --stage INDEX --redisAddress redis://redis:6379 --busType REDIS👷♀️ This page is currently being written by Datashare team.






docker compose exec datashare_web /entrypoint.sh \
--mode CLI \
--stage SCAN \
--queueType REDIS \
--queueName "datashare:queue" \
--redisAddress redis://redis:6379 \
--defaultProject secret-project \
--elasticsearchAddress http://elasticsearch:9200 \
--dataDir /home/datashare/Datashare/docker compose exec redis redis-cli lrange datashare:queue 0 20docker compose exec datashare_web /entrypoint.sh \
--mode CLI \
--stage INDEX \
--queueType REDIS \
--queueName "datashare:queue" \
--redisAddress redis://redis:6379 \
--defaultProject secret-project \
--elasticsearchAddress http://elasticsearch:9200 \
--dataDir /home/datashare/Datashare/docker compose exec datashare_web /entrypoint.sh \
--mode CLI \
--stage SCANIDX \
--queueType REDIS \
--reportName "report:queue" \
--redisAddress redis://redis:6379 \
--defaultProject secret-project \
--elasticsearchAddress http://elasticsearch:9200 \
--dataDir /home/datashare/Datashare/docker compose exec datashare_web /entrypoint.sh \
--mode CLI \
--stage SCAN,INDEX \
--ocr true \
--queueType REDIS \
--queueName "datashare:queue" \
--reportName "report:queue" \
--redisAddress redis://redis:6379 \
--defaultProject secret-project \
--elasticsearchAddress http://elasticsearch:9200 \
--dataDir /home/datashare/Datashare/...
services:
datashare_web:
...
environment:
- DS_DOCKER_NEO4J_HOST=neo4j
- DS_DOCKER_NEO4J_PORT=7687
- DS_DOCKER_NEO4J_SINGLE_PROJECT=secret-project # This is for community edition onlydocker compose restart datashare_webdocker run -ti ICIJ/datashare:version --mode SERVER \
--redisAddress redis://my.redis-server.org:6379 \
--elasticsearchAddress https://my.elastic-server.org:9200 \
--messageBusAddress my.redis-server.org \
--dataSourceUrl jdbc:postgresql://db-server/ds-database?user=ds-user&password=ds-password \
--rootHost https://my.datashare-server.org
# ... +auth parameters (see authentication providers section)docker compose exec \
datashare_web /entrypoint.sh \
--mode CLI \
--ext neo4j \
--full-import \
--project secret-projectdocker compose exec \
datashare_web /entrypoint.sh \
--mode CLI \
--ext neo4j \
--full-import \
--project secret-projectdatashare --mode CLI --stage INDEX --parallelism 14 --parserParallelism 14
datashare --mode CLI --stage NLP --parallelism 14 --nlpParallelism 14JAVA_OPTS="-Xms10g -Xmx50g" datashare --mode CLI --stage INDEXdatashare --mode CLI --stage INDEX --language FRENCH --ocrLanguage fra
datashare --mode CLI --stage INDEX --language CHINESE --ocrLanguage chi_sim
datashare --mode CLI --stage INDEX --language GREEK --ocrLanguage elldatashare --mode CLI --stage INDEX --ocr falsereadpst -reD <Filename>.pst$ psql datashare
datashare=> insert into user_inventory (id, email, name, provider, details) values ('fbar', 'foo@bar.com', 'Foo Bar', 'my_company', '{"password": "fcde2b2edba56bf408601fb721fe9b5c338d10ee429ea04fae5511b68fbf8fb9", "groups_by_applications":{"datashare":["local-datashare"]}}');$ echo -n bar | sha256sum
fcde2b2edba56bf408601fb721fe9b5c338d10ee429ea04fae5511b68fbf8fb9 -$ redis-cli -h my.redis-server.org
redis-server.org:6379> set foo '{"uid":"foo", "password":"fcde2b2edba56bf408601fb721fe9b5c338d10ee429ea04fae5511b68fbf8fb9", "groups_by_applications":{"datashare":["local-datashare"]}}'Projects are collections of documents. Datashare displays statistics about each projects.
Batch searches allow to get the results of each query of a list all at once: instead of searching each query one by one, upload a list, set options/filters and see the matching documents.
A project is a collection of documents. Datashare displays statistics about each projects.
$ psql datashare
datashare=> insert into user_inventory (id, email, name, provider, details) values ('fbar', 'foo@bar.com', 'Foo Bar', 'my_company', '{"password": "fcde2b2edba56bf408601fb721fe9b5c338d10ee429ea04fae5511b68fbf8fb9", "groups_by_applications":{"datashare":["myindex", "local-datashare"]}}');docker run -ti ICIJ/datashare --mode SERVER \
--batchQueueType REDIS \
--dataSourceUrl 'jdbc:postgresql://postgres/datashare?user=<username>&password=<password>' \
--sessionStoreType REDIS \
--authFilter org.icij.datashare.session.BasicAuthAdaptorFilter \
--authUsersProvider org.icij.datashare.session.UsersInDb$ redis-cli -h my.redis-server.org
redis-server.org:6379> set foo '{"uid":"foo", "password":"fcde2b2edba56bf408601fb721fe9b5c338d10ee429ea04fae5511b68fbf8fb9", "groups_by_applications":{"datashare":["myindex","local-datashare"]}}'docker run -ti ICIJ/datashare --mode SERVER \
--batchQueueType REDIS \
--dataSourceUrl 'jdbc:postgresql://postgres/datashare?user=<username>&password=<password>' \
--sessionStoreType REDIS \
--authFilter org.icij.datashare.session.BasicAuthAdaptorFilter \
--authUsersProvider org.icij.datashare.session.UsersInRedis















































👷♀️ This page is currently being written by Datashare team.


/(?:4[0-9]{12}(?:[0-9]{3})?|[25][1-7][0-9]{14}|6(?:011|5[0-9][0-9])[0-9]{12}|3[47][0-9]{13}|3(?:0[0-5]|[68][0-9])[0-9]{11}|(?:2131|1800|35[0-9]{3})[0-9]{11})/












Shortcuts help do some actions faster.
Turn the documents into starred, tag them or, in server mode, recommend them to project's other members.
Unexpected char 106 at (line no=1, column no=81, offset=80)
















































This page explains how to leverage Neo4j to explore your Datashare projects.



👷♀️ This page is currently being written by Datashare team.
👷♀️ This page is currently being written by Datashare team.







docker ps | grep neo4j # Should display your running neo4j container IDdocker cp \
<export-path> \
<neo4j-container-id>:/var/lib/neo4j/imports/datashare-graph.dumpdocker exec -it <neo4j-container-id> /bin/bash
./bin/cypher-shell -f imports/datashare-graph.dump cp <export-path> imports./bin/cypher-shell -f imports/datashare-graph.dump This can be due to some syntax errors in the way you wrote your query.
...
services:
neo4j:
image: neo4j:5-community
environment:
NEO4J_AUTH: none
NEO4J_PLUGINS: '["apoc"]'
ports:
- 7474:7474
- 7687:7687
volumes:
- neo4j_conf:/var/lib/neo4j/conf
- neo4j_data:/var/lib/neo4j/data












![Screenshot of Datashare's search page with '[ikea]' in the search bar and the message 'We were unable to perform your search. This might be due to a server error or a syntax error in your query'](https://icij.gitbook.io/datashare/~gitbook/image?url=https%3A%2F%2F2881303961-files.gitbook.io%2F%7E%2Ffiles%2Fv0%2Fb%2Fgitbook-x-prod.appspot.com%2Fo%2Fspaces%252F-LWCyd3pDXO_H4jk9DgG%252Fuploads%252Fgit-blob-362831eea4e949ee23b7e3ba77622a52915944c4%252FScreenshot%25202025-06-17%2520at%252016.19.13.png%3Falt%3Dmedia&width=768&dpr=3&quality=100&sign=d762852f&sv=2)

👷♀️ This page is currently being written by Datashare team.
volumes:
...
neo4j_data:
driver: local
neo4j_conf:
driver: localdocker compose up -d neo4jrm -rf ~/Library/datashare/plugins ~/Library/datashare/extensionsrm -rf ~/.local/share/datashare/plugins ~/.local/share/datashare/extensionsdel /S %APPDATA%\Datashare\Extensions %APPDATA%\Datashare\Pluginsmkdir /home/user/extensions
datashare --extensionsDir=/home/user/extensions$ datashare -m CLI --extensionList
2020-08-29 09:27:51,219 [main] INFO Main - Running datashare
extension datashare-extension-nlp-opennlp
OPENNLP Pipeline
7.0.0
https://github.com/ICIJ/datashare-extension-nlp-opennlp/releases/download/7.0.0/datashare-nlp-opennlp-7.0.0-jar-with-dependencies.jar
Extension to extract NER entities with OPENNLP
NLP
...$ datashare -m CLI --extensionInstall datashare-extension-nlp-mitie --extensionsDir "/home/user/extensions"
2020-08-29 09:34:30,927 [main] INFO Main - Running datashare
2020-08-29 09:34:32,632 [main] INFO Extension - downloading from url https://github.com/ICIJ/datashare-extension-nlp-mitie/releases/download/7.0.0/datashare-nlp-mitie-7.0.0-jar-with-dependencies.jar
2020-08-29 09:34:36,324 [main] INFO Extension - installing extension from file /tmp/tmp218535941624710718.jar into /home/user/extensions$ datashare -m CLI --extensionDelete datashare-extension-nlp-mitie --extensionsDir "/home/user/extensions/"
2020-08-29 09:40:11,033 [main] INFO Main - Running datashare
2020-08-29 09:40:11,249 [main] INFO Extension - removing extension datashare-extension-nlp-mitie jar /home/user/extensions/datashare-nlp-mitie-7.0.0-jar-with-dependencies.jarpackage org.myorg;
import net.codestory.http.annotations.Get;
import net.codestory.http.annotations.Prefix;
@Prefix("myorg")
public class FooResource {
@Get("foo")
public String getFoo() {
return "hello from foo extension";
}
}$ datashare --extensionsDir /home/user/extensions/
# ... starting logs
2020-08-29 11:03:59,776 [Thread-0] INFO ExtensionLoader - loading jar /home/user/extensions/my-extension.jar
2020-08-29 11:03:59,779 [Thread-0] INFO CorsFilter - adding Cross-Origin Request filter allows *
2020-08-29 11:04:00,314 [Thread-0] INFO Fluent - Production mode
2020-08-29 11:04:00,331 [Thread-0] INFO Fluent - Server started on port 8080$ curl localhost:8080/myorg/foo
hello from foo extension$ datashare -m CLI --extensionInstall /home/user/src/my-extension/dist/my-extension.jar --extensionsDir "/home/user/extensions"
2020-07-27 10:02:32,381 [main] INFO Main - Running datashare
2020-07-27 10:02:32,596 [main] INFO ExtensionService - installing extension from file /home/user/src/my-extension/dist/my-extension.jar into /home/user/extensions$ datashare -m CLI --extensionDelete my-extension.jar --extensionsDir "/home/user/extensions"
2020-08-29 10:45:37,363 [main] INFO Main - Running datashare
2020-08-29 10:45:37,579 [main] INFO Extension - removing extension my-extension jar /home/user/extensions/my-extension.jar


The Datashare API is fully defined using the OpenAPI 3.0 specification and automatically generated after every Datashare release.
api_keybatch_searchbatch_search_projectbatch_search_querybatch_search_resultdocumentdocument_tagdocument_user_recommendationdocument_user_starnamed_entitynoteprojecttaskuser_historyuser_history_projectuser_inventoryuser_policyDatashare Tarentula is a powerful command-line toolbelt designed to streamline bulk operations against any Datashare instance.
pip3 install --user tarentulaWhat if you want to integrate text translations to Datashare’s interface? Or make it display tweets scraped with Twint? Ask no more: there is plugins for that!
// It's usualy safer to wait for the app to be ready
document.addEventListener('datashare:ready', async () => {
// This load the ButtonIcon component asynchronously
const ButtonIcon = await datashare.findComponent('Button/ButtonIcon')
// Than we create a dummy component. For the sake of simplicity we use
// Vue 3's option API but we strongly encourage you to build your plugins
// with Vite and use the option API.
const definition = {
components: {
ButtonIcon,
},
methods: {
sayHi() {
alert('Hi!')
}
},
template: `
<button-icon @click="sayHi()" icon-left="hand-waving">
Say hi
</button-icon>
`
}
// Finally, we register the component's definition in a hook.
datashare.registerHook({ target: 'app-sidebar-sections:before', definition })
})./elasticsearch/index/create.sh <temporary_index> <ds_version_number>./elasticsearch/documents/reindex.sh <original_index> <temporary_index> /docker run icij/datashare-tarentuladatashare.config.set('hooksDebug', true)../elasticsearch/index/replace.sh <temporary_index> <original_index>./elasticsearch/index/delete.sh <temporary_index>mkdir ~/Datashare\ Plugins
datashare --pluginsDir=~/Datashare\ Plugins$ datashare -m CLI --pluginList ".*"
2020-07-24 10:04:59,767 [main] INFO Main - Running datashare
plugin datashare-plugin-site-alert
Site Alert
v1.2.0
https://github.com/ICIJ/datashare-plugin-site-alert
A plugin to display an alert banner on the Datashare demo instance.
...$ datashare -m CLI --pluginInstall datashare-plugin-site-alert --pluginsDir "~/Datashare Plugins"
2020-07-24 10:15:46,732 [main] INFO Main - Running datashare
2020-07-24 10:15:50,202 [main] INFO PluginService - downloading from url https://github.com/ICIJ/datashare-plugin-site-alert/archive/v1.2.0.tar.gz
2020-07-24 10:15:50,503 [main] INFO PluginService - installing plugin from file /tmp/tmp7747128158158548092.gz into /home/dev/Datashare Plugins$ datashare -m CLI --pluginDelete datashare-plugin-site-alert --pluginsDir "~/Datashare Plugins"
2020-07-24 10:20:43,431 [main] INFO Main - Running datashare
2020-07-24 10:20:43,640 [main] INFO PluginService - removing plugin base directory /home/dev/Datashare Plugins/datashare-plugin-site-alert-1.2.0* A folder with a package.json file containing a "main" field.
* A folder with an index.js file in it.mkdir ~/Datashare\ Plugins/hello-world
echo "console.log('Welcome to %s', datashare.config.get('app.name'))" > ~/Datashare\ Plugins/hello-world/index.js$ tar tvzf ~/src/my-plugin/dist/my-plugin.tgz
drwxr-xr-x dev/dev 0 2020-07-22 11:51 my-plugin/
-rw-r--r-- dev/dev 31 2020-07-21 14:07 my-plugin/main.js
-rw-r--r-- dev/dev 19 2020-07-21 14:07 my-plugin/package.json$ datashare -m CLI --pluginInstall ~/src/my-plugin/dist/my-plugin.tgz --pluginsDir "~/Datashare Plugins"
2020-07-27 10:02:32,381 [main] INFO Main - Running datashare
2020-07-27 10:02:32,596 [main] INFO PluginService - installing plugin from file ~/src/my-plugin/dist/my-plugin.tgz into ~/Datashare Plugins$ datashare -m CLI --pluginDelete my-plugin --pluginsDir "~/Datashare Plugins"
2020-07-27 10:02:32,381 [main] INFO Main - Running datashare
2020-07-27 10:02:32,596 [main] INFO PluginService - installing plugin from file ~/src/my-plugin/dist/my-plugin.tgz into ~/Datashare Plugins// `datashare` is a global variable
datashare.registerHook({ target: 'app-sidebar.menu:before', definition: 'This is a message written with a plugin' })// It's usualy safer to wait for the app to be ready
document.addEventListener('datashare:ready', ({ detail }) => {
// Alert is a Vue component meaning it can have computed properties, methods, etc...
const Alert = {
computed: {
weekday () {
const today = new Date()
return today.toLocaleDateString('en-US', { weekday: 'long' })
}
},
template: `<div class="text-center bg-info p-2 width-100">
It's {{ weekday }}, have a lovely day!
</div>`
}
// This is the most important part of this snippet:
// we register the component on the a given `target`
// using the core method `registerHook`.
detail.core.registerHook({ target: 'landing.form:before', definition: Alert })
})