import React, { Component } from 'react';
import withStyles from '@material-ui/core/styles/withStyles';
import CssBaseline from '@material-ui/core/CssBaseline';
import Grid from '@material-ui/core/Grid';
import Typography from '@material-ui/core/Typography';
import Nav from './Nav';
import Topbar from '../Topbar';
import Footer from '../Footer';
import { Link } from 'react-router-dom';

const banner = require('../../images/about-data-banner.png');
// images
const img_architecture = require('../../images/recirc-architecture-2020_v1.png');

const styles = theme => ({
    root: {
        flexGrow: 1,
        marginTop: 20,
        // padding: 20,
        paddingBottom: 200
    },
    grid: {
        width: 1200,
        marginTop: 10,
        marginBottom: 10,
        [theme.breakpoints.down('sm')]: {
            width: 'calc(100% - 20px)'
        }
    },
})

class Technical extends Component {

    render() {
        const { classes } = this.props;
        const currentPath = this.props.location.pathname

        return (
            <React.Fragment>
                <CssBaseline />
                <Topbar currentPath={currentPath} />
                <div className={classes.root}>
                    <Grid container justify="center">

                        <Grid spacing={24} alignItems="center" justify="center" container className={classes.grid}>
                            <Grid item xs={12}>
                                <img src={banner} alt="" />

                                <Typography variant="h1" className=" text-center mb-4 mt-4">About the Data</Typography>
                            </Grid>
                        </Grid>
                    </Grid>
                    <Grid container justify="center">
                        <Grid spacing={24} justify="center" container className={classes.grid}>

                            <Grid item xs={12} md={4}>
                               <Nav />
                            </Grid>

                            <Grid item xs={12} md={8}>

                                <Typography variant="h2" className="mb-3">Technical Overview</Typography>
                                
                                <ul>
                                    <li>Data access 
                                        <ul>
                                            <li>Exporting data </li>
                                            <li>API access </li>
                                        </ul>                           
                                    </li>
                                    <li>RECIRC Database system overview </li>
                                    <li>Project development overview </li>
                                </ul>

                                <Typography variant="h3" className="mb-3 mt-3">Data Access</Typography>

                                <p>In addition to data being available to view through the <Link to='/explore'>web interface</Link>, researchers have the ability to download raw data via the search system’s in-built export tools, or to access data programmatically using the system’s Application Programming Interface (API). </p>

                                <p>Please refer to the <Link to='/about-data/data-use'>Data Use and Citation section</Link> when using the project’s data.</p>

                                <Typography variant="h4" className="mb-3 pt-2">Exporting Data</Typography>

                                <p>Search results may be exported as a .csv file, which can then be opened in software such as Excel, or a standard text editor. </p>

                                <p>In cases where search results can be represented as a network graph, the results can be exported in graph format (.GEXF). This can then be imported into software such as <a href="https://gephi.org">Gephi</a>, or used with packages such as <a href="https://networkx.org/">NetworkX</a>, to conduct network analysis. </p>

                                <p>There are a range of resources available to learn more about working with network data, and the various tools that can assist you. For a helpful introduction, <a href="https://programminghistorian.org/en/lessons/?topic=network-analysis">the Programming Historian has several lessons</a> which focus on different software tools that can be used. </p>

                                <Typography variant="h4" className="mb-3 pt-2">API Access </Typography>

                                <p>Data may be accessed programatically using the system's RESTful Application Programming Interface (API). Data from the API is provided in JSON format. Further documentation on routes and responses provided by the API, is available via the project's interactive API documentation, hosted at <code>** URL to Follow **</code>. </p>

                                <p>If you plan to access data in this way, please contact david.d.kelly@nuigalway.ie. Depending on usage, this API may change to require authentication in the future, or be subject to changes to endpoints or data formats.</p>

                                <Typography variant="h3" className="mb-3 mt-3">RECIRC Database System Overview</Typography> 

                                <p>The RECIRC database is made up of a number of separate components. The database design and development was conducted at the <a href="http://mooreinstitute.ie">Moore Institute</a>, <a href="http://www.nuigalway.ie">NUI Galway</a>. </p>

                                <p>The web application was built using the <a href="https://laravel.com">Laravel</a> PHP Framework. Data is stored in a relational (MySQL) database, with the search system built on <a href="https://www.elastic.co/">Elasticsearch</a>. </p>

                                <p>The database’s public user interface was built using <a href="https://reactjs.org">React</a>, with data visualisation on the site being implemented using <a href="http://sigmajs.org/">Sigma</a>, <a href="https://leafletjs.com/">Leaflet</a> and <a href="http://recharts.org/en-US/">ReCharts</a>. </p>

                                    
                                <Typography variant="h3" className="mb-3 mt-3">Project Development Overview </Typography>

                                <p>In the course of the project's development, a number of tools and technologies were employed to make the process more efficient. In the spirit of sharing for those engaging in the development of similar projects, an overview of the various components are illustrated below. </p>

                                <figure className="clearfix" style={{ background: '#fff'}} className="p-2">
                                    <img src={img_architecture} alt="RECIRC System Architecture - further described in the sections below" title="RECIRC System Architecture" style={{maxWidth: '100%' }} />
                                    <figcaption style={{fontStyle: 'italic', fontSize: '80%', textAlign: 'right'}} className="pt-2">RECIRC System Architecture</figcaption>
                                </figure>                            

                                <Typography variant="h4" className="mb-3 pt-2">RECIRC Web Application</Typography>

                                <Typography variant="h5" className="mb-3">RESTful API</Typography>

                                <p>The RECIRC API (Application Programming Interface), provides a layer of abstraction between the various interfaces described below, and the database, in which the project’s data is stored. This approach both enables, and reduces the complexity involved in, the creation of such interfaces. </p>

                                <Typography variant="h5" className="mb-3">Email Interface </Typography>

                                <p>This interface was developed to allow researchers to email images taken in archives directly into the database, which enabled an alternative to the sometimes more time-consuming form-based file upload. This facility is built on top of Mailgun, a “transactional email API service”. </p>

                                <Typography variant="h5" className="mb-3">Researcher Web Interface </Typography>

                                <p>Development of this interface started early in the project, and evolved in-line with the needs of the research team. This interface relies on the RESTful API described above. </p>

                                <Typography variant="h5" className="mb-3">Public Web Interface </Typography>

                                <p>Developed using React, the public interface provides tools for users to search, explore, visualise and export data collected by the project. This interface relies on the RESTful API described above. </p>

                                    

                                <Typography variant="h4" className="mb-3 pt-2">Data Analysis </Typography>

                                <p>In situations where a one-off analysis of data was required, or where the incorporation of a feature into the web application wasn’t widely useful to the research team, bespoke software was written. Examples of situations where this occurred were when there was a need to extract a subset of data from the database, to process the data into a specified format for use in another application, or to perform some form of analysis on the data. In most cases, Python was used to create this software. </p>

                                    

                                <Typography variant="h4" className="mb-3 pt-2">Storage</Typography>

                                <Typography variant="h5" className="mb-3">Data</Typography> 

                                <p>Data is stored within the system in two ways. Firstly, a MySQL relational database is used as the primary data store for the research data. In total, 45 tables store both a representation of the project’s complex data model, along with aggregated and transactional data used for the smooth functioning of the web application. Following a two-day RECIRC-team workshop on SQL, a replica of this “production” database was created to allow the team to experiment with SQL queries that informed their own research.  The team maintained a shared Wiki of custom SQL queries on Github, which allowed members to review and adapt each other’s work. </p>

                                <p>Secondly, data is stored within Elasticsearch, which enables the search system on the public interface. Within Elasticsearch, data from the relational database is combined into JSON objects that reflect the logic of the data model, for example, as People, Receptions, Works, and Reception Sources. Data within Elasticsearch is viewed as transient; when changes occur to a record in the database, these changes are synched to Elasticsearch. The full search index can be rebuilt from within the web application. Early in the development of the team’s internal researcher system, Kibana was used create visualisations of the data contained in the database.</p>
                            
                                <p>During the course of the project, both the database and Elasticsearch were deployed on Amazon’s Web Services (AWS) cloud infrastructure. Because this has an on-going cost attached to it, upon completion of the project these were moved to NUI Galway’s internal hosting infrastructure.</p>

                                <Typography variant="h5" className="mb-3">Images</Typography> 

                                <p>In the course of the project, the research team took photographs of manuscripts while working in archives around the world. These image files were uploaded to the RECIRC web application, either directly, or using the email interface (described above). To avoid data loss, multiple copies of the images were stored. Upon receiving an image, the system stored it on Amazon’s S3 service, while also placing a copy on the server’s local file system, and into a shared Dropbox folder. For images that were received via email, and prior to processing, a copy of the email and its attachments were forwarded to a separate project email account. </p>

                                    
                                <Typography variant="h4" className="mb-3 pt-2">Management </Typography>

                                <Typography variant="h5" className="mb-3">Code &amp; Issue Management</Typography>

                                <p>Throughout the project the team used a dedicated private Github organization as a platform to both store version-controlled code, and to manage team interactions around bugs and feature requests. From a development perspective, training the team on how to use Github’s Issue Management system, and how to write bug reports, proved extremely valuable.  </p>                                    

                                <p>From an organizational perspective, individual repositories are used for each of the different parts of the project, for example, the public interface, web application and the interactive applications for a physical exhibition each have their own repository, with associated project management tools. This made the management of the project easier, as not all team members needed to contribute to every part of the project. In cases where an external developer was contributing to a component of the project, they could be granted access to only the repository they needed for their work. </p>

                                <p>Another useful feature of Github is that repositories provide Wiki functionality, which was used, as mentioned earlier, by the team to share internal project documentation such as SQL queries. This enabled collaboration between, what were at the outset of the project non-technical, users. </p>                                    

                                <Typography variant="h5" className="mb-3">Code Deployment</Typography>

                                <p>To streamline the process of deploying code to the testing and production servers online deployment services were used. Early in the project, <a href="https://deploybot.com/">Deploybot</a> was used, before being replaced (due to a wider internal reorganization of tools) with <a href="https://envoyer.io/">Envoyer</a>.</p>

                                    

                                <Typography variant="h5" className="mb-3">Monitoring &amp; Analytics</Typography> 

                                    <p>Monitoring of the public interface of the project takes place at a number of levels. From a system availability perspective, a free external tool (UpTimeRobot) is used to provide reports of any system downtime. </p>

                                    

                                    <p>Within the system, we use an external application monitoring and error reporting service (Sentry.io) to identify and report on any errors that happen within the system. This service requires integration with the project code, and is an extremely useful, and on-going, approach to finding software bugs that testing did not identify. </p>

                                    

                                    <p>Finally, we use Google Analytics to report on usage of the public and researcher interfaces. This free tool provides reporting on a wide range of metrics, for example, how users find the website, what content they view and interact with, and data on their location and technologies they use. </p>

                                <p>
                                    <small><em>Updated: 10/2/21</em></small>
                                </p>
                                                                
                            </Grid>
                        </Grid>
                    </Grid>




                </div>
                <Footer />
            </React.Fragment>
        )
    }
}

export default withStyles(styles)(Technical);